diff --git a/.clang-format b/.clang-format index 443f90b774d..84552f330bb 100755 --- a/.clang-format +++ b/.clang-format @@ -1,3 +1,6 @@ +# Copyright 2016-2021 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + # The style used for all options not specifically set in the configuration. BasedOnStyle: LLVM diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000000..31fd1149349 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,70 @@ +# Copyright 2020-2021 The Khronos Group, Inc. +# SPDX-License-Identifier: Apache-2.0 + +name: CI + +on: + push: + branches: [ main ] + pull_request: + types: [ opened, synchronize, reopened ] + +jobs: + build: + name: "Build ${{ matrix.platform }}" + strategy: + matrix: + platform: [windows-latest, ubuntu-18.04, ubuntu-20.04, macos-latest] + env: + PARALLEL: -j 2 + + runs-on: "${{ matrix.platform }}" + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + with: + python-version: '3.x' + + - name: Pull glslang / SPIRV-Tools + shell: bash + working-directory: ${{github.workspace}} + run: ./checkout_glslang_spirv_tools.sh + + - name: Build glslang / SPIRV-Tools + shell: bash + working-directory: ${{github.workspace}} + run: ./build_glslang_spirv_tools.sh Release + + - name: Configure SPIRV-Cross + shell: bash + run: | + mkdir build + cd build + cmake .. -DSPIRV_CROSS_WERROR=ON -DSPIRV_CROSS_MISC_WARNINGS=ON -DSPIRV_CROSS_SHARED=ON -DCMAKE_INSTALL_PREFIX=output -DCMAKE_BUILD_TYPE=Release -DSPIRV_CROSS_ENABLE_TESTS=ON + + - name: Build SPIRV-Cross + shell: bash + working-directory: ${{github.workspace}}/build + run: | + cmake --build . --config Release + cmake --build . --config Release --target install + + - name: Test SPIRV-Cross + shell: bash + working-directory: ${{github.workspace}}/build + run: ctest --verbose -C Release + reuse: + name: "REUSE license check" + runs-on: ubuntu-latest + container: khronosgroup/docker-images:asciidoctor-spec + + steps: + - uses: actions/checkout@v2 + + # REUSE license checker + - name: license-check + run: | + reuse lint + + diff --git a/.gitignore b/.gitignore index abd71895838..8e91b32a386 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Copyright 2016-2021 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + *.o *.d *.txt @@ -18,3 +21,4 @@ *.vcxproj.user !CMakeLists.txt +!LICENSES/*.txt diff --git a/.reuse/dep5 b/.reuse/dep5 new file mode 100644 index 00000000000..9ed4191305b --- /dev/null +++ b/.reuse/dep5 @@ -0,0 +1,11 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: SPIRV-Cross +Source: https://github.com/KhronosGroup/SPIRV-Cross + +Files: shaders*/* reference/* tests-other/* +Copyright: 2016-2021 The Khronos Group, Inc. +License: Apache-2.0 + +Files: spirv.h spirv.hpp GLSL.std.450.h +Copyright: 2016-2021 The Khronos Group, Inc. +License: MIT diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 46b92c8fc8e..00000000000 --- a/.travis.yml +++ /dev/null @@ -1,72 +0,0 @@ -language: - - cpp - - python - -python: 3.7 - -matrix: - include: - - os: linux - dist: trusty - compiler: gcc - env: - - GENERATOR="Unix Makefiles" - - ARTIFACT=gcc-trusty-64bit - - os: linux - dist: trusty - compiler: clang - env: - - GENERATOR="Unix Makefiles" - - ARTIFACT=clang-trusty-64bit - - os: osx - compiler: clang - osx_image: xcode10 - env: - - GENERATOR="Unix Makefiles" - - ARTIFACT=clang-macos-64bit - - os: windows - before_install: - - choco install python3 - - export PATH="/c/Python37:/c/Python37/Scripts:$PATH" - env: - - GENERATOR="Visual Studio 15 2017" - - ARTIFACT=vs2017-32bit - - os: windows - before_install: - - choco install python3 - - export PATH="/c/Python37:/c/Python37/Scripts:$PATH" - env: - - GENERATOR="Visual Studio 15 2017 Win64" - - ARTIFACT=vs2017-64bit - -before_script: - - "./checkout_glslang_spirv_tools.sh" - -script: - - if [[ "$TRAVIS_OS_NAME" == "windows" ]]; then PYTHON3=$(which python); fi - - if [[ "$TRAVIS_OS_NAME" != "windows" ]]; then PYTHON3=$(which python3); fi - - "./build_glslang_spirv_tools.sh Release" - - mkdir build - - cd build - - cmake .. -DSPIRV_CROSS_SHARED=ON -DCMAKE_INSTALL_PREFIX=output -DCMAKE_BUILD_TYPE=Release -G "${GENERATOR}" -DPYTHON_EXECUTABLE:FILEPATH="${PYTHON3}" -DSPIRV_CROSS_ENABLE_TESTS=ON - - cmake --build . --config Release - - cmake --build . --config Release --target install - - ctest --verbose -C Release - - cd .. - -before_deploy: - - REV=${ARTIFACT}-$(git rev-parse --short=10 HEAD) - - cd build/output - - tar cf spirv-cross-${REV}.tar * - - gzip spirv-cross-${REV}.tar - - cd ../.. - - export FILE_TO_UPLOAD=build/output/spirv-cross-${REV}.tar.gz - -deploy: - provider: releases - api_key: - secure: c7YEOyzhE19TFo76UnbLWk/kikRQxsHsOxzkOqN6Q2aL8joNRw5kmcG84rGd+Rf6isX62cykCzA6qHkyJCv9QTIzcyXnLju17rLvgib7cXDcseaq8x4mFvet2yUxCglthDpFY2M2LB0Aqws71lPeYIrKXa6hCFEh8jO3AWxnaor7O3RYfNZylM9d33HgH6KLT3sDx/cukwBstmKeg7EG9OUnrSvairkPW0W2+jlq3SXPlq/WeVhf8hQs3Yg0BluExGbmLOwe9EaeUpeGuJMyHRxXypnToQv1/KwoScKpap5tYxdNWiwRGZ4lYcmKrjAYVvilTioh654oX5LQpn34mE/oe8Ko9AaATkSaoiisRFp6meWtnB39oFBoL5Yn15DqLQpRXPr1AJsnBXSGAac3aDBO1j4MIqTHmYlYlfRw3n2ZsBaFaTZnv++438SNQ54nkivyoDTIWjoOmYa9+K4mQc3415RDdQmjZTJM+lu+GAlMmNBTVbfNvrbU55Usu9Lo6BZJKKdUMvdBB78kJ5FHvcBlL+eMgmk1pABQY0IZROCt7NztHcv1UmAxoWNxveSFs5glydPNNjNS8bogc4dzBGYG0KMmILbBHihVbY2toA1M9CMdDHdp+LucfDMmzECmYSEmlx0h8win+Jjb74/qpOhaXuUZ0NnzVgCOyeUYuMQ= - file: "${FILE_TO_UPLOAD}" - skip_cleanup: true - on: - tags: true diff --git a/CMakeLists.txt b/CMakeLists.txt index aa33262ac8c..ccaedc90c8c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ -# Copyright 2016 Google Inc. +# Copyright 2016-2021 Google Inc. +# SPDX-License-Identifier: Apache-2.0 OR MIT # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,11 +13,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 2.8) +# +# At your option, you may choose to accept this material under either: +# 1. The Apache License, Version 2.0, found at , or +# 2. The MIT License, found at . +# + +cmake_minimum_required(VERSION 3.0) set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_EXTENSIONS OFF) + +# Avoid a warning if parent project sets VERSION in project(). +if (${CMAKE_VERSION} VERSION_GREATER "3.0.1") + cmake_policy(SET CMP0048 NEW) +endif() + project(SPIRV-Cross LANGUAGES CXX C) enable_testing() +include(GNUInstallDirs) + option(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS "Instead of throwing exceptions assert" OFF) option(SPIRV_CROSS_SHARED "Build the C API as a single shared library." OFF) option(SPIRV_CROSS_STATIC "Build the C and C++ API as static libraries." ON) @@ -39,6 +55,13 @@ option(SPIRV_CROSS_SANITIZE_UNDEFINED "Sanitize undefined" OFF) option(SPIRV_CROSS_NAMESPACE_OVERRIDE "" "Override the namespace used in the C++ API.") option(SPIRV_CROSS_FORCE_STL_TYPES "Force use of STL types instead of STL replacements in certain places. Might reduce performance." OFF) +option(SPIRV_CROSS_SKIP_INSTALL "Skips installation targets." OFF) + +option(SPIRV_CROSS_WERROR "Fail build on warnings." OFF) +option(SPIRV_CROSS_MISC_WARNINGS "Misc warnings useful for Travis runs." OFF) + +option(SPIRV_CROSS_FORCE_PIC "Force position-independent code for all targets." OFF) + if(${CMAKE_GENERATOR} MATCHES "Makefile") if(${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR}) message(FATAL_ERROR "Build out of tree to avoid overwriting Makefile") @@ -49,7 +72,7 @@ set(spirv-compiler-options "") set(spirv-compiler-defines "") set(spirv-cross-link-flags "") -message(STATUS "Finding Git version for SPIRV-Cross.") +message(STATUS "SPIRV-Cross: Finding Git version for SPIRV-Cross.") set(spirv-cross-build-version "unknown") find_package(Git) if (GIT_FOUND) @@ -60,24 +83,39 @@ if (GIT_FOUND) ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE ) - message(STATUS "Git hash: ${spirv-cross-build-version}") + message(STATUS "SPIRV-Cross: Git hash: ${spirv-cross-build-version}") else() - message(STATUS "Git not found, using unknown build version.") + message(STATUS "SPIRV-Cross: Git not found, using unknown build version.") endif() string(TIMESTAMP spirv-cross-timestamp) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/gitversion.in.h ${CMAKE_CURRENT_BINARY_DIR}/gitversion.h @ONLY) -if(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS) +if (SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS) set(spirv-compiler-defines ${spirv-compiler-defines} SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS) + if (NOT MSVC) + set(spirv-compiler-options ${spirv-compiler-options} -fno-exceptions) + endif() endif() -if(SPIRV_CROSS_FORCE_STL_TYPES) +if (SPIRV_CROSS_FORCE_STL_TYPES) set(spirv-compiler-defines ${spirv-compiler-defines} SPIRV_CROSS_FORCE_STL_TYPES) endif() -if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")) - set(spirv-compiler-options ${spirv-compiler-options} -Wall -Wextra -Werror -Wshadow) +if (WIN32) + set(CMAKE_DEBUG_POSTFIX "d") +endif() + +if (CMAKE_COMPILER_IS_GNUCXX OR ((${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") AND NOT MSVC)) + set(spirv-compiler-options ${spirv-compiler-options} -Wall -Wextra -Wshadow -Wno-deprecated-declarations) + if (SPIRV_CROSS_MISC_WARNINGS) + if (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") + set(spirv-compiler-options ${spirv-compiler-options} -Wshorten-64-to-32) + endif() + endif() + if (SPIRV_CROSS_WERROR) + set(spirv-compiler-options ${spirv-compiler-options} -Werror) + endif() if (SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS) set(spirv-compiler-options ${spirv-compiler-options} -fno-exceptions) @@ -103,7 +141,8 @@ if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")) set(spirv-cross-link-flags "${spirv-cross-link-flags} -fsanitize=thread") endif() elseif (MSVC) - set(spirv-compiler-options ${spirv-compiler-options} /wd4267) + # AppVeyor spuriously fails in debug build on older MSVC without /bigobj. + set(spirv-compiler-options ${spirv-compiler-options} /wd4267 /wd4996 $<$:/bigobj>) endif() macro(extract_headers out_abs file_list) @@ -130,9 +169,12 @@ macro(spirv_cross_add_library name config_name library_type) extract_headers(hdrs "${ARGN}") target_include_directories(${name} PUBLIC $ - $) + $) set_target_properties(${name} PROPERTIES PUBLIC_HEADERS "${hdrs}") + if (SPIRV_CROSS_FORCE_PIC) + set_target_properties(${name} PROPERTIES POSITION_INDEPENDENT_CODE ON) + endif() target_compile_options(${name} PRIVATE ${spirv-compiler-options}) target_compile_definitions(${name} PRIVATE ${spirv-compiler-defines}) if (SPIRV_CROSS_NAMESPACE_OVERRIDE) @@ -142,15 +184,18 @@ macro(spirv_cross_add_library name config_name library_type) target_compile_definitions(${name} PRIVATE SPIRV_CROSS_NAMESPACE_OVERRIDE=${SPIRV_CROSS_NAMESPACE_OVERRIDE}) endif() endif() - install(TARGETS ${name} + + if (NOT SPIRV_CROSS_SKIP_INSTALL) + install(TARGETS ${name} EXPORT ${config_name}Config - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib - PUBLIC_HEADER DESTINATION include/spirv_cross) - install(FILES ${hdrs} DESTINATION include/spirv_cross) - install(EXPORT ${config_name}Config DESTINATION share/${config_name}/cmake) - export(TARGETS ${name} FILE ${config_name}Config.cmake) + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/spirv_cross) + install(FILES ${hdrs} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/spirv_cross) + install(EXPORT ${config_name}Config DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${config_name}/cmake) + export(TARGETS ${name} FILE ${config_name}Config.cmake) + endif() endmacro() set(spirv-cross-core-sources @@ -287,17 +332,18 @@ if (SPIRV_CROSS_STATIC) endif() set(spirv-cross-abi-major 0) -set(spirv-cross-abi-minor 16) +set(spirv-cross-abi-minor 54) set(spirv-cross-abi-patch 0) if (SPIRV_CROSS_SHARED) set(SPIRV_CROSS_VERSION ${spirv-cross-abi-major}.${spirv-cross-abi-minor}.${spirv-cross-abi-patch}) - set(SPIRV_CROSS_INSTALL_LIB_DIR ${CMAKE_INSTALL_PREFIX}/lib) - set(SPIRV_CROSS_INSTALL_INC_DIR ${CMAKE_INSTALL_PREFIX}/include/spirv_cross) - configure_file( + + if (NOT SPIRV_CROSS_SKIP_INSTALL) + configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/pkg-config/spirv-cross-c-shared.pc.in ${CMAKE_CURRENT_BINARY_DIR}/spirv-cross-c-shared.pc @ONLY) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/spirv-cross-c-shared.pc DESTINATION ${CMAKE_INSTALL_PREFIX}/share/pkgconfig) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/spirv-cross-c-shared.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) + endif() spirv_cross_add_library(spirv-cross-c-shared spirv_cross_c_shared SHARED ${spirv-cross-core-sources} @@ -395,7 +441,9 @@ if (SPIRV_CROSS_CLI) target_include_directories(spirv-cross PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_compile_definitions(spirv-cross PRIVATE ${spirv-compiler-defines} HAVE_SPIRV_CROSS_GIT_VERSION) set_target_properties(spirv-cross PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}") - install(TARGETS spirv-cross RUNTIME DESTINATION bin) + if (NOT SPIRV_CROSS_SKIP_INSTALL) + install(TARGETS spirv-cross DESTINATION ${CMAKE_INSTALL_BINDIR}) + endif() target_link_libraries(spirv-cross PRIVATE spirv-cross-glsl spirv-cross-hlsl @@ -427,14 +475,14 @@ if (SPIRV_CROSS_CLI) if ((${spirv-cross-glslang} MATCHES "NOTFOUND") OR (${spirv-cross-spirv-as} MATCHES "NOTFOUND") OR (${spirv-cross-spirv-val} MATCHES "NOTFOUND") OR (${spirv-cross-spirv-opt} MATCHES "NOTFOUND")) set(SPIRV_CROSS_ENABLE_TESTS OFF) - message("Could not find glslang or SPIRV-Tools build under external/. Run ./checkout_glslang_spirv_tools.sh and ./build_glslang_spirv_tools.sh. Testing will be disabled.") + message("SPIRV-Cross: Testing will be disabled for SPIRV-Cross. Could not find glslang or SPIRV-Tools build under external/. To enable testing, run ./checkout_glslang_spirv_tools.sh and ./build_glslang_spirv_tools.sh first.") else() set(SPIRV_CROSS_ENABLE_TESTS ON) - message("Found glslang and SPIRV-Tools. Enabling test suite.") - message("Found glslangValidator in: ${spirv-cross-glslang}.") - message("Found spirv-as in: ${spirv-cross-spirv-as}.") - message("Found spirv-val in: ${spirv-cross-spirv-val}.") - message("Found spirv-opt in: ${spirv-cross-spirv-opt}.") + message("SPIRV-Cross: Found glslang and SPIRV-Tools. Enabling test suite.") + message("SPIRV-Cross: Found glslangValidator in: ${spirv-cross-glslang}.") + message("SPIRV-Cross: Found spirv-as in: ${spirv-cross-spirv-as}.") + message("SPIRV-Cross: Found spirv-val in: ${spirv-cross-spirv-val}.") + message("SPIRV-Cross: Found spirv-opt in: ${spirv-cross-spirv-opt}.") endif() set(spirv-cross-externals @@ -461,6 +509,18 @@ if (SPIRV_CROSS_CLI) target_link_libraries(spirv-cross-msl-resource-binding-test spirv-cross-c) set_target_properties(spirv-cross-msl-resource-binding-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}") + add_executable(spirv-cross-hlsl-resource-binding-test tests-other/hlsl_resource_bindings.cpp) + target_link_libraries(spirv-cross-hlsl-resource-binding-test spirv-cross-c) + set_target_properties(spirv-cross-hlsl-resource-binding-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}") + + add_executable(spirv-cross-msl-ycbcr-conversion-test tests-other/msl_ycbcr_conversion_test.cpp) + target_link_libraries(spirv-cross-msl-ycbcr-conversion-test spirv-cross-c) + set_target_properties(spirv-cross-msl-ycbcr-conversion-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}") + + add_executable(spirv-cross-typed-id-test tests-other/typed_id_test.cpp) + target_link_libraries(spirv-cross-typed-id-test spirv-cross-core) + set_target_properties(spirv-cross-typed-id-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}") + if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")) target_compile_options(spirv-cross-c-api-test PRIVATE -std=c89 -Wall -Wextra) endif() @@ -475,6 +535,14 @@ if (SPIRV_CROSS_CLI) COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_constexpr_test.spv) add_test(NAME spirv-cross-msl-resource-binding-test COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_resource_binding.spv) + add_test(NAME spirv-cross-hlsl-resource-binding-test + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/hlsl_resource_binding.spv) + add_test(NAME spirv-cross-msl-ycbcr-conversion-test + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_ycbcr_conversion_test.spv) + add_test(NAME spirv-cross-msl-ycbcr-conversion-test-2 + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_ycbcr_conversion_test_2.spv) + add_test(NAME spirv-cross-typed-id-test + COMMAND $) add_test(NAME spirv-cross-test COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --parallel ${spirv-cross-externals} @@ -525,9 +593,24 @@ if (SPIRV_CROSS_CLI) ${spirv-cross-externals} ${CMAKE_CURRENT_SOURCE_DIR}/shaders-reflection WORKING_DIRECTORY $) + add_test(NAME spirv-cross-test-ue4 + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --msl --parallel + ${spirv-cross-externals} + ${CMAKE_CURRENT_SOURCE_DIR}/shaders-ue4 + WORKING_DIRECTORY $) + add_test(NAME spirv-cross-test-ue4-opt + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --msl --opt --parallel + ${spirv-cross-externals} + ${CMAKE_CURRENT_SOURCE_DIR}/shaders-ue4 + WORKING_DIRECTORY $) + add_test(NAME spirv-cross-test-ue4-no-opt + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --msl --parallel + ${spirv-cross-externals} + ${CMAKE_CURRENT_SOURCE_DIR}/shaders-ue4-no-opt + WORKING_DIRECTORY $) endif() elseif(NOT ${PYTHONINTERP_FOUND}) - message(WARNING "Testing disabled. Could not find python3. If you have python3 installed try running " + message(WARNING "SPIRV-Cross: Testing disabled. Could not find python3. If you have python3 installed try running " "cmake with -DPYTHON_EXECUTABLE:FILEPATH=/path/to/python3 to help it find the executable") endif() endif() diff --git a/CODE_OF_CONDUCT.adoc b/CODE_OF_CONDUCT.adoc new file mode 100644 index 00000000000..ca5e08b930a --- /dev/null +++ b/CODE_OF_CONDUCT.adoc @@ -0,0 +1,11 @@ +// Copyright (c) 2016-2020 The Khronos Group Inc. +// +// SPDX-License-Identifier: CC-BY-4.0 + += Code of Conduct + +A reminder that this issue tracker is managed by the Khronos Group. +Interactions here should follow the +https://www.khronos.org/developers/code-of-conduct[Khronos Code of Conduct], +which prohibits aggressive or derogatory language. Please keep the +discussion friendly and civil. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index a11610bd300..00000000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1 +0,0 @@ -A reminder that this issue tracker is managed by the Khronos Group. Interactions here should follow the Khronos Code of Conduct (https://www.khronos.org/developers/code-of-conduct), which prohibits aggressive or derogatory language. Please keep the discussion friendly and civil. diff --git a/GLSL.std.450.h b/GLSL.std.450.h index 54cc00e9a88..2686fc4ea7e 100644 --- a/GLSL.std.450.h +++ b/GLSL.std.450.h @@ -1,27 +1,10 @@ /* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. + * Copyright 2014-2016,2021 The Khronos Group, Inc. + * SPDX-License-Identifier: MIT + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS + * STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND + * HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ */ #ifndef GLSLstd450_H diff --git a/LICENSES/Apache-2.0.txt b/LICENSES/Apache-2.0.txt new file mode 100644 index 00000000000..4ed90b95224 --- /dev/null +++ b/LICENSES/Apache-2.0.txt @@ -0,0 +1,208 @@ +Apache License + +Version 2.0, January 2004 + +http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, +AND DISTRIBUTION + + 1. Definitions. + + + +"License" shall mean the terms and conditions for use, reproduction, and distribution +as defined by Sections 1 through 9 of this document. + + + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + + + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct +or indirect, to cause the direction or management of such entity, whether +by contract or otherwise, or (ii) ownership of fifty percent (50%) or more +of the outstanding shares, or (iii) beneficial ownership of such entity. + + + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions +granted by this License. + + + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + + + +"Object" form shall mean any form resulting from mechanical transformation +or translation of a Source form, including but not limited to compiled object +code, generated documentation, and conversions to other media types. + + + +"Work" shall mean the work of authorship, whether in Source or Object form, +made available under the License, as indicated by a copyright notice that +is included in or attached to the work (an example is provided in the Appendix +below). + + + +"Derivative Works" shall mean any work, whether in Source or Object form, +that is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative +Works shall not include works that remain separable from, or merely link (or +bind by name) to the interfaces of, the Work and Derivative Works thereof. + + + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative +Works thereof, that is intentionally submitted to Licensor for inclusion in +the Work by the copyright owner or by an individual or Legal Entity authorized +to submit on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication +sent to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor +for the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + + + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently incorporated +within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this +License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable copyright license to reproduce, prepare +Derivative Works of, publicly display, publicly perform, sublicense, and distribute +the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, +each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) patent +license to make, have made, use, offer to sell, sell, import, and otherwise +transfer the Work, where such license applies only to those patent claims +licensable by such Contributor that are necessarily infringed by their Contribution(s) +alone or by combination of their Contribution(s) with the Work to which such +Contribution(s) was submitted. If You institute patent litigation against +any entity (including a cross-claim or counterclaim in a lawsuit) alleging +that the Work or a Contribution incorporated within the Work constitutes direct +or contributory patent infringement, then any patent licenses granted to You +under this License for that Work shall terminate as of the date such litigation +is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or +Derivative Works thereof in any medium, with or without modifications, and +in Source or Object form, provided that You meet the following conditions: + +(a) You must give any other recipients of the Work or Derivative Works a copy +of this License; and + +(b) You must cause any modified files to carry prominent notices stating that +You changed the files; and + +(c) You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source +form of the Work, excluding those notices that do not pertain to any part +of the Derivative Works; and + +(d) If the Work includes a "NOTICE" text file as part of its distribution, +then any Derivative Works that You distribute must include a readable copy +of the attribution notices contained within such NOTICE file, excluding those +notices that do not pertain to any part of the Derivative Works, in at least +one of the following places: within a NOTICE text file distributed as part +of the Derivative Works; within the Source form or documentation, if provided +along with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents +of the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works +that You distribute, alongside or as an addendum to the NOTICE text from the +Work, provided that such additional attribution notices cannot be construed +as modifying the License. + +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, +or distribution of Your modifications, or for any such Derivative Works as +a whole, provided Your use, reproduction, and distribution of the Work otherwise +complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any +Contribution intentionally submitted for inclusion in the Work by You to the +Licensor shall be under the terms and conditions of this License, without +any additional terms or conditions. Notwithstanding the above, nothing herein +shall supersede or modify the terms of any separate license agreement you +may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, +trademarks, service marks, or product names of the Licensor, except as required +for reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to +in writing, Licensor provides the Work (and each Contributor provides its +Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied, including, without limitation, any warranties +or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR +A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness +of using or redistributing the Work and assume any risks associated with Your +exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether +in tort (including negligence), contract, or otherwise, unless required by +applicable law (such as deliberate and grossly negligent acts) or agreed to +in writing, shall any Contributor be liable to You for damages, including +any direct, indirect, special, incidental, or consequential damages of any +character arising as a result of this License or out of the use or inability +to use the Work (including but not limited to damages for loss of goodwill, +work stoppage, computer failure or malfunction, or any and all other commercial +damages or losses), even if such Contributor has been advised of the possibility +of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work +or Derivative Works thereof, You may choose to offer, and charge a fee for, +acceptance of support, warranty, indemnity, or other liability obligations +and/or rights consistent with this License. However, in accepting such obligations, +You may act only on Your own behalf and on Your sole responsibility, not on +behalf of any other Contributor, and only if You agree to indemnify, defend, +and hold each Contributor harmless for any liability incurred by, or claims +asserted against, such Contributor by reason of your accepting any such warranty +or additional liability. END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own identifying +information. (Don't include the brackets!) The text should be enclosed in +the appropriate comment syntax for the file format. We also recommend that +a file or class name and description of purpose be included on the same "printed +page" as the copyright notice for easier identification within third-party +archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); + +you may not use this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software + +distributed under the License is distributed on an "AS IS" BASIS, + +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and + +limitations under the License. diff --git a/LICENSES/CC-BY-4.0.txt b/LICENSES/CC-BY-4.0.txt new file mode 100644 index 00000000000..3f92dfc5fdd --- /dev/null +++ b/LICENSES/CC-BY-4.0.txt @@ -0,0 +1,324 @@ +Creative Commons Attribution 4.0 International Creative Commons Corporation +("Creative Commons") is not a law firm and does not provide legal services +or legal advice. Distribution of Creative Commons public licenses does not +create a lawyer-client or other relationship. Creative Commons makes its licenses +and related information available on an "as-is" basis. Creative Commons gives +no warranties regarding its licenses, any material licensed under their terms +and conditions, or any related information. Creative Commons disclaims all +liability for damages resulting from their use to the fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and conditions +that creators and other rights holders may use to share original works of +authorship and other material subject to copyright and certain other rights +specified in the public license below. The following considerations are for +informational purposes only, are not exhaustive, and do not form part of our +licenses. + +Considerations for licensors: Our public licenses are intended for use by +those authorized to give the public permission to use material in ways otherwise +restricted by copyright and certain other rights. Our licenses are irrevocable. +Licensors should read and understand the terms and conditions of the license +they choose before applying it. Licensors should also secure all rights necessary +before applying our licenses so that the public can reuse the material as +expected. Licensors should clearly mark any material not subject to the license. +This includes other CC-licensed material, or material used under an exception +or limitation to copyright. More considerations for licensors : wiki.creativecommons.org/Considerations_for_licensors + +Considerations for the public: By using one of our public licenses, a licensor +grants the public permission to use the licensed material under specified +terms and conditions. If the licensor's permission is not necessary for any +reason–for example, because of any applicable exception or limitation to copyright–then +that use is not regulated by the license. Our licenses grant only permissions +under copyright and certain other rights that a licensor has authority to +grant. Use of the licensed material may still be restricted for other reasons, +including because others have copyright or other rights in the material. A +licensor may make special requests, such as asking that all changes be marked +or described. Although not required by our licenses, you are encouraged to +respect those requests where reasonable. More considerations for the public +: wiki.creativecommons.org/Considerations_for_licensees Creative Commons Attribution +4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree to +be bound by the terms and conditions of this Creative Commons Attribution +4.0 International Public License ("Public License"). To the extent this Public +License may be interpreted as a contract, You are granted the Licensed Rights +in consideration of Your acceptance of these terms and conditions, and the +Licensor grants You such rights in consideration of benefits the Licensor +receives from making the Licensed Material available under these terms and +conditions. + +Section 1 – Definitions. + +a. Adapted Material means material subject to Copyright and Similar Rights +that is derived from or based upon the Licensed Material and in which the +Licensed Material is translated, altered, arranged, transformed, or otherwise +modified in a manner requiring permission under the Copyright and Similar +Rights held by the Licensor. For purposes of this Public License, where the +Licensed Material is a musical work, performance, or sound recording, Adapted +Material is always produced where the Licensed Material is synched in timed +relation with a moving image. + +b. Adapter's License means the license You apply to Your Copyright and Similar +Rights in Your contributions to Adapted Material in accordance with the terms +and conditions of this Public License. + +c. Copyright and Similar Rights means copyright and/or similar rights closely +related to copyright including, without limitation, performance, broadcast, +sound recording, and Sui Generis Database Rights, without regard to how the +rights are labeled or categorized. For purposes of this Public License, the +rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. + +d. Effective Technological Measures means those measures that, in the absence +of proper authority, may not be circumvented under laws fulfilling obligations +under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, +and/or similar international agreements. + +e. Exceptions and Limitations means fair use, fair dealing, and/or any other +exception or limitation to Copyright and Similar Rights that applies to Your +use of the Licensed Material. + +f. Licensed Material means the artistic or literary work, database, or other +material to which the Licensor applied this Public License. + +g. Licensed Rights means the rights granted to You subject to the terms and +conditions of this Public License, which are limited to all Copyright and +Similar Rights that apply to Your use of the Licensed Material and that the +Licensor has authority to license. + +h. Licensor means the individual(s) or entity(ies) granting rights under this +Public License. + +i. Share means to provide material to the public by any means or process that +requires permission under the Licensed Rights, such as reproduction, public +display, public performance, distribution, dissemination, communication, or +importation, and to make material available to the public including in ways +that members of the public may access the material from a place and at a time +individually chosen by them. + +j. Sui Generis Database Rights means rights other than copyright resulting +from Directive 96/9/EC of the European Parliament and of the Council of 11 +March 1996 on the legal protection of databases, as amended and/or succeeded, +as well as other essentially equivalent rights anywhere in the world. + +k. You means the individual or entity exercising the Licensed Rights under +this Public License. Your has a corresponding meaning. + +Section 2 – Scope. + + a. License grant. + +1. Subject to the terms and conditions of this Public License, the Licensor +hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, +irrevocable license to exercise the Licensed Rights in the Licensed Material +to: + + A. reproduce and Share the Licensed Material, in whole or in part; and + + B. produce, reproduce, and Share Adapted Material. + +2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions +and Limitations apply to Your use, this Public License does not apply, and +You do not need to comply with its terms and conditions. + + 3. Term. The term of this Public License is specified in Section 6(a). + +4. Media and formats; technical modifications allowed. The Licensor authorizes +You to exercise the Licensed Rights in all media and formats whether now known +or hereafter created, and to make technical modifications necessary to do +so. The Licensor waives and/or agrees not to assert any right or authority +to forbid You from making technical modifications necessary to exercise the +Licensed Rights, including technical modifications necessary to circumvent +Effective Technological Measures. For purposes of this Public License, simply +making modifications authorized by this Section 2(a)(4) never produces Adapted +Material. + + 5. Downstream recipients. + +A. Offer from the Licensor – Licensed Material. Every recipient of the Licensed +Material automatically receives an offer from the Licensor to exercise the +Licensed Rights under the terms and conditions of this Public License. + +B. No downstream restrictions. You may not offer or impose any additional +or different terms or conditions on, or apply any Effective Technological +Measures to, the Licensed Material if doing so restricts exercise of the Licensed +Rights by any recipient of the Licensed Material. + +6. No endorsement. Nothing in this Public License constitutes or may be construed +as permission to assert or imply that You are, or that Your use of the Licensed +Material is, connected with, or sponsored, endorsed, or granted official status +by, the Licensor or others designated to receive attribution as provided in +Section 3(a)(1)(A)(i). + + b. Other rights. + +1. Moral rights, such as the right of integrity, are not licensed under this +Public License, nor are publicity, privacy, and/or other similar personality +rights; however, to the extent possible, the Licensor waives and/or agrees +not to assert any such rights held by the Licensor to the limited extent necessary +to allow You to exercise the Licensed Rights, but not otherwise. + +2. Patent and trademark rights are not licensed under this Public License. + +3. To the extent possible, the Licensor waives any right to collect royalties +from You for the exercise of the Licensed Rights, whether directly or through +a collecting society under any voluntary or waivable statutory or compulsory +licensing scheme. In all other cases the Licensor expressly reserves any right +to collect such royalties. + +Section 3 – License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the following +conditions. + + a. Attribution. + +1. If You Share the Licensed Material (including in modified form), You must: + +A. retain the following if it is supplied by the Licensor with the Licensed +Material: + +i. identification of the creator(s) of the Licensed Material and any others +designated to receive attribution, in any reasonable manner requested by the +Licensor (including by pseudonym if designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of warranties; + +v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; + +B. indicate if You modified the Licensed Material and retain an indication +of any previous modifications; and + +C. indicate the Licensed Material is licensed under this Public License, and +include the text of, or the URI or hyperlink to, this Public License. + +2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner +based on the medium, means, and context in which You Share the Licensed Material. +For example, it may be reasonable to satisfy the conditions by providing a +URI or hyperlink to a resource that includes the required information. + +3. If requested by the Licensor, You must remove any of the information required +by Section 3(a)(1)(A) to the extent reasonably practicable. + +4. If You Share Adapted Material You produce, the Adapter's License You apply +must not prevent recipients of the Adapted Material from complying with this +Public License. + +Section 4 – Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that apply to +Your use of the Licensed Material: + +a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, +reuse, reproduce, and Share all or a substantial portion of the contents of +the database; + +b. if You include all or a substantial portion of the database contents in +a database in which You have Sui Generis Database Rights, then the database +in which You have Sui Generis Database Rights (but not its individual contents) +is Adapted Material; and + +c. You must comply with the conditions in Section 3(a) if You Share all or +a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not replace +Your obligations under this Public License where the Licensed Rights include +other Copyright and Similar Rights. + +Section 5 – Disclaimer of Warranties and Limitation of Liability. + +a. Unless otherwise separately undertaken by the Licensor, to the extent possible, +the Licensor offers the Licensed Material as-is and as-available, and makes +no representations or warranties of any kind concerning the Licensed Material, +whether express, implied, statutory, or other. This includes, without limitation, +warranties of title, merchantability, fitness for a particular purpose, non-infringement, +absence of latent or other defects, accuracy, or the presence or absence of +errors, whether or not known or discoverable. Where disclaimers of warranties +are not allowed in full or in part, this disclaimer may not apply to You. + +b. To the extent possible, in no event will the Licensor be liable to You +on any legal theory (including, without limitation, negligence) or otherwise +for any direct, special, indirect, incidental, consequential, punitive, exemplary, +or other losses, costs, expenses, or damages arising out of this Public License +or use of the Licensed Material, even if the Licensor has been advised of +the possibility of such losses, costs, expenses, or damages. Where a limitation +of liability is not allowed in full or in part, this limitation may not apply +to You. + +c. The disclaimer of warranties and limitation of liability provided above +shall be interpreted in a manner that, to the extent possible, most closely +approximates an absolute disclaimer and waiver of all liability. + +Section 6 – Term and Termination. + +a. This Public License applies for the term of the Copyright and Similar Rights +licensed here. However, if You fail to comply with this Public License, then +Your rights under this Public License terminate automatically. + +b. Where Your right to use the Licensed Material has terminated under Section +6(a), it reinstates: + +1. automatically as of the date the violation is cured, provided it is cured +within 30 days of Your discovery of the violation; or + + 2. upon express reinstatement by the Licensor. + +c. For the avoidance of doubt, this Section 6(b) does not affect any right +the Licensor may have to seek remedies for Your violations of this Public +License. + +d. For the avoidance of doubt, the Licensor may also offer the Licensed Material +under separate terms or conditions or stop distributing the Licensed Material +at any time; however, doing so will not terminate this Public License. + + e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. + +Section 7 – Other Terms and Conditions. + +a. The Licensor shall not be bound by any additional or different terms or +conditions communicated by You unless expressly agreed. + +b. Any arrangements, understandings, or agreements regarding the Licensed +Material not stated herein are separate from and independent of the terms +and conditions of this Public License. + +Section 8 – Interpretation. + +a. For the avoidance of doubt, this Public License does not, and shall not +be interpreted to, reduce, limit, restrict, or impose conditions on any use +of the Licensed Material that could lawfully be made without permission under +this Public License. + +b. To the extent possible, if any provision of this Public License is deemed +unenforceable, it shall be automatically reformed to the minimum extent necessary +to make it enforceable. If the provision cannot be reformed, it shall be severed +from this Public License without affecting the enforceability of the remaining +terms and conditions. + +c. No term or condition of this Public License will be waived and no failure +to comply consented to unless expressly agreed to by the Licensor. + +d. Nothing in this Public License constitutes or may be interpreted as a limitation +upon, or waiver of, any privileges and immunities that apply to the Licensor +or You, including from the legal processes of any jurisdiction or authority. + +Creative Commons is not a party to its public licenses. Notwithstanding, Creative +Commons may elect to apply one of its public licenses to material it publishes +and in those instances will be considered the "Licensor." The text of the +Creative Commons public licenses is dedicated to the public domain under the +CC0 Public Domain Dedication. Except for the limited purpose of indicating +that material is shared under a Creative Commons public license or as otherwise +permitted by the Creative Commons policies published at creativecommons.org/policies, +Creative Commons does not authorize the use of the trademark "Creative Commons" +or any other trademark or logo of Creative Commons without its prior written +consent including, without limitation, in connection with any unauthorized +modifications to any of its public licenses or any other arrangements, understandings, +or agreements concerning use of licensed material. For the avoidance of doubt, +this paragraph does not form part of the public licenses. + +Creative Commons may be contacted at creativecommons.org. diff --git a/LICENSES/MIT.txt b/LICENSES/MIT.txt new file mode 100644 index 00000000000..204b93da48d --- /dev/null +++ b/LICENSES/MIT.txt @@ -0,0 +1,19 @@ +MIT License Copyright (c) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished +to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile index a006e81faa7..b44eb5e8c97 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +# Copyright 2016-2021 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + TARGET := spirv-cross SOURCES := $(wildcard spirv_*.cpp) @@ -10,7 +13,7 @@ STATIC_LIB := lib$(TARGET).a DEPS := $(OBJECTS:.o=.d) $(CLI_OBJECTS:.o=.d) -CXXFLAGS += -std=c++11 -Wall -Wextra -Wshadow +CXXFLAGS += -std=c++11 -Wall -Wextra -Wshadow -Wno-deprecated-declarations ifeq ($(DEBUG), 1) CXXFLAGS += -O0 -g diff --git a/Package.swift b/Package.swift new file mode 100644 index 00000000000..99e601936a4 --- /dev/null +++ b/Package.swift @@ -0,0 +1,72 @@ +// swift-tools-version:5.5 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +// Copyright 2016-2021 The Khronos Group Inc. +// SPDX-License-Identifier: Apache-2.0 + +import PackageDescription + +let package = Package( + name: "SPIRV-Cross", + products: [ + // Products define the executables and libraries a package produces, and make them visible to other packages. + .library( + name: "SPIRV-Cross", + targets: ["SPIRV-Cross"]), + ], + dependencies: [ + // Dependencies declare other packages that this package depends on. + // .package(url: /* package url */, from: "1.0.0"), + ], + targets: [ + // Targets are the basic building blocks of a package. A target can define a module or a test suite. + // Targets can depend on other targets in this package, and on products in packages this package depends on. + .target( + name: "SPIRV-Cross", + dependencies: [], + path: ".", + exclude: ["CMakeLists.txt", + "CODE_OF_CONDUCT.adoc", + "LICENSE", + "LICENSES", + "Makefile", + "README.md", + "appveyor.yml", + "build_glslang_spirv_tools.sh", + "checkout_glslang_spirv_tools.sh", + "cmake", + "format_all.sh", + "gn", + "main.cpp", + "pkg-config", + "reference", + "samples", + "shaders", + "shaders-hlsl", + "shaders-hlsl-no-opt", + "shaders-msl", + "shaders-msl-no-opt", + "shaders-no-opt", + "shaders-other", + "shaders-reflection", + "shaders-ue4", + "shaders-ue4-no-opt", + "test_shaders.py", + "test_shaders.sh", + "tests-other", + "update_test_shaders.sh"], + sources: ["spirv_cfg.cpp", + "spirv_cpp.cpp", + "spirv_cross.cpp", + "spirv_cross_c.cpp", + "spirv_cross_parsed_ir.cpp", + "spirv_cross_util.cpp", + "spirv_glsl.cpp", + "spirv_hlsl.cpp", + "spirv_msl.cpp", + "spirv_parser.cpp", + "spirv_reflect.cpp"], + publicHeadersPath: "."), + ], + cxxLanguageStandard: .cxx14 +) diff --git a/README.md b/README.md index 831c6ff00e2..3a89e6c2f8c 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,22 @@ + + # SPIRV-Cross SPIRV-Cross is a tool designed for parsing and converting SPIR-V to other shader languages. -[![Build Status](https://travis-ci.org/KhronosGroup/SPIRV-Cross.svg?branch=master)](https://travis-ci.org/KhronosGroup/SPIRV-Cross) -[![Build Status](https://ci.appveyor.com/api/projects/status/github/KhronosGroup/SPIRV-Cross?svg=true&branch=master)](https://ci.appveyor.com/project/HansKristian-Work/SPIRV-Cross) +[![CI](https://github.com/KhronosGroup/SPIRV-Cross/actions/workflows/main.yml/badge.svg)](https://github.com/KhronosGroup/SPIRV-Cross/actions/workflows/main.yml) +[![Build Status](https://ci.appveyor.com/api/projects/status/github/KhronosGroup/SPIRV-Cross?svg=true&branch=main)](https://ci.appveyor.com/project/HansKristian-Work/SPIRV-Cross) ## Features - Convert SPIR-V to readable, usable and efficient GLSL - Convert SPIR-V to readable, usable and efficient Metal Shading Language (MSL) - Convert SPIR-V to readable, usable and efficient HLSL + - Convert SPIR-V to a JSON reflection format - Convert SPIR-V to debuggable C++ [DEPRECATED] - - Convert SPIR-V to a JSON reflection format [EXPERIMENTAL] - Reflection API to simplify the creation of Vulkan pipeline layouts - Reflection API to modify and tweak OpDecorations - Supports "all" of vertex, fragment, tessellation, geometry and compute shaders. @@ -26,6 +31,10 @@ However, most missing features are expected to be "trivial" improvements at this SPIRV-Cross has been tested on Linux, iOS/OSX, Windows and Android. CMake is the main build system. +### NOTE: main branch rename + +On 2023-01-12, `master` was renamed to `main` as per Khronos policy. + ### Linux and macOS Building with CMake is recommended, as it is the only build system which is tested in continuous integration. @@ -52,6 +61,20 @@ The make and CMake build flavors offer the option to treat exceptions as asserti You can use `-DSPIRV_CROSS_STATIC=ON/OFF` `-DSPIRV_CROSS_SHARED=ON/OFF` `-DSPIRV_CROSS_CLI=ON/OFF` to control which modules are built (and installed). +### Installing SPIRV-Cross (vcpkg) + +Alternatively, you can build and install SPIRV-Cross using [vcpkg](https://github.com/Microsoft/vcpkg/) dependency manager: + +``` +git clone https://github.com/Microsoft/vcpkg.git +cd vcpkg +./bootstrap-vcpkg.sh +./vcpkg integrate install +./vcpkg install spirv-cross +``` + +The SPIRV-Cross port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. + ## Usage ### Using the C++ API @@ -99,7 +122,7 @@ int main() spirv_cross::CompilerGLSL::Options options; options.version = 310; options.es = true; - glsl.set_options(options); + glsl.set_common_options(options); // Compile to GLSL, ready to give to GL driver. std::string source = glsl.compile(); @@ -161,12 +184,12 @@ for (i = 0; i < count; i++) } // Modify options. -spvc_compiler_create_compiler_options(context, &options); +spvc_compiler_create_compiler_options(compiler_glsl, &options); spvc_compiler_options_set_uint(options, SPVC_COMPILER_OPTION_GLSL_VERSION, 330); spvc_compiler_options_set_bool(options, SPVC_COMPILER_OPTION_GLSL_ES, SPVC_FALSE); spvc_compiler_install_compiler_options(compiler_glsl, options); -spvc_compiler_compile(compiler, &result); +spvc_compiler_compile(compiler_glsl, &result); printf("Cross-compiled source: %s\n", result); // Frees all memory we allocated so far. @@ -321,7 +344,7 @@ compiler.set_name(varying_resource.base_type_id, "VertexFragmentLinkage"); ``` Some platform may require identical variable name for both vertex outputs and fragment inputs. (for example MacOSX) -to rename varaible base on location, please add +to rename variable base on location, please add ``` --rename-interface-variable ``` @@ -376,10 +399,28 @@ for (auto &remap : compiler->get_combined_image_samplers()) If your target is Vulkan GLSL, `--vulkan-semantics` will emit separate image samplers as you'd expect. The command line client calls `Compiler::build_combined_image_samplers` automatically, but if you're calling the library, you'll need to do this yourself. -#### Descriptor sets (Vulkan GLSL) for backends which do not support them (HLSL/GLSL/Metal) +#### Descriptor sets (Vulkan GLSL) for backends which do not support them (pre HLSL 5.1 / GLSL) Descriptor sets are unique to Vulkan, so make sure that descriptor set + binding is remapped to a flat binding scheme (set always 0), so that other APIs can make sense of the bindings. -This can be done with `Compiler::set_decoration(id, spv::DecorationDescriptorSet)`. +This can be done with `Compiler::set_decoration(id, spv::DecorationDescriptorSet)`. For other backends like MSL and HLSL, descriptor sets +can be used, with some minor caveats, see below. + +##### MSL 2.0+ + +Metal supports indirect argument buffers (--msl-argument-buffers). In this case, descriptor sets become argument buffers, +and bindings are mapped to [[id(N)]] within the argument buffer. One quirk is that arrays of resources consume multiple ids, +where Vulkan does not. This can be worked around either from shader authoring stage +or remapping bindings as needed to avoid the overlap. +There is also a rich API to declare remapping schemes which is intended to work like +the pipeline layout in Vulkan. See `CompilerMSL::add_msl_resource_binding`. Remapping combined image samplers for example +must be split into two bindings in MSL, so it's possible to declare an id for the texture and sampler binding separately. + +##### HLSL - SM 5.1+ + +In SM 5.1+, descriptor set bindings are interpreted as register spaces directly. In HLSL however, arrays of resources consume +multiple binding slots where Vulkan does not, so there might be overlap if the SPIR-V was not authored with this in mind. +This can be worked around either from shader authoring stage (don't assign overlapping bindings) +or remap bindings in SPIRV-Cross as needed to avoid the overlap. #### Linking by name for targets which do not support explicit locations (legacy GLSL/ESSL) @@ -402,6 +443,29 @@ Y-flipping of gl_Position and similar is also supported. The use of this is discouraged, because relying on vertex shader Y-flipping tends to get quite messy. To enable this, set `CompilerGLSL::Options.vertex.flip_vert_y` or `--flip-vert-y` in CLI. +#### Reserved identifiers + +When cross-compiling, certain identifiers are considered to be reserved by the implementation. +Code generated by SPIRV-Cross cannot emit these identifiers as they are reserved and used for various internal purposes, +and such variables will typically show up as `_RESERVED_IDENTIFIER_FIXUP_` +or some similar name to make it more obvious that an identifier has been renamed. + +Reflection output will follow the exact name specified in the SPIR-V module. It might not be a valid identifier in the C sense, +as it may contain non-alphanumeric/non-underscore characters. + +Reserved identifiers currently assumed by the implementation are (in pseudo-regex): + +- _$digit+, e.g. `_100`, `_2` +- _$digit+_.+, e.g. `_100_tmp`, `_2_foobar`. `_2Bar` is **not** reserved. +- gl_- prefix +- spv- prefix +- SPIRV_Cross prefix. This prefix is generally used for interface variables where app needs to provide data for workaround purposes. + This identifier will not be rewritten, but be aware of potential collisions. +- Double underscores (reserved by all target languages). + +Members of structs also have a reserved identifier: +- _m$digit+$END, e.g. `_m20` and `_m40` are reserved, but not `_m40Foobar`. + ## Contributing Contributions to SPIRV-Cross are welcome. See Testing and Licensing sections for details. @@ -416,9 +480,6 @@ All pull requests should ensure that test output does not change unexpectedly. T ``` ./checkout_glslang_spirv_tools.sh # Checks out glslang and SPIRV-Tools at a fixed revision which matches the reference output. - # NOTE: Some users have reported problems cloning from git:// paths. To use https:// instead pass in - # $ PROTOCOL=https ./checkout_glslang_spirv_tools.sh - # instead. ./build_glslang_spirv_tools.sh # Builds glslang and SPIRV-Tools. ./test_shaders.sh # Runs over all changes and makes sure that there are no deltas compared to reference files. ``` @@ -442,7 +503,7 @@ to update the reference files and include these changes as part of the pull requ Always make sure you are running the correct version of glslangValidator as well as SPIRV-Tools when updating reference files. See `checkout_glslang_spirv_tools.sh` which revisions are currently expected. The revisions change regularly. -In short, the master branch should always be able to run `./test_shaders.py shaders` and friends without failure. +In short, the main branch should always be able to run `./test_shaders.py shaders` and friends without failure. SPIRV-Cross uses Travis CI to test all pull requests, so it is not strictly needed to perform testing yourself if you have problems running it locally. A pull request which does not pass testing on Travis will not be accepted however. diff --git a/appveyor.yml b/appveyor.yml index 2f427f18044..cc2fade2716 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,3 +1,5 @@ +# Copyright 2016-2021 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 environment: matrix: diff --git a/build_glslang_spirv_tools.sh b/build_glslang_spirv_tools.sh index fb4f7de218c..f78640f35b9 100755 --- a/build_glslang_spirv_tools.sh +++ b/build_glslang_spirv_tools.sh @@ -1,4 +1,6 @@ #!/bin/bash +# Copyright 2016-2021 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 PROFILE=Release diff --git a/checkout_glslang_spirv_tools.sh b/checkout_glslang_spirv_tools.sh index 37b99a0e08f..88429395837 100755 --- a/checkout_glslang_spirv_tools.sh +++ b/checkout_glslang_spirv_tools.sh @@ -1,14 +1,11 @@ #!/bin/bash +# Copyright 2016-2021 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 -GLSLANG_REV=e291f7a09f6733f6634fe077a228056fabee881e -SPIRV_TOOLS_REV=89fe836fe22c3e5c2a062ebeade012e2c2f0839b -SPIRV_HEADERS_REV=c4f8f65792d4bf2657ca751904c511bbcf2ac77b - -if [ -z $PROTOCOL ]; then - PROTOCOL=git -fi - -echo "Using protocol \"$PROTOCOL\" for checking out repositories. If this is problematic, try PROTOCOL=https $0." +GLSLANG_REV=06a7078ce74ab5c7801a165b8145859678831fb8 +SPIRV_TOOLS_REV=f62e121b0df5374d1f043d1fbda98467406af0b1 +SPIRV_HEADERS_REV=d13b52222c39a7e9a401b44646f0ca3a640fbd47 +PROTOCOL=https if [ -d external/glslang ]; then echo "Updating glslang to revision $GLSLANG_REV." @@ -41,7 +38,7 @@ fi if [ -d external/spirv-headers ]; then cd external/spirv-headers - git pull origin master + git fetch origin git checkout $SPIRV_HEADERS_REV cd ../.. else diff --git a/cmake/gitversion.in.h b/cmake/gitversion.in.h index 7135e283b23..bff73e964e8 100644 --- a/cmake/gitversion.in.h +++ b/cmake/gitversion.in.h @@ -1,3 +1,6 @@ +// Copyright 2016-2021 The Khronos Group Inc. +// SPDX-License-Identifier: Apache-2.0 + #ifndef SPIRV_CROSS_GIT_VERSION_H_ #define SPIRV_CROSS_GIT_VERSION_H_ diff --git a/format_all.sh b/format_all.sh index fcfffc57f86..001c3c5dd4b 100755 --- a/format_all.sh +++ b/format_all.sh @@ -1,4 +1,6 @@ #!/bin/bash +# Copyright 2016-2021 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 #for file in spirv_*.{cpp,hpp} include/spirv_cross/*.{hpp,h} samples/cpp/*.cpp main.cpp for file in spirv_*.{cpp,hpp} main.cpp diff --git a/gn/BUILD.gn b/gn/BUILD.gn new file mode 100644 index 00000000000..64d019eae50 --- /dev/null +++ b/gn/BUILD.gn @@ -0,0 +1,68 @@ +# Copyright (C) 2019 Google, Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +config("spirv_cross_public") { + include_dirs = [ ".." ] + + defines = [ "SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS" ] +} + +source_set("spirv_cross_sources") { + public_configs = [ ":spirv_cross_public" ] + + sources = [ + "../GLSL.std.450.h", + "../spirv.hpp", + "../spirv_cfg.cpp", + "../spirv_cfg.hpp", + "../spirv_common.hpp", + "../spirv_cross.cpp", + "../spirv_cross.hpp", + "../spirv_cross_containers.hpp", + "../spirv_cross_error_handling.hpp", + "../spirv_cross_parsed_ir.cpp", + "../spirv_cross_parsed_ir.hpp", + "../spirv_cross_util.cpp", + "../spirv_cross_util.hpp", + "../spirv_glsl.cpp", + "../spirv_glsl.hpp", + "../spirv_hlsl.cpp", + "../spirv_hlsl.hpp", + "../spirv_msl.cpp", + "../spirv_msl.hpp", + "../spirv_parser.cpp", + "../spirv_parser.hpp", + "../spirv_reflect.cpp", + "../spirv_reflect.hpp", + ] + + if (!is_win) { + cflags = [ "-fno-exceptions" ] + } + + if (is_clang) { + cflags_cc = [ + "-Wno-extra-semi", + "-Wno-ignored-qualifiers", + "-Wno-implicit-fallthrough", + "-Wno-inconsistent-missing-override", + "-Wno-missing-field-initializers", + "-Wno-newline-eof", + "-Wno-sign-compare", + "-Wno-unused-variable", + ] + } +} diff --git a/include/spirv_cross/barrier.hpp b/include/spirv_cross/barrier.hpp index bfcd2284317..4ca7f4d77cb 100644 --- a/include/spirv_cross/barrier.hpp +++ b/include/spirv_cross/barrier.hpp @@ -1,5 +1,6 @@ /* * Copyright 2015-2017 ARM Limited + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/spirv_cross/external_interface.h b/include/spirv_cross/external_interface.h index 1d26f1e1e44..949654f5bff 100644 --- a/include/spirv_cross/external_interface.h +++ b/include/spirv_cross/external_interface.h @@ -1,5 +1,6 @@ /* * Copyright 2015-2017 ARM Limited + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/spirv_cross/image.hpp b/include/spirv_cross/image.hpp index 73de894f886..a41ccdfbb40 100644 --- a/include/spirv_cross/image.hpp +++ b/include/spirv_cross/image.hpp @@ -1,5 +1,6 @@ /* * Copyright 2015-2017 ARM Limited + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/spirv_cross/internal_interface.hpp b/include/spirv_cross/internal_interface.hpp index e56223dfdbe..3ff7f8e258c 100644 --- a/include/spirv_cross/internal_interface.hpp +++ b/include/spirv_cross/internal_interface.hpp @@ -1,5 +1,6 @@ /* * Copyright 2015-2017 ARM Limited + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/spirv_cross/sampler.hpp b/include/spirv_cross/sampler.hpp index a95d489e2dc..02084809514 100644 --- a/include/spirv_cross/sampler.hpp +++ b/include/spirv_cross/sampler.hpp @@ -1,5 +1,6 @@ /* * Copyright 2015-2017 ARM Limited + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,7 +86,7 @@ struct sampler2DBase : spirv_cross_sampler_2d std::vector mips; spirv_cross_format format; spirv_cross_wrap wrap_s; - spirv_cross_format wrap_t; + spirv_cross_wrap wrap_t; spirv_cross_filter min_filter; spirv_cross_filter mag_filter; spirv_cross_mipfilter mip_filter; diff --git a/include/spirv_cross/thread_group.hpp b/include/spirv_cross/thread_group.hpp index 377f098b4fb..b2155815625 100644 --- a/include/spirv_cross/thread_group.hpp +++ b/include/spirv_cross/thread_group.hpp @@ -1,5 +1,6 @@ /* * Copyright 2015-2017 ARM Limited + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/main.cpp b/main.cpp index c441d235815..3605a54a209 100644 --- a/main.cpp +++ b/main.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2015-2019 Arm Limited + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #include "spirv_cpp.hpp" #include "spirv_cross_util.hpp" #include "spirv_glsl.hpp" @@ -31,12 +38,13 @@ #include #include -#ifdef HAVE_SPIRV_CROSS_GIT_VERSION -#include "gitversion.h" +#ifdef _WIN32 +#include +#include #endif -#ifdef _MSC_VER -#pragma warning(disable : 4996) +#ifdef HAVE_SPIRV_CROSS_GIT_VERSION +#include "gitversion.h" #endif using namespace spv; @@ -69,7 +77,7 @@ struct CLICallbacks struct CLIParser { CLIParser(CLICallbacks cbs_, int argc_, char *argv_[]) - : cbs(move(cbs_)) + : cbs(std::move(cbs_)) , argc(argc_) , argv(argv_) { @@ -140,6 +148,25 @@ struct CLIParser return uint32_t(val); } + uint32_t next_hex_uint() + { + if (!argc) + { + THROW("Tried to parse uint, but nothing left in arguments"); + } + + uint64_t val = stoul(*argv, nullptr, 16); + if (val > numeric_limits::max()) + { + THROW("next_uint() out of range"); + } + + argc--; + argv++; + + return uint32_t(val); + } + double next_double() { if (!argc) @@ -190,8 +217,35 @@ struct CLIParser bool ended_state = false; }; +#if defined(__clang__) || defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#elif defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4996) +#endif + +static vector read_spirv_file_stdin() +{ +#ifdef _WIN32 + setmode(fileno(stdin), O_BINARY); +#endif + + vector buffer; + uint32_t tmp[256]; + size_t ret; + + while ((ret = fread(tmp, sizeof(uint32_t), 256, stdin))) + buffer.insert(buffer.end(), tmp, tmp + ret); + + return buffer; +} + static vector read_spirv_file(const char *path) { + if (path[0] == '-' && path[1] == '\0') + return read_spirv_file_stdin(); + FILE *file = fopen(path, "rb"); if (!file) { @@ -225,6 +279,67 @@ static bool write_string_to_file(const char *path, const char *string) return true; } +#if defined(__clang__) || defined(__GNUC__) +#pragma GCC diagnostic pop +#elif defined(_MSC_VER) +#pragma warning(pop) +#endif + +static void print_resources(const Compiler &compiler, spv::StorageClass storage, + const SmallVector &resources) +{ + fprintf(stderr, "%s\n", storage == StorageClassInput ? "builtin inputs" : "builtin outputs"); + fprintf(stderr, "=============\n\n"); + for (auto &res : resources) + { + bool active = compiler.has_active_builtin(res.builtin, storage); + const char *basetype = "?"; + auto &type = compiler.get_type(res.value_type_id); + switch (type.basetype) + { + case SPIRType::Float: basetype = "float"; break; + case SPIRType::Int: basetype = "int"; break; + case SPIRType::UInt: basetype = "uint"; break; + default: break; + } + + uint32_t array_size = 0; + bool array_size_literal = false; + if (!type.array.empty()) + { + array_size = type.array.front(); + array_size_literal = type.array_size_literal.front(); + } + + string type_str = basetype; + if (type.vecsize > 1) + type_str += std::to_string(type.vecsize); + + if (array_size) + { + if (array_size_literal) + type_str += join("[", array_size, "]"); + else + type_str += join("[", array_size, " (spec constant ID)]"); + } + + string builtin_str; + switch (res.builtin) + { + case spv::BuiltInPosition: builtin_str = "Position"; break; + case spv::BuiltInPointSize: builtin_str = "PointSize"; break; + case spv::BuiltInCullDistance: builtin_str = "CullDistance"; break; + case spv::BuiltInClipDistance: builtin_str = "ClipDistance"; break; + case spv::BuiltInTessLevelInner: builtin_str = "TessLevelInner"; break; + case spv::BuiltInTessLevelOuter: builtin_str = "TessLevelOuter"; break; + default: builtin_str = string("builtin #") + to_string(res.builtin); + } + + fprintf(stderr, "Builtin %s (%s) (active: %s).\n", builtin_str.c_str(), type_str.c_str(), active ? "yes" : "no"); + } + fprintf(stderr, "=============\n\n"); +} + static void print_resources(const Compiler &compiler, const char *tag, const SmallVector &resources) { fprintf(stderr, "%s\n", tag); @@ -246,7 +361,7 @@ static void print_resources(const Compiler &compiler, const char *tag, const Sma compiler.get_decoration_bitset(type.self).get(DecorationBufferBlock); bool is_sized_block = is_block && (compiler.get_storage_class(res.id) == StorageClassUniform || compiler.get_storage_class(res.id) == StorageClassUniformConstant); - uint32_t fallback_id = !is_push_constant && is_block ? res.base_type_id : res.id; + ID fallback_id = !is_push_constant && is_block ? ID(res.base_type_id) : ID(res.id); uint32_t block_size = 0; uint32_t runtime_array_stride = 0; @@ -268,7 +383,7 @@ static void print_resources(const Compiler &compiler, const char *tag, const Sma for (auto arr : type.array) array = join("[", arr ? convert_to_string(arr) : "", "]") + array; - fprintf(stderr, " ID %03u : %s%s", res.id, + fprintf(stderr, " ID %03u : %s%s", uint32_t(res.id), !res.name.empty() ? res.name.c_str() : compiler.get_fallback_name(fallback_id).c_str(), array.c_str()); if (mask.get(DecorationLocation)) @@ -277,12 +392,20 @@ static void print_resources(const Compiler &compiler, const char *tag, const Sma fprintf(stderr, " (Set : %u)", compiler.get_decoration(res.id, DecorationDescriptorSet)); if (mask.get(DecorationBinding)) fprintf(stderr, " (Binding : %u)", compiler.get_decoration(res.id, DecorationBinding)); + if (static_cast(compiler).variable_is_depth_or_compare(res.id)) + fprintf(stderr, " (comparison)"); if (mask.get(DecorationInputAttachmentIndex)) fprintf(stderr, " (Attachment : %u)", compiler.get_decoration(res.id, DecorationInputAttachmentIndex)); if (mask.get(DecorationNonReadable)) fprintf(stderr, " writeonly"); if (mask.get(DecorationNonWritable)) fprintf(stderr, " readonly"); + if (mask.get(DecorationRestrict)) + fprintf(stderr, " restrict"); + if (mask.get(DecorationCoherent)) + fprintf(stderr, " coherent"); + if (mask.get(DecorationVolatile)) + fprintf(stderr, " volatile"); if (is_sized_block) { fprintf(stderr, " (BlockSize : %u bytes)", block_size); @@ -413,6 +536,9 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res print_resources(compiler, "push", res.push_constant_buffers); print_resources(compiler, "counters", res.atomic_counters); print_resources(compiler, "acceleration structures", res.acceleration_structures); + print_resources(compiler, "record buffers", res.shader_record_buffers); + print_resources(compiler, spv::StorageClassInput, res.builtin_inputs); + print_resources(compiler, spv::StorageClassOutput, res.builtin_outputs); } static void print_push_constant_resources(const Compiler &compiler, const SmallVector &res) @@ -442,7 +568,7 @@ static void print_spec_constants(const Compiler &compiler) fprintf(stderr, "Specialization constants\n"); fprintf(stderr, "==================\n\n"); for (auto &c : spec_constants) - fprintf(stderr, "ID: %u, Spec ID: %u\n", c.id, c.constant_id); + fprintf(stderr, "ID: %u, Spec ID: %u\n", uint32_t(c.id), c.constant_id); fprintf(stderr, "==================\n\n"); } @@ -487,6 +613,12 @@ struct InterfaceVariableRename string variable_name; }; +struct HLSLVertexAttributeRemapNamed +{ + std::string name; + std::string semantic; +}; + struct CLIArguments { const char *input = nullptr; @@ -513,12 +645,55 @@ struct CLIArguments bool msl_pad_fragment_output = false; bool msl_domain_lower_left = false; bool msl_argument_buffers = false; + uint32_t msl_argument_buffers_tier = 0; // Tier 1 bool msl_texture_buffer_native = false; + bool msl_framebuffer_fetch = false; + bool msl_invariant_float_math = false; + bool msl_emulate_cube_array = false; bool msl_multiview = false; + bool msl_multiview_layered_rendering = true; + bool msl_view_index_from_device_index = false; + bool msl_dispatch_base = false; + bool msl_decoration_binding = false; + bool msl_force_active_argument_buffer_resources = false; + bool msl_force_native_arrays = false; + bool msl_enable_frag_depth_builtin = true; + bool msl_enable_frag_stencil_ref_builtin = true; + uint32_t msl_enable_frag_output_mask = 0xffffffff; + bool msl_enable_clip_distance_user_varying = true; + bool msl_raw_buffer_tese_input = false; + bool msl_multi_patch_workgroup = false; + bool msl_vertex_for_tessellation = false; + uint32_t msl_additional_fixed_sample_mask = 0xffffffff; + bool msl_arrayed_subpass_input = false; + uint32_t msl_r32ui_linear_texture_alignment = 4; + uint32_t msl_r32ui_alignment_constant_id = 65535; + bool msl_texture_1d_as_2d = false; + bool msl_ios_use_simdgroup_functions = false; + bool msl_emulate_subgroups = false; + uint32_t msl_fixed_subgroup_size = 0; + bool msl_force_sample_rate_shading = false; + bool msl_manual_helper_invocation_updates = true; + bool msl_check_discarded_frag_stores = false; + const char *msl_combined_sampler_suffix = nullptr; bool glsl_emit_push_constant_as_ubo = false; bool glsl_emit_ubo_as_plain_uniforms = false; + bool glsl_force_flattened_io_blocks = false; + uint32_t glsl_ovr_multiview_view_count = 0; + SmallVector> glsl_ext_framebuffer_fetch; + bool glsl_ext_framebuffer_fetch_noncoherent = false; + bool vulkan_glsl_disable_ext_samplerless_texture_functions = false; bool emit_line_directives = false; + bool enable_storage_image_qualifier_deduction = true; + bool force_zero_initialized_variables = false; + bool relax_nan_checks = false; + uint32_t force_recompile_max_debug_iterations = 3; SmallVector msl_discrete_descriptor_sets; + SmallVector msl_device_argument_buffers; + SmallVector> msl_dynamic_buffers; + SmallVector> msl_inline_uniform_blocks; + SmallVector msl_shader_inputs; + SmallVector msl_shader_outputs; SmallVector pls_in; SmallVector pls_out; SmallVector remaps; @@ -526,6 +701,9 @@ struct CLIArguments SmallVector variable_type_remaps; SmallVector interface_variable_renames; SmallVector hlsl_attr_remap; + SmallVector hlsl_attr_remap_named; + SmallVector> masked_stage_outputs; + SmallVector masked_stage_builtins; string entry; string entry_stage; @@ -543,7 +721,17 @@ struct CLIArguments bool msl = false; bool hlsl = false; bool hlsl_compat = false; + bool hlsl_support_nonzero_base = false; + bool hlsl_base_vertex_index_explicit_binding = false; + uint32_t hlsl_base_vertex_index_register_index = 0; + uint32_t hlsl_base_vertex_index_register_space = 0; + + bool hlsl_force_storage_buffer_as_uav = false; + bool hlsl_nonwritable_uav_texture_as_srv = false; + bool hlsl_enable_16bit_types = false; + bool hlsl_flatten_matrix_vertex_input_semantics = false; + HLSLBindingFlags hlsl_binding_flags = 0; bool vulkan_semantics = false; bool flatten_multidimensional_arrays = false; bool use_420pack_extension = true; @@ -560,63 +748,276 @@ static void print_version() #endif } +static void print_help_backend() +{ + // clang-format off + fprintf(stderr, "\nSelect backend:\n" + "\tBy default, OpenGL-style GLSL is the target, with #version and GLSL/ESSL information inherited from the SPIR-V module if present.\n" + "\t[--vulkan-semantics] or [-V]:\n\t\tEmit Vulkan GLSL instead of plain GLSL. Makes use of Vulkan-only features to match SPIR-V.\n" + "\t[--msl]:\n\t\tEmit Metal Shading Language (MSL).\n" + "\t[--hlsl]:\n\t\tEmit HLSL.\n" + "\t[--reflect]:\n\t\tEmit JSON reflection.\n" + "\t[--cpp]:\n\t\tDEPRECATED. Emits C++ code.\n" + ); + // clang-format on +} + +static void print_help_glsl() +{ + // clang-format off + fprintf(stderr, "\nGLSL options:\n" + "\t[--es]:\n\t\tForce ESSL.\n" + "\t[--no-es]:\n\t\tForce desktop GLSL.\n" + "\t[--version ]:\n\t\tE.g. --version 450 will emit '#version 450' in shader.\n" + "\t\tCode generation will depend on the version used.\n" + "\t[--flatten-ubo]:\n\t\tEmit UBOs as plain uniform arrays which are suitable for use with glUniform4*v().\n" + "\t\tThis can be an optimization on GL implementations where this is faster or works around buggy driver implementations.\n" + "\t\tE.g.: uniform MyUBO { vec4 a; float b, c, d, e; }; will be emitted as uniform vec4 MyUBO[2];\n" + "\t\tCaveat: You cannot mix and match floating-point and integer in the same UBO with this option.\n" + "\t\tLegacy GLSL/ESSL (where this flattening makes sense) does not support bit-casting, which would have been the obvious workaround.\n" + "\t[--extension ext]:\n\t\tAdd #extension string of your choosing to GLSL output.\n" + "\t\tUseful if you use variable name remapping to something that requires an extension unknown to SPIRV-Cross.\n" + "\t[--remove-unused-variables]:\n\t\tDo not emit interface variables which are not statically accessed by the shader.\n" + "\t[--separate-shader-objects]:\n\t\tRedeclare gl_PerVertex blocks to be suitable for desktop GL separate shader objects.\n" + "\t[--glsl-emit-push-constant-as-ubo]:\n\t\tInstead of a plain uniform of struct for push constants, emit a UBO block instead.\n" + "\t[--glsl-emit-ubo-as-plain-uniforms]:\n\t\tInstead of emitting UBOs, emit them as plain uniform structs.\n" + "\t[--glsl-remap-ext-framebuffer-fetch input-attachment color-location]:\n\t\tRemaps an input attachment to use GL_EXT_shader_framebuffer_fetch.\n" + "\t\tgl_LastFragData[location] is read from. The attachment to read from must be declared as an output in the shader.\n" + "\t[--glsl-ext-framebuffer-fetch-noncoherent]:\n\t\tUses noncoherent qualifier for framebuffer fetch.\n" + "\t[--vulkan-glsl-disable-ext-samplerless-texture-functions]:\n\t\tDo not allow use of GL_EXT_samperless_texture_functions, even in Vulkan GLSL.\n" + "\t\tUse of texelFetch and similar might have to create dummy samplers to work around it.\n" + "\t[--combined-samplers-inherit-bindings]:\n\t\tInherit binding information from the textures when building combined image samplers from separate textures and samplers.\n" + "\t[--no-support-nonzero-baseinstance]:\n\t\tWhen using gl_InstanceIndex with desktop GL,\n" + "\t\tassume that base instance is always 0, and do not attempt to fix up gl_InstanceID to match Vulkan semantics.\n" + "\t[--pls-in format input-name]:\n\t\tRemaps a subpass input with name into a GL_EXT_pixel_local_storage input.\n" + "\t\tEntry in PLS block is ordered where first --pls-in marks the first entry. Can be called multiple times.\n" + "\t\tFormats allowed: r11f_g11f_b10f, r32f, rg16f, rg16, rgb10_a2, rgba8, rgba8i, rgba8ui, rg16i, rgb10_a2ui, rg16ui, r32ui.\n" + "\t\tRequires ESSL.\n" + "\t[--pls-out format output-name]:\n\t\tRemaps a color output with name into a GL_EXT_pixel_local_storage output.\n" + "\t\tEntry in PLS block is ordered where first --pls-output marks the first entry. Can be called multiple times.\n" + "\t\tFormats allowed: r11f_g11f_b10f, r32f, rg16f, rg16, rgb10_a2, rgba8, rgba8i, rgba8ui, rg16i, rgb10_a2ui, rg16ui, r32ui.\n" + "\t\tRequires ESSL.\n" + "\t[--remap source_name target_name components]:\n\t\tRemaps a variable to a different name with N components.\n" + "\t\tMain use case is to remap a subpass input to gl_LastFragDepthARM.\n" + "\t\tE.g.:\n" + "\t\tuniform subpassInput uDepth;\n" + "\t\t--remap uDepth gl_LastFragDepthARM 1 --extension GL_ARM_shader_framebuffer_fetch_depth_stencil\n" + "\t[--no-420pack-extension]:\n\t\tDo not make use of GL_ARB_shading_language_420pack in older GL targets to support layout(binding).\n" + "\t[--remap-variable-type ]:\n\t\tRemaps a variable type based on name.\n" + "\t\tPrimary use case is supporting external samplers in ESSL for video rendering on Android where you could remap a texture to a YUV one.\n" + "\t[--glsl-force-flattened-io-blocks]:\n\t\tAlways flatten I/O blocks and structs.\n" + "\t[--glsl-ovr-multiview-view-count count]:\n\t\tIn GL_OVR_multiview2, specify layout(num_views).\n" + ); + // clang-format on +} + +static void print_help_hlsl() +{ + // clang-format off + fprintf(stderr, "\nHLSL options:\n" + "\t[--shader-model]:\n\t\tEnables a specific shader model, e.g. --shader-model 50 for SM 5.0.\n" + "\t[--flatten-ubo]:\n\t\tEmit UBOs as plain uniform arrays.\n" + "\t\tE.g.: uniform MyUBO { vec4 a; float b, c, d, e; }; will be emitted as uniform float4 MyUBO[2];\n" + "\t\tCaveat: You cannot mix and match floating-point and integer in the same UBO with this option.\n" + "\t[--hlsl-enable-compat]:\n\t\tAllow point size and point coord to be used, even if they won't work as expected.\n" + "\t\tPointSize is ignored, and PointCoord returns (0.5, 0.5).\n" + "\t[--hlsl-support-nonzero-basevertex-baseinstance]:\n\t\tSupport base vertex and base instance by emitting a special cbuffer declared as:\n" + "\t\tcbuffer SPIRV_Cross_VertexInfo { int SPIRV_Cross_BaseVertex; int SPIRV_Cross_BaseInstance; };\n" + "\t[--hlsl-basevertex-baseinstance-binding ]:\n\t\tAssign a fixed binding to SPIRV_Cross_VertexInfo.\n" + "\t[--hlsl-auto-binding (push, cbv, srv, uav, sampler, all)]\n" + "\t\tDo not emit any : register(#) bindings for specific resource types, and rely on HLSL compiler to assign something.\n" + "\t[--hlsl-force-storage-buffer-as-uav]:\n\t\tAlways emit SSBOs as UAVs, even when marked as read-only.\n" + "\t\tNormally, SSBOs marked with NonWritable will be emitted as SRVs.\n" + "\t[--hlsl-nonwritable-uav-texture-as-srv]:\n\t\tEmit NonWritable storage images as SRV textures instead of UAV.\n" + "\t\tUsing this option messes with the type system. SPIRV-Cross cannot guarantee that this will work.\n" + "\t\tOne major problem area with this feature is function arguments, where we won't know if we're seeing a UAV or SRV.\n" + "\t\tShader must ensure that read/write state is consistent at all call sites.\n" + "\t[--set-hlsl-vertex-input-semantic ]:\n\t\tEmits a specific vertex input semantic for a given location.\n" + "\t\tOtherwise, TEXCOORD# is used as semantics, where # is location.\n" + "\t[--set-hlsl-named-vertex-input-semantic ]:\n\t\tEmits a specific vertex input semantic for a given name.\n" + "\t\tOpName reflection information must be intact.\n" + "\t[--hlsl-enable-16bit-types]:\n\t\tEnables native use of half/int16_t/uint16_t and ByteAddressBuffer interaction with these types. Requires SM 6.2.\n" + "\t[--hlsl-flatten-matrix-vertex-input-semantics]:\n\t\tEmits matrix vertex inputs with input semantics as if they were independent vectors, e.g. TEXCOORD{2,3,4} rather than matrix form TEXCOORD2_{0,1,2}.\n" + ); + // clang-format on +} + +static void print_help_msl() +{ + // clang-format off + fprintf(stderr, "\nMSL options:\n" + "\t[--msl-version ]:\n\t\tUses a specific MSL version, e.g. --msl-version 20100 for MSL 2.1.\n" + "\t[--msl-capture-output]:\n\t\tWrites geometry varyings to a buffer instead of as stage-outputs.\n" + "\t[--msl-swizzle-texture-samples]:\n\t\tWorks around lack of support for VkImageView component swizzles.\n" + "\t\tThis has a massive impact on performance and bloat. Do not use this unless you are absolutely forced to.\n" + "\t\tTo use this feature, the API side must pass down swizzle buffers.\n" + "\t\tShould only be used by translation layers as a last resort.\n" + "\t\tRecent Metal versions do not require this workaround.\n" + "\t[--msl-ios]:\n\t\tTarget iOS Metal instead of macOS Metal.\n" + "\t[--msl-pad-fragment-output]:\n\t\tAlways emit color outputs as 4-component variables.\n" + "\t\tIn Metal, the fragment shader must emit at least as many components as the render target format.\n" + "\t[--msl-domain-lower-left]:\n\t\tUse a lower-left tessellation domain.\n" + "\t[--msl-argument-buffers]:\n\t\tEmit Metal argument buffers instead of discrete resource bindings.\n" + "\t\tRequires MSL 2.0 to be enabled.\n" + "\t[--msl-argument-buffers-tier]:\n\t\tWhen using Metal argument buffers, indicate the Metal argument buffer tier level supported by the Metal platform.\n" + "\t\tUses same values as Metal MTLArgumentBuffersTier enumeration (0 = Tier1, 1 = Tier2).\n" + "\t\tSetting this value also enables msl-argument-buffers.\n" + "\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n" + "\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n" + "\t\tEmits [[color(N)]] inputs in fragment stage.\n" + "\t\tRequires an Apple GPU.\n" + "\t[--msl-emulate-cube-array]:\n\t\tEmulate cube arrays with 2D array and manual math.\n" + "\t[--msl-discrete-descriptor-set ]:\n\t\tWhen using argument buffers, forces a specific descriptor set to be implemented without argument buffers.\n" + "\t\tUseful for implementing push descriptors in emulation layers.\n" + "\t\tCan be used multiple times for each descriptor set in question.\n" + "\t[--msl-device-argument-buffer ]:\n\t\tUse device address space to hold indirect argument buffers instead of constant.\n" + "\t\tComes up when trying to support argument buffers which are larger than 64 KiB.\n" + "\t[--msl-multiview]:\n\t\tEnable SPV_KHR_multiview emulation.\n" + "\t[--msl-multiview-no-layered-rendering]:\n\t\tDon't set [[render_target_array_index]] in multiview shaders.\n" + "\t\tUseful for devices which don't support layered rendering. Only effective when --msl-multiview is enabled.\n" + "\t[--msl-view-index-from-device-index]:\n\t\tTreat the view index as the device index instead.\n" + "\t\tFor multi-GPU rendering.\n" + "\t[--msl-dispatch-base]:\n\t\tAdd support for vkCmdDispatchBase() or similar APIs.\n" + "\t\tOffsets the workgroup ID based on a buffer.\n" + "\t[--msl-dynamic-buffer ]:\n\t\tMarks a buffer as having dynamic offset.\n" + "\t\tThe offset is applied in the shader with pointer arithmetic.\n" + "\t\tUseful for argument buffers where it is non-trivial to apply dynamic offset otherwise.\n" + "\t[--msl-inline-uniform-block ]:\n\t\tIn argument buffers, mark an UBO as being an inline uniform block which is embedded into the argument buffer itself.\n" + "\t[--msl-decoration-binding]:\n\t\tUse SPIR-V bindings directly as MSL bindings.\n" + "\t\tThis does not work in the general case as there is no descriptor set support, and combined image samplers are split up.\n" + "\t\tHowever, if the shader author knows of binding limitations, this option will avoid the need for reflection on Metal side.\n" + "\t[--msl-force-active-argument-buffer-resources]:\n\t\tAlways emit resources which are part of argument buffers.\n" + "\t\tThis makes sure that similar shaders with same resource declarations can share the argument buffer as declaring an argument buffer implies an ABI.\n" + "\t[--msl-force-native-arrays]:\n\t\tRather than implementing array types as a templated value type ala std::array, use plain, native arrays.\n" + "\t\tThis will lead to worse code-gen, but can work around driver bugs on certain driver revisions of certain Intel-based Macbooks where template arrays break.\n" + "\t[--msl-disable-frag-depth-builtin]:\n\t\tDisables FragDepth output. Useful if pipeline does not enable depth, as pipeline creation might otherwise fail.\n" + "\t[--msl-disable-frag-stencil-ref-builtin]:\n\t\tDisable FragStencilRef output. Useful if pipeline does not enable stencil output, as pipeline creation might otherwise fail.\n" + "\t[--msl-enable-frag-output-mask ]:\n\t\tOnly selectively enable fragment outputs. Useful if pipeline does not enable fragment output for certain locations, as pipeline creation might otherwise fail.\n" + "\t[--msl-no-clip-distance-user-varying]:\n\t\tDo not emit user varyings to emulate gl_ClipDistance in fragment shaders.\n" + "\t[--msl-add-shader-input ]:\n\t\tSpecify the format of the shader input at .\n" + "\t\t can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, " + "or other-typed variable. is the vector length of the variable, which must be greater than or equal to that declared in the shader. can be 'vertex', " + "'primitive', or 'patch' to indicate a per-vertex, per-primitive, or per-patch variable.\n" + "\t\tUseful if shader stage interfaces don't match up, as pipeline creation might otherwise fail.\n" + "\t[--msl-add-shader-output ]:\n\t\tSpecify the format of the shader output at .\n" + "\t\t can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, " + "or other-typed variable. is the vector length of the variable, which must be greater than or equal to that declared in the shader. can be 'vertex', " + "'primitive', or 'patch' to indicate a per-vertex, per-primitive, or per-patch variable.\n" + "\t\tUseful if shader stage interfaces don't match up, as pipeline creation might otherwise fail.\n" + "\t[--msl-shader-input ]:\n\t\tSpecify the format of the shader input at .\n" + "\t\t can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, " + "or other-typed variable. is the vector length of the variable, which must be greater than or equal to that declared in the shader." + "\t\tEquivalent to --msl-add-shader-input with a rate of 'vertex'.\n" + "\t[--msl-shader-output ]:\n\t\tSpecify the format of the shader output at .\n" + "\t\t can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, " + "or other-typed variable. is the vector length of the variable, which must be greater than or equal to that declared in the shader." + "\t\tEquivalent to --msl-add-shader-output with a rate of 'vertex'.\n" + "\t[--msl-raw-buffer-tese-input]:\n\t\tUse raw buffers for tessellation evaluation input.\n" + "\t\tThis allows the use of nested structures and arrays.\n" + "\t\tIn a future version of SPIRV-Cross, this will become the default.\n" + "\t[--msl-multi-patch-workgroup]:\n\t\tUse the new style of tessellation control processing, where multiple patches are processed per workgroup.\n" + "\t\tThis should increase throughput by ensuring all the GPU's SIMD lanes are occupied, but it is not compatible with the old style.\n" + "\t\tIn addition, this style also passes input variables in buffers directly instead of using vertex attribute processing.\n" + "\t\tIn a future version of SPIRV-Cross, this will become the default.\n" + "\t[--msl-vertex-for-tessellation]:\n\t\tWhen handling a vertex shader, marks it as one that will be used with a new-style tessellation control shader.\n" + "\t\tThe vertex shader is output to MSL as a compute kernel which outputs vertices to the buffer in the order they are received, rather than in index order as with --msl-capture-output normally.\n" + "\t[--msl-additional-fixed-sample-mask ]:\n" + "\t\tSet an additional fixed sample mask. If the shader outputs a sample mask, then the final sample mask will be a bitwise AND of the two.\n" + "\t[--msl-arrayed-subpass-input]:\n\t\tAssume that images of dimension SubpassData have multiple layers. Layered input attachments are accessed relative to BuiltInLayer.\n" + "\t\tThis option has no effect if multiview is also enabled.\n" + "\t[--msl-r32ui-linear-texture-align ]:\n\t\tThe required alignment of linear textures of format MTLPixelFormatR32Uint.\n" + "\t\tThis is used to align the row stride for atomic accesses to such images.\n" + "\t[--msl-r32ui-linear-texture-align-constant-id ]:\n\t\tThe function constant ID to use for the linear texture alignment.\n" + "\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n" + "\t[--msl-texture-1d-as-2d]:\n\t\tEmit Image variables of dimension Dim1D as texture2d.\n" + "\t\tIn Metal, 1D textures do not support all features that 2D textures do. Use this option if your code relies on these features.\n" + "\t[--msl-ios-use-simdgroup-functions]:\n\t\tUse simd_*() functions for subgroup ops instead of quad_*().\n" + "\t\tRecent Apple GPUs support SIMD-groups larger than a quad. Use this option to take advantage of this support.\n" + "\t[--msl-emulate-subgroups]:\n\t\tAssume subgroups of size 1.\n" + "\t\tIntended for Vulkan Portability implementations where Metal support for SIMD-groups is insufficient for true subgroups.\n" + "\t[--msl-fixed-subgroup-size ]:\n\t\tAssign a constant to the SubgroupSize builtin.\n" + "\t\tIntended for Vulkan Portability implementations where VK_EXT_subgroup_size_control is not supported or disabled.\n" + "\t\tIf 0, assume variable subgroup size as actually exposed by Metal.\n" + "\t[--msl-force-sample-rate-shading]:\n\t\tForce fragment shaders to run per sample.\n" + "\t\tThis adds a [[sample_id]] parameter if none is already present.\n" + "\t[--msl-no-manual-helper-invocation-updates]:\n\t\tDo not manually update the HelperInvocation builtin when a fragment is discarded.\n" + "\t\tSome Metal devices have a bug where simd_is_helper_thread() does not return true\n" + "\t\tafter the fragment is discarded. This behavior is required by Vulkan and SPIR-V, however.\n" + "\t[--msl-check-discarded-frag-stores]:\n\t\tAdd additional checks to resource stores in a fragment shader.\n" + "\t\tSome Metal devices have a bug where stores to resources from a fragment shader\n" + "\t\tcontinue to execute, even when the fragment is discarded. These checks\n" + "\t\tprevent these stores from executing.\n" + "\t[--msl-combined-sampler-suffix ]:\n\t\tUses a custom suffix for combined samplers.\n"); + // clang-format on +} + +static void print_help_common() +{ + // clang-format off + fprintf(stderr, "\nCommon options:\n" + "\t[--entry name]:\n\t\tUse a specific entry point. By default, the first entry point in the module is used.\n" + "\t[--stage ]:\n\t\tForces use of a certain shader stage.\n" + "\t\tCan disambiguate the entry point if more than one entry point exists with same name, but different stage.\n" + "\t[--emit-line-directives]:\n\t\tIf SPIR-V has OpLine directives, aim to emit those accurately in output code as well.\n" + "\t[--rename-entry-point ]:\n\t\tRenames an entry point from what is declared in SPIR-V to code output.\n" + "\t\tMostly relevant for HLSL or MSL.\n" + "\t[--rename-interface-variable ]:\n\t\tRename an interface variable based on location decoration.\n" + "\t[--force-zero-initialized-variables]:\n\t\tForces temporary variables to be initialized to zero.\n" + "\t\tCan be useful in environments where compilers do not allow potentially uninitialized variables.\n" + "\t\tThis usually comes up with Phi temporaries.\n" + "\t[--fixup-clipspace]:\n\t\tFixup Z clip-space at the end of a vertex shader. The behavior is backend-dependent.\n" + "\t\tGLSL: Rewrites [0, w] Z range (D3D/Metal/Vulkan) to GL-style [-w, w].\n" + "\t\tHLSL/MSL: Rewrites [-w, w] Z range (GL) to D3D/Metal/Vulkan-style [0, w].\n" + "\t[--flip-vert-y]:\n\t\tInverts gl_Position.y (or equivalent) at the end of a vertex shader. This is equivalent to using negative viewport height.\n" + "\t[--mask-stage-output-location ]:\n" + "\t\tIf a stage output variable with matching location and component is active, optimize away the variable if applicable.\n" + "\t[--mask-stage-output-builtin ]:\n" + "\t\tIf a stage output variable with matching builtin is active, " + "optimize away the variable if it can affect cross-stage linking correctness.\n" + "\t[--relax-nan-checks]:\n\t\tRelax NaN checks for N{Clamp,Min,Max} and ordered vs. unordered compare instructions.\n" + ); + // clang-format on +} + +static void print_help_obscure() +{ + // clang-format off + fprintf(stderr, "\nObscure options:\n" + "\tThese options are not meant to be used on a regular basis. They have some occasional uses in the test suite.\n" + + "\t[--force-temporary]:\n\t\tAggressively emit temporary expressions instead of forwarding expressions. Very rarely used and under-tested.\n" + "\t[--revision]:\n\t\tPrints build timestamp and Git commit information (updated when cmake is configured).\n" + "\t[--iterations iter]:\n\t\tRecompiles the same shader over and over, benchmarking related.\n" + "\t[--disable-storage-image-qualifier-deduction]:\n\t\tIf storage images are received without any nonwritable or nonreadable information,\n""" + "\t\tdo not attempt to analyze usage, and always emit read/write state.\n" + "\t[--flatten-multidimensional-arrays]:\n\t\tDo not support multi-dimensional arrays and flatten them to one dimension.\n" + "\t[--cpp-interface-name ]:\n\t\tEmit a specific class name in C++ codegen.\n" + "\t[--force-recompile-max-debug-iterations ]:\n\t\tAllow compilation loop to run for N loops.\n" + "\t\tCan be used to triage workarounds, but should not be used as a crutch, since it masks an implementation bug.\n" + ); + // clang-format on +} + static void print_help() { print_version(); - fprintf(stderr, "Usage: spirv-cross\n" - "\t[--output ]\n" - "\t[SPIR-V file]\n" - "\t[--es]\n" - "\t[--no-es]\n" - "\t[--version ]\n" - "\t[--dump-resources]\n" - "\t[--help]\n" - "\t[--revision]\n" - "\t[--force-temporary]\n" - "\t[--vulkan-semantics]\n" - "\t[--flatten-ubo]\n" - "\t[--fixup-clipspace]\n" - "\t[--flip-vert-y]\n" - "\t[--iterations iter]\n" - "\t[--cpp]\n" - "\t[--cpp-interface-name ]\n" - "\t[--glsl-emit-push-constant-as-ubo]\n" - "\t[--glsl-emit-ubo-as-plain-uniforms]\n" - "\t[--msl]\n" - "\t[--msl-version ]\n" - "\t[--msl-capture-output]\n" - "\t[--msl-swizzle-texture-samples]\n" - "\t[--msl-ios]\n" - "\t[--msl-pad-fragment-output]\n" - "\t[--msl-domain-lower-left]\n" - "\t[--msl-argument-buffers]\n" - "\t[--msl-texture-buffer-native]\n" - "\t[--msl-discrete-descriptor-set ]\n" - "\t[--msl-multiview]\n" - "\t[--hlsl]\n" - "\t[--reflect]\n" - "\t[--shader-model]\n" - "\t[--hlsl-enable-compat]\n" - "\t[--hlsl-support-nonzero-basevertex-baseinstance]\n" - "\t[--separate-shader-objects]\n" - "\t[--pls-in format input-name]\n" - "\t[--pls-out format output-name]\n" - "\t[--remap source_name target_name components]\n" - "\t[--extension ext]\n" - "\t[--entry name]\n" - "\t[--stage ]\n" - "\t[--remove-unused-variables]\n" - "\t[--flatten-multidimensional-arrays]\n" - "\t[--no-420pack-extension]\n" - "\t[--remap-variable-type ]\n" - "\t[--rename-interface-variable ]\n" - "\t[--set-hlsl-vertex-input-semantic ]\n" - "\t[--rename-entry-point ]\n" - "\t[--combined-samplers-inherit-bindings]\n" - "\t[--no-support-nonzero-baseinstance]\n" - "\t[--emit-line-directives]\n" - "\n"); + // clang-format off + fprintf(stderr, "Usage: spirv-cross <...>\n" + "\nBasic:\n" + "\t[SPIR-V file] (- is stdin)\n" + "\t[--output ]: If not provided, prints output to stdout.\n" + "\t[--dump-resources]:\n\t\tPrints a basic reflection of the SPIR-V module along with other output.\n" + "\t[--help]:\n\t\tPrints this help message.\n" + ); + // clang-format on + + print_help_backend(); + print_help_common(); + print_help_glsl(); + print_help_msl(); + print_help_hlsl(); + print_help_obscure(); } static bool remap_generic(Compiler &compiler, const SmallVector &resources, const Remap &remap) @@ -717,13 +1118,50 @@ static ExecutionModel stage_to_execution_model(const std::string &stage) return ExecutionModelTessellationEvaluation; else if (stage == "geom") return ExecutionModelGeometry; + else if (stage == "rgen") + return ExecutionModelRayGenerationKHR; + else if (stage == "rint") + return ExecutionModelIntersectionKHR; + else if (stage == "rahit") + return ExecutionModelAnyHitKHR; + else if (stage == "rchit") + return ExecutionModelClosestHitKHR; + else if (stage == "rmiss") + return ExecutionModelMissKHR; + else if (stage == "rcall") + return ExecutionModelCallableKHR; + else if (stage == "mesh") + return spv::ExecutionModelMeshEXT; + else if (stage == "task") + return spv::ExecutionModelTaskEXT; else SPIRV_CROSS_THROW("Invalid stage."); } +static HLSLBindingFlags hlsl_resource_type_to_flag(const std::string &arg) +{ + if (arg == "push") + return HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT; + else if (arg == "cbv") + return HLSL_BINDING_AUTO_CBV_BIT; + else if (arg == "srv") + return HLSL_BINDING_AUTO_SRV_BIT; + else if (arg == "uav") + return HLSL_BINDING_AUTO_UAV_BIT; + else if (arg == "sampler") + return HLSL_BINDING_AUTO_SAMPLER_BIT; + else if (arg == "all") + return HLSL_BINDING_AUTO_ALL; + else + { + fprintf(stderr, "Invalid resource type for --hlsl-auto-binding: %s\n", arg.c_str()); + return 0; + } +} + static string compile_iteration(const CLIArguments &args, std::vector spirv_file) { - Parser spirv_parser(move(spirv_file)); + Parser spirv_parser(std::move(spirv_file)); spirv_parser.parse(); unique_ptr compiler; @@ -732,13 +1170,13 @@ static string compile_iteration(const CLIArguments &args, std::vector if (args.cpp) { - compiler.reset(new CompilerCPP(move(spirv_parser.get_parsed_ir()))); + compiler.reset(new CompilerCPP(std::move(spirv_parser.get_parsed_ir()))); if (args.cpp_interface_name) static_cast(compiler.get())->set_interface_name(args.cpp_interface_name); } else if (args.msl) { - compiler.reset(new CompilerMSL(move(spirv_parser.get_parsed_ir()))); + compiler.reset(new CompilerMSL(std::move(spirv_parser.get_parsed_ir()))); auto *msl_comp = static_cast(compiler.get()); auto msl_opts = msl_comp->get_msl_options(); @@ -746,25 +1184,69 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_opts.msl_version = args.msl_version; msl_opts.capture_output_to_buffer = args.msl_capture_output_to_buffer; msl_opts.swizzle_texture_samples = args.msl_swizzle_texture_samples; + msl_opts.invariant_float_math = args.msl_invariant_float_math; if (args.msl_ios) + { msl_opts.platform = CompilerMSL::Options::iOS; + msl_opts.emulate_cube_array = args.msl_emulate_cube_array; + } + msl_opts.use_framebuffer_fetch_subpasses = args.msl_framebuffer_fetch; msl_opts.pad_fragment_output_components = args.msl_pad_fragment_output; msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left; msl_opts.argument_buffers = args.msl_argument_buffers; + msl_opts.argument_buffers_tier = static_cast(args.msl_argument_buffers_tier); msl_opts.texture_buffer_native = args.msl_texture_buffer_native; msl_opts.multiview = args.msl_multiview; + msl_opts.multiview_layered_rendering = args.msl_multiview_layered_rendering; + msl_opts.view_index_from_device_index = args.msl_view_index_from_device_index; + msl_opts.dispatch_base = args.msl_dispatch_base; + msl_opts.enable_decoration_binding = args.msl_decoration_binding; + msl_opts.force_active_argument_buffer_resources = args.msl_force_active_argument_buffer_resources; + msl_opts.force_native_arrays = args.msl_force_native_arrays; + msl_opts.enable_frag_depth_builtin = args.msl_enable_frag_depth_builtin; + msl_opts.enable_frag_stencil_ref_builtin = args.msl_enable_frag_stencil_ref_builtin; + msl_opts.enable_frag_output_mask = args.msl_enable_frag_output_mask; + msl_opts.enable_clip_distance_user_varying = args.msl_enable_clip_distance_user_varying; + msl_opts.raw_buffer_tese_input = args.msl_raw_buffer_tese_input; + msl_opts.multi_patch_workgroup = args.msl_multi_patch_workgroup; + msl_opts.vertex_for_tessellation = args.msl_vertex_for_tessellation; + msl_opts.additional_fixed_sample_mask = args.msl_additional_fixed_sample_mask; + msl_opts.arrayed_subpass_input = args.msl_arrayed_subpass_input; + msl_opts.r32ui_linear_texture_alignment = args.msl_r32ui_linear_texture_alignment; + msl_opts.r32ui_alignment_constant_id = args.msl_r32ui_alignment_constant_id; + msl_opts.texture_1D_as_2D = args.msl_texture_1d_as_2d; + msl_opts.ios_use_simdgroup_functions = args.msl_ios_use_simdgroup_functions; + msl_opts.emulate_subgroups = args.msl_emulate_subgroups; + msl_opts.fixed_subgroup_size = args.msl_fixed_subgroup_size; + msl_opts.force_sample_rate_shading = args.msl_force_sample_rate_shading; + msl_opts.manual_helper_invocation_updates = args.msl_manual_helper_invocation_updates; + msl_opts.check_discarded_frag_stores = args.msl_check_discarded_frag_stores; + msl_opts.ios_support_base_vertex_instance = true; msl_comp->set_msl_options(msl_opts); for (auto &v : args.msl_discrete_descriptor_sets) msl_comp->add_discrete_descriptor_set(v); + for (auto &v : args.msl_device_argument_buffers) + msl_comp->set_argument_buffer_device_address_space(v, true); + uint32_t i = 0; + for (auto &v : args.msl_dynamic_buffers) + msl_comp->add_dynamic_buffer(v.first, v.second, i++); + for (auto &v : args.msl_inline_uniform_blocks) + msl_comp->add_inline_uniform_block(v.first, v.second); + for (auto &v : args.msl_shader_inputs) + msl_comp->add_msl_shader_input(v); + for (auto &v : args.msl_shader_outputs) + msl_comp->add_msl_shader_output(v); + if (args.msl_combined_sampler_suffix) + msl_comp->set_combined_sampler_suffix(args.msl_combined_sampler_suffix); } else if (args.hlsl) - compiler.reset(new CompilerHLSL(move(spirv_parser.get_parsed_ir()))); + compiler.reset(new CompilerHLSL(std::move(spirv_parser.get_parsed_ir()))); else { combined_image_samplers = !args.vulkan_semantics; - if (!args.vulkan_semantics) + if (!args.vulkan_semantics || args.vulkan_glsl_disable_ext_samplerless_texture_functions) build_dummy_sampler = true; - compiler.reset(new CompilerGLSL(move(spirv_parser.get_parsed_ir()))); + compiler.reset(new CompilerGLSL(std::move(spirv_parser.get_parsed_ir()))); } if (!args.variable_type_remaps.empty()) @@ -775,9 +1257,14 @@ static string compile_iteration(const CLIArguments &args, std::vector out = remap.new_variable_type; }; - compiler->set_variable_type_remap_callback(move(remap_cb)); + compiler->set_variable_type_remap_callback(std::move(remap_cb)); } + for (auto &masked : args.masked_stage_outputs) + compiler->mask_stage_output_by_location(masked.first, masked.second); + for (auto &masked : args.masked_stage_builtins) + compiler->mask_stage_output_by_builtin(masked); + for (auto &rename : args.entry_point_rename) compiler->rename_entry_point(rename.old_name, rename.new_name, rename.execution_model); @@ -878,9 +1365,18 @@ static string compile_iteration(const CLIArguments &args, std::vector opts.vertex.support_nonzero_base_instance = args.support_nonzero_baseinstance; opts.emit_push_constant_as_uniform_buffer = args.glsl_emit_push_constant_as_ubo; opts.emit_uniform_buffer_as_plain_uniforms = args.glsl_emit_ubo_as_plain_uniforms; + opts.force_flattened_io_blocks = args.glsl_force_flattened_io_blocks; + opts.ovr_multiview_view_count = args.glsl_ovr_multiview_view_count; opts.emit_line_directives = args.emit_line_directives; + opts.enable_storage_image_qualifier_deduction = args.enable_storage_image_qualifier_deduction; + opts.force_zero_initialized_variables = args.force_zero_initialized_variables; + opts.relax_nan_checks = args.relax_nan_checks; + opts.force_recompile_max_debug_iterations = args.force_recompile_max_debug_iterations; compiler->set_common_options(opts); + for (auto &fetch : args.glsl_ext_framebuffer_fetch) + compiler->remap_ext_framebuffer_fetch(fetch.first, fetch.second, !args.glsl_ext_framebuffer_fetch_noncoherent); + // Set HLSL specific options. if (args.hlsl) { @@ -910,8 +1406,23 @@ static string compile_iteration(const CLIArguments &args, std::vector build_dummy_sampler = true; } + // If we're explicitly renaming, we probably want that name to be output. + if (!args.entry_point_rename.empty()) + hlsl_opts.use_entry_point_name = true; + hlsl_opts.support_nonzero_base_vertex_base_instance = args.hlsl_support_nonzero_base; + hlsl_opts.force_storage_buffer_as_uav = args.hlsl_force_storage_buffer_as_uav; + hlsl_opts.nonwritable_uav_texture_as_srv = args.hlsl_nonwritable_uav_texture_as_srv; + hlsl_opts.enable_16bit_types = args.hlsl_enable_16bit_types; + hlsl_opts.flatten_matrix_vertex_input_semantics = args.hlsl_flatten_matrix_vertex_input_semantics; hlsl->set_hlsl_options(hlsl_opts); + hlsl->set_resource_binding_flags(args.hlsl_binding_flags); + if (args.hlsl_base_vertex_index_explicit_binding) + { + hlsl->set_hlsl_aux_buffer_binding(HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE, + args.hlsl_base_vertex_index_register_index, + args.hlsl_base_vertex_index_register_space); + } } if (build_dummy_sampler) @@ -930,7 +1441,7 @@ static string compile_iteration(const CLIArguments &args, std::vector { auto active = compiler->get_active_interface_variables(); res = compiler->get_shader_resources(active); - compiler->set_enabled_interface_variables(move(active)); + compiler->set_enabled_interface_variables(std::move(active)); } else res = compiler->get_shader_resources(); @@ -945,7 +1456,7 @@ static string compile_iteration(const CLIArguments &args, std::vector auto pls_inputs = remap_pls(args.pls_in, res.stage_inputs, &res.subpass_inputs); auto pls_outputs = remap_pls(args.pls_out, res.stage_outputs, nullptr); - compiler->remap_pixel_local_storage(move(pls_inputs), move(pls_outputs)); + compiler->remap_pixel_local_storage(std::move(pls_inputs), std::move(pls_outputs)); for (auto &ext : args.extensions) compiler->require_extension(ext); @@ -975,14 +1486,6 @@ static string compile_iteration(const CLIArguments &args, std::vector } } - if (args.dump_resources) - { - print_resources(*compiler, res); - print_push_constant_resources(*compiler, res.push_constant_buffers); - print_spec_constants(*compiler); - print_capabilities_and_extensions(*compiler); - } - if (combined_image_samplers) { compiler->build_combined_image_samplers(); @@ -1000,21 +1503,43 @@ static string compile_iteration(const CLIArguments &args, std::vector if (args.hlsl) { auto *hlsl_compiler = static_cast(compiler.get()); - uint32_t new_builtin = hlsl_compiler->remap_num_workgroups_builtin(); - if (new_builtin) - { - hlsl_compiler->set_decoration(new_builtin, DecorationDescriptorSet, 0); - hlsl_compiler->set_decoration(new_builtin, DecorationBinding, 0); - } + hlsl_compiler->remap_num_workgroups_builtin(); } if (args.hlsl) { for (auto &remap : args.hlsl_attr_remap) static_cast(compiler.get())->add_vertex_attribute_remap(remap); + + for (auto &named_remap : args.hlsl_attr_remap_named) + { + auto itr = std::find_if(res.stage_inputs.begin(), res.stage_inputs.end(), [&](const Resource &input_res) { + return input_res.name == named_remap.name; + }); + + if (itr != res.stage_inputs.end()) + { + HLSLVertexAttributeRemap remap = { + compiler->get_decoration(itr->id, DecorationLocation), + named_remap.semantic, + }; + static_cast(compiler.get())->add_vertex_attribute_remap(remap); + } + } } - return compiler->compile(); + auto ret = compiler->compile(); + + if (args.dump_resources) + { + compiler->update_active_builtins(); + print_resources(*compiler, res); + print_push_constant_resources(*compiler, res.push_constant_buffers); + print_spec_constants(*compiler); + print_capabilities_and_extensions(*compiler); + } + + return ret; } static int main_inner(int argc, char *argv[]) @@ -1055,12 +1580,44 @@ static int main_inner(int argc, char *argv[]) cbs.add("--metal", [&args](CLIParser &) { args.msl = true; }); // Legacy compatibility cbs.add("--glsl-emit-push-constant-as-ubo", [&args](CLIParser &) { args.glsl_emit_push_constant_as_ubo = true; }); cbs.add("--glsl-emit-ubo-as-plain-uniforms", [&args](CLIParser &) { args.glsl_emit_ubo_as_plain_uniforms = true; }); + cbs.add("--glsl-force-flattened-io-blocks", [&args](CLIParser &) { args.glsl_force_flattened_io_blocks = true; }); + cbs.add("--glsl-ovr-multiview-view-count", [&args](CLIParser &parser) { args.glsl_ovr_multiview_view_count = parser.next_uint(); }); + cbs.add("--glsl-remap-ext-framebuffer-fetch", [&args](CLIParser &parser) { + uint32_t input_index = parser.next_uint(); + uint32_t color_attachment = parser.next_uint(); + args.glsl_ext_framebuffer_fetch.push_back({ input_index, color_attachment }); + }); + cbs.add("--glsl-ext-framebuffer-fetch-noncoherent", [&args](CLIParser &) { + args.glsl_ext_framebuffer_fetch_noncoherent = true; + }); + cbs.add("--vulkan-glsl-disable-ext-samplerless-texture-functions", + [&args](CLIParser &) { args.vulkan_glsl_disable_ext_samplerless_texture_functions = true; }); + cbs.add("--disable-storage-image-qualifier-deduction", + [&args](CLIParser &) { args.enable_storage_image_qualifier_deduction = false; }); + cbs.add("--force-zero-initialized-variables", + [&args](CLIParser &) { args.force_zero_initialized_variables = true; }); cbs.add("--msl", [&args](CLIParser &) { args.msl = true; }); cbs.add("--hlsl", [&args](CLIParser &) { args.hlsl = true; }); cbs.add("--hlsl-enable-compat", [&args](CLIParser &) { args.hlsl_compat = true; }); cbs.add("--hlsl-support-nonzero-basevertex-baseinstance", [&args](CLIParser &) { args.hlsl_support_nonzero_base = true; }); + cbs.add("--hlsl-basevertex-baseinstance-binding", [&args](CLIParser &parser) { + args.hlsl_base_vertex_index_explicit_binding = true; + args.hlsl_base_vertex_index_register_index = parser.next_uint(); + args.hlsl_base_vertex_index_register_space = parser.next_uint(); + }); + cbs.add("--hlsl-auto-binding", [&args](CLIParser &parser) { + args.hlsl_binding_flags |= hlsl_resource_type_to_flag(parser.next_string()); + }); + cbs.add("--hlsl-force-storage-buffer-as-uav", + [&args](CLIParser &) { args.hlsl_force_storage_buffer_as_uav = true; }); + cbs.add("--hlsl-nonwritable-uav-texture-as-srv", + [&args](CLIParser &) { args.hlsl_nonwritable_uav_texture_as_srv = true; }); + cbs.add("--hlsl-enable-16bit-types", [&args](CLIParser &) { args.hlsl_enable_16bit_types = true; }); + cbs.add("--hlsl-flatten-matrix-vertex-input-semantics", + [&args](CLIParser &) { args.hlsl_flatten_matrix_vertex_input_semantics = true; }); cbs.add("--vulkan-semantics", [&args](CLIParser &) { args.vulkan_semantics = true; }); + cbs.add("-V", [&args](CLIParser &) { args.vulkan_semantics = true; }); cbs.add("--flatten-multidimensional-arrays", [&args](CLIParser &) { args.flatten_multidimensional_arrays = true; }); cbs.add("--no-420pack-extension", [&args](CLIParser &) { args.use_420pack_extension = false; }); cbs.add("--msl-capture-output", [&args](CLIParser &) { args.msl_capture_output_to_buffer = true; }); @@ -1069,16 +1626,163 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-pad-fragment-output", [&args](CLIParser &) { args.msl_pad_fragment_output = true; }); cbs.add("--msl-domain-lower-left", [&args](CLIParser &) { args.msl_domain_lower_left = true; }); cbs.add("--msl-argument-buffers", [&args](CLIParser &) { args.msl_argument_buffers = true; }); + cbs.add("--msl-argument-buffer-tier", [&args](CLIParser &parser) { + args.msl_argument_buffers_tier = parser.next_uint(); + args.msl_argument_buffers = true; + }); cbs.add("--msl-discrete-descriptor-set", [&args](CLIParser &parser) { args.msl_discrete_descriptor_sets.push_back(parser.next_uint()); }); + cbs.add("--msl-device-argument-buffer", + [&args](CLIParser &parser) { args.msl_device_argument_buffers.push_back(parser.next_uint()); }); cbs.add("--msl-texture-buffer-native", [&args](CLIParser &) { args.msl_texture_buffer_native = true; }); + cbs.add("--msl-framebuffer-fetch", [&args](CLIParser &) { args.msl_framebuffer_fetch = true; }); + cbs.add("--msl-invariant-float-math", [&args](CLIParser &) { args.msl_invariant_float_math = true; }); + cbs.add("--msl-emulate-cube-array", [&args](CLIParser &) { args.msl_emulate_cube_array = true; }); cbs.add("--msl-multiview", [&args](CLIParser &) { args.msl_multiview = true; }); + cbs.add("--msl-multiview-no-layered-rendering", + [&args](CLIParser &) { args.msl_multiview_layered_rendering = false; }); + cbs.add("--msl-view-index-from-device-index", + [&args](CLIParser &) { args.msl_view_index_from_device_index = true; }); + cbs.add("--msl-dispatch-base", [&args](CLIParser &) { args.msl_dispatch_base = true; }); + cbs.add("--msl-dynamic-buffer", [&args](CLIParser &parser) { + args.msl_argument_buffers = true; + // Make sure next_uint() is called in-order. + uint32_t desc_set = parser.next_uint(); + uint32_t binding = parser.next_uint(); + args.msl_dynamic_buffers.push_back(make_pair(desc_set, binding)); + }); + cbs.add("--msl-decoration-binding", [&args](CLIParser &) { args.msl_decoration_binding = true; }); + cbs.add("--msl-force-active-argument-buffer-resources", + [&args](CLIParser &) { args.msl_force_active_argument_buffer_resources = true; }); + cbs.add("--msl-inline-uniform-block", [&args](CLIParser &parser) { + args.msl_argument_buffers = true; + // Make sure next_uint() is called in-order. + uint32_t desc_set = parser.next_uint(); + uint32_t binding = parser.next_uint(); + args.msl_inline_uniform_blocks.push_back(make_pair(desc_set, binding)); + }); + cbs.add("--msl-force-native-arrays", [&args](CLIParser &) { args.msl_force_native_arrays = true; }); + cbs.add("--msl-disable-frag-depth-builtin", [&args](CLIParser &) { args.msl_enable_frag_depth_builtin = false; }); + cbs.add("--msl-disable-frag-stencil-ref-builtin", + [&args](CLIParser &) { args.msl_enable_frag_stencil_ref_builtin = false; }); + cbs.add("--msl-enable-frag-output-mask", + [&args](CLIParser &parser) { args.msl_enable_frag_output_mask = parser.next_hex_uint(); }); + cbs.add("--msl-no-clip-distance-user-varying", + [&args](CLIParser &) { args.msl_enable_clip_distance_user_varying = false; }); + cbs.add("--msl-add-shader-input", [&args](CLIParser &parser) { + MSLShaderInterfaceVariable input; + // Make sure next_uint() is called in-order. + input.location = parser.next_uint(); + const char *format = parser.next_value_string("other"); + if (strcmp(format, "any32") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; + else if (strcmp(format, "any16") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; + else if (strcmp(format, "u16") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; + else if (strcmp(format, "u8") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; + else + input.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; + input.vecsize = parser.next_uint(); + const char *rate = parser.next_value_string("vertex"); + if (strcmp(rate, "primitive") == 0) + input.rate = MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE; + else if (strcmp(rate, "patch") == 0) + input.rate = MSL_SHADER_VARIABLE_RATE_PER_PATCH; + else + input.rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX; + args.msl_shader_inputs.push_back(input); + }); + cbs.add("--msl-add-shader-output", [&args](CLIParser &parser) { + MSLShaderInterfaceVariable output; + // Make sure next_uint() is called in-order. + output.location = parser.next_uint(); + const char *format = parser.next_value_string("other"); + if (strcmp(format, "any32") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; + else if (strcmp(format, "any16") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; + else if (strcmp(format, "u16") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; + else if (strcmp(format, "u8") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; + else + output.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; + output.vecsize = parser.next_uint(); + const char *rate = parser.next_value_string("vertex"); + if (strcmp(rate, "primitive") == 0) + output.rate = MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE; + else if (strcmp(rate, "patch") == 0) + output.rate = MSL_SHADER_VARIABLE_RATE_PER_PATCH; + else + output.rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX; + args.msl_shader_outputs.push_back(output); + }); + cbs.add("--msl-shader-input", [&args](CLIParser &parser) { + MSLShaderInterfaceVariable input; + // Make sure next_uint() is called in-order. + input.location = parser.next_uint(); + const char *format = parser.next_value_string("other"); + if (strcmp(format, "any32") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; + else if (strcmp(format, "any16") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; + else if (strcmp(format, "u16") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; + else if (strcmp(format, "u8") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; + else + input.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; + input.vecsize = parser.next_uint(); + args.msl_shader_inputs.push_back(input); + }); + cbs.add("--msl-shader-output", [&args](CLIParser &parser) { + MSLShaderInterfaceVariable output; + // Make sure next_uint() is called in-order. + output.location = parser.next_uint(); + const char *format = parser.next_value_string("other"); + if (strcmp(format, "any32") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; + else if (strcmp(format, "any16") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; + else if (strcmp(format, "u16") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; + else if (strcmp(format, "u8") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; + else + output.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; + output.vecsize = parser.next_uint(); + args.msl_shader_outputs.push_back(output); + }); + cbs.add("--msl-raw-buffer-tese-input", [&args](CLIParser &) { args.msl_raw_buffer_tese_input = true; }); + cbs.add("--msl-multi-patch-workgroup", [&args](CLIParser &) { args.msl_multi_patch_workgroup = true; }); + cbs.add("--msl-vertex-for-tessellation", [&args](CLIParser &) { args.msl_vertex_for_tessellation = true; }); + cbs.add("--msl-additional-fixed-sample-mask", + [&args](CLIParser &parser) { args.msl_additional_fixed_sample_mask = parser.next_hex_uint(); }); + cbs.add("--msl-arrayed-subpass-input", [&args](CLIParser &) { args.msl_arrayed_subpass_input = true; }); + cbs.add("--msl-r32ui-linear-texture-align", + [&args](CLIParser &parser) { args.msl_r32ui_linear_texture_alignment = parser.next_uint(); }); + cbs.add("--msl-r32ui-linear-texture-align-constant-id", + [&args](CLIParser &parser) { args.msl_r32ui_alignment_constant_id = parser.next_uint(); }); + cbs.add("--msl-texture-1d-as-2d", [&args](CLIParser &) { args.msl_texture_1d_as_2d = true; }); + cbs.add("--msl-ios-use-simdgroup-functions", [&args](CLIParser &) { args.msl_ios_use_simdgroup_functions = true; }); + cbs.add("--msl-emulate-subgroups", [&args](CLIParser &) { args.msl_emulate_subgroups = true; }); + cbs.add("--msl-fixed-subgroup-size", + [&args](CLIParser &parser) { args.msl_fixed_subgroup_size = parser.next_uint(); }); + cbs.add("--msl-force-sample-rate-shading", [&args](CLIParser &) { args.msl_force_sample_rate_shading = true; }); + cbs.add("--msl-no-manual-helper-invocation-updates", + [&args](CLIParser &) { args.msl_manual_helper_invocation_updates = false; }); + cbs.add("--msl-check-discarded-frag-stores", [&args](CLIParser &) { args.msl_check_discarded_frag_stores = true; }); + cbs.add("--msl-combined-sampler-suffix", [&args](CLIParser &parser) { + args.msl_combined_sampler_suffix = parser.next_string(); + }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); auto new_name = parser.next_string(); auto model = stage_to_execution_model(parser.next_string()); - args.entry_point_rename.push_back({ old_name, new_name, move(model) }); + args.entry_point_rename.push_back({ old_name, new_name, std::move(model) }); }); cbs.add("--entry", [&args](CLIParser &parser) { args.entry = parser.next_string(); }); cbs.add("--stage", [&args](CLIParser &parser) { args.entry_stage = parser.next_string(); }); @@ -1087,20 +1791,26 @@ static int main_inner(int argc, char *argv[]) HLSLVertexAttributeRemap remap; remap.location = parser.next_uint(); remap.semantic = parser.next_string(); - args.hlsl_attr_remap.push_back(move(remap)); + args.hlsl_attr_remap.push_back(std::move(remap)); + }); + cbs.add("--set-hlsl-named-vertex-input-semantic", [&args](CLIParser &parser) { + HLSLVertexAttributeRemapNamed remap; + remap.name = parser.next_string(); + remap.semantic = parser.next_string(); + args.hlsl_attr_remap_named.push_back(std::move(remap)); }); cbs.add("--remap", [&args](CLIParser &parser) { string src = parser.next_string(); string dst = parser.next_string(); uint32_t components = parser.next_uint(); - args.remaps.push_back({ move(src), move(dst), components }); + args.remaps.push_back({ std::move(src), std::move(dst), components }); }); cbs.add("--remap-variable-type", [&args](CLIParser &parser) { string var_name = parser.next_string(); string new_type = parser.next_string(); - args.variable_type_remaps.push_back({ move(var_name), move(new_type) }); + args.variable_type_remaps.push_back({ std::move(var_name), std::move(new_type) }); }); cbs.add("--rename-interface-variable", [&args](CLIParser &parser) { @@ -1113,18 +1823,18 @@ static int main_inner(int argc, char *argv[]) uint32_t loc = parser.next_uint(); string var_name = parser.next_string(); - args.interface_variable_renames.push_back({ cls, loc, move(var_name) }); + args.interface_variable_renames.push_back({ cls, loc, std::move(var_name) }); }); cbs.add("--pls-in", [&args](CLIParser &parser) { auto fmt = pls_format(parser.next_string()); auto name = parser.next_string(); - args.pls_in.push_back({ move(fmt), move(name) }); + args.pls_in.push_back({ std::move(fmt), std::move(name) }); }); cbs.add("--pls-out", [&args](CLIParser &parser) { auto fmt = pls_format(parser.next_string()); auto name = parser.next_string(); - args.pls_out.push_back({ move(fmt), move(name) }); + args.pls_out.push_back({ std::move(fmt), std::move(name) }); }); cbs.add("--shader-model", [&args](CLIParser &parser) { args.shader_model = parser.next_uint(); @@ -1142,10 +1852,42 @@ static int main_inner(int argc, char *argv[]) cbs.add("--no-support-nonzero-baseinstance", [&](CLIParser &) { args.support_nonzero_baseinstance = false; }); cbs.add("--emit-line-directives", [&args](CLIParser &) { args.emit_line_directives = true; }); + cbs.add("--mask-stage-output-location", [&](CLIParser &parser) { + uint32_t location = parser.next_uint(); + uint32_t component = parser.next_uint(); + args.masked_stage_outputs.push_back({ location, component }); + }); + + cbs.add("--mask-stage-output-builtin", [&](CLIParser &parser) { + BuiltIn masked_builtin = BuiltInMax; + std::string builtin = parser.next_string(); + if (builtin == "Position") + masked_builtin = BuiltInPosition; + else if (builtin == "PointSize") + masked_builtin = BuiltInPointSize; + else if (builtin == "CullDistance") + masked_builtin = BuiltInCullDistance; + else if (builtin == "ClipDistance") + masked_builtin = BuiltInClipDistance; + else + { + print_help(); + exit(EXIT_FAILURE); + } + args.masked_stage_builtins.push_back(masked_builtin); + }); + + cbs.add("--force-recompile-max-debug-iterations", [&](CLIParser &parser) { + args.force_recompile_max_debug_iterations = parser.next_uint(); + }); + + cbs.add("--relax-nan-checks", [&](CLIParser &) { args.relax_nan_checks = true; }); + cbs.default_handler = [&args](const char *value) { args.input = value; }; + cbs.add("-", [&args](CLIParser &) { args.input = "-"; }); cbs.error_handler = [] { print_help(); }; - CLIParser parser{ move(cbs), argc - 1, argv + 1 }; + CLIParser parser{ std::move(cbs), argc - 1, argv + 1 }; if (!parser.parse()) return EXIT_FAILURE; else if (parser.ended_state) @@ -1165,10 +1907,10 @@ static int main_inner(int argc, char *argv[]) // Special case reflection because it has little to do with the path followed by code-outputting compilers if (!args.reflect.empty()) { - Parser spirv_parser(move(spirv_file)); + Parser spirv_parser(std::move(spirv_file)); spirv_parser.parse(); - CompilerReflection compiler(move(spirv_parser.get_parsed_ir())); + CompilerReflection compiler(std::move(spirv_parser.get_parsed_ir())); compiler.set_format(args.reflect); auto json = compiler.compile(); if (args.output) @@ -1181,7 +1923,7 @@ static int main_inner(int argc, char *argv[]) string compiled_output; if (args.iterations == 1) - compiled_output = compile_iteration(args, move(spirv_file)); + compiled_output = compile_iteration(args, std::move(spirv_file)); else { for (unsigned i = 0; i < args.iterations; i++) diff --git a/pkg-config/spirv-cross-c-shared.pc.in b/pkg-config/spirv-cross-c-shared.pc.in index 823e4ce48bb..4fb8a0aee98 100644 --- a/pkg-config/spirv-cross-c-shared.pc.in +++ b/pkg-config/spirv-cross-c-shared.pc.in @@ -1,8 +1,11 @@ +# Copyright 2020-2021 Hans-Kristian Arntzen +# SPDX-License-Identifier: Apache-2.0 + prefix=@CMAKE_INSTALL_PREFIX@ -exec_prefix=@CMAKE_INSTALL_PREFIX@ -libdir=@SPIRV_CROSS_INSTALL_LIB_DIR@ -sharedlibdir=@SPIRV_CROSS_INSTALL_LIB_DIR@ -includedir=@SPIRV_CROSS_INSTALL_INC_DIR@ +exec_prefix=${prefix} +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +sharedlibdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@/spirv_cross Name: spirv-cross-c-shared Description: C API for SPIRV-Cross diff --git a/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000000..da499c3b6da --- /dev/null +++ b/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,20 @@ +RWByteAddressBuffer _5 : register(u0); +RWByteAddressBuffer _6 : register(u1); + +void comp_main() +{ + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) < int4(_5.Load4(0)).x, int(_5.Load4(16).y) < int4(_5.Load4(0)).y, int(_5.Load4(16).z) < int4(_5.Load4(0)).z, int(_5.Load4(16).w) < int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) <= int4(_5.Load4(0)).x, int(_5.Load4(16).y) <= int4(_5.Load4(0)).y, int(_5.Load4(16).z) <= int4(_5.Load4(0)).z, int(_5.Load4(16).w) <= int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x < uint(int4(_5.Load4(0)).x), _5.Load4(16).y < uint(int4(_5.Load4(0)).y), _5.Load4(16).z < uint(int4(_5.Load4(0)).z), _5.Load4(16).w < uint(int4(_5.Load4(0)).w)))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x <= uint(int4(_5.Load4(0)).x), _5.Load4(16).y <= uint(int4(_5.Load4(0)).y), _5.Load4(16).z <= uint(int4(_5.Load4(0)).z), _5.Load4(16).w <= uint(int4(_5.Load4(0)).w)))); + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) > int4(_5.Load4(0)).x, int(_5.Load4(16).y) > int4(_5.Load4(0)).y, int(_5.Load4(16).z) > int4(_5.Load4(0)).z, int(_5.Load4(16).w) > int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) >= int4(_5.Load4(0)).x, int(_5.Load4(16).y) >= int4(_5.Load4(0)).y, int(_5.Load4(16).z) >= int4(_5.Load4(0)).z, int(_5.Load4(16).w) >= int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x > uint(int4(_5.Load4(0)).x), _5.Load4(16).y > uint(int4(_5.Load4(0)).y), _5.Load4(16).z > uint(int4(_5.Load4(0)).z), _5.Load4(16).w > uint(int4(_5.Load4(0)).w)))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x >= uint(int4(_5.Load4(0)).x), _5.Load4(16).y >= uint(int4(_5.Load4(0)).y), _5.Load4(16).z >= uint(int4(_5.Load4(0)).z), _5.Load4(16).w >= uint(int4(_5.Load4(0)).w)))); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/opt/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp b/reference/opt/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp index a12274c01c6..e184e03c5c1 100644 --- a/reference/opt/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp +++ b/reference/opt/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp @@ -4,22 +4,16 @@ struct A int b; }; -struct A_1 -{ - int a; - int b; -}; - RWByteAddressBuffer C1 : register(u1); cbuffer C2 : register(b2) { - A_1 C2_1_Data[1024] : packoffset(c0); + A C2_1_Data[1024] : packoffset(c0); }; RWByteAddressBuffer C3 : register(u0); cbuffer B : register(b3) { - A_1 C4_Data[1024] : packoffset(c0); + A C4_Data[1024] : packoffset(c0); }; diff --git a/reference/opt/shaders-hlsl/asm/comp/control-flow-hints.asm.comp b/reference/opt/shaders-hlsl/asm/comp/control-flow-hints.asm.comp index 142ef5efa8d..70df6baf0e7 100644 --- a/reference/opt/shaders-hlsl/asm/comp/control-flow-hints.asm.comp +++ b/reference/opt/shaders-hlsl/asm/comp/control-flow-hints.asm.comp @@ -3,18 +3,27 @@ RWByteAddressBuffer foo : register(u1); void comp_main() { - [unroll] - for (int _135 = 0; _135 < 16; ) - { - bar.Store4(_135 * 16 + 0, asuint(asfloat(foo.Load4(_135 * 16 + 0)))); - _135++; - continue; - } + bar.Store4(0, asuint(asfloat(foo.Load4(0)))); + bar.Store4(16, asuint(asfloat(foo.Load4(16)))); + bar.Store4(32, asuint(asfloat(foo.Load4(32)))); + bar.Store4(48, asuint(asfloat(foo.Load4(48)))); + bar.Store4(64, asuint(asfloat(foo.Load4(64)))); + bar.Store4(80, asuint(asfloat(foo.Load4(80)))); + bar.Store4(96, asuint(asfloat(foo.Load4(96)))); + bar.Store4(112, asuint(asfloat(foo.Load4(112)))); + bar.Store4(128, asuint(asfloat(foo.Load4(128)))); + bar.Store4(144, asuint(asfloat(foo.Load4(144)))); + bar.Store4(160, asuint(asfloat(foo.Load4(160)))); + bar.Store4(176, asuint(asfloat(foo.Load4(176)))); + bar.Store4(192, asuint(asfloat(foo.Load4(192)))); + bar.Store4(208, asuint(asfloat(foo.Load4(208)))); + bar.Store4(224, asuint(asfloat(foo.Load4(224)))); + bar.Store4(240, asuint(asfloat(foo.Load4(240)))); [loop] - for (int _136 = 0; _136 < 16; ) + for (int _137 = 0; _137 < 16; ) { - bar.Store4((15 - _136) * 16 + 0, asuint(asfloat(foo.Load4(_136 * 16 + 0)))); - _136++; + bar.Store4((15 - _137) * 16 + 0, asuint(asfloat(foo.Load4(_137 * 16 + 0)))); + _137++; continue; } [branch] diff --git a/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000000..9f51eff1354 --- /dev/null +++ b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,30 @@ +RWByteAddressBuffer _4 : register(u0); + +void comp_main() +{ + _4.Store(0, asuint(min(asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + _4.Store(0, asuint(max(asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(clamp(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(clamp(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(clamp(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + for (int _139 = 0; _139 < 2; ) + { + _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(56)), asfloat(_4.Load(60))))); + _139++; + continue; + } +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag b/reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag index ed53720d946..2527d10fdc8 100644 --- a/reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag +++ b/reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag @@ -8,7 +8,7 @@ struct SPIRV_Cross_Output int2 Size : SV_Target0; }; -uint2 SPIRV_Cross_textureSize(Texture2D Tex, uint Level, out uint Param) +uint2 spvTextureSize(Texture2D Tex, uint Level, out uint Param) { uint2 ret; Tex.GetDimensions(Level, ret.x, ret.y, Param); @@ -19,7 +19,7 @@ void frag_main() { uint _19_dummy_parameter; uint _20_dummy_parameter; - Size = int2(SPIRV_Cross_textureSize(uTexture, uint(0), _19_dummy_parameter)) + int2(SPIRV_Cross_textureSize(uTexture, uint(1), _20_dummy_parameter)); + Size = int2(spvTextureSize(uTexture, uint(0), _19_dummy_parameter)) + int2(spvTextureSize(uTexture, uint(1), _20_dummy_parameter)); } SPIRV_Cross_Output main() diff --git a/reference/opt/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag b/reference/opt/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag index d20cf995acf..25dc6939e5c 100644 --- a/reference/opt/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag +++ b/reference/opt/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag @@ -7,7 +7,7 @@ struct SPIRV_Cross_Output void frag_main() { - FragColor = float3(asfloat(0x7f800000u), asfloat(0xff800000u), asfloat(0x7fc00000u)); + FragColor = float3(asfloat(0x7f800000u /* inf */), asfloat(0xff800000u /* -inf */), asfloat(0x7fc00000u /* nan */)); } SPIRV_Cross_Output main() diff --git a/reference/opt/shaders-hlsl/asm/frag/line-directive.line.asm.frag b/reference/opt/shaders-hlsl/asm/frag/line-directive.line.asm.frag index b596a8446ef..4a1cf2ee545 100644 --- a/reference/opt/shaders-hlsl/asm/frag/line-directive.line.asm.frag +++ b/reference/opt/shaders-hlsl/asm/frag/line-directive.line.asm.frag @@ -14,14 +14,12 @@ struct SPIRV_Cross_Output #line 8 "test.frag" void frag_main() { - float _80; #line 8 "test.frag" FragColor = 1.0f; #line 9 "test.frag" FragColor = 2.0f; #line 10 "test.frag" - _80 = vColor; - if (_80 < 0.0f) + if (vColor < 0.0f) { #line 12 "test.frag" FragColor = 3.0f; @@ -31,16 +29,19 @@ void frag_main() #line 16 "test.frag" FragColor = 4.0f; } - for (int _126 = 0; float(_126) < (40.0f + _80); ) +#line 19 "test.frag" + for (int _127 = 0; float(_127) < (40.0f + vColor); ) { #line 21 "test.frag" FragColor += 0.20000000298023223876953125f; #line 22 "test.frag" FragColor += 0.300000011920928955078125f; - _126 += (int(_80) + 5); +#line 19 "test.frag" + _127 += (int(vColor) + 5); continue; } - switch (int(_80)) +#line 25 "test.frag" + switch (int(vColor)) { case 0: { @@ -66,7 +67,8 @@ void frag_main() } for (;;) { - FragColor += (10.0f + _80); +#line 42 "test.frag" + FragColor += (10.0f + vColor); #line 43 "test.frag" if (FragColor < 100.0f) { @@ -76,6 +78,7 @@ void frag_main() break; } } +#line 48 "test.frag" } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/opt/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag b/reference/opt/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag new file mode 100644 index 00000000000..5926eef7b40 --- /dev/null +++ b/reference/opt/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag @@ -0,0 +1,19 @@ +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = float4(18.0f, 52.0f, 1.0f, 1.0f); +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag b/reference/opt/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag index 507bbe1d080..269cecb3022 100644 --- a/reference/opt/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag +++ b/reference/opt/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag @@ -55,6 +55,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; frag_main(); SPIRV_Cross_Output stage_output; stage_output.o_color = o_color; diff --git a/reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag b/reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag index 695d5fe9dfd..74c12945bfc 100644 --- a/reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag +++ b/reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag @@ -22,6 +22,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert b/reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert index 2cebffffa85..3bccae3e0a5 100644 --- a/reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert +++ b/reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert @@ -8,10 +8,7 @@ static const int _20 = (_7 + 2); #endif static const uint _8 = SPIRV_CROSS_CONSTANT_ID_202; static const uint _25 = (_8 % 5u); -#ifndef SPIRV_CROSS_CONSTANT_ID_0 -#define SPIRV_CROSS_CONSTANT_ID_0 int4(20, 30, _20, _20) -#endif -static const int4 _30 = SPIRV_CROSS_CONSTANT_ID_0; +static const int4 _30 = int4(20, 30, _20, _20); static const int2 _32 = int2(_30.y, _30.x); static const int _33 = _30.y; @@ -28,9 +25,8 @@ void vert_main() { float4 _63 = 0.0f.xxxx; _63.y = float(_20); - float4 _66 = _63; - _66.z = float(_25); - float4 _52 = _66 + float4(_30); + _63.z = float(_25); + float4 _52 = _63 + float4(_30); float2 _56 = _52.xy + float2(_32); gl_Position = float4(_56.x, _56.y, _52.z, _52.w); _4 = _33; diff --git a/reference/opt/shaders-hlsl/comp/access-chain-load-composite.comp b/reference/opt/shaders-hlsl/comp/access-chain-load-composite.comp new file mode 100644 index 00000000000..778f62e83c8 --- /dev/null +++ b/reference/opt/shaders-hlsl/comp/access-chain-load-composite.comp @@ -0,0 +1,108 @@ +struct Baz +{ + float c; +}; + +struct Bar +{ + float d[2][4]; + Baz baz[2]; +}; + +struct Foo +{ + column_major float2x2 a; + float2 b; + Bar c[5]; +}; + +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer _31 : register(u0); + +void comp_main() +{ + Foo _36; + _36.a = asfloat(uint2x2(_31.Load(0), _31.Load(8), _31.Load(4), _31.Load(12))); + _36.b = asfloat(_31.Load2(16)); + [unroll] + for (int _4ident = 0; _4ident < 5; _4ident++) + { + [unroll] + for (int _5ident = 0; _5ident < 2; _5ident++) + { + [unroll] + for (int _6ident = 0; _6ident < 4; _6ident++) + { + _36.c[_4ident].d[_5ident][_6ident] = asfloat(_31.Load(_6ident * 4 + _5ident * 16 + _4ident * 40 + 24)); + } + } + [unroll] + for (int _7ident = 0; _7ident < 2; _7ident++) + { + _36.c[_4ident].baz[_7ident].c = asfloat(_31.Load(_7ident * 4 + _4ident * 40 + 56)); + } + } + float2x2 _234 = float2x2(_36.a[0] + 1.0f.xx, _36.a[1] + 1.0f.xx); + _31.Store(224, asuint(_234[0].x)); + _31.Store(228, asuint(_234[1].x)); + _31.Store(232, asuint(_234[0].y)); + _31.Store(236, asuint(_234[1].y)); + _31.Store2(240, asuint(_36.b + 2.0f.xx)); + _31.Store(248, asuint(_36.c[0].d[0][0])); + _31.Store(252, asuint(_36.c[0].d[0][1])); + _31.Store(256, asuint(_36.c[0].d[0][2])); + _31.Store(260, asuint(_36.c[0].d[0][3])); + _31.Store(264, asuint(_36.c[0].d[1][0])); + _31.Store(268, asuint(_36.c[0].d[1][1])); + _31.Store(272, asuint(_36.c[0].d[1][2])); + _31.Store(276, asuint(_36.c[0].d[1][3])); + _31.Store(280, asuint(_36.c[0].baz[0].c)); + _31.Store(284, asuint(_36.c[0].baz[1].c)); + _31.Store(288, asuint(_36.c[1].d[0][0])); + _31.Store(292, asuint(_36.c[1].d[0][1])); + _31.Store(296, asuint(_36.c[1].d[0][2])); + _31.Store(300, asuint(_36.c[1].d[0][3])); + _31.Store(304, asuint(_36.c[1].d[1][0])); + _31.Store(308, asuint(_36.c[1].d[1][1])); + _31.Store(312, asuint(_36.c[1].d[1][2])); + _31.Store(316, asuint(_36.c[1].d[1][3])); + _31.Store(320, asuint(_36.c[1].baz[0].c)); + _31.Store(324, asuint(_36.c[1].baz[1].c)); + _31.Store(328, asuint(_36.c[2].d[0][0])); + _31.Store(332, asuint(_36.c[2].d[0][1])); + _31.Store(336, asuint(_36.c[2].d[0][2])); + _31.Store(340, asuint(_36.c[2].d[0][3])); + _31.Store(344, asuint(_36.c[2].d[1][0])); + _31.Store(348, asuint(_36.c[2].d[1][1])); + _31.Store(352, asuint(_36.c[2].d[1][2])); + _31.Store(356, asuint(_36.c[2].d[1][3])); + _31.Store(360, asuint(_36.c[2].baz[0].c)); + _31.Store(364, asuint(_36.c[2].baz[1].c)); + _31.Store(368, asuint(_36.c[3].d[0][0])); + _31.Store(372, asuint(_36.c[3].d[0][1])); + _31.Store(376, asuint(_36.c[3].d[0][2])); + _31.Store(380, asuint(_36.c[3].d[0][3])); + _31.Store(384, asuint(_36.c[3].d[1][0])); + _31.Store(388, asuint(_36.c[3].d[1][1] + 5.0f)); + _31.Store(392, asuint(_36.c[3].d[1][2])); + _31.Store(396, asuint(_36.c[3].d[1][3])); + _31.Store(400, asuint(_36.c[3].baz[0].c)); + _31.Store(404, asuint(_36.c[3].baz[1].c)); + _31.Store(408, asuint(_36.c[4].d[0][0])); + _31.Store(412, asuint(_36.c[4].d[0][1])); + _31.Store(416, asuint(_36.c[4].d[0][2])); + _31.Store(420, asuint(_36.c[4].d[0][3])); + _31.Store(424, asuint(_36.c[4].d[1][0])); + _31.Store(428, asuint(_36.c[4].d[1][1])); + _31.Store(432, asuint(_36.c[4].d[1][2])); + _31.Store(436, asuint(_36.c[4].d[1][3])); + _31.Store(440, asuint(_36.c[4].baz[0].c)); + _31.Store(444, asuint(_36.c[4].baz[1].c)); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/opt/shaders-hlsl/comp/access-chains.comp b/reference/opt/shaders-hlsl/comp/access-chains.comp index 924e9191245..c748200b969 100644 --- a/reference/opt/shaders-hlsl/comp/access-chains.comp +++ b/reference/opt/shaders-hlsl/comp/access-chains.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer wo : register(u1); ByteAddressBuffer ro : register(t0); diff --git a/reference/opt/shaders-hlsl/comp/access-chains.force-uav.comp b/reference/opt/shaders-hlsl/comp/access-chains.force-uav.comp new file mode 100644 index 00000000000..97d046d89a3 --- /dev/null +++ b/reference/opt/shaders-hlsl/comp/access-chains.force-uav.comp @@ -0,0 +1,23 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer wo : register(u1); +RWByteAddressBuffer ro : register(u0); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + wo.Store4(gl_GlobalInvocationID.x * 64 + 272, asuint(asfloat(ro.Load4(gl_GlobalInvocationID.x * 64 + 160)))); + wo.Store4(gl_GlobalInvocationID.x * 16 + 480, asuint(asfloat(ro.Load4(gl_GlobalInvocationID.x * 16 + 480)))); +} + +[numthreads(1, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/reference/opt/shaders-hlsl/comp/address-buffers.comp b/reference/opt/shaders-hlsl/comp/address-buffers.comp index a252fc8ae36..7f1c7975bc6 100644 --- a/reference/opt/shaders-hlsl/comp/address-buffers.comp +++ b/reference/opt/shaders-hlsl/comp/address-buffers.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer WriteOnly : register(u2); ByteAddressBuffer ReadOnly : register(t0); RWByteAddressBuffer ReadWrite : register(u1); diff --git a/reference/opt/shaders-hlsl/comp/atomic.comp b/reference/opt/shaders-hlsl/comp/atomic.comp index 72e15bf77dc..e6ff891e8c2 100644 --- a/reference/opt/shaders-hlsl/comp/atomic.comp +++ b/reference/opt/shaders-hlsl/comp/atomic.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer ssbo : register(u2); RWTexture2D uImage : register(u0); RWTexture2D iImage : register(u1); diff --git a/reference/opt/shaders-hlsl/comp/globallycoherent.comp b/reference/opt/shaders-hlsl/comp/globallycoherent.comp index 1637727deb2..b5f1e377ca4 100644 --- a/reference/opt/shaders-hlsl/comp/globallycoherent.comp +++ b/reference/opt/shaders-hlsl/comp/globallycoherent.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + globallycoherent RWByteAddressBuffer _29 : register(u3); ByteAddressBuffer _33 : register(t2); RWTexture2D uImageIn : register(u0); diff --git a/reference/opt/shaders-hlsl/comp/image.comp b/reference/opt/shaders-hlsl/comp/image.comp index 6c2b58cd29c..e2f6b0a340f 100644 --- a/reference/opt/shaders-hlsl/comp/image.comp +++ b/reference/opt/shaders-hlsl/comp/image.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWTexture2D uImageInF : register(u0); RWTexture2D uImageOutF : register(u1); RWTexture2D uImageInI : register(u2); diff --git a/reference/opt/shaders-hlsl/comp/image.nonwritable-uav-texture.comp b/reference/opt/shaders-hlsl/comp/image.nonwritable-uav-texture.comp new file mode 100644 index 00000000000..6c4a2139954 --- /dev/null +++ b/reference/opt/shaders-hlsl/comp/image.nonwritable-uav-texture.comp @@ -0,0 +1,66 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +Texture2D uImageInF : register(t0); +RWTexture2D uImageOutF : register(u1); +Texture2D uImageInI : register(t2); +RWTexture2D uImageOutI : register(u3); +Texture2D uImageInU : register(t4); +RWTexture2D uImageOutU : register(u5); +Buffer uImageInBuffer : register(t6); +RWBuffer uImageOutBuffer : register(u7); +Texture2D uImageInF2 : register(t8); +RWTexture2D uImageOutF2 : register(u9); +Texture2D uImageInI2 : register(t10); +RWTexture2D uImageOutI2 : register(u11); +Texture2D uImageInU2 : register(t12); +RWTexture2D uImageOutU2 : register(u13); +Buffer uImageInBuffer2 : register(t14); +RWBuffer uImageOutBuffer2 : register(u15); +Texture2D uImageInF4 : register(t16); +RWTexture2D uImageOutF4 : register(u17); +Texture2D uImageInI4 : register(t18); +RWTexture2D uImageOutI4 : register(u19); +Texture2D uImageInU4 : register(t20); +RWTexture2D uImageOutU4 : register(u21); +Buffer uImageInBuffer4 : register(t22); +RWBuffer uImageOutBuffer4 : register(u23); +RWTexture2D uImageNoFmtF : register(u24); +RWTexture2D uImageNoFmtU : register(u25); +RWTexture2D uImageNoFmtI : register(u26); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + int2 _23 = int2(gl_GlobalInvocationID.xy); + uImageOutF[_23] = uImageInF[_23].x; + uImageOutI[_23] = uImageInI[_23].x; + uImageOutU[_23] = uImageInU[_23].x; + int _74 = int(gl_GlobalInvocationID.x); + uImageOutBuffer[_74] = uImageInBuffer[_74].x; + uImageOutF2[_23] = uImageInF2[_23].xy; + uImageOutI2[_23] = uImageInI2[_23].xy; + uImageOutU2[_23] = uImageInU2[_23].xy; + float4 _135 = uImageInBuffer2[_74]; + uImageOutBuffer2[_74] = _135.xy; + uImageOutF4[_23] = uImageInF4[_23]; + int4 _165 = uImageInI4[_23]; + uImageOutI4[_23] = _165; + uint4 _180 = uImageInU4[_23]; + uImageOutU4[_23] = _180; + uImageOutBuffer4[_74] = uImageInBuffer4[_74]; + uImageNoFmtF[_23] = _135; + uImageNoFmtU[_23] = _180; + uImageNoFmtI[_23] = _165; +} + +[numthreads(1, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/reference/opt/shaders-hlsl/comp/inverse.comp b/reference/opt/shaders-hlsl/comp/inverse.comp index 3be954a6f61..698f647cecc 100644 --- a/reference/opt/shaders-hlsl/comp/inverse.comp +++ b/reference/opt/shaders-hlsl/comp/inverse.comp @@ -1,9 +1,11 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _15 : register(u0); ByteAddressBuffer _20 : register(t1); // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. -float2x2 SPIRV_Cross_Inverse(float2x2 m) +float2x2 spvInverse(float2x2 m) { float2x2 adj; // The adjoint matrix (inverse after dividing by determinant) @@ -23,29 +25,29 @@ float2x2 SPIRV_Cross_Inverse(float2x2 m) } // Returns the determinant of a 2x2 matrix. -float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2) +float spvDet2x2(float a1, float a2, float b1, float b2) { return a1 * b2 - b1 * a2; } // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. -float3x3 SPIRV_Cross_Inverse(float3x3 m) +float3x3 spvInverse(float3x3 m) { float3x3 adj; // The adjoint matrix (inverse after dividing by determinant) // Create the transpose of the cofactors, as the classical adjoint of the matrix. - adj[0][0] = SPIRV_Cross_Det2x2(m[1][1], m[1][2], m[2][1], m[2][2]); - adj[0][1] = -SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[2][1], m[2][2]); - adj[0][2] = SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[1][1], m[1][2]); + adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]); + adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]); + adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]); - adj[1][0] = -SPIRV_Cross_Det2x2(m[1][0], m[1][2], m[2][0], m[2][2]); - adj[1][1] = SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[2][0], m[2][2]); - adj[1][2] = -SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[1][0], m[1][2]); + adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]); + adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]); + adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]); - adj[2][0] = SPIRV_Cross_Det2x2(m[1][0], m[1][1], m[2][0], m[2][1]); - adj[2][1] = -SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[2][0], m[2][1]); - adj[2][2] = SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[1][0], m[1][1]); + adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]); + adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]); + adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]); // Calculate the determinant as a combination of the cofactors of the first row. float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]); @@ -56,37 +58,37 @@ float3x3 SPIRV_Cross_Inverse(float3x3 m) } // Returns the determinant of a 3x3 matrix. -float SPIRV_Cross_Det3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) +float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) { - return a1 * SPIRV_Cross_Det2x2(b2, b3, c2, c3) - b1 * SPIRV_Cross_Det2x2(a2, a3, c2, c3) + c1 * SPIRV_Cross_Det2x2(a2, a3, b2, b3); + return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3); } // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. -float4x4 SPIRV_Cross_Inverse(float4x4 m) +float4x4 spvInverse(float4x4 m) { float4x4 adj; // The adjoint matrix (inverse after dividing by determinant) // Create the transpose of the cofactors, as the classical adjoint of the matrix. - adj[0][0] = SPIRV_Cross_Det3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); - adj[0][1] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); - adj[0][2] = SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]); - adj[0][3] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]); - - adj[1][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); - adj[1][1] = SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); - adj[1][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]); - adj[1][3] = SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]); - - adj[2][0] = SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); - adj[2][1] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); - adj[2][2] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]); - adj[2][3] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]); - - adj[3][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); - adj[3][1] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); - adj[3][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]); - adj[3][3] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]); + adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); + adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); + adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]); + adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]); + + adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); + adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); + adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]); + adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]); + + adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); + adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); + adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]); + adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]); + + adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); + adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); + adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]); + adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]); // Calculate the determinant as a combination of the cofactors of the first row. float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]); @@ -99,16 +101,16 @@ float4x4 SPIRV_Cross_Inverse(float4x4 m) void comp_main() { float2x2 _23 = asfloat(uint2x2(_20.Load2(0), _20.Load2(8))); - float2x2 _24 = SPIRV_Cross_Inverse(_23); + float2x2 _24 = spvInverse(_23); _15.Store2(0, asuint(_24[0])); _15.Store2(8, asuint(_24[1])); float3x3 _29 = asfloat(uint3x3(_20.Load3(16), _20.Load3(32), _20.Load3(48))); - float3x3 _30 = SPIRV_Cross_Inverse(_29); + float3x3 _30 = spvInverse(_29); _15.Store3(16, asuint(_30[0])); _15.Store3(32, asuint(_30[1])); _15.Store3(48, asuint(_30[2])); float4x4 _35 = asfloat(uint4x4(_20.Load4(64), _20.Load4(80), _20.Load4(96), _20.Load4(112))); - float4x4 _36 = SPIRV_Cross_Inverse(_35); + float4x4 _36 = spvInverse(_35); _15.Store4(64, asuint(_36[0])); _15.Store4(80, asuint(_36[1])); _15.Store4(96, asuint(_36[2])); diff --git a/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp b/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp index dee39e3d579..ff71a0e103c 100644 --- a/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp +++ b/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp @@ -1,5 +1,7 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _10 : register(u0); -cbuffer SPIRV_Cross_NumWorkgroups : register(b0) +cbuffer SPIRV_Cross_NumWorkgroups { uint3 SPIRV_Cross_NumWorkgroups_1_count : packoffset(c0); }; diff --git a/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp b/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp index 1c98e5e56d7..cc326db3329 100644 --- a/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp +++ b/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp @@ -1,5 +1,7 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _10 : register(u0); -cbuffer SPIRV_Cross_NumWorkgroups : register(b0) +cbuffer SPIRV_Cross_NumWorkgroups { uint3 SPIRV_Cross_NumWorkgroups_1_count : packoffset(c0); }; diff --git a/reference/opt/shaders-hlsl/comp/outer-product.comp b/reference/opt/shaders-hlsl/comp/outer-product.comp index 71613d4f156..e58c02fe0b8 100644 --- a/reference/opt/shaders-hlsl/comp/outer-product.comp +++ b/reference/opt/shaders-hlsl/comp/outer-product.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _21 : register(u0); ByteAddressBuffer _26 : register(t1); diff --git a/reference/opt/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp b/reference/opt/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp new file mode 100644 index 00000000000..80394bef7be --- /dev/null +++ b/reference/opt/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp @@ -0,0 +1,242 @@ +static float3x4 _362; +static float4x3 _364; + +RWByteAddressBuffer _17 : register(u0); +uniform RaytracingAccelerationStructure rtas : register(t1); + +static RayQuery rayQuery; + +void comp_main() +{ + RayDesc _1ident = {0.0f.xxx, 0.0f, float3(1.0f, 0.0f, 0.0f), 9999.0f}; + rayQuery.TraceRayInline(rtas, 0u, 255u, _1ident); + float3x4 _361; + float4x3 _363; + _363 = _364; + _361 = _362; + float3x4 _387; + float4x3 _398; + for (;;) + { + bool _67 = rayQuery.Proceed(); + if (_67) + { + uint _71 = rayQuery.CandidateType(); + switch (_71) + { + case 0u: + { + rayQuery.Abort(); + float4x3 _79 = rayQuery.CandidateObjectToWorld4x3(); + rayQuery.CommitNonOpaqueTriangleHit(); + bool _87 = rayQuery.CommittedTriangleFrontFace(); + if (_87) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + float2 _92 = rayQuery.CommittedTriangleBarycentrics(); + if (_92.x == 0.0f) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + int _98 = rayQuery.CommittedInstanceID(); + if (_98 > 0) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + int _103 = rayQuery.CommittedInstanceIndex(); + if (_103 > 0) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + float3 _108 = rayQuery.CommittedObjectRayDirection(); + if (_108.x > 0.0f) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + float3 _114 = rayQuery.CommittedObjectRayOrigin(); + if (_114.x > 0.0f) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + int _120 = rayQuery.CommittedPrimitiveIndex(); + if (_120 > 0) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + float _125 = rayQuery.CommittedRayT(); + if (_125 > 0.0f) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + uint _130 = rayQuery.CommittedInstanceContributionToHitGroupIndex(); + if (_130 > 0u) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + _398 = _79; + _387 = transpose(_79); + break; + } + case 1u: + { + float4x3 _136 = rayQuery.CandidateObjectToWorld4x3(); + bool _139 = rayQuery.CandidateProceduralPrimitiveNonOpaque(); + if (_139) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + rayQuery.CommitProceduralPrimitiveHit(144); + rayQuery.Abort(); + _398 = _136; + _387 = transpose(_136); + break; + } + default: + { + _398 = _363; + _387 = _361; + break; + } + } + _363 = _398; + _361 = _387; + continue; + } + else + { + break; + } + } + if (_361[0].x == _363[0].x) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + uint _157 = rayQuery.CommittedStatus(); + float3x4 _365; + float4x3 _376; + switch (_157) + { + case 0u: + { + float4x3 _163 = rayQuery.CandidateWorldToObject4x3(); + _376 = _163; + _365 = transpose(_163); + break; + } + case 1u: + { + float4x3 _167 = rayQuery.CommittedWorldToObject4x3(); + bool _170 = rayQuery.CommittedTriangleFrontFace(); + if (_170) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + float2 _174 = rayQuery.CommittedTriangleBarycentrics(); + if (_174.y == 0.0f) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + _376 = _167; + _365 = transpose(_167); + break; + } + case 2u: + { + int _182 = rayQuery.CommittedGeometryIndex(); + if (_182 > 0) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + int _187 = rayQuery.CommittedInstanceIndex(); + if (_187 > 0) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + int _192 = rayQuery.CommittedInstanceID(); + if (_192 > 0) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + float3 _197 = rayQuery.CommittedObjectRayDirection(); + if (_197.z > 0.0f) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + float3 _204 = rayQuery.CommittedObjectRayOrigin(); + if (_204.x > 0.0f) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + int _210 = rayQuery.CommittedPrimitiveIndex(); + if (_210 > 0) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + float _215 = rayQuery.CommittedRayT(); + if (_215 > 0.0f) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + _376 = _363; + _365 = _361; + break; + } + default: + { + _376 = _363; + _365 = _361; + break; + } + } + if (_365[0].x == _376[0].x) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + uint _230 = rayQuery.RayFlags(); + if (_230 > 256u) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + float _236 = rayQuery.RayTMin(); + if (_236 > 0.0f) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } + float3 _242 = rayQuery.WorldRayOrigin(); + float3 _244 = rayQuery.WorldRayDirection(); + if (_242.x == _244.z) + { + _17.Store(0, 0u); + _17.Store(4, 0u); + } +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/opt/shaders-hlsl/comp/rmw-matrix.comp b/reference/opt/shaders-hlsl/comp/rmw-matrix.comp index ed666693588..30ac03f84f4 100644 --- a/reference/opt/shaders-hlsl/comp/rmw-matrix.comp +++ b/reference/opt/shaders-hlsl/comp/rmw-matrix.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _11 : register(u0); void comp_main() diff --git a/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp b/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp index 42103c2bd46..09cbd2f49b4 100644 --- a/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp +++ b/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _28 : register(u0); cbuffer UBO : register(b1) { @@ -8,57 +10,57 @@ cbuffer UBO : register(b1) void comp_main() { - float4x4 _253 = asfloat(uint4x4(_28.Load(64), _28.Load(80), _28.Load(96), _28.Load(112), _28.Load(68), _28.Load(84), _28.Load(100), _28.Load(116), _28.Load(72), _28.Load(88), _28.Load(104), _28.Load(120), _28.Load(76), _28.Load(92), _28.Load(108), _28.Load(124))); - _28.Store4(0, asuint(_253[0])); - _28.Store4(16, asuint(_253[1])); - _28.Store4(32, asuint(_253[2])); - _28.Store4(48, asuint(_253[3])); - float2x2 _256 = asfloat(uint2x2(_28.Load(144), _28.Load(152), _28.Load(148), _28.Load(156))); - _28.Store2(128, asuint(_256[0])); - _28.Store2(136, asuint(_256[1])); - float2x3 _259 = asfloat(uint2x3(_28.Load(192), _28.Load(200), _28.Load(208), _28.Load(196), _28.Load(204), _28.Load(212))); - _28.Store3(160, asuint(_259[0])); - _28.Store3(176, asuint(_259[1])); - float3x2 _262 = asfloat(uint3x2(_28.Load(240), _28.Load(256), _28.Load(244), _28.Load(260), _28.Load(248), _28.Load(264))); - _28.Store2(216, asuint(_262[0])); - _28.Store2(224, asuint(_262[1])); - _28.Store2(232, asuint(_262[2])); - float4x4 _265 = asfloat(uint4x4(_28.Load4(0), _28.Load4(16), _28.Load4(32), _28.Load4(48))); - _28.Store(64, asuint(_265[0].x)); - _28.Store(68, asuint(_265[1].x)); - _28.Store(72, asuint(_265[2].x)); - _28.Store(76, asuint(_265[3].x)); - _28.Store(80, asuint(_265[0].y)); - _28.Store(84, asuint(_265[1].y)); - _28.Store(88, asuint(_265[2].y)); - _28.Store(92, asuint(_265[3].y)); - _28.Store(96, asuint(_265[0].z)); - _28.Store(100, asuint(_265[1].z)); - _28.Store(104, asuint(_265[2].z)); - _28.Store(108, asuint(_265[3].z)); - _28.Store(112, asuint(_265[0].w)); - _28.Store(116, asuint(_265[1].w)); - _28.Store(120, asuint(_265[2].w)); - _28.Store(124, asuint(_265[3].w)); - float2x2 _268 = asfloat(uint2x2(_28.Load2(128), _28.Load2(136))); - _28.Store(144, asuint(_268[0].x)); - _28.Store(148, asuint(_268[1].x)); - _28.Store(152, asuint(_268[0].y)); - _28.Store(156, asuint(_268[1].y)); - float2x3 _271 = asfloat(uint2x3(_28.Load3(160), _28.Load3(176))); - _28.Store(192, asuint(_271[0].x)); - _28.Store(196, asuint(_271[1].x)); - _28.Store(200, asuint(_271[0].y)); - _28.Store(204, asuint(_271[1].y)); - _28.Store(208, asuint(_271[0].z)); - _28.Store(212, asuint(_271[1].z)); - float3x2 _274 = asfloat(uint3x2(_28.Load2(216), _28.Load2(224), _28.Load2(232))); - _28.Store(240, asuint(_274[0].x)); - _28.Store(244, asuint(_274[1].x)); - _28.Store(248, asuint(_274[2].x)); - _28.Store(256, asuint(_274[0].y)); - _28.Store(260, asuint(_274[1].y)); - _28.Store(264, asuint(_274[2].y)); + float4x4 _258 = asfloat(uint4x4(_28.Load(64), _28.Load(80), _28.Load(96), _28.Load(112), _28.Load(68), _28.Load(84), _28.Load(100), _28.Load(116), _28.Load(72), _28.Load(88), _28.Load(104), _28.Load(120), _28.Load(76), _28.Load(92), _28.Load(108), _28.Load(124))); + _28.Store4(0, asuint(_258[0])); + _28.Store4(16, asuint(_258[1])); + _28.Store4(32, asuint(_258[2])); + _28.Store4(48, asuint(_258[3])); + float2x2 _261 = asfloat(uint2x2(_28.Load(144), _28.Load(152), _28.Load(148), _28.Load(156))); + _28.Store2(128, asuint(_261[0])); + _28.Store2(136, asuint(_261[1])); + float2x3 _264 = asfloat(uint2x3(_28.Load(192), _28.Load(200), _28.Load(208), _28.Load(196), _28.Load(204), _28.Load(212))); + _28.Store3(160, asuint(_264[0])); + _28.Store3(176, asuint(_264[1])); + float3x2 _267 = asfloat(uint3x2(_28.Load(240), _28.Load(256), _28.Load(244), _28.Load(260), _28.Load(248), _28.Load(264))); + _28.Store2(216, asuint(_267[0])); + _28.Store2(224, asuint(_267[1])); + _28.Store2(232, asuint(_267[2])); + float4x4 _271 = asfloat(uint4x4(_28.Load4(0), _28.Load4(16), _28.Load4(32), _28.Load4(48))); + _28.Store(64, asuint(_271[0].x)); + _28.Store(68, asuint(_271[1].x)); + _28.Store(72, asuint(_271[2].x)); + _28.Store(76, asuint(_271[3].x)); + _28.Store(80, asuint(_271[0].y)); + _28.Store(84, asuint(_271[1].y)); + _28.Store(88, asuint(_271[2].y)); + _28.Store(92, asuint(_271[3].y)); + _28.Store(96, asuint(_271[0].z)); + _28.Store(100, asuint(_271[1].z)); + _28.Store(104, asuint(_271[2].z)); + _28.Store(108, asuint(_271[3].z)); + _28.Store(112, asuint(_271[0].w)); + _28.Store(116, asuint(_271[1].w)); + _28.Store(120, asuint(_271[2].w)); + _28.Store(124, asuint(_271[3].w)); + float2x2 _274 = asfloat(uint2x2(_28.Load2(128), _28.Load2(136))); + _28.Store(144, asuint(_274[0].x)); + _28.Store(148, asuint(_274[1].x)); + _28.Store(152, asuint(_274[0].y)); + _28.Store(156, asuint(_274[1].y)); + float2x3 _277 = asfloat(uint2x3(_28.Load3(160), _28.Load3(176))); + _28.Store(192, asuint(_277[0].x)); + _28.Store(196, asuint(_277[1].x)); + _28.Store(200, asuint(_277[0].y)); + _28.Store(204, asuint(_277[1].y)); + _28.Store(208, asuint(_277[0].z)); + _28.Store(212, asuint(_277[1].z)); + float3x2 _280 = asfloat(uint3x2(_28.Load2(216), _28.Load2(224), _28.Load2(232))); + _28.Store(240, asuint(_280[0].x)); + _28.Store(244, asuint(_280[1].x)); + _28.Store(248, asuint(_280[2].x)); + _28.Store(256, asuint(_280[0].y)); + _28.Store(260, asuint(_280[1].y)); + _28.Store(264, asuint(_280[2].y)); _28.Store(_68_index0 * 4 + _68_index1 * 16 + 64, asuint(1.0f)); _28.Store(_68_index0 * 4 + _68_index1 * 8 + 144, asuint(2.0f)); _28.Store(_68_index0 * 4 + _68_index1 * 8 + 192, asuint(3.0f)); diff --git a/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp index 47f2fe41076..db2bbe96989 100644 --- a/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp +++ b/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _9 : register(u0); void comp_main() diff --git a/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp b/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp index c4537db0391..4e7c5e6167e 100644 --- a/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp +++ b/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp @@ -28,6 +28,7 @@ static const int d = (c + 50); #define SPIRV_CROSS_CONSTANT_ID_3 400 #endif static const int e = SPIRV_CROSS_CONSTANT_ID_3; +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); RWByteAddressBuffer _22 : register(u0); diff --git a/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp b/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp index 2e3df626ae7..82657cacfcb 100644 --- a/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp +++ b/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _11 : register(u1); void comp_main() diff --git a/reference/opt/shaders-hlsl/comp/ssbo-array.comp b/reference/opt/shaders-hlsl/comp/ssbo-array.comp index d8bce8d54b7..ee202a22257 100644 --- a/reference/opt/shaders-hlsl/comp/ssbo-array.comp +++ b/reference/opt/shaders-hlsl/comp/ssbo-array.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + void comp_main() { } diff --git a/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp b/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp deleted file mode 100644 index dabc7df9e2d..00000000000 --- a/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp +++ /dev/null @@ -1,67 +0,0 @@ -RWByteAddressBuffer _9 : register(u0, space0); - -static uint4 gl_SubgroupEqMask; -static uint4 gl_SubgroupGeMask; -static uint4 gl_SubgroupGtMask; -static uint4 gl_SubgroupLeMask; -static uint4 gl_SubgroupLtMask; -void comp_main() -{ - _9.Store(0, asuint(float(WaveGetLaneCount()))); - _9.Store(0, asuint(float(WaveGetLaneIndex()))); - _9.Store(0, asuint(float4(gl_SubgroupEqMask).x)); - _9.Store(0, asuint(float4(gl_SubgroupGeMask).x)); - _9.Store(0, asuint(float4(gl_SubgroupGtMask).x)); - _9.Store(0, asuint(float4(gl_SubgroupLeMask).x)); - _9.Store(0, asuint(float4(gl_SubgroupLtMask).x)); - uint4 _75 = WaveActiveBallot(true); - float4 _88 = WaveActiveSum(20.0f.xxxx); - int4 _94 = WaveActiveSum(int4(20, 20, 20, 20)); - float4 _96 = WaveActiveProduct(20.0f.xxxx); - int4 _98 = WaveActiveProduct(int4(20, 20, 20, 20)); - float4 _127 = WavePrefixProduct(_96) * _96; - int4 _129 = WavePrefixProduct(_98) * _98; -} - -[numthreads(1, 1, 1)] -void main() -{ - gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96)); - if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0; - if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0; - if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0; - if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0; - gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u); - if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u; - if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u; - if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u; - if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u; - if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u; - if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u; - uint gt_lane_index = WaveGetLaneIndex() + 1; - gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u); - if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u; - if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u; - if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u; - if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u; - if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u; - if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u; - if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u; - uint le_lane_index = WaveGetLaneIndex() + 1; - gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u; - if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u; - if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u; - if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u; - if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u; - if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u; - if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u; - if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u; - gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u; - if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u; - if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u; - if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u; - if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u; - if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u; - if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u; - comp_main(); -} diff --git a/reference/opt/shaders-hlsl/flatten/array.flatten.vert b/reference/opt/shaders-hlsl/flatten/array.flatten.vert new file mode 100644 index 00000000000..c709893c1e9 --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/array.flatten.vert @@ -0,0 +1,28 @@ +uniform float4 UBO[56]; + +static float4 gl_Position; +static float4 aVertex; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = (mul(aVertex, float4x4(UBO[40], UBO[41], UBO[42], UBO[43])) + UBO[55]) + ((UBO[50] + UBO[45]) + UBO[54].x.xxxx); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/basic.flatten.vert b/reference/opt/shaders-hlsl/flatten/basic.flatten.vert new file mode 100644 index 00000000000..778acd48037 --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/basic.flatten.vert @@ -0,0 +1,35 @@ +uniform float4 UBO[4]; + +static float4 gl_Position; +static float4 aVertex; +static float3 vNormal; +static float3 aNormal; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; + float3 aNormal : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float3 vNormal : TEXCOORD0; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])); + vNormal = aNormal; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + aNormal = stage_input.aNormal; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vNormal = vNormal; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/copy.flatten.vert b/reference/opt/shaders-hlsl/flatten/copy.flatten.vert new file mode 100644 index 00000000000..5d857ad674d --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/copy.flatten.vert @@ -0,0 +1,50 @@ +struct Light +{ + float3 Position; + float Radius; + float4 Color; +}; + +uniform float4 UBO[12]; + +static float4 gl_Position; +static float4 aVertex; +static float4 vColor; +static float3 aNormal; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; + float3 aNormal : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 vColor : TEXCOORD0; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])); + vColor = 0.0f.xxxx; + for (int _96 = 0; _96 < 4; ) + { + Light _51 = {UBO[_96 * 2 + 4].xyz, UBO[_96 * 2 + 4].w, UBO[_96 * 2 + 5]}; + float3 _68 = aVertex.xyz - _51.Position; + vColor += ((UBO[_96 * 2 + 5] * clamp(1.0f - (length(_68) / _51.Radius), 0.0f, 1.0f)) * dot(aNormal, normalize(_68))); + _96++; + continue; + } +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + aNormal = stage_input.aNormal; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vColor = vColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/dynamic.flatten.vert b/reference/opt/shaders-hlsl/flatten/dynamic.flatten.vert new file mode 100644 index 00000000000..98d5e1b3039 --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/dynamic.flatten.vert @@ -0,0 +1,49 @@ +struct Light +{ + float3 Position; + float Radius; + float4 Color; +}; + +uniform float4 UBO[12]; + +static float4 gl_Position; +static float4 aVertex; +static float4 vColor; +static float3 aNormal; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; + float3 aNormal : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 vColor : TEXCOORD0; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])); + vColor = 0.0f.xxxx; + for (int _82 = 0; _82 < 4; ) + { + float3 _54 = aVertex.xyz - UBO[_82 * 2 + 4].xyz; + vColor += ((UBO[_82 * 2 + 5] * clamp(1.0f - (length(_54) / UBO[_82 * 2 + 4].w), 0.0f, 1.0f)) * dot(aNormal, normalize(_54))); + _82++; + continue; + } +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + aNormal = stage_input.aNormal; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vColor = vColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/matrix-conversion.flatten.frag b/reference/opt/shaders-hlsl/flatten/matrix-conversion.flatten.frag new file mode 100644 index 00000000000..59ec525f41a --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/matrix-conversion.flatten.frag @@ -0,0 +1,29 @@ +uniform float4 UBO[4]; + +static float3 FragColor; +static float3 vNormal; + +struct SPIRV_Cross_Input +{ + nointerpolation float3 vNormal : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float3 FragColor : SV_Target0; +}; + +void frag_main() +{ + float4x4 _19 = float4x4(UBO[0], UBO[1], UBO[2], UBO[3]); + FragColor = mul(vNormal, float3x3(_19[0].xyz, _19[1].xyz, _19[2].xyz)); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vNormal = stage_input.vNormal; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/matrixindex.flatten.vert b/reference/opt/shaders-hlsl/flatten/matrixindex.flatten.vert new file mode 100644 index 00000000000..b69a72dc11a --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/matrixindex.flatten.vert @@ -0,0 +1,41 @@ +uniform float4 UBO[14]; + +static float4 gl_Position; +static float4 oA; +static float4 oB; +static float4 oC; +static float4 oD; +static float4 oE; + +struct SPIRV_Cross_Output +{ + float4 oA : TEXCOORD0; + float4 oB : TEXCOORD1; + float4 oC : TEXCOORD2; + float4 oD : TEXCOORD3; + float4 oE : TEXCOORD4; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = 0.0f.xxxx; + oA = UBO[1]; + oB = float4(UBO[4].y, UBO[5].y, UBO[6].y, UBO[7].y); + oC = UBO[9]; + oD = float4(UBO[10].x, UBO[11].x, UBO[12].x, UBO[13].x); + oE = float4(UBO[1].z, UBO[6].y, UBO[9].z, UBO[12].y); +} + +SPIRV_Cross_Output main() +{ + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.oA = oA; + stage_output.oB = oB; + stage_output.oC = oC; + stage_output.oD = oD; + stage_output.oE = oE; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/multiindex.flatten.vert b/reference/opt/shaders-hlsl/flatten/multiindex.flatten.vert new file mode 100644 index 00000000000..f21f05ec446 --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/multiindex.flatten.vert @@ -0,0 +1,28 @@ +uniform float4 UBO[15]; + +static float4 gl_Position; +static int2 aIndex; + +struct SPIRV_Cross_Input +{ + int2 aIndex : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = UBO[aIndex.x * 5 + aIndex.y * 1 + 0]; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aIndex = stage_input.aIndex; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/push-constant.flatten.vert b/reference/opt/shaders-hlsl/flatten/push-constant.flatten.vert new file mode 100644 index 00000000000..5bfb4dc0651 --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/push-constant.flatten.vert @@ -0,0 +1,35 @@ +uniform float4 PushMe[6]; + +static float4 gl_Position; +static float4 Pos; +static float2 vRot; +static float2 Rot; + +struct SPIRV_Cross_Input +{ + float2 Rot : TEXCOORD0; + float4 Pos : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float2 vRot : TEXCOORD0; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(Pos, float4x4(PushMe[0], PushMe[1], PushMe[2], PushMe[3])); + vRot = mul(Rot, float2x2(PushMe[4].xy, PushMe[4].zw)) + PushMe[5].z.xx; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + Pos = stage_input.Pos; + Rot = stage_input.Rot; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vRot = vRot; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/rowmajor.flatten.vert b/reference/opt/shaders-hlsl/flatten/rowmajor.flatten.vert new file mode 100644 index 00000000000..2560484efb5 --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/rowmajor.flatten.vert @@ -0,0 +1,28 @@ +uniform float4 UBO[12]; + +static float4 gl_Position; +static float4 aVertex; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])) + mul(aVertex, transpose(float4x4(UBO[4], UBO[5], UBO[6], UBO[7]))); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/struct.flatten.vert b/reference/opt/shaders-hlsl/flatten/struct.flatten.vert new file mode 100644 index 00000000000..41ad8ce9654 --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/struct.flatten.vert @@ -0,0 +1,44 @@ +struct Light +{ + float3 Position; + float Radius; + float4 Color; +}; + +uniform float4 UBO[6]; + +static float4 gl_Position; +static float4 aVertex; +static float4 vColor; +static float3 aNormal; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; + float3 aNormal : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 vColor : TEXCOORD0; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])); + vColor = 0.0f.xxxx; + float3 _39 = aVertex.xyz - UBO[4].xyz; + vColor += ((UBO[5] * clamp(1.0f - (length(_39) / UBO[4].w), 0.0f, 1.0f)) * dot(aNormal, normalize(_39))); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + aNormal = stage_input.aNormal; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vColor = vColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/struct.rowmajor.flatten.vert b/reference/opt/shaders-hlsl/flatten/struct.rowmajor.flatten.vert new file mode 100644 index 00000000000..bb702907a72 --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/struct.rowmajor.flatten.vert @@ -0,0 +1,43 @@ +struct Foo +{ + column_major float3x4 MVP0; + column_major float3x4 MVP1; +}; + +uniform float4 UBO[8]; + +static float4 v0; +static float4 v1; +static float3 V0; +static float3 V1; + +struct SPIRV_Cross_Input +{ + float4 v0 : TEXCOORD0; + float4 v1 : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float3 V0 : TEXCOORD0; + float3 V1 : TEXCOORD1; +}; + +void vert_main() +{ + Foo _19 = {transpose(float4x3(UBO[0].xyz, UBO[1].xyz, UBO[2].xyz, UBO[3].xyz)), transpose(float4x3(UBO[4].xyz, UBO[5].xyz, UBO[6].xyz, UBO[7].xyz))}; + Foo _20 = _19; + V0 = mul(_20.MVP0, v0); + V1 = mul(_20.MVP1, v1); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + v0 = stage_input.v0; + v1 = stage_input.v1; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.V0 = V0; + stage_output.V1 = V1; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/swizzle.flatten.vert b/reference/opt/shaders-hlsl/flatten/swizzle.flatten.vert new file mode 100644 index 00000000000..1091a17e995 --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/swizzle.flatten.vert @@ -0,0 +1,45 @@ +uniform float4 UBO[8]; + +static float4 gl_Position; +static float4 oA; +static float4 oB; +static float4 oC; +static float4 oD; +static float4 oE; +static float4 oF; + +struct SPIRV_Cross_Output +{ + float4 oA : TEXCOORD0; + float4 oB : TEXCOORD1; + float4 oC : TEXCOORD2; + float4 oD : TEXCOORD3; + float4 oE : TEXCOORD4; + float4 oF : TEXCOORD5; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = 0.0f.xxxx; + oA = UBO[0]; + oB = float4(UBO[1].xy, UBO[1].zw); + oC = float4(UBO[2].x, UBO[3].xyz); + oD = float4(UBO[4].xyz, UBO[4].w); + oE = float4(UBO[5].x, UBO[5].y, UBO[5].z, UBO[5].w); + oF = float4(UBO[6].x, UBO[6].zw, UBO[7].x); +} + +SPIRV_Cross_Output main() +{ + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.oA = oA; + stage_output.oB = oB; + stage_output.oC = oC; + stage_output.oD = oD; + stage_output.oE = oE; + stage_output.oF = oF; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/flatten/types.flatten.frag b/reference/opt/shaders-hlsl/flatten/types.flatten.frag new file mode 100644 index 00000000000..feb0b36096a --- /dev/null +++ b/reference/opt/shaders-hlsl/flatten/types.flatten.frag @@ -0,0 +1,23 @@ +uniform int4 UBO1[2]; +uniform uint4 UBO2[2]; +uniform float4 UBO0[2]; + +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = ((((float4(UBO1[0]) + float4(UBO1[1])) + float4(UBO2[0])) + float4(UBO2[1])) + UBO0[0]) + UBO0[1]; +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag index 3adf7d9852e..38f416fbfad 100644 --- a/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag +++ b/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag @@ -1,12 +1,6 @@ static const float _17[5] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; static float4 FragColor; -static float4 v0; - -struct SPIRV_Cross_Input -{ - float4 v0 : TEXCOORD0; -}; struct SPIRV_Cross_Output { @@ -24,9 +18,8 @@ void frag_main() } } -SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +SPIRV_Cross_Output main() { - v0 = stage_input.v0; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/opt/shaders-hlsl/frag/builtins.frag b/reference/opt/shaders-hlsl/frag/builtins.frag index 922eca7c2d2..8432c42f80d 100644 --- a/reference/opt/shaders-hlsl/frag/builtins.frag +++ b/reference/opt/shaders-hlsl/frag/builtins.frag @@ -24,6 +24,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; vColor = stage_input.vColor; frag_main(); SPIRV_Cross_Output stage_output; diff --git a/reference/opt/shaders-hlsl/frag/bvec-operations.frag b/reference/opt/shaders-hlsl/frag/bvec-operations.frag index 6a22df1ed7f..4813cc55a26 100644 --- a/reference/opt/shaders-hlsl/frag/bvec-operations.frag +++ b/reference/opt/shaders-hlsl/frag/bvec-operations.frag @@ -1,3 +1,5 @@ +static bool _47; + static float2 value; static float4 FragColor; @@ -11,8 +13,6 @@ struct SPIRV_Cross_Output float4 FragColor : SV_Target0; }; -bool _47; - void frag_main() { bool2 _25 = bool2(value.x == 0.0f, _47); diff --git a/reference/opt/shaders-hlsl/frag/complex-expression-in-access-chain.frag b/reference/opt/shaders-hlsl/frag/complex-expression-in-access-chain.frag index d9336c09fce..1de882445b4 100644 --- a/reference/opt/shaders-hlsl/frag/complex-expression-in-access-chain.frag +++ b/reference/opt/shaders-hlsl/frag/complex-expression-in-access-chain.frag @@ -28,6 +28,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; vIn = stage_input.vIn; vIn2 = stage_input.vIn2; frag_main(); diff --git a/reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag b/reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag index b2899ea02ca..1b314e13b98 100644 --- a/reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag +++ b/reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag @@ -28,6 +28,7 @@ void frag_main() float4 _47 = ddy_fine(vInput); float4 _50 = fwidth(vInput); float _56_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vInput.zw); + float2 _56 = _56_tmp.xx; if (vInput.y > 10.0f) { FragColor += _23; @@ -40,7 +41,7 @@ void frag_main() FragColor += _44; FragColor += _47; FragColor += _50; - FragColor += float2(_56_tmp, _56_tmp).xyxy; + FragColor += _56.xyxy; } } diff --git a/reference/opt/shaders-hlsl/frag/demote-to-helper.frag b/reference/opt/shaders-hlsl/frag/demote-to-helper.frag new file mode 100644 index 00000000000..743a4228baf --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/demote-to-helper.frag @@ -0,0 +1,9 @@ +void frag_main() +{ + discard; +} + +void main() +{ + frag_main(); +} diff --git a/reference/opt/shaders-hlsl/frag/fp16-packing.frag b/reference/opt/shaders-hlsl/frag/fp16-packing.frag index d87828225fd..54b91e2aa51 100644 --- a/reference/opt/shaders-hlsl/frag/fp16-packing.frag +++ b/reference/opt/shaders-hlsl/frag/fp16-packing.frag @@ -15,21 +15,21 @@ struct SPIRV_Cross_Output uint FP16Out : SV_Target1; }; -uint SPIRV_Cross_packHalf2x16(float2 value) +uint spvPackHalf2x16(float2 value) { uint2 Packed = f32tof16(value); return Packed.x | (Packed.y << 16); } -float2 SPIRV_Cross_unpackHalf2x16(uint value) +float2 spvUnpackHalf2x16(uint value) { return f16tof32(uint2(value & 0xffff, value >> 16)); } void frag_main() { - FP32Out = SPIRV_Cross_unpackHalf2x16(FP16); - FP16Out = SPIRV_Cross_packHalf2x16(FP32); + FP32Out = spvUnpackHalf2x16(FP16); + FP16Out = spvPackHalf2x16(FP32); } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/opt/shaders-hlsl/frag/fp16.invalid.desktop.frag b/reference/opt/shaders-hlsl/frag/fp16.invalid.desktop.frag deleted file mode 100644 index 8ec30af16fc..00000000000 --- a/reference/opt/shaders-hlsl/frag/fp16.invalid.desktop.frag +++ /dev/null @@ -1,45 +0,0 @@ -static min16float4 v4; -static min16float3 v3; -static min16float v1; -static min16float2 v2; -static float o1; -static float2 o2; -static float3 o3; -static float4 o4; - -struct SPIRV_Cross_Input -{ - min16float v1 : TEXCOORD0; - min16float2 v2 : TEXCOORD1; - min16float3 v3 : TEXCOORD2; - min16float4 v4 : TEXCOORD3; -}; - -struct SPIRV_Cross_Output -{ - float o1 : SV_Target0; - float2 o2 : SV_Target1; - float3 o3 : SV_Target2; - float4 o4 : SV_Target3; -}; - -void frag_main() -{ - min16float4 _324; - min16float4 _387 = modf(v4, _324); -} - -SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) -{ - v4 = stage_input.v4; - v3 = stage_input.v3; - v1 = stage_input.v1; - v2 = stage_input.v2; - frag_main(); - SPIRV_Cross_Output stage_output; - stage_output.o1 = o1; - stage_output.o2 = o2; - stage_output.o3 = o3; - stage_output.o4 = o4; - return stage_output; -} diff --git a/reference/opt/shaders-hlsl/frag/image-query-uav.frag b/reference/opt/shaders-hlsl/frag/image-query-uav.frag new file mode 100644 index 00000000000..3b50282fe07 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/image-query-uav.frag @@ -0,0 +1,8 @@ +void frag_main() +{ +} + +void main() +{ + frag_main(); +} diff --git a/reference/opt/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag b/reference/opt/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag new file mode 100644 index 00000000000..3b50282fe07 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag @@ -0,0 +1,8 @@ +void frag_main() +{ +} + +void main() +{ + frag_main(); +} diff --git a/reference/opt/shaders-hlsl/frag/input-attachment-ms.frag b/reference/opt/shaders-hlsl/frag/input-attachment-ms.frag index e206b83798a..54cb1dd944c 100644 --- a/reference/opt/shaders-hlsl/frag/input-attachment-ms.frag +++ b/reference/opt/shaders-hlsl/frag/input-attachment-ms.frag @@ -24,6 +24,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; gl_SampleID = stage_input.gl_SampleID; frag_main(); SPIRV_Cross_Output stage_output; diff --git a/reference/opt/shaders-hlsl/frag/input-attachment.frag b/reference/opt/shaders-hlsl/frag/input-attachment.frag index d87661e5f93..34aaafcf3d2 100644 --- a/reference/opt/shaders-hlsl/frag/input-attachment.frag +++ b/reference/opt/shaders-hlsl/frag/input-attachment.frag @@ -22,6 +22,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/opt/shaders-hlsl/frag/io-block.frag b/reference/opt/shaders-hlsl/frag/io-block.frag index 52c1f518bf2..812a44d8138 100644 --- a/reference/opt/shaders-hlsl/frag/io-block.frag +++ b/reference/opt/shaders-hlsl/frag/io-block.frag @@ -1,13 +1,18 @@ -static float4 FragColor; - struct VertexOut { - float4 a : TEXCOORD1; - float4 b : TEXCOORD2; + float4 a; + float4 b; }; +static float4 FragColor; static VertexOut _12; +struct SPIRV_Cross_Input +{ + float4 VertexOut_a : TEXCOORD1; + float4 VertexOut_b : TEXCOORD2; +}; + struct SPIRV_Cross_Output { float4 FragColor : SV_Target0; @@ -18,9 +23,10 @@ void frag_main() FragColor = _12.a + _12.b; } -SPIRV_Cross_Output main(in VertexOut stage_input_12) +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { - _12 = stage_input_12; + _12.a = stage_input.VertexOut_a; + _12.b = stage_input.VertexOut_b; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/opt/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag b/reference/opt/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag new file mode 100644 index 00000000000..2af0e513b44 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag @@ -0,0 +1,32 @@ +uniform sampler2D uSampler; + +static float4 FragColor; +static float2 vUV; + +struct SPIRV_Cross_Input +{ + float2 vUV : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : COLOR0; +}; + +void frag_main() +{ + float3 _23 = float3(vUV, 5.0f); + FragColor = tex2Dproj(uSampler, float4(_23.xy, 0.0, _23.z)); + FragColor += tex2Dbias(uSampler, float4(vUV, 0.0, 3.0f)); + FragColor += tex2Dlod(uSampler, float4(vUV, 0.0, 2.0f)); + FragColor += tex2Dgrad(uSampler, vUV, 4.0f.xx, 5.0f.xx); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vUV = stage_input.vUV; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = float4(FragColor); + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/no-return2.frag b/reference/opt/shaders-hlsl/frag/no-return2.frag index e9d7bbc8f97..3b50282fe07 100644 --- a/reference/opt/shaders-hlsl/frag/no-return2.frag +++ b/reference/opt/shaders-hlsl/frag/no-return2.frag @@ -1,16 +1,8 @@ -static float4 vColor; - -struct SPIRV_Cross_Input -{ - float4 vColor : TEXCOORD0; -}; - void frag_main() { } -void main(SPIRV_Cross_Input stage_input) +void main() { - vColor = stage_input.vColor; frag_main(); } diff --git a/reference/opt/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag b/reference/opt/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag index 544c5705389..6685ef9c429 100644 --- a/reference/opt/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag +++ b/reference/opt/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag @@ -3,12 +3,15 @@ struct UBO_1_1 float4 v[64]; }; -ConstantBuffer ubos[] : register(b0, space3); -ByteAddressBuffer ssbos[] : register(t0, space4); +ConstantBuffer ubos[] : register(b2, space9); +RWByteAddressBuffer ssbos[] : register(u3, space10); Texture2D uSamplers[] : register(t0, space0); -SamplerState uSamps[] : register(s0, space2); -Texture2D uCombinedSamplers[] : register(t0, space1); -SamplerState _uCombinedSamplers_sampler[] : register(s0, space1); +SamplerState uSamps[] : register(s1, space3); +Texture2D uCombinedSamplers[] : register(t4, space2); +SamplerState _uCombinedSamplers_sampler[] : register(s4, space2); +Texture2DMS uSamplersMS[] : register(t0, space1); +RWTexture2D uImages[] : register(u5, space7); +RWTexture2D uImagesU32[] : register(u5, space8); static int vIndex; static float4 FragColor; @@ -25,14 +28,76 @@ struct SPIRV_Cross_Output float4 FragColor : SV_Target0; }; +uint2 spvTextureSize(Texture2D Tex, uint Level, out uint Param) +{ + uint2 ret; + Tex.GetDimensions(Level, ret.x, ret.y, Param); + return ret; +} + +uint2 spvTextureSize(Texture2DMS Tex, uint Level, out uint Param) +{ + uint2 ret; + Tex.GetDimensions(ret.x, ret.y, Param); + return ret; +} + +uint2 spvImageSize(RWTexture2D Tex, out uint Param) +{ + uint2 ret; + Tex.GetDimensions(ret.x, ret.y); + Param = 0u; + return ret; +} + void frag_main() { int _22 = vIndex + 10; int _32 = vIndex + 40; FragColor = uSamplers[NonUniformResourceIndex(_22)].Sample(uSamps[NonUniformResourceIndex(_32)], vUV); - FragColor = uCombinedSamplers[NonUniformResourceIndex(_22)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_22)], vUV); - FragColor += ubos[NonUniformResourceIndex(vIndex + 20)].v[_32]; - FragColor += asfloat(ssbos[NonUniformResourceIndex(vIndex + 50)].Load4((vIndex + 60) * 16 + 0)); + int _49 = _22; + FragColor = uCombinedSamplers[NonUniformResourceIndex(_49)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_49)], vUV); + int _65 = vIndex + 20; + int _69 = _32; + FragColor += ubos[NonUniformResourceIndex(_65)].v[_69]; + int _83 = vIndex + 50; + int _88 = vIndex + 60; + FragColor += asfloat(ssbos[NonUniformResourceIndex(_83)].Load4(_88 * 16 + 16)); + int _100 = vIndex + 70; + ssbos[NonUniformResourceIndex(_88)].Store4(_100 * 16 + 16, asuint(20.0f.xxxx)); + int2 _111 = int2(vUV); + FragColor = uSamplers[NonUniformResourceIndex(_49)].Load(int3(_111, 0)); + int _116 = vIndex + 100; + uint _122; + ssbos[_116].InterlockedAdd(0, 100u, _122); + float _136_tmp = uSamplers[NonUniformResourceIndex(_22)].CalculateLevelOfDetail(uSamps[NonUniformResourceIndex(_32)], vUV); + float2 _136 = _136_tmp.xx; + float _143_tmp = uCombinedSamplers[NonUniformResourceIndex(_49)].CalculateLevelOfDetail(_uCombinedSamplers_sampler[NonUniformResourceIndex(_49)], vUV); + float2 _143 = _143_tmp.xx; + float4 _147 = FragColor; + float2 _149 = _147.xy + (_136 + _143); + FragColor.x = _149.x; + FragColor.y = _149.y; + int _160; + spvTextureSize(uSamplers[NonUniformResourceIndex(_65)], 0u, _160); + FragColor.x += float(int(_160)); + int _176; + spvTextureSize(uSamplersMS[NonUniformResourceIndex(_65)], 0u, _176); + FragColor.y += float(int(_176)); + uint _187_dummy_parameter; + float4 _189 = FragColor; + float2 _191 = _189.xy + float2(int2(spvTextureSize(uSamplers[NonUniformResourceIndex(_65)], uint(0), _187_dummy_parameter))); + FragColor.x = _191.x; + FragColor.y = _191.y; + FragColor += uImages[NonUniformResourceIndex(_83)][_111].xxxx; + uint _216_dummy_parameter; + float4 _218 = FragColor; + float2 _220 = _218.xy + float2(int2(spvImageSize(uImages[NonUniformResourceIndex(_65)], _216_dummy_parameter))); + FragColor.x = _220.x; + FragColor.y = _220.y; + uImages[NonUniformResourceIndex(_88)][_111] = 50.0f.x; + uint _248; + InterlockedAdd(uImagesU32[NonUniformResourceIndex(_100)][_111], 40u, _248); } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag new file mode 100644 index 00000000000..8923f96a75e --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag @@ -0,0 +1,24 @@ +RWByteAddressBuffer _9 : register(u6, space0); +globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0); +RasterizerOrderedByteAddressBuffer _52 : register(u4, space0); +RWTexture2D img4 : register(u5, space0); +RasterizerOrderedTexture2D img : register(u0, space0); +RasterizerOrderedTexture2D img3 : register(u2, space0); +RasterizerOrderedTexture2D img2 : register(u1, space0); + +void frag_main() +{ + _9.Store(0, uint(0)); + img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f); + img[int2(0, 0)] = img3[int2(0, 0)]; + uint _39; + InterlockedAdd(img2[int2(0, 0)], 1u, _39); + _42.Store(0, uint(int(_42.Load(0)) + 42)); + uint _55; + _42.InterlockedAnd(4, _52.Load(0), _55); +} + +void main() +{ + frag_main(); +} diff --git a/reference/opt/shaders-hlsl/frag/query-lod.desktop.frag b/reference/opt/shaders-hlsl/frag/query-lod.desktop.frag index fd95798bf42..a9d4bd83d9d 100644 --- a/reference/opt/shaders-hlsl/frag/query-lod.desktop.frag +++ b/reference/opt/shaders-hlsl/frag/query-lod.desktop.frag @@ -17,7 +17,8 @@ struct SPIRV_Cross_Output void frag_main() { float _19_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vTexCoord); - FragColor = float2(_19_tmp, _19_tmp).xyxy; + float2 _19 = _19_tmp.xx; + FragColor = _19.xyxy; } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag b/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag new file mode 100644 index 00000000000..bbe3e4a7d32 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag @@ -0,0 +1,21 @@ +globallycoherent RWByteAddressBuffer _12 : register(u0); + +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = asfloat(_12.Load4(0)); +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.frag b/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.frag new file mode 100644 index 00000000000..02252f9cbc5 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.frag @@ -0,0 +1,21 @@ +ByteAddressBuffer _12 : register(t0); + +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = asfloat(_12.Load4(0)); +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag b/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag index c6539b18342..82688ac5a4f 100644 --- a/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag +++ b/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag @@ -24,9 +24,10 @@ struct SPIRV_Cross_Output void frag_main() { - float4 _80 = vDirRef; - _80.z = vDirRef.w; - FragColor = (((((((uSampler2D.SampleCmp(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)) + uSampler2DArray.SampleCmp(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmp(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSamplerCubeArray.SampleCmp(_uSamplerCubeArray_sampler, vDirRef, 0.5f)) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1))) + uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSampler2D.SampleCmp(_uSampler2D_sampler, _80.xy / _80.z, vDirRef.z / _80.z, int2(1, 1))) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, _80.xy / _80.z, vDirRef.z / _80.z, int2(1, 1)); + float4 _33 = vDirRef; + float4 _80 = _33; + _80.z = _33.w; + FragColor = (((((((uSampler2D.SampleCmp(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)) + uSampler2DArray.SampleCmp(_uSampler2DArray_sampler, _33.xyz, _33.w, int2(-1, -1))) + uSamplerCube.SampleCmp(_uSamplerCube_sampler, _33.xyz, _33.w)) + uSamplerCubeArray.SampleCmp(_uSamplerCubeArray_sampler, _33, 0.5f)) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1))) + uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, _33.xyz, _33.w, int2(-1, -1))) + uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, _33.xyz, _33.w)) + uSampler2D.SampleCmp(_uSampler2D_sampler, _80.xy / _80.z, _33.z / _80.z, int2(1, 1))) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, _80.xy / _80.z, _33.z / _80.z, int2(1, 1)); } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/opt/shaders-hlsl/frag/sample-mask-in-and-out.frag b/reference/opt/shaders-hlsl/frag/sample-mask-in-and-out.frag new file mode 100644 index 00000000000..185a09821ea --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/sample-mask-in-and-out.frag @@ -0,0 +1,30 @@ +static int gl_SampleMaskIn; +static int gl_SampleMask; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + uint gl_SampleMaskIn : SV_Coverage; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; + uint gl_SampleMask : SV_Coverage; +}; + +void frag_main() +{ + FragColor = 1.0f.xxxx; + gl_SampleMask = gl_SampleMaskIn; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + gl_SampleMaskIn = stage_input.gl_SampleMaskIn; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_SampleMask = gl_SampleMask; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/sample-mask-in.frag b/reference/opt/shaders-hlsl/frag/sample-mask-in.frag new file mode 100644 index 00000000000..8f6cfaf9e53 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/sample-mask-in.frag @@ -0,0 +1,32 @@ +static int gl_SampleID; +static int gl_SampleMaskIn; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + uint gl_SampleID : SV_SampleIndex; + uint gl_SampleMaskIn : SV_Coverage; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + if ((gl_SampleMaskIn & (1 << gl_SampleID)) != 0) + { + FragColor = 1.0f.xxxx; + } +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + gl_SampleID = stage_input.gl_SampleID; + gl_SampleMaskIn = stage_input.gl_SampleMaskIn; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/sample-mask-out.frag b/reference/opt/shaders-hlsl/frag/sample-mask-out.frag new file mode 100644 index 00000000000..a966c032183 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/sample-mask-out.frag @@ -0,0 +1,23 @@ +static int gl_SampleMask; +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; + uint gl_SampleMask : SV_Coverage; +}; + +void frag_main() +{ + FragColor = 1.0f.xxxx; + gl_SampleMask = 0; +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_SampleMask = gl_SampleMask; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/sampler-array.frag b/reference/opt/shaders-hlsl/frag/sampler-array.frag index 1eced29be0d..8ecdc6c3a8e 100644 --- a/reference/opt/shaders-hlsl/frag/sampler-array.frag +++ b/reference/opt/shaders-hlsl/frag/sampler-array.frag @@ -24,6 +24,7 @@ void frag_main() void main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; vTex = stage_input.vTex; vIndex = stage_input.vIndex; frag_main(); diff --git a/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag b/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag index 0fb694c543f..6c2d0be4f71 100644 --- a/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag +++ b/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag @@ -11,12 +11,12 @@ struct SPIRV_Cross_Output float FragColor : SV_Target0; }; -float SPIRV_Cross_Reflect(float i, float n) +float spvReflect(float i, float n) { return i - 2.0 * dot(n, i) * n; } -float SPIRV_Cross_Refract(float i, float n, float eta) +float spvRefract(float i, float n, float eta) { float NoI = n * i; float NoI2 = NoI * NoI; @@ -33,8 +33,8 @@ float SPIRV_Cross_Refract(float i, float n, float eta) void frag_main() { - FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z); - FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y); + FragColor = spvRefract(vRefract.x, vRefract.y, vRefract.z); + FragColor += spvReflect(vRefract.x, vRefract.y); FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y; FragColor += reflect(vRefract.xy, vRefract.zy).y; } diff --git a/reference/opt/shaders-hlsl/frag/switch-unreachable-break.frag b/reference/opt/shaders-hlsl/frag/switch-unreachable-break.frag new file mode 100644 index 00000000000..f25b768b9e5 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/switch-unreachable-break.frag @@ -0,0 +1,49 @@ +cbuffer UBO : register(b0) +{ + int _13_cond : packoffset(c0); + int _13_cond2 : packoffset(c0.y); +}; + + +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + bool _49; + switch (_13_cond) + { + case 1: + { + if (_13_cond2 < 50) + { + _49 = false; + break; + } + else + { + discard; + } + break; // unreachable workaround + } + default: + { + _49 = true; + break; + } + } + bool4 _45 = _49.xxxx; + FragColor = float4(_45.x ? 10.0f.xxxx.x : 20.0f.xxxx.x, _45.y ? 10.0f.xxxx.y : 20.0f.xxxx.y, _45.z ? 10.0f.xxxx.z : 20.0f.xxxx.z, _45.w ? 10.0f.xxxx.w : 20.0f.xxxx.w); +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag b/reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag index ca88cfaeb3a..d4dd78d8901 100644 --- a/reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag +++ b/reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag @@ -26,6 +26,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/opt/shaders-hlsl/frag/tex-sampling.frag b/reference/opt/shaders-hlsl/frag/tex-sampling.frag index 4f8e8f091ad..caedd37e448 100644 --- a/reference/opt/shaders-hlsl/frag/tex-sampling.frag +++ b/reference/opt/shaders-hlsl/frag/tex-sampling.frag @@ -48,12 +48,12 @@ void frag_main() float3 _88 = float3(texCoord2d, 2.0f); float4 _135 = float4(texCoord3d, 2.0f); float4 _162 = (((((((((((((((((((tex1d.Sample(_tex1d_sampler, texCoord1d) + tex1d.Sample(_tex1d_sampler, texCoord1d, 1)) + tex1d.SampleLevel(_tex1d_sampler, texCoord1d, 2.0f)) + tex1d.SampleGrad(_tex1d_sampler, texCoord1d, 1.0f, 2.0f)) + tex1d.Sample(_tex1d_sampler, _41.x / _41.y)) + tex1d.SampleBias(_tex1d_sampler, texCoord1d, 1.0f)) + tex2d.Sample(_tex2d_sampler, texCoord2d)) + tex2d.Sample(_tex2d_sampler, texCoord2d, int2(1, 2))) + tex2d.SampleLevel(_tex2d_sampler, texCoord2d, 2.0f)) + tex2d.SampleGrad(_tex2d_sampler, texCoord2d, float2(1.0f, 2.0f), float2(3.0f, 4.0f))) + tex2d.Sample(_tex2d_sampler, _88.xy / _88.z)) + tex2d.SampleBias(_tex2d_sampler, texCoord2d, 1.0f)) + tex3d.Sample(_tex3d_sampler, texCoord3d)) + tex3d.Sample(_tex3d_sampler, texCoord3d, int3(1, 2, 3))) + tex3d.SampleLevel(_tex3d_sampler, texCoord3d, 2.0f)) + tex3d.SampleGrad(_tex3d_sampler, texCoord3d, float3(1.0f, 2.0f, 3.0f), float3(4.0f, 5.0f, 6.0f))) + tex3d.Sample(_tex3d_sampler, _135.xyz / _135.w)) + tex3d.SampleBias(_tex3d_sampler, texCoord3d, 1.0f)) + texCube.Sample(_texCube_sampler, texCoord3d)) + texCube.SampleLevel(_texCube_sampler, texCoord3d, 2.0f)) + texCube.SampleBias(_texCube_sampler, texCoord3d, 1.0f); - float4 _333 = _162; - _333.w = ((_162.w + tex1dShadow.SampleCmp(_tex1dShadow_sampler, float3(texCoord1d, 0.0f, 0.0f).x, 0.0f)) + tex2dShadow.SampleCmp(_tex2dShadow_sampler, float3(texCoord2d, 0.0f).xy, 0.0f)) + texCubeShadow.SampleCmp(_texCubeShadow_sampler, float4(texCoord3d, 0.0f).xyz, 0.0f); - float4 _308 = ((((((((((((((_333 + tex1dArray.Sample(_tex1dArray_sampler, texCoord2d)) + tex2dArray.Sample(_tex2dArray_sampler, texCoord3d)) + texCubeArray.Sample(_texCubeArray_sampler, texCoord4d)) + tex2d.GatherRed(_tex2d_sampler, texCoord2d)) + tex2d.GatherRed(_tex2d_sampler, texCoord2d)) + tex2d.GatherGreen(_tex2d_sampler, texCoord2d)) + tex2d.GatherBlue(_tex2d_sampler, texCoord2d)) + tex2d.GatherAlpha(_tex2d_sampler, texCoord2d)) + tex2d.GatherRed(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherRed(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherGreen(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherBlue(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherAlpha(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.Load(int3(int2(1, 2), 0))) + separateTex2d.Sample(samplerNonDepth, texCoord2d); - float4 _336 = _308; - _336.w = _308.w + separateTex2dDepth.SampleCmp(samplerDepth, texCoord3d.xy, texCoord3d.z); - FragColor = _336; + _162.w = ((_162.w + tex1dShadow.SampleCmp(_tex1dShadow_sampler, float3(texCoord1d, 0.0f, 0.0f).x, 0.0f)) + tex2dShadow.SampleCmp(_tex2dShadow_sampler, float3(texCoord2d, 0.0f).xy, 0.0f)) + texCubeShadow.SampleCmp(_texCubeShadow_sampler, float4(texCoord3d, 0.0f).xyz, 0.0f); + float4 _243 = tex2d.GatherRed(_tex2d_sampler, texCoord2d); + float4 _269 = tex2d.GatherRed(_tex2d_sampler, texCoord2d, int2(1, 1)); + float4 _308 = ((((((((((((((_162 + tex1dArray.Sample(_tex1dArray_sampler, texCoord2d)) + tex2dArray.Sample(_tex2dArray_sampler, texCoord3d)) + texCubeArray.Sample(_texCubeArray_sampler, texCoord4d)) + _243) + _243) + tex2d.GatherGreen(_tex2d_sampler, texCoord2d)) + tex2d.GatherBlue(_tex2d_sampler, texCoord2d)) + tex2d.GatherAlpha(_tex2d_sampler, texCoord2d)) + _269) + _269) + tex2d.GatherGreen(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherBlue(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherAlpha(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.Load(int3(int2(1, 2), 0))) + separateTex2d.Sample(samplerNonDepth, texCoord2d); + _308.w = _308.w + separateTex2dDepth.SampleCmp(samplerDepth, texCoord3d.xy, texCoord3d.z); + FragColor = _308; } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/opt/shaders-hlsl/frag/tex-sampling.sm30.frag b/reference/opt/shaders-hlsl/frag/tex-sampling.sm30.frag new file mode 100644 index 00000000000..4a2d9b68f61 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/tex-sampling.sm30.frag @@ -0,0 +1,54 @@ +uniform sampler1D tex1d; +uniform sampler2D tex2d; +uniform sampler3D tex3d; +uniform samplerCUBE texCube; +uniform sampler1D tex1dShadow; +uniform sampler2D tex2dShadow; + +static float texCoord1d; +static float2 texCoord2d; +static float3 texCoord3d; +static float4 FragColor; +static float4 texCoord4d; + +struct SPIRV_Cross_Input +{ + float texCoord1d : TEXCOORD0; + float2 texCoord2d : TEXCOORD1; + float3 texCoord3d : TEXCOORD2; + float4 texCoord4d : TEXCOORD3; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : COLOR0; +}; + +void frag_main() +{ + float2 _34 = float2(texCoord1d, 2.0f); + float3 _73 = float3(texCoord2d, 2.0f); + float4 _112 = float4(texCoord3d, 2.0f); + float4 _139 = ((((((((((((((((tex1D(tex1d, texCoord1d) + tex1Dlod(tex1d, float4(texCoord1d, 0.0, 0.0, 2.0f))) + tex1Dgrad(tex1d, texCoord1d, 1.0f, 2.0f)) + tex1Dproj(tex1d, float4(_34.x, 0.0, 0.0, _34.y))) + tex1Dbias(tex1d, float4(texCoord1d, 0.0, 0.0, 1.0f))) + tex2D(tex2d, texCoord2d)) + tex2Dlod(tex2d, float4(texCoord2d, 0.0, 2.0f))) + tex2Dgrad(tex2d, texCoord2d, float2(1.0f, 2.0f), float2(3.0f, 4.0f))) + tex2Dproj(tex2d, float4(_73.xy, 0.0, _73.z))) + tex2Dbias(tex2d, float4(texCoord2d, 0.0, 1.0f))) + tex3D(tex3d, texCoord3d)) + tex3Dlod(tex3d, float4(texCoord3d, 2.0f))) + tex3Dgrad(tex3d, texCoord3d, float3(1.0f, 2.0f, 3.0f), float3(4.0f, 5.0f, 6.0f))) + tex3Dproj(tex3d, float4(_112.xyz, _112.w))) + tex3Dbias(tex3d, float4(texCoord3d, 1.0f))) + texCUBE(texCube, texCoord3d)) + texCUBElod(texCube, float4(texCoord3d, 2.0f))) + texCUBEbias(texCube, float4(texCoord3d, 1.0f)); + float3 _147 = float3(texCoord1d, 0.0f, 0.0f); + float4 _171 = float4(texCoord1d, 0.0f, 0.0f, 2.0f); + _171.y = 2.0f; + float3 _194 = float3(texCoord2d, 0.0f); + float4 _219 = float4(texCoord2d, 0.0f, 2.0f); + _219.z = 2.0f; + float4 _264 = _139; + _264.w = (((((((_139.w + tex1Dproj(tex1dShadow, float4(_147.x, 0.0, 0.0f, 1.0)).x) + tex1Dlod(tex1dShadow, float4(_147.x, 0.0, 0.0f, 2.0f)).x) + tex1Dproj(tex1dShadow, float4(_171.x, 0.0, 0.0f, _171.y)).x) + tex1Dbias(tex1dShadow, float4(_147.x, 0.0, 0.0f, 1.0f)).x) + tex2Dproj(tex2dShadow, float4(_194.xy, 0.0f, 1.0)).x) + tex2Dlod(tex2dShadow, float4(_194.xy, 0.0f, 2.0f)).x) + tex2Dproj(tex2dShadow, float4(_219.xy, 0.0f, _219.z)).x) + tex2Dbias(tex2dShadow, float4(_194.xy, 0.0f, 1.0f)).x; + FragColor = _264; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + texCoord1d = stage_input.texCoord1d; + texCoord2d = stage_input.texCoord2d; + texCoord3d = stage_input.texCoord3d; + texCoord4d = stage_input.texCoord4d; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = float4(FragColor); + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/texel-fetch-offset.frag b/reference/opt/shaders-hlsl/frag/texel-fetch-offset.frag index d7aa73d5264..9bd27697c5b 100644 --- a/reference/opt/shaders-hlsl/frag/texel-fetch-offset.frag +++ b/reference/opt/shaders-hlsl/frag/texel-fetch-offset.frag @@ -24,6 +24,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/opt/shaders-hlsl/frag/texture-proj-shadow.frag b/reference/opt/shaders-hlsl/frag/texture-proj-shadow.frag index 07e06008a0b..bc710227ec8 100644 --- a/reference/opt/shaders-hlsl/frag/texture-proj-shadow.frag +++ b/reference/opt/shaders-hlsl/frag/texture-proj-shadow.frag @@ -28,15 +28,16 @@ struct SPIRV_Cross_Output void frag_main() { - float4 _20 = vClip4; - _20.y = vClip4.w; - FragColor = uShadow1D.SampleCmp(_uShadow1D_sampler, _20.x / _20.y, vClip4.z / _20.y); - float4 _30 = vClip4; - _30.z = vClip4.w; - FragColor = uShadow2D.SampleCmp(_uShadow2D_sampler, _30.xy / _30.z, vClip4.z / _30.z); + float4 _17 = vClip4; + float4 _20 = _17; + _20.y = _17.w; + FragColor = uShadow1D.SampleCmp(_uShadow1D_sampler, _20.x / _20.y, _17.z / _20.y); + float4 _30 = _17; + _30.z = _17.w; + FragColor = uShadow2D.SampleCmp(_uShadow2D_sampler, _30.xy / _30.z, _17.z / _30.z); FragColor = uSampler1D.Sample(_uSampler1D_sampler, vClip2.x / vClip2.y).x; FragColor = uSampler2D.Sample(_uSampler2D_sampler, vClip3.xy / vClip3.z).x; - FragColor = uSampler3D.Sample(_uSampler3D_sampler, vClip4.xyz / vClip4.w).x; + FragColor = uSampler3D.Sample(_uSampler3D_sampler, _17.xyz / _17.w).x; } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/opt/shaders-hlsl/frag/texture-size-combined-image-sampler.frag b/reference/opt/shaders-hlsl/frag/texture-size-combined-image-sampler.frag index d5c373746d8..dd2eb251fc2 100644 --- a/reference/opt/shaders-hlsl/frag/texture-size-combined-image-sampler.frag +++ b/reference/opt/shaders-hlsl/frag/texture-size-combined-image-sampler.frag @@ -8,7 +8,7 @@ struct SPIRV_Cross_Output int2 FooOut : SV_Target0; }; -uint2 SPIRV_Cross_textureSize(Texture2D Tex, uint Level, out uint Param) +uint2 spvTextureSize(Texture2D Tex, uint Level, out uint Param) { uint2 ret; Tex.GetDimensions(Level, ret.x, ret.y, Param); @@ -18,7 +18,7 @@ uint2 SPIRV_Cross_textureSize(Texture2D Tex, uint Level, out uint Param) void frag_main() { uint _23_dummy_parameter; - FooOut = int2(SPIRV_Cross_textureSize(uTex, uint(0), _23_dummy_parameter)); + FooOut = int2(spvTextureSize(uTex, uint(0), _23_dummy_parameter)); } SPIRV_Cross_Output main() diff --git a/reference/opt/shaders-hlsl/frag/unary-enclose.frag b/reference/opt/shaders-hlsl/frag/unary-enclose.frag index 348b91c1727..85419ef14ad 100644 --- a/reference/opt/shaders-hlsl/frag/unary-enclose.frag +++ b/reference/opt/shaders-hlsl/frag/unary-enclose.frag @@ -1,11 +1,9 @@ static float4 FragColor; static float4 vIn; -static int4 vIn1; struct SPIRV_Cross_Input { float4 vIn : TEXCOORD0; - nointerpolation int4 vIn1 : TEXCOORD1; }; struct SPIRV_Cross_Output @@ -21,7 +19,6 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { vIn = stage_input.vIn; - vIn1 = stage_input.vIn1; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/opt/shaders-hlsl/frag/unorm-snorm-packing.frag b/reference/opt/shaders-hlsl/frag/unorm-snorm-packing.frag index 57b5950636e..95786b93b68 100644 --- a/reference/opt/shaders-hlsl/frag/unorm-snorm-packing.frag +++ b/reference/opt/shaders-hlsl/frag/unorm-snorm-packing.frag @@ -27,50 +27,50 @@ struct SPIRV_Cross_Output uint SNORM16Out : SV_Target4; }; -uint SPIRV_Cross_packUnorm4x8(float4 value) +uint spvPackUnorm4x8(float4 value) { uint4 Packed = uint4(round(saturate(value) * 255.0)); return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24); } -float4 SPIRV_Cross_unpackUnorm4x8(uint value) +float4 spvUnpackUnorm4x8(uint value) { uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24); return float4(Packed) / 255.0; } -uint SPIRV_Cross_packSnorm4x8(float4 value) +uint spvPackSnorm4x8(float4 value) { int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff; return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24)); } -float4 SPIRV_Cross_unpackSnorm4x8(uint value) +float4 spvUnpackSnorm4x8(uint value) { int SignedValue = int(value); int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24; return clamp(float4(Packed) / 127.0, -1.0, 1.0); } -uint SPIRV_Cross_packUnorm2x16(float2 value) +uint spvPackUnorm2x16(float2 value) { uint2 Packed = uint2(round(saturate(value) * 65535.0)); return Packed.x | (Packed.y << 16); } -float2 SPIRV_Cross_unpackUnorm2x16(uint value) +float2 spvUnpackUnorm2x16(uint value) { uint2 Packed = uint2(value & 0xffff, value >> 16); return float2(Packed) / 65535.0; } -uint SPIRV_Cross_packSnorm2x16(float2 value) +uint spvPackSnorm2x16(float2 value) { int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff; return uint(Packed.x | (Packed.y << 16)); } -float2 SPIRV_Cross_unpackSnorm2x16(uint value) +float2 spvUnpackSnorm2x16(uint value) { int SignedValue = int(value); int2 Packed = int2(SignedValue << 16, SignedValue) >> 16; @@ -79,16 +79,18 @@ float2 SPIRV_Cross_unpackSnorm2x16(uint value) void frag_main() { - FP32Out = SPIRV_Cross_unpackUnorm4x8(UNORM8); - FP32Out = SPIRV_Cross_unpackSnorm4x8(SNORM8); - float2 _21 = SPIRV_Cross_unpackUnorm2x16(UNORM16); - FP32Out = float4(_21.x, _21.y, FP32Out.z, FP32Out.w); - float2 _26 = SPIRV_Cross_unpackSnorm2x16(SNORM16); - FP32Out = float4(_26.x, _26.y, FP32Out.z, FP32Out.w); - UNORM8Out = SPIRV_Cross_packUnorm4x8(FP32); - SNORM8Out = SPIRV_Cross_packSnorm4x8(FP32); - UNORM16Out = SPIRV_Cross_packUnorm2x16(FP32.xy); - SNORM16Out = SPIRV_Cross_packSnorm2x16(FP32.zw); + FP32Out = spvUnpackUnorm4x8(UNORM8); + FP32Out = spvUnpackSnorm4x8(SNORM8); + float2 _21 = spvUnpackUnorm2x16(UNORM16); + FP32Out.x = _21.x; + FP32Out.y = _21.y; + float2 _31 = spvUnpackSnorm2x16(SNORM16); + FP32Out.x = _31.x; + FP32Out.y = _31.y; + UNORM8Out = spvPackUnorm4x8(FP32); + SNORM8Out = spvPackSnorm4x8(FP32); + UNORM16Out = spvPackUnorm2x16(FP32.xy); + SNORM16Out = spvPackSnorm2x16(FP32.zw); } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh new file mode 100644 index 00000000000..4819b14f68e --- /dev/null +++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -0,0 +1,90 @@ +struct BlockOut +{ + float4 a; + float4 b; +}; + +struct BlockOutPrim +{ + float4 a; + float4 b; +}; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u); + +static uint3 gl_WorkGroupID; +static uint3 gl_GlobalInvocationID; +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +struct gl_MeshPerVertexEXT +{ + float4 vOut : TEXCOORD0; + BlockOut outputs : TEXCOORD2; + float4 gl_Position : SV_Position; + float gl_ClipDistance[1] : SV_ClipDistance; + float2 gl_CullDistance : SV_CullDistance; +}; + +struct gl_MeshPerPrimitiveEXT +{ + float4 vPrim : TEXCOORD1; + BlockOutPrim prim_outputs : TEXCOORD4; + uint gl_PrimitiveID : SV_PrimitiveID; + uint gl_Layer : SV_RenderTargetArrayIndex; + uint gl_ViewportIndex : SV_ViewportArrayIndex; + uint gl_PrimitiveShadingRateEXT : SV_ShadingRate; + bool gl_CullPrimitiveEXT : SV_CullPrimitive; +}; + +groupshared float shared_float[16]; + +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) +{ + SetMeshOutputCounts(24u, 22u); + float3 _173 = float3(gl_GlobalInvocationID); + float _174 = _173.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_174, _173.yz, 1.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_174, _173.yz, 2.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationIndex < 22u) + { + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx; + int _229 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _229; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _229 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _229 + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _229 + 3; + } +} + +[outputtopology("line")] +[numthreads(2, 3, 4)] +void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint2 gl_PrimitiveLineIndicesEXT[22]) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT); +} diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh new file mode 100644 index 00000000000..7436c463ed3 --- /dev/null +++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -0,0 +1,90 @@ +struct BlockOut +{ + float4 a; + float4 b; +}; + +struct BlockOutPrim +{ + float4 a; + float4 b; +}; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u); + +static uint3 gl_WorkGroupID; +static uint3 gl_GlobalInvocationID; +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +struct gl_MeshPerVertexEXT +{ + float4 vOut : TEXCOORD0; + BlockOut outputs : TEXCOORD2; + float4 gl_Position : SV_Position; + float gl_ClipDistance[1] : SV_ClipDistance; + float2 gl_CullDistance : SV_CullDistance; +}; + +struct gl_MeshPerPrimitiveEXT +{ + float4 vPrim : TEXCOORD1; + BlockOutPrim prim_outputs : TEXCOORD4; + uint gl_PrimitiveID : SV_PrimitiveID; + uint gl_Layer : SV_RenderTargetArrayIndex; + uint gl_ViewportIndex : SV_ViewportArrayIndex; + uint gl_PrimitiveShadingRateEXT : SV_ShadingRate; + bool gl_CullPrimitiveEXT : SV_CullPrimitive; +}; + +groupshared float shared_float[16]; + +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint3 gl_PrimitiveTriangleIndicesEXT[22]) +{ + SetMeshOutputCounts(24u, 22u); + float3 _29 = float3(gl_GlobalInvocationID); + float _31 = _29.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_31, _29.yz, 1.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_31, _29.yz, 2.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationIndex < 22u) + { + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(0u, 1u, 2u) + gl_LocalInvocationIndex.xxx; + int _127 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _127; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _127 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _127 + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _127 + 3; + } +} + +[outputtopology("triangle")] +[numthreads(2, 3, 4)] +void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint3 gl_PrimitiveTriangleIndicesEXT[22]) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveTriangleIndicesEXT); +} diff --git a/reference/opt/shaders-hlsl/vert/invariant.vert b/reference/opt/shaders-hlsl/vert/invariant.vert new file mode 100644 index 00000000000..ae1ae4b7e6f --- /dev/null +++ b/reference/opt/shaders-hlsl/vert/invariant.vert @@ -0,0 +1,39 @@ +static float4 gl_Position; +static float4 vInput0; +static float4 vInput1; +static float4 vInput2; +static float4 vColor; + +struct SPIRV_Cross_Input +{ + float4 vInput0 : TEXCOORD0; + float4 vInput1 : TEXCOORD1; + float4 vInput2 : TEXCOORD2; +}; + +struct SPIRV_Cross_Output +{ + precise float4 vColor : TEXCOORD0; + precise float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + float4 _21 = mad(vInput1, vInput2, vInput0); + gl_Position = _21; + float4 _27 = vInput0 - vInput1; + float4 _29 = _27 * vInput2; + vColor = _29; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vInput0 = stage_input.vInput0; + vInput1 = stage_input.vInput1; + vInput2 = stage_input.vInput2; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vColor = vColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/vert/locations.vert b/reference/opt/shaders-hlsl/vert/locations.vert index b007582c2ae..0d14def3428 100644 --- a/reference/opt/shaders-hlsl/vert/locations.vert +++ b/reference/opt/shaders-hlsl/vert/locations.vert @@ -5,6 +5,12 @@ struct Foo float3 c; }; +struct VertexOut +{ + float3 color; + float3 foo; +}; + static const Foo _71 = { 1.0f.xxx, 1.0f.xxx, 1.0f.xxx }; static float4 gl_Position; @@ -16,13 +22,6 @@ static float vLocation1; static float vLocation2[2]; static Foo vLocation4; static float vLocation9; - -struct VertexOut -{ - float3 color : TEXCOORD7; - float3 foo : TEXCOORD8; -}; - static VertexOut vout; struct SPIRV_Cross_Input @@ -38,6 +37,8 @@ struct SPIRV_Cross_Output float vLocation1 : TEXCOORD1; float vLocation2[2] : TEXCOORD2; Foo vLocation4 : TEXCOORD4; + float3 VertexOut_color : TEXCOORD7; + float3 VertexOut_foo : TEXCOORD8; float vLocation9 : TEXCOORD9; float4 gl_Position : SV_Position; }; @@ -55,13 +56,12 @@ void vert_main() vout.foo = 4.0f.xxx; } -SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input, out VertexOut stage_outputvout) +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { Input2 = stage_input.Input2; Input4 = stage_input.Input4; Input0 = stage_input.Input0; vert_main(); - stage_outputvout = vout; SPIRV_Cross_Output stage_output; stage_output.gl_Position = gl_Position; stage_output.vLocation0 = vLocation0; @@ -69,5 +69,7 @@ SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input, out VertexOut stage_outpu stage_output.vLocation2 = vLocation2; stage_output.vLocation4 = vLocation4; stage_output.vLocation9 = vLocation9; + stage_output.VertexOut_color = vout.color; + stage_output.VertexOut_foo = vout.foo; return stage_output; } diff --git a/reference/opt/shaders-hlsl/vert/no-contraction.vert b/reference/opt/shaders-hlsl/vert/no-contraction.vert new file mode 100644 index 00000000000..10763fbee5a --- /dev/null +++ b/reference/opt/shaders-hlsl/vert/no-contraction.vert @@ -0,0 +1,39 @@ +static float4 gl_Position; +static float4 vA; +static float4 vB; +static float4 vC; + +struct SPIRV_Cross_Input +{ + float4 vA : TEXCOORD0; + float4 vB : TEXCOORD1; + float4 vC : TEXCOORD2; +}; + +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + precise float4 _15 = vA * vB; + precise float4 _19 = vA + vB; + precise float4 _23 = vA - vB; + precise float4 _30 = _15 + vC; + precise float4 _34 = _15 + _19; + precise float4 _36 = _34 + _23; + precise float4 _38 = _36 + _30; + gl_Position = _38; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vA = stage_input.vA; + vB = stage_input.vB; + vC = stage_input.vC; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/vert/qualifiers.vert b/reference/opt/shaders-hlsl/vert/qualifiers.vert index 13ee2a8c1c0..bbf7dc61e45 100644 --- a/reference/opt/shaders-hlsl/vert/qualifiers.vert +++ b/reference/opt/shaders-hlsl/vert/qualifiers.vert @@ -1,17 +1,16 @@ +struct Block +{ + float vFlat; + float vCentroid; + float vSample; + float vNoperspective; +}; + static float4 gl_Position; static float vFlat; static float vCentroid; static float vSample; static float vNoperspective; - -struct Block -{ - nointerpolation float vFlat : TEXCOORD4; - centroid float vCentroid : TEXCOORD5; - sample float vSample : TEXCOORD6; - noperspective float vNoperspective : TEXCOORD7; -}; - static Block vout; struct SPIRV_Cross_Output @@ -20,6 +19,10 @@ struct SPIRV_Cross_Output centroid float vCentroid : TEXCOORD1; sample float vSample : TEXCOORD2; noperspective float vNoperspective : TEXCOORD3; + nointerpolation float Block_vFlat : TEXCOORD4; + centroid float Block_vCentroid : TEXCOORD5; + sample float Block_vSample : TEXCOORD6; + noperspective float Block_vNoperspective : TEXCOORD7; float4 gl_Position : SV_Position; }; @@ -36,15 +39,18 @@ void vert_main() vout.vNoperspective = 3.0f; } -SPIRV_Cross_Output main(out Block stage_outputvout) +SPIRV_Cross_Output main() { vert_main(); - stage_outputvout = vout; SPIRV_Cross_Output stage_output; stage_output.gl_Position = gl_Position; stage_output.vFlat = vFlat; stage_output.vCentroid = vCentroid; stage_output.vSample = vSample; stage_output.vNoperspective = vNoperspective; + stage_output.Block_vFlat = vout.vFlat; + stage_output.Block_vCentroid = vout.vCentroid; + stage_output.Block_vSample = vout.vSample; + stage_output.Block_vNoperspective = vout.vNoperspective; return stage_output; } diff --git a/reference/opt/shaders-hlsl/vert/return-array.vert b/reference/opt/shaders-hlsl/vert/return-array.vert index bd157556338..be11c3f1a55 100644 --- a/reference/opt/shaders-hlsl/vert/return-array.vert +++ b/reference/opt/shaders-hlsl/vert/return-array.vert @@ -1,10 +1,8 @@ static float4 gl_Position; -static float4 vInput0; static float4 vInput1; struct SPIRV_Cross_Input { - float4 vInput0 : TEXCOORD0; float4 vInput1 : TEXCOORD1; }; @@ -20,7 +18,6 @@ void vert_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { - vInput0 = stage_input.vInput0; vInput1 = stage_input.vInput1; vert_main(); SPIRV_Cross_Output stage_output; diff --git a/reference/opt/shaders-msl/amd/shader_trinary_minmax.msl21.comp b/reference/opt/shaders-msl/amd/shader_trinary_minmax.msl21.comp new file mode 100644 index 00000000000..9c33c22ca86 --- /dev/null +++ b/reference/opt/shaders-msl/amd/shader_trinary_minmax.msl21.comp @@ -0,0 +1,11 @@ +#include +#include + +using namespace metal; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +kernel void main0() +{ +} + diff --git a/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp b/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp index feb7dbbe524..513f8763a32 100644 --- a/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp @@ -7,20 +7,21 @@ using namespace metal; -struct u0_counters -{ - uint c; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct u0_counters +{ + uint c; +}; + kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); + uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type(as_type(_29)))); } diff --git a/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp b/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp index 22409301c9c..55c41374c3b 100644 --- a/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp @@ -7,20 +7,21 @@ using namespace metal; -struct u0_counters -{ - uint c; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct u0_counters +{ + uint c; +}; + kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); + uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type(as_type(_29)))); } diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp index 47ce85f8fc3..cbbf27d65da 100644 --- a/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp @@ -15,7 +15,7 @@ struct _4 int4 _m1; }; -kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]]) +kernel void main0(device _3& __restrict _5 [[buffer(0)]], device _4& __restrict _6 [[buffer(1)]]) { _6._m0 = _5._m1 + uint4(_5._m0); _6._m0 = uint4(_5._m0) + _5._m1; diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000000..a55d8916dfa --- /dev/null +++ b/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +kernel void main0(device _3& __restrict _5 [[buffer(0)]], device _4& __restrict _6 [[buffer(1)]]) +{ + _6._m0 = uint4(int4(_5._m1) < _5._m0); + _6._m0 = uint4(int4(_5._m1) <= _5._m0); + _6._m0 = uint4(_5._m1 < uint4(_5._m0)); + _6._m0 = uint4(_5._m1 <= uint4(_5._m0)); + _6._m0 = uint4(int4(_5._m1) > _5._m0); + _6._m0 = uint4(int4(_5._m1) >= _5._m0); + _6._m0 = uint4(_5._m1 > uint4(_5._m0)); + _6._m0 = uint4(_5._m1 >= uint4(_5._m0)); +} + diff --git a/reference/opt/shaders-msl/asm/comp/block-name-alias-global.asm.comp b/reference/opt/shaders-msl/asm/comp/block-name-alias-global.asm.comp index 2928efda2c4..6dcc14ea8d5 100644 --- a/reference/opt/shaders-msl/asm/comp/block-name-alias-global.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/block-name-alias-global.asm.comp @@ -18,11 +18,12 @@ struct A_2 { int a; int b; + char _m0_final_padding[8]; }; struct A_3 { - /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ A_2 Data[1024]; + A_2 Data[1024]; }; struct B @@ -32,7 +33,7 @@ struct B struct B_1 { - /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ A_2 Data[1024]; + A_2 Data[1024]; }; kernel void main0(device A_1& C1 [[buffer(0)]], constant A_3& C2 [[buffer(1)]], device B& C3 [[buffer(2)]], constant B_1& C4 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) diff --git a/reference/opt/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp b/reference/opt/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp index fa2b5fe53a7..db0ade34b4b 100644 --- a/reference/opt/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp @@ -5,17 +5,18 @@ using namespace metal; -struct cb5_struct -{ - float4 _m0[5]; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct cb5_struct +{ + float4 _m0[5]; +}; + kernel void main0(constant cb5_struct& cb0_5 [[buffer(0)]], texture2d u0 [[texture(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { uint _44 = as_type(as_type(int(gl_LocalInvocationID.x) << 4)) >> 2u; diff --git a/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp b/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp index 159d09b38c8..89e8d83ea71 100644 --- a/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp @@ -5,17 +5,18 @@ using namespace metal; -struct cb -{ - float value; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct cb +{ + float value; +}; + kernel void main0(constant cb& _6 [[buffer(0)]], texture2d _buffer [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { _buffer.write(float4(_6.value), spvTexelBufferCoord(((32u * gl_WorkGroupID.x) + gl_LocalInvocationIndex))); diff --git a/reference/opt/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp b/reference/opt/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp new file mode 100644 index 00000000000..986e9096633 --- /dev/null +++ b/reference/opt/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct _19 +{ +}; +struct _5 +{ + int _m0; + _19 _m1; + char _m2_pad[4]; + _19 _m2; + char _m3_pad[4]; + int _m3; +}; + +kernel void main0(device _5& _3 [[buffer(0)]], device _5& _4 [[buffer(1)]]) +{ + _4 = _3; +} + diff --git a/reference/opt/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp b/reference/opt/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp new file mode 100644 index 00000000000..4bcfeb21ab5 --- /dev/null +++ b/reference/opt/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct _19 +{ +}; +struct _5 +{ + int _m0; + char _m1_pad[12]; + _19 _m1; + char _m2_pad[16]; + _19 _m2; + char _m3_pad[16]; + int _m3; +}; + +kernel void main0(constant _5& _3 [[buffer(0)]], device _5& _4 [[buffer(1)]]) +{ + _4 = _3; +} + diff --git a/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp deleted file mode 100644 index fb97d0da9bd..00000000000 --- a/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp +++ /dev/null @@ -1,10 +0,0 @@ -#include -#include - -using namespace metal; - -kernel void main0(texture2d TargetTexture [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - TargetTexture.write((TargetTexture.read(uint2(gl_WorkGroupID.xy)).xy + float2(1.0)).xyyy, uint2((gl_WorkGroupID.xy + uint2(1u)))); -} - diff --git a/reference/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp similarity index 80% rename from reference/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp rename to reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp index c90faf9ef26..536556391ec 100644 --- a/reference/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp @@ -5,7 +5,8 @@ using namespace metal; -void _main(thread const uint3& id, thread texture2d TargetTexture) +static inline __attribute__((always_inline)) +void _main(thread const uint3& id, texture2d TargetTexture) { float2 loaded = TargetTexture.read(uint2(id.xy)).xy; float2 storeTemp = loaded + float2(1.0); diff --git a/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp b/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp index 7652733268f..35843733790 100644 --- a/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp @@ -15,7 +15,7 @@ struct _7 int4 _m1; }; -kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]]) +kernel void main0(device _6& __restrict _8 [[buffer(0)]], device _7& __restrict _9 [[buffer(1)]]) { _9._m0 = _8._m1 + uint4(_8._m0); _9._m0 = uint4(_8._m0) + _8._m1; diff --git a/reference/opt/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp b/reference/opt/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp new file mode 100644 index 00000000000..d643379aaf6 --- /dev/null +++ b/reference/opt/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp @@ -0,0 +1,110 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _29 +{ + spvUnsafeArray, 3> _m0; +}; + +struct _7 +{ + int _m0[1]; +}; + +constant int3 _32 = {}; +constant int _3_tmp [[function_constant(0)]]; +constant int _3 = is_function_constant_defined(_3_tmp) ? _3_tmp : 0; +constant int _4_tmp [[function_constant(1)]]; +constant int _4 = is_function_constant_defined(_4_tmp) ? _4_tmp : 0; +constant int _5_tmp [[function_constant(2)]]; +constant int _5 = is_function_constant_defined(_5_tmp) ? _5_tmp : 0; +constant spvUnsafeArray _36 = spvUnsafeArray({ _3, 0, 0 }); +constant spvUnsafeArray _37 = spvUnsafeArray({ _3, _4, 0 }); +constant spvUnsafeArray _38 = spvUnsafeArray({ _3, _4, _5 }); +constant spvUnsafeArray _39 = spvUnsafeArray({ _4, 0, 0 }); +constant spvUnsafeArray _40 = spvUnsafeArray({ _4, _5, 0 }); +constant spvUnsafeArray _41 = spvUnsafeArray({ _4, _5, _3 }); +constant spvUnsafeArray _42 = spvUnsafeArray({ _5, 0, 0 }); +constant spvUnsafeArray _43 = spvUnsafeArray({ _5, _3, 0 }); +constant spvUnsafeArray _44 = spvUnsafeArray({ _5, _3, _4 }); +constant spvUnsafeArray, 3> _45 = spvUnsafeArray, 3>({ spvUnsafeArray({ _3, _4, _5 }), spvUnsafeArray({ 0, 0, 0 }), spvUnsafeArray({ 0, 0, 0 }) }); +constant spvUnsafeArray, 3> _46 = spvUnsafeArray, 3>({ spvUnsafeArray({ _3, _4, _5 }), spvUnsafeArray({ _4, _5, _3 }), spvUnsafeArray({ 0, 0, 0 }) }); +constant spvUnsafeArray, 3> _47 = spvUnsafeArray, 3>({ spvUnsafeArray({ _3, _4, _5 }), spvUnsafeArray({ _4, _5, _3 }), spvUnsafeArray({ _5, _3, _4 }) }); +constant _29 _48 = _29{ spvUnsafeArray, 3>({ spvUnsafeArray({ _3, _4, _5 }), spvUnsafeArray({ _4, _5, _3 }), spvUnsafeArray({ _5, _3, _4 }) }) }; +constant int _50 = _48._m0[0][0]; +constant int _51 = _48._m0[1][0]; +constant int _52 = _48._m0[0][1]; +constant int _53 = _48._m0[2][2]; +constant int _54 = _48._m0[2][0]; +constant int _55 = _48._m0[1][1]; +constant bool _56 = (_50 == _51); +constant bool _57 = (_52 == _53); +constant bool _58 = (_54 == _55); +constant int _59 = int(_56); +constant int _60 = int(_57); +constant int _61 = _58 ? 2 : 1; +constant int3 _62 = int3(_3, 0, 0); +constant int3 _63 = int3(0, _4, 0); +constant int3 _64 = int3(0, 0, _5); +constant int3 _65 = int3(_62.x, 0, _62.z); +constant int3 _66 = int3(0, _63.y, _63.x); +constant int3 _67 = int3(_64.z, 0, _64.z); +constant int3 _68 = int3(_65.y, _65.x, _66.y); +constant int3 _69 = int3(_67.z, _68.y, _68.z); +constant int _70 = _69.x; +constant int _71 = _69.y; +constant int _72 = _69.z; +constant int _73 = (_70 - _71); +constant int _74 = (_73 * _72); + +constant spvUnsafeArray _33 = spvUnsafeArray({ 0, 0, 0 }); +constant spvUnsafeArray, 3> _34 = spvUnsafeArray, 3>({ spvUnsafeArray({ 0, 0, 0 }), spvUnsafeArray({ 0, 0, 0 }), spvUnsafeArray({ 0, 0, 0 }) }); + +kernel void main0(device _7& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + _9._m0[gl_GlobalInvocationID.x] = _8._m0[gl_GlobalInvocationID.x] + ((((1 - _59) * _60) * (_61 - 1)) * _74); +} + diff --git a/reference/opt/shaders-msl/asm/comp/quantize.asm.comp b/reference/opt/shaders-msl/asm/comp/quantize.asm.comp index 1839ec7a3b8..672c2b20883 100644 --- a/reference/opt/shaders-msl/asm/comp/quantize.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/quantize.asm.comp @@ -1,8 +1,21 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + #include #include using namespace metal; +template struct SpvHalfTypeSelector; +template <> struct SpvHalfTypeSelector { public: using H = half; }; +template struct SpvHalfTypeSelector> { using H = vec; }; +template::H> +[[clang::optnone]] F spvQuantizeToF16(F fval) +{ + H hval = H(fval); + hval = select(copysign(H(0), hval), hval, isnormal(hval) || isinf(hval) || isnan(hval)); + return F(hval); +} + struct SSBO0 { float scalar; @@ -13,9 +26,9 @@ struct SSBO0 kernel void main0(device SSBO0& _4 [[buffer(0)]]) { - _4.scalar = float(half(_4.scalar)); - _4.vec2_val = float2(half2(_4.vec2_val)); - _4.vec3_val = float3(half3(_4.vec3_val)); - _4.vec4_val = float4(half4(_4.vec4_val)); + _4.scalar = spvQuantizeToF16(_4.scalar); + _4.vec2_val = spvQuantizeToF16(_4.vec2_val); + _4.vec3_val = spvQuantizeToF16(_4.vec3_val); + _4.vec4_val = spvQuantizeToF16(_4.vec4_val); } diff --git a/reference/opt/shaders-msl/asm/comp/uint_smulextended.asm.comp b/reference/opt/shaders-msl/asm/comp/uint_smulextended.asm.comp new file mode 100644 index 00000000000..6996f7fd26a --- /dev/null +++ b/reference/opt/shaders-msl/asm/comp/uint_smulextended.asm.comp @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct _4 +{ + uint _m0[1]; +}; + +struct _20 +{ + uint _m0; + uint _m1; +}; + +kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + _20 _28; + _28._m0 = uint(int(_5._m0[gl_GlobalInvocationID.x]) * int(_6._m0[gl_GlobalInvocationID.x])); + _28._m1 = uint(mulhi(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x]))); + _7._m0[gl_GlobalInvocationID.x] = _28._m0; + _8._m0[gl_GlobalInvocationID.x] = _28._m1; +} + diff --git a/reference/opt/shaders-msl/asm/comp/undefined-constant-composite.asm.comp b/reference/opt/shaders-msl/asm/comp/undefined-constant-composite.asm.comp new file mode 100644 index 00000000000..359e8913fc6 --- /dev/null +++ b/reference/opt/shaders-msl/asm/comp/undefined-constant-composite.asm.comp @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct _20 +{ + int _m0; + int _m1; +}; + +struct _5 +{ + int _m0[10]; +}; + +struct _7 +{ + int _m0[10]; +}; + +constant int _28 = {}; + +kernel void main0(device _5& _6 [[buffer(0)]], device _7& _8 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + _6._m0[gl_GlobalInvocationID.x] = _8._m0[gl_GlobalInvocationID.x] + (_20{ _28, 200 })._m1; +} + diff --git a/reference/opt/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp b/reference/opt/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp new file mode 100644 index 00000000000..0cb22e1761d --- /dev/null +++ b/reference/opt/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp @@ -0,0 +1,31 @@ +#include +#include + +using namespace metal; + +struct _21 +{ + int _m0; + int _m1; +}; + +struct _5 +{ + int _m0[10]; +}; + +struct _7 +{ + int _m0[10]; +}; + +constant int _29 = {}; +constant int _9_tmp [[function_constant(0)]]; +constant int _9 = is_function_constant_defined(_9_tmp) ? _9_tmp : 0; +constant _21 _30 = _21{ _9, _29 }; + +kernel void main0(device _5& _6 [[buffer(0)]], device _7& _8 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + _6._m0[gl_GlobalInvocationID.x] = (_8._m0[gl_GlobalInvocationID.x] + _30._m0) + (_21{ _29, 200 })._m1; +} + diff --git a/reference/opt/shaders-msl/asm/comp/variable-pointers-2.asm.comp b/reference/opt/shaders-msl/asm/comp/variable-pointers-2.asm.comp index b2dfc01b196..a276b400c00 100644 --- a/reference/opt/shaders-msl/asm/comp/variable-pointers-2.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/variable-pointers-2.asm.comp @@ -17,20 +17,35 @@ struct bar kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - bool _70 = cb.d != 0; - device foo* _71 = _70 ? &buf : nullptr; - device foo* _67 = _71; - device foo* _45 = _71; - thread uint3* _77 = _70 ? &gl_GlobalInvocationID : &gl_LocalInvocationID; - thread uint3* _73 = _77; - for (device int* _52 = &_71->a[0u], * _55 = &buf.a[0u]; (*_52) != (*_55); ) + bool _71 = cb.d != 0; + device foo* _72 = _71 ? &buf : nullptr; + device foo* _67 = _72; + device foo* _45 = _72; + thread uint3* _79 = _71 ? &gl_GlobalInvocationID : &gl_LocalInvocationID; + thread uint3* _74 = _79; + device int* _52; + device int* _55; + _52 = &_72->a[0u]; + _55 = &buf.a[0u]; + int _57; + int _58; + for (;;) { - int _66 = ((*_52) + (*_55)) + int((*_77).x); - *_52 = _66; - *_55 = _66; - _52 = &_52[1u]; - _55 = &_55[1u]; - continue; + _57 = *_52; + _58 = *_55; + if (_57 != _58) + { + int _66 = (_57 + _58) + int((*_79).x); + *_52 = _66; + *_55 = _66; + _52 = &_52[1u]; + _55 = &_55[1u]; + continue; + } + else + { + break; + } } } diff --git a/reference/opt/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp b/reference/opt/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp index b4e03a2924f..e1861730f5c 100644 --- a/reference/opt/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp @@ -15,11 +15,11 @@ struct bar kernel void main0(device foo& x [[buffer(0)]], device bar& y [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - device int* _46 = (gl_GlobalInvocationID.x != 0u) ? &x.a : &y.b; - device int* _40 = _46; - device int* _33 = _46; + device int* _47 = (gl_GlobalInvocationID.x != 0u) ? &x.a : &y.b; + device int* _40 = _47; + device int* _33 = _47; int _37 = x.a; - *_46 = 0; + *_47 = 0; y.b = _37 + _37; } diff --git a/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp b/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp index 641f108e8a1..afbcadd0b95 100644 --- a/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp @@ -5,7 +5,7 @@ using namespace metal; struct cb1_struct { - float4 _m0[1]; + float4 _RESERVED_IDENTIFIER_FIXUP_m0[1]; }; constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 16u, 1u); @@ -13,14 +13,14 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 16u, 1u); kernel void main0(constant cb1_struct& cb0_1 [[buffer(0)]], texture2d u0 [[texture(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { int2 _46 = int2(u0.get_width(), u0.get_height()) >> int2(uint2(4u)); - int _97; - _97 = 0; - for (; _97 < _46.y; _97++) + int _98; + _98 = 0; + for (; _98 < _46.y; _98++) { - for (int _98 = 0; _98 < _46.x; ) + for (int _99 = 0; _99 < _46.x; ) { - u0.write(cb0_1._m0[0].xxxx, uint2(((_46 * int3(gl_LocalInvocationID).xy) + int2(_97, _98)))); - _98++; + u0.write(cb0_1._RESERVED_IDENTIFIER_FIXUP_m0[0].xxxx, uint2(((_46 * int3(gl_LocalInvocationID).xy) + int2(_98, _99)))); + _99++; continue; } } diff --git a/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp b/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp index 7f6d4bd900e..e572525ebd7 100644 --- a/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp @@ -5,7 +5,7 @@ using namespace metal; struct cb1_struct { - float4 _m0[1]; + float4 _RESERVED_IDENTIFIER_FIXUP_m0[1]; }; constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 16u, 1u); @@ -19,7 +19,7 @@ kernel void main0(constant cb1_struct& cb0_1 [[buffer(0)]], texture2d +#include + +using namespace metal; + +struct _7 +{ + float4 _m0[64]; +}; + +struct main0_out +{ + float4 m_3 [[color(0)]]; +}; + +struct main0_in +{ + float4 m_2 [[user(locn1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], device _7& _10 [[buffer(0)]], texture2d _8 [[texture(0)]]) +{ + main0_out out = {}; + for (int _154 = 0; _154 < 64; ) + { + _10._m0[_154] = _8.read(uint2(int2(_154 - 8 * (_154 / 8), _154 / 8)), 0); + _154++; + continue; + } + out.m_3 = in.m_2; + return out; +} + diff --git a/reference/opt/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag b/reference/opt/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag new file mode 100644 index 00000000000..bbe0acd75b8 --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag @@ -0,0 +1,33 @@ +#include +#include + +using namespace metal; + +struct _7 +{ + float4 _m0[64]; +}; + +struct main0_out +{ + float4 m_3 [[color(0)]]; +}; + +struct main0_in +{ + float4 m_2 [[user(locn1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], device _7& _10 [[buffer(0)]], texture2d _8 [[texture(0)]], sampler _9 [[sampler(0)]]) +{ + main0_out out = {}; + for (int _158 = 0; _158 < 64; ) + { + _10._m0[_158] = _8.sample(_9, (float2(int2(_158 - 8 * (_158 / 8), _158 / 8)) * float2(0.125)), level(0.0)); + _158++; + continue; + } + out.m_3 = in.m_2; + return out; +} + diff --git a/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag b/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag index 1870f67194e..58f02ad0726 100644 --- a/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag @@ -42,7 +42,7 @@ fragment main0_out main0(const device _4* _5_0 [[buffer(0)]], const device _4* _ }; main0_out out = {}; - out.m_3 = _5[_20._m0]->_m0 + (_8[_20._m0]->_m0 * float4(0.20000000298023223876953125)); + out.m_3 = fma(_8[_20._m0]->_m0, float4(0.20000000298023223876953125), _5[_20._m0]->_m0); return out; } diff --git a/reference/opt/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag b/reference/opt/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag new file mode 100644 index 00000000000..b64ccabe6bc --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 o1 [[color(1)]]; + float4 o3 [[color(3)]]; + float4 o6 [[color(6)]]; + float4 o7 [[color(7)]]; +}; + +fragment main0_out main0() +{ + float4 o0; + float4 o2; + float4 o4; + float4 o5; + float gl_FragDepth; + int gl_FragStencilRefARB; + main0_out out = {}; + o0 = float4(0.0, 0.0, 0.0, 1.0); + out.o1 = float4(1.0, 0.0, 0.0, 1.0); + o2 = float4(0.0, 1.0, 0.0, 1.0); + out.o3 = float4(0.0, 0.0, 1.0, 1.0); + o4 = float4(1.0, 0.0, 1.0, 0.5); + o5 = float4(0.25); + out.o6 = float4(0.75); + out.o7 = float4(1.0); + gl_FragDepth = 0.89999997615814208984375; + gl_FragStencilRefARB = uint(127); + return out; +} + diff --git a/reference/opt/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag b/reference/opt/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag index 649f9f6f187..09f8ed8c0d6 100644 --- a/reference/opt/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag @@ -23,8 +23,8 @@ struct main0_out fragment main0_out main0(constant buf& _11 [[buffer(0)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; - int _67 = int(gl_FragCoord.x) % 16; - out._entryPointOutput = float4(dot(float3(_11.results[_67].a), _11.bar.xyz), _11.results[_67].b, 0.0, 0.0); + int _68 = int(gl_FragCoord.x) % 16; + out._entryPointOutput = float4(dot(float3(_11.results[_68].a), _11.bar.xyz), _11.results[_68].b, 0.0, 0.0); return out; } diff --git a/reference/opt/shaders-msl/asm/frag/inf-nan-constant.asm.frag b/reference/opt/shaders-msl/asm/frag/inf-nan-constant.asm.frag index 8537dac19a1..067719896b8 100644 --- a/reference/opt/shaders-msl/asm/frag/inf-nan-constant.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/inf-nan-constant.asm.frag @@ -11,7 +11,7 @@ struct main0_out fragment main0_out main0() { main0_out out = {}; - out.FragColor = float3(as_type(0x7f800000u), as_type(0xff800000u), as_type(0x7fc00000u)); + out.FragColor = float3(as_type(0x7f800000u /* inf */), as_type(0xff800000u /* -inf */), as_type(0x7fc00000u /* nan */)); return out; } diff --git a/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag b/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag index 41472adac94..fe49e09aa4e 100644 --- a/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag @@ -21,27 +21,27 @@ struct main0_out struct main0_in { - float2 Input_v0 [[user(locn0)]]; - float2 Input_v1 [[user(locn1), center_no_perspective]]; - float3 Input_v2 [[user(locn2), centroid_perspective]]; - float4 Input_v3 [[user(locn3), centroid_no_perspective]]; - float Input_v4 [[user(locn4), sample_perspective]]; - float Input_v5 [[user(locn5), sample_no_perspective]]; - float Input_v6 [[user(locn6), flat]]; + float2 inp_v0 [[user(locn0)]]; + float2 inp_v1 [[user(locn1), center_no_perspective]]; + float3 inp_v2 [[user(locn2), centroid_perspective]]; + float4 inp_v3 [[user(locn3), centroid_no_perspective]]; + float inp_v4 [[user(locn4), sample_perspective]]; + float inp_v5 [[user(locn5), sample_no_perspective]]; + float inp_v6 [[user(locn6), flat]]; }; fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; Input inp = {}; - inp.v0 = in.Input_v0; - inp.v1 = in.Input_v1; - inp.v2 = in.Input_v2; - inp.v3 = in.Input_v3; - inp.v4 = in.Input_v4; - inp.v5 = in.Input_v5; - inp.v6 = in.Input_v6; - out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, ((inp.v3.w * inp.v4) + inp.v5) - inp.v6); + inp.v0 = in.inp_v0; + inp.v1 = in.inp_v1; + inp.v2 = in.inp_v2; + inp.v3 = in.inp_v3; + inp.v4 = in.inp_v4; + inp.v5 = in.inp_v5; + inp.v6 = in.inp_v6; + out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, fma(inp.v3.w, inp.v4, inp.v5) - inp.v6); return out; } diff --git a/reference/opt/shaders-msl/asm/frag/line-directive.line.asm.frag b/reference/opt/shaders-msl/asm/frag/line-directive.line.asm.frag index 30018aad4c9..27b7d4771f7 100644 --- a/reference/opt/shaders-msl/asm/frag/line-directive.line.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/line-directive.line.asm.frag @@ -17,14 +17,12 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float _80; #line 8 "test.frag" out.FragColor = 1.0; #line 9 "test.frag" out.FragColor = 2.0; #line 10 "test.frag" - _80 = in.vColor; - if (_80 < 0.0) + if (in.vColor < 0.0) { #line 12 "test.frag" out.FragColor = 3.0; @@ -34,16 +32,19 @@ fragment main0_out main0(main0_in in [[stage_in]]) #line 16 "test.frag" out.FragColor = 4.0; } - for (int _126 = 0; float(_126) < (40.0 + _80); ) +#line 19 "test.frag" + for (int _127 = 0; float(_127) < (40.0 + in.vColor); ) { #line 21 "test.frag" out.FragColor += 0.20000000298023223876953125; #line 22 "test.frag" out.FragColor += 0.300000011920928955078125; - _126 += (int(_80) + 5); +#line 19 "test.frag" + _127 += (int(in.vColor) + 5); continue; } - switch (int(_80)) +#line 25 "test.frag" + switch (int(in.vColor)) { case 0: { @@ -69,7 +70,8 @@ fragment main0_out main0(main0_in in [[stage_in]]) } for (;;) { - out.FragColor += (10.0 + _80); +#line 42 "test.frag" + out.FragColor += (10.0 + in.vColor); #line 43 "test.frag" if (out.FragColor < 100.0) { @@ -79,6 +81,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) break; } } +#line 48 "test.frag" return out; } diff --git a/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag index 610d447a8d7..726976631ac 100644 --- a/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag @@ -1,13 +1,52 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; -constant float _46[16] = { 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 }; -constant float4 _76[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; -constant float4 _90[4] = { float4(20.0), float4(30.0), float4(50.0), float4(60.0) }; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _46 = spvUnsafeArray({ 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 }); +constant spvUnsafeArray _76 = spvUnsafeArray({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) }); +constant spvUnsafeArray _90 = spvUnsafeArray({ float4(20.0), float4(30.0), float4(50.0), float4(60.0) }); struct main0_out { @@ -19,23 +58,10 @@ struct main0_in int index [[user(locn0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - fragment main0_out main0(main0_in in [[stage_in]]) { - float4 foobar[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; - float4 baz[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; + spvUnsafeArray foobar = spvUnsafeArray({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) }); + spvUnsafeArray baz = spvUnsafeArray({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) }); main0_out out = {}; out.FragColor = _46[in.index]; if (in.index < 10) @@ -61,7 +87,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) } int _37 = in.index & 3; out.FragColor += foobar[_37].z; - spvArrayCopyFromConstant1(baz, _90); + baz = _90; out.FragColor += baz[_37].z; return out; } diff --git a/reference/opt/shaders-msl/asm/frag/op-image-sampled-image.asm.frag b/reference/opt/shaders-msl/asm/frag/op-image-sampled-image.asm.frag index 45f0ca52f4a..807fde3f49c 100644 --- a/reference/opt/shaders-msl/asm/frag/op-image-sampled-image.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/op-image-sampled-image.asm.frag @@ -16,7 +16,7 @@ struct main0_out fragment main0_out main0(constant push_cb& _19 [[buffer(0)]], texture2d t0 [[texture(0)]], sampler dummy_sampler [[sampler(0)]]) { main0_out out = {}; - out.o0 = t0.read(uint2(as_type(_19.cb0[0u].zw)) + uint2(int2(-1, -2)), as_type(0.0)); + out.o0 = t0.read(uint2(as_type(_19.cb0[0u].zw)) + uint2(int2(-1, -2)), 0); return out; } diff --git a/reference/opt/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag b/reference/opt/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag new file mode 100644 index 00000000000..6a6b1622d20 --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag @@ -0,0 +1,181 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _13 +{ + float4 x; + float4 y; + float4 z; + spvUnsafeArray u; + spvUnsafeArray v; + spvUnsafeArray w; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + interpolant foo [[user(locn0)]]; + interpolant bar [[user(locn1)]]; + interpolant baz [[user(locn2)]]; + int sid [[user(locn3)]]; + interpolant a_0 [[user(locn4)]]; + interpolant a_1 [[user(locn5)]]; + interpolant b_0 [[user(locn6)]]; + interpolant b_1 [[user(locn7)]]; + interpolant c_0 [[user(locn8)]]; + interpolant c_1 [[user(locn9)]]; + interpolant s_x [[user(locn10)]]; + interpolant s_y [[user(locn11)]]; + interpolant s_z [[user(locn12)]]; + interpolant s_u_0 [[user(locn13)]]; + interpolant s_u_1 [[user(locn14)]]; + interpolant s_v_0 [[user(locn15)]]; + interpolant s_v_1 [[user(locn16)]]; + interpolant s_w_0 [[user(locn17)]]; + interpolant s_w_1 [[user(locn18)]]; + interpolant s_w_2 [[user(locn19)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + spvUnsafeArray a = {}; + _13 s = {}; + spvUnsafeArray b = {}; + spvUnsafeArray c = {}; + a[0] = in.a_0.interpolate_at_center(); + a[1] = in.a_1.interpolate_at_center(); + s.x = in.s_x.interpolate_at_center(); + s.y = in.s_y.interpolate_at_centroid(); + s.z = in.s_z.interpolate_at_sample(gl_SampleID); + s.u[0] = in.s_u_0.interpolate_at_centroid(); + s.u[1] = in.s_u_1.interpolate_at_centroid(); + s.v[0] = in.s_v_0.interpolate_at_sample(gl_SampleID); + s.v[1] = in.s_v_1.interpolate_at_sample(gl_SampleID); + s.w[0] = in.s_w_0.interpolate_at_center(); + s.w[1] = in.s_w_1.interpolate_at_center(); + s.w[2] = in.s_w_2.interpolate_at_center(); + b[0] = in.b_0.interpolate_at_centroid(); + b[1] = in.b_1.interpolate_at_centroid(); + c[0] = in.c_0.interpolate_at_sample(gl_SampleID); + c[1] = in.c_1.interpolate_at_sample(gl_SampleID); + out.FragColor = in.foo.interpolate_at_center(); + out.FragColor += in.foo.interpolate_at_centroid(); + out.FragColor += in.foo.interpolate_at_sample(in.sid); + out.FragColor += in.foo.interpolate_at_offset(float2(0.100000001490116119384765625) + 0.4375); + float3 _65 = out.FragColor.xyz + in.bar.interpolate_at_centroid(); + out.FragColor = float4(_65.x, _65.y, _65.z, out.FragColor.w); + float3 _71 = out.FragColor.xyz + in.bar.interpolate_at_centroid(); + out.FragColor = float4(_71.x, _71.y, _71.z, out.FragColor.w); + float3 _78 = out.FragColor.xyz + in.bar.interpolate_at_sample(in.sid); + out.FragColor = float4(_78.x, _78.y, _78.z, out.FragColor.w); + float3 _84 = out.FragColor.xyz + in.bar.interpolate_at_offset(float2(-0.100000001490116119384765625) + 0.4375); + out.FragColor = float4(_84.x, _84.y, _84.z, out.FragColor.w); + float2 _91 = out.FragColor.xy + b[0]; + out.FragColor = float4(_91.x, _91.y, out.FragColor.z, out.FragColor.w); + float2 _98 = out.FragColor.xy + in.b_1.interpolate_at_centroid(); + out.FragColor = float4(_98.x, _98.y, out.FragColor.z, out.FragColor.w); + float2 _105 = out.FragColor.xy + in.b_0.interpolate_at_sample(2); + out.FragColor = float4(_105.x, _105.y, out.FragColor.z, out.FragColor.w); + float2 _112 = out.FragColor.xy + in.b_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375); + out.FragColor = float4(_112.x, _112.y, out.FragColor.z, out.FragColor.w); + float2 _119 = out.FragColor.xy + c[0]; + out.FragColor = float4(_119.x, _119.y, out.FragColor.z, out.FragColor.w); + float2 _127 = out.FragColor.xy + in.c_1.interpolate_at_centroid().xy; + out.FragColor = float4(_127.x, _127.y, out.FragColor.z, out.FragColor.w); + float2 _135 = out.FragColor.xy + in.c_0.interpolate_at_sample(2).yx; + out.FragColor = float4(_135.x, _135.y, out.FragColor.z, out.FragColor.w); + float2 _143 = out.FragColor.xy + in.c_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).xx; + out.FragColor = float4(_143.x, _143.y, out.FragColor.z, out.FragColor.w); + out.FragColor += s.x; + out.FragColor += in.s_x.interpolate_at_centroid(); + out.FragColor += in.s_x.interpolate_at_sample(in.sid); + out.FragColor += in.s_x.interpolate_at_offset(float2(0.100000001490116119384765625) + 0.4375); + out.FragColor += s.y; + out.FragColor += in.s_y.interpolate_at_centroid(); + out.FragColor += in.s_y.interpolate_at_sample(in.sid); + out.FragColor += in.s_y.interpolate_at_offset(float2(-0.100000001490116119384765625) + 0.4375); + float2 _184 = out.FragColor.xy + s.v[0]; + out.FragColor = float4(_184.x, _184.y, out.FragColor.z, out.FragColor.w); + float2 _191 = out.FragColor.xy + in.s_v_1.interpolate_at_centroid(); + out.FragColor = float4(_191.x, _191.y, out.FragColor.z, out.FragColor.w); + float2 _198 = out.FragColor.xy + in.s_v_0.interpolate_at_sample(2); + out.FragColor = float4(_198.x, _198.y, out.FragColor.z, out.FragColor.w); + float2 _205 = out.FragColor.xy + in.s_v_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375); + out.FragColor = float4(_205.x, _205.y, out.FragColor.z, out.FragColor.w); + out.FragColor.x += s.w[0]; + out.FragColor.x += in.s_w_1.interpolate_at_centroid(); + out.FragColor.x += in.s_w_0.interpolate_at_sample(2); + out.FragColor.x += in.s_w_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375); + float2 _328 = out.FragColor.xy + in.baz.interpolate_at_sample(gl_SampleID); + out.FragColor = float4(_328.x, _328.y, out.FragColor.z, out.FragColor.w); + out.FragColor.x += in.baz.interpolate_at_centroid().x; + out.FragColor.y += in.baz.interpolate_at_sample(3).y; + out.FragColor.z += in.baz.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).y; + float2 _353 = out.FragColor.xy + in.a_1.interpolate_at_centroid(); + out.FragColor = float4(_353.x, _353.y, out.FragColor.z, out.FragColor.w); + float2 _360 = out.FragColor.xy + in.a_0.interpolate_at_sample(2); + out.FragColor = float4(_360.x, _360.y, out.FragColor.z, out.FragColor.w); + float2 _367 = out.FragColor.xy + in.a_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375); + out.FragColor = float4(_367.x, _367.y, out.FragColor.z, out.FragColor.w); + out.FragColor += s.z; + float2 _379 = out.FragColor.xy + in.s_z.interpolate_at_centroid().yy; + out.FragColor = float4(_379.x, _379.y, out.FragColor.z, out.FragColor.w); + float2 _387 = out.FragColor.yz + in.s_z.interpolate_at_sample(3).xy; + out.FragColor = float4(out.FragColor.x, _387.x, _387.y, out.FragColor.w); + float2 _395 = out.FragColor.zw + in.s_z.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).wx; + out.FragColor = float4(out.FragColor.x, out.FragColor.y, _395.x, _395.y); + out.FragColor += s.u[0]; + out.FragColor += in.s_u_1.interpolate_at_centroid(); + out.FragColor += in.s_u_0.interpolate_at_sample(2); + out.FragColor += in.s_u_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375); + return out; +} + diff --git a/reference/opt/shaders-msl/asm/frag/single-function-private-lut.asm.frag b/reference/opt/shaders-msl/asm/frag/single-function-private-lut.asm.frag index 4081c3d89ab..6ae5ec7844f 100644 --- a/reference/opt/shaders-msl/asm/frag/single-function-private-lut.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/single-function-private-lut.asm.frag @@ -1,31 +1,70 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() +template +inline Tx mod(Tx x, Ty y) +{ + return x - y * floor(x / y); +} + struct myType { float data; }; -constant myType _21[5] = { myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 } }; - struct main0_out { float4 o_color [[color(0)]]; }; -// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() -template -Tx mod(Tx x, Ty y) -{ - return x - y * floor(x / y); -} - fragment main0_out main0(float4 gl_FragCoord [[position]]) { + spvUnsafeArray _21 = spvUnsafeArray({ myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 } }); + main0_out out = {}; if (_21[int(mod(gl_FragCoord.x, 4.0))].data > 0.0) { diff --git a/reference/opt/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag b/reference/opt/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag index 1bafc6953ba..d59013daaf8 100644 --- a/reference/opt/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag @@ -1,9 +1,50 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; -constant float4 _20[2] = { float4(1.0, 2.0, 3.0, 4.0), float4(10.0) }; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _20 = spvUnsafeArray({ float4(1.0, 2.0, 3.0, 4.0), float4(10.0) }); struct main0_out { @@ -15,7 +56,7 @@ struct main0_out fragment main0_out main0() { main0_out out = {}; - float4 FragColors[2] = { float4(1.0, 2.0, 3.0, 4.0), float4(10.0) }; + spvUnsafeArray FragColors = spvUnsafeArray({ float4(1.0, 2.0, 3.0, 4.0), float4(10.0) }); out.FragColor = float4(5.0); out.FragColors_0 = FragColors[0]; out.FragColors_1 = FragColors[1]; diff --git a/reference/opt/shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag b/reference/opt/shaders-msl/asm/frag/switch-different-sizes.asm.frag similarity index 100% rename from reference/opt/shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag rename to reference/opt/shaders-msl/asm/frag/switch-different-sizes.asm.frag diff --git a/reference/opt/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag b/reference/opt/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag new file mode 100644 index 00000000000..92ac1d9f832 --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag @@ -0,0 +1,9 @@ +#include +#include + +using namespace metal; + +fragment void main0() +{ +} + diff --git a/reference/opt/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag b/reference/opt/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag new file mode 100644 index 00000000000..92ac1d9f832 --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag @@ -0,0 +1,9 @@ +#include +#include + +using namespace metal; + +fragment void main0() +{ +} + diff --git a/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.frag b/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.frag new file mode 100644 index 00000000000..ab5be649849 --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.frag @@ -0,0 +1,121 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_StructuredBuffer_v4float +{ + spvUnsafeArray _m0; +}; + +struct type_Globals +{ + uint2 ShadowTileListGroupSize; +}; + +constant float3 _70 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + uint in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + uint2 _77 = uint2(gl_FragCoord.xy); + uint _78 = _77.y; + uint _83 = _77.x; + float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78)); + float2 _93 = float2(_Globals.ShadowTileListGroupSize); + float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0); + float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0); + float3 _102 = float3(_100.x, _100.y, _70.z); + _102.z = 1.0; + uint _103 = in.in_var_TEXCOORD0 * 5u; + uint _107 = _103 + 1u; + if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102)) + { + float _122 = _96.x; + float _123 = _96.y; + spvUnsafeArray _73; + _73[0] = float3(_122, _123, -1000.0); + float _126 = _100.x; + _73[1] = float3(_126, _123, -1000.0); + float _129 = _100.y; + _73[2] = float3(_122, _129, -1000.0); + _73[3] = float3(_126, _129, -1000.0); + _73[4] = float3(_122, _123, 1.0); + _73[5] = float3(_126, _123, 1.0); + _73[6] = float3(_122, _129, 1.0); + _73[7] = float3(_126, _129, 1.0); + float3 _155; + float3 _158; + _155 = float3(-500000.0); + _158 = float3(500000.0); + for (int _160 = 0; _160 < 8; ) + { + float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz)); + float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + _155 = fast::max(_155, _170); + _158 = fast::min(_158, _170); + _160++; + continue; + } + if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0))) + { + uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed); + } + } + out.out_var_SV_Target0 = float4(0.0); + return out; +} + diff --git a/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag b/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag new file mode 100644 index 00000000000..ca5e3eadb70 --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag @@ -0,0 +1,122 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_StructuredBuffer_v4float +{ + spvUnsafeArray _m0; +}; + +struct type_Globals +{ + uint2 ShadowTileListGroupSize; +}; + +constant float3 _70 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + uint in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvBufferSizeConstants [[buffer(25)]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + constant uint& CulledObjectBoxBoundsBufferSize = spvBufferSizeConstants[0]; + uint2 _77 = uint2(gl_FragCoord.xy); + uint _78 = _77.y; + uint _83 = _77.x; + float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78)); + float2 _93 = float2(_Globals.ShadowTileListGroupSize); + float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0); + float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0); + float3 _102 = float3(_100.x, _100.y, _70.z); + _102.z = 1.0; + uint _103 = in.in_var_TEXCOORD0 * 5u; + uint _186 = clamp(_103 + 1u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u); + if (all(CulledObjectBoxBounds._m0[_186].xy > _96.xy) && all(CulledObjectBoxBounds._m0[clamp(_103, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz < _102)) + { + float _122 = _96.x; + float _123 = _96.y; + spvUnsafeArray _73; + _73[0] = float3(_122, _123, -1000.0); + float _126 = _100.x; + _73[1] = float3(_126, _123, -1000.0); + float _129 = _100.y; + _73[2] = float3(_122, _129, -1000.0); + _73[3] = float3(_126, _129, -1000.0); + _73[4] = float3(_122, _123, 1.0); + _73[5] = float3(_126, _123, 1.0); + _73[6] = float3(_122, _129, 1.0); + _73[7] = float3(_126, _129, 1.0); + float3 _155; + float3 _158; + _155 = float3(-500000.0); + _158 = float3(500000.0); + for (int _160 = 0; _160 < 8; ) + { + float3 _166 = _73[int(clamp(uint(_160), uint(0), uint(7)))] - (float3(0.5) * (CulledObjectBoxBounds._m0[clamp(_103, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz + CulledObjectBoxBounds._m0[_186].xyz)); + float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 2u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz), dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 3u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz), dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 4u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz)); + _155 = fast::max(_155, _170); + _158 = fast::min(_158, _170); + _160++; + continue; + } + if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0))) + { + uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed); + } + } + out.out_var_SV_Target0 = float4(0.0); + return out; +} + diff --git a/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag new file mode 100644 index 00000000000..aee290f5a2f --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +constant float a_tmp [[function_constant(1)]]; +constant float a = is_function_constant_defined(a_tmp) ? a_tmp : 1.0; +constant float b_tmp [[function_constant(2)]]; +constant float b = is_function_constant_defined(b_tmp) ? b_tmp : 2.0; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + out.FragColor = float4(a + b); + return out; +} + diff --git a/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag index a1a73ced2bd..2031b335d48 100644 --- a/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag @@ -88,8 +88,6 @@ struct _18 float4 _m38[2]; }; -constant _28 _74 = {}; - struct main0_out { float4 m_5 [[color(0)]]; @@ -98,11 +96,10 @@ struct main0_out fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buffer(1)]], constant _18& _19 [[buffer(2)]], texture2d _8 [[texture(0)]], texture2d _12 [[texture(1)]], texture2d _14 [[texture(2)]], sampler _9 [[sampler(0)]], sampler _13 [[sampler(1)]], sampler _15 [[sampler(2)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; - float2 _82 = gl_FragCoord.xy * _19._m23.xy; float4 _88 = _7._m2 * _7._m0.xyxy; float2 _95 = _88.xy; float2 _96 = _88.zw; - float2 _97 = fast::clamp(_82 + (float2(0.0, -2.0) * _7._m0.xy), _95, _96); + float2 _97 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, -2.0) * _7._m0.xy), _95, _96); float3 _109 = float3(_11._m5) * fast::clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _113 = _12.sample(_13, _97, level(0.0)); float _114 = _113.y; @@ -115,8 +112,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _129 = _109; } - float3 _130 = _129 * 0.5; - float2 _144 = fast::clamp(_82 + (float2(-1.0) * _7._m0.xy), _95, _96); + float2 _144 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-1.0) * _7._m0.xy), _95, _96); float3 _156 = float3(_11._m5) * fast::clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _160 = _12.sample(_13, _144, level(0.0)); float _161 = _160.y; @@ -129,8 +125,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _176 = _156; } - float3 _177 = _176 * 0.5; - float2 _191 = fast::clamp(_82 + (float2(0.0, -1.0) * _7._m0.xy), _95, _96); + float2 _191 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, -1.0) * _7._m0.xy), _95, _96); float3 _203 = float3(_11._m5) * fast::clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _207 = _12.sample(_13, _191, level(0.0)); float _208 = _207.y; @@ -143,8 +138,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _223 = _203; } - float3 _224 = _223 * 0.75; - float2 _238 = fast::clamp(_82 + (float2(1.0, -1.0) * _7._m0.xy), _95, _96); + float2 _238 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(1.0, -1.0) * _7._m0.xy), _95, _96); float3 _250 = float3(_11._m5) * fast::clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _254 = _12.sample(_13, _238, level(0.0)); float _255 = _254.y; @@ -157,8 +151,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _270 = _250; } - float3 _271 = _270 * 0.5; - float2 _285 = fast::clamp(_82 + (float2(-2.0, 0.0) * _7._m0.xy), _95, _96); + float2 _285 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-2.0, 0.0) * _7._m0.xy), _95, _96); float3 _297 = float3(_11._m5) * fast::clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _301 = _12.sample(_13, _285, level(0.0)); float _302 = _301.y; @@ -171,8 +164,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _317 = _297; } - float3 _318 = _317 * 0.5; - float2 _332 = fast::clamp(_82 + (float2(-1.0, 0.0) * _7._m0.xy), _95, _96); + float2 _332 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-1.0, 0.0) * _7._m0.xy), _95, _96); float3 _344 = float3(_11._m5) * fast::clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _348 = _12.sample(_13, _332, level(0.0)); float _349 = _348.y; @@ -185,8 +177,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _364 = _344; } - float3 _365 = _364 * 0.75; - float2 _379 = fast::clamp(_82, _95, _96); + float2 _379 = fast::clamp(gl_FragCoord.xy * _19._m23.xy, _95, _96); float3 _391 = float3(_11._m5) * fast::clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _395 = _12.sample(_13, _379, level(0.0)); float _396 = _395.y; @@ -199,8 +190,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _411 = _391; } - float3 _412 = _411 * 1.0; - float2 _426 = fast::clamp(_82 + (float2(1.0, 0.0) * _7._m0.xy), _95, _96); + float2 _426 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(1.0, 0.0) * _7._m0.xy), _95, _96); float3 _438 = float3(_11._m5) * fast::clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _442 = _12.sample(_13, _426, level(0.0)); float _443 = _442.y; @@ -213,8 +203,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _458 = _438; } - float3 _459 = _458 * 0.75; - float2 _473 = fast::clamp(_82 + (float2(2.0, 0.0) * _7._m0.xy), _95, _96); + float2 _473 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(2.0, 0.0) * _7._m0.xy), _95, _96); float3 _485 = float3(_11._m5) * fast::clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _489 = _12.sample(_13, _473, level(0.0)); float _490 = _489.y; @@ -227,8 +216,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _505 = _485; } - float3 _506 = _505 * 0.5; - float2 _520 = fast::clamp(_82 + (float2(-1.0, 1.0) * _7._m0.xy), _95, _96); + float2 _520 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-1.0, 1.0) * _7._m0.xy), _95, _96); float3 _532 = float3(_11._m5) * fast::clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _536 = _12.sample(_13, _520, level(0.0)); float _537 = _536.y; @@ -241,8 +229,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _552 = _532; } - float3 _553 = _552 * 0.5; - float2 _567 = fast::clamp(_82 + (float2(0.0, 1.0) * _7._m0.xy), _95, _96); + float2 _567 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, 1.0) * _7._m0.xy), _95, _96); float3 _579 = float3(_11._m5) * fast::clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _583 = _12.sample(_13, _567, level(0.0)); float _584 = _583.y; @@ -255,8 +242,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _599 = _579; } - float3 _600 = _599 * 0.75; - float2 _614 = fast::clamp(_82 + _7._m0.xy, _95, _96); + float2 _614 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, _7._m0.xy), _95, _96); float3 _626 = float3(_11._m5) * fast::clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _630 = _12.sample(_13, _614, level(0.0)); float _631 = _630.y; @@ -269,8 +255,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _646 = _626; } - float3 _647 = _646 * 0.5; - float2 _661 = fast::clamp(_82 + (float2(0.0, 2.0) * _7._m0.xy), _95, _96); + float2 _661 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, 2.0) * _7._m0.xy), _95, _96); float3 _673 = float3(_11._m5) * fast::clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _677 = _12.sample(_13, _661, level(0.0)); float _678 = _677.y; @@ -283,12 +268,10 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _693 = _673; } - float3 _702 = ((((((((((((_130.xyz + _177).xyz + _224).xyz + _271).xyz + _318).xyz + _365).xyz + _412).xyz + _459).xyz + _506).xyz + _553).xyz + _600).xyz + _647).xyz + (_693 * 0.5)).xyz * float3(0.125); - _28 _704 = _74; - _704._m0 = float4(_702.x, _702.y, _702.z, float4(0.0).w); - _28 _705 = _704; - _705._m0.w = 1.0; - out.m_5 = _705._m0; + float3 _702 = (((((((((((((_129 * 0.5).xyz + (_176 * 0.5)).xyz + (_223 * 0.75)).xyz + (_270 * 0.5)).xyz + (_317 * 0.5)).xyz + (_364 * 0.75)).xyz + (_411 * 1.0)).xyz + (_458 * 0.75)).xyz + (_505 * 0.5)).xyz + (_552 * 0.5)).xyz + (_599 * 0.75)).xyz + (_646 * 0.5)).xyz + (_693 * 0.5)).xyz * float3(0.125); + _28 _750 = _28{ float4(_702.x, _702.y, _702.z, float4(0.0).w) }; + _750._m0.w = 1.0; + out.m_5 = _750._m0; return out; } diff --git a/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc deleted file mode 100644 index bbda7be5bd1..00000000000 --- a/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc +++ /dev/null @@ -1,73 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct VertexOutput -{ - float4 pos; - float2 uv; -}; - -struct VertexOutput_1 -{ - float2 uv; -}; - -struct HSOut -{ - float2 uv; -}; - -struct main0_out -{ - HSOut _entryPointOutput; - float4 gl_Position; -}; - -struct main0_in -{ - float2 VertexOutput_uv [[attribute(0)]]; - float4 gl_Position [[attribute(1)]]; -}; - -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) -{ - device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; - if (gl_InvocationID < spvIndirectParams[0]) - gl_in[gl_InvocationID] = in; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_InvocationID >= 3) - return; - VertexOutput _223[3] = { VertexOutput{ gl_in[0].gl_Position, gl_in[0].VertexOutput_uv }, VertexOutput{ gl_in[1].gl_Position, gl_in[1].VertexOutput_uv }, VertexOutput{ gl_in[2].gl_Position, gl_in[2].VertexOutput_uv } }; - VertexOutput param[3]; - spvArrayCopyFromStack1(param, _223); - gl_out[gl_InvocationID].gl_Position = param[gl_InvocationID].pos; - gl_out[gl_InvocationID]._entryPointOutput.uv = param[gl_InvocationID].uv; - threadgroup_barrier(mem_flags::mem_device); - if (int(gl_InvocationID) == 0) - { - float2 _174 = float2(1.0) + gl_in[0].VertexOutput_uv; - float _175 = _174.x; - spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_175); - spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_175); - spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_175); - spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_175); - } -} - diff --git a/reference/opt/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc b/reference/opt/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc new file mode 100644 index 00000000000..79395a4bbb2 --- /dev/null +++ b/reference/opt/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct TessLevels +{ + float inner0; + float inner1; + float outer0; + float outer1; + float outer2; + float outer3; +}; + +kernel void main0(const device TessLevels& sb_levels [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(sb_levels.inner0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(sb_levels.outer0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(sb_levels.outer1); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(sb_levels.outer2); +} + diff --git a/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese b/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese index 83ef729321e..bfa96f9cfbd 100644 --- a/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese +++ b/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 gl_Position [[position]]; @@ -10,14 +51,23 @@ struct main0_out struct main0_patchIn { - float2 gl_TessLevelInner [[attribute(0)]]; - float4 gl_TessLevelOuter [[attribute(1)]]; + float4 gl_TessLevelOuter [[attribute(0)]]; + float2 gl_TessLevelInner [[attribute(1)]]; }; -[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]]) +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]]) { main0_out out = {}; - out.gl_Position = float4(((gl_TessCoord.x * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.z), ((gl_TessCoord.y * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.y) + (((1.0 - gl_TessCoord.y) * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.w), 0.0, 1.0); + spvUnsafeArray gl_TessLevelInner = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0]; + gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1]; + gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2]; + gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3]; + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); + out.gl_Position = float4(fma(gl_TessCoord.x * gl_TessLevelInner[0], gl_TessLevelOuter[0], ((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), fma(gl_TessCoord.y * gl_TessLevelInner[1], gl_TessLevelOuter[1], ((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0); return out; } diff --git a/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert b/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert new file mode 100644 index 00000000000..1528c8350de --- /dev/null +++ b/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +struct main0_in +{ + float4 pos [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.gl_Position = in.pos; + out.gl_ClipDistance[0] = in.pos.x; + out.gl_ClipDistance[1] = in.pos.y; + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; + return out; +} + diff --git a/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert b/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert new file mode 100644 index 00000000000..1d6885958c3 --- /dev/null +++ b/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; +}; + +struct main0_in +{ + float4 pos [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.gl_Position = in.pos; + out.gl_ClipDistance[0] = in.pos.x; + out.gl_ClipDistance[1] = in.pos.y; + return out; +} + diff --git a/reference/opt/shaders-msl/asm/vert/fake-builtin-input.asm.vert b/reference/opt/shaders-msl/asm/vert/fake-builtin-input.asm.vert index f9fcbc85c30..3079ae9bcbb 100644 --- a/reference/opt/shaders-msl/asm/vert/fake-builtin-input.asm.vert +++ b/reference/opt/shaders-msl/asm/vert/fake-builtin-input.asm.vert @@ -5,6 +5,7 @@ using namespace metal; struct main0_out { + half4 out_var_SV_Target [[user(locn0)]]; float4 gl_Position [[position]]; }; diff --git a/reference/opt/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert b/reference/opt/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert new file mode 100644 index 00000000000..1926ff9e14e --- /dev/null +++ b/reference/opt/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct Struct +{ + uint flags[1]; +}; + +struct defaultUniformsVS +{ + Struct flags; + float4 uquad[4]; + float4x4 umatrix; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 a_position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _9 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + out.gl_Position = _9.umatrix * float4(_9.uquad[int(gl_VertexIndex)].x, _9.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w); + if (_9.flags.flags[0] != 0u) + { + out.gl_Position.z = 0.0; + } + return out; +} + diff --git a/reference/opt/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert b/reference/opt/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert new file mode 100644 index 00000000000..ee206385746 --- /dev/null +++ b/reference/opt/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct Struct +{ + uint2 flags[1]; +}; + +struct defaultUniformsVS +{ + Struct flags; + float4 uquad[4]; + float4x4 umatrix; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 a_position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _9 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + out.gl_Position = _9.umatrix * float4(_9.uquad[int(gl_VertexIndex)].x, _9.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w); + if (_9.flags.flags[0].x != 0u) + { + out.gl_Position.z = 0.0; + } + return out; +} + diff --git a/reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert b/reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert index ed5c5f9ad6e..196057a79d7 100644 --- a/reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert +++ b/reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert @@ -9,9 +9,10 @@ constant int _20 = (_7 + 2); constant uint _8_tmp [[function_constant(202)]]; constant uint _8 = is_function_constant_defined(_8_tmp) ? _8_tmp : 100u; constant uint _25 = (_8 % 5u); -constant int4 _30 = int4(20, 30, _20, _20); -constant int2 _32 = int2(_30.y, _30.x); -constant int _33 = _30.y; +constant int _30 = _7 - (-3) * (_7 / (-3)); +constant int4 _32 = int4(20, 30, _20, _30); +constant int2 _34 = int2(_32.y, _32.x); +constant int _35 = _32.y; struct main0_out { @@ -22,14 +23,13 @@ struct main0_out vertex main0_out main0() { main0_out out = {}; - float4 _63 = float4(0.0); - _63.y = float(_20); - float4 _66 = _63; + float4 _66 = float4(0.0); + _66.y = float(_20); _66.z = float(_25); - float4 _52 = _66 + float4(_30); - float2 _56 = _52.xy + float2(_32); - out.gl_Position = float4(_56.x, _56.y, _52.z, _52.w); - out.m_4 = _33; + float4 _55 = _66 + float4(_32); + float2 _59 = _55.xy + float2(_34); + out.gl_Position = float4(_59.x, _59.y, _55.z, _55.w); + out.m_4 = _35; return out; } diff --git a/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp b/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp index 59fc03a7520..e57b2ea171e 100644 --- a/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp +++ b/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp @@ -3,6 +3,8 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0() { } diff --git a/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp b/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp index f7757cd19f8..18cfd68c199 100644 --- a/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp +++ b/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp @@ -23,6 +23,8 @@ struct SSBO2 float4 v; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + struct spvDescriptorSetBuffer0 { const device SSBO0* ssbo0 [[id(0)]]; diff --git a/reference/opt/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp b/reference/opt/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp new file mode 100644 index 00000000000..25a0233aec7 --- /dev/null +++ b/reference/opt/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp @@ -0,0 +1,11 @@ +#include +#include + +using namespace metal; + +kernel void main0(texture2d uImage [[texture(0)]], texture2d uImageRead [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + int2 _17 = int2(gl_GlobalInvocationID.xy); + uImage.write(uImageRead.read(uint2(_17)), uint2(_17)); +} + diff --git a/reference/opt/shaders-msl/comp/array-length.comp b/reference/opt/shaders-msl/comp/array-length.comp index 79358eb90e2..5a284b96669 100644 --- a/reference/opt/shaders-msl/comp/array-length.comp +++ b/reference/opt/shaders-msl/comp/array-length.comp @@ -14,6 +14,8 @@ struct SSBO1 float bz[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device SSBO& _14 [[buffer(0)]], device SSBO1* ssbos_0 [[buffer(1)]], device SSBO1* ssbos_1 [[buffer(2)]]) { device SSBO1* ssbos[] = diff --git a/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp b/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp index 6ec9b11bbe7..d804e187679 100644 --- a/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp +++ b/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp @@ -25,6 +25,8 @@ struct SSBO3 float bz[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + struct spvDescriptorSetBuffer0 { device SSBO* m_16 [[id(0)]]; diff --git a/reference/opt/shaders-msl/comp/atomic.comp b/reference/opt/shaders-msl/comp/atomic.comp index 43e6a8f0380..fca72bfcfe9 100644 --- a/reference/opt/shaders-msl/comp/atomic.comp +++ b/reference/opt/shaders-msl/comp/atomic.comp @@ -12,59 +12,61 @@ struct SSBO int i32; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& ssbo [[buffer(0)]]) { threadgroup uint shared_u32; threadgroup int shared_i32; - uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); uint _32; do { _32 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u); - int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u); + int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); int _52; do { _52 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10); + } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10); shared_u32 = 10u; shared_i32 = 10; - uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); uint _64; do { _64 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u); - int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u); + int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); int _72; do { _72 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10); + } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10); } diff --git a/reference/opt/shaders-msl/comp/basic.comp b/reference/opt/shaders-msl/comp/basic.comp index 22ec741965d..dbb839f5817 100644 --- a/reference/opt/shaders-msl/comp/basic.comp +++ b/reference/opt/shaders-msl/comp/basic.comp @@ -21,12 +21,14 @@ struct SSBO3 uint counter; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { float4 _29 = _23.in_data[gl_GlobalInvocationID.x]; if (dot(_29, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) { - uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed); + uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed); _45.out_data[_52] = _29; } } diff --git a/reference/opt/shaders-msl/comp/basic.dispatchbase.comp b/reference/opt/shaders-msl/comp/basic.dispatchbase.comp new file mode 100644 index 00000000000..ebbc144c7b1 --- /dev/null +++ b/reference/opt/shaders-msl/comp/basic.dispatchbase.comp @@ -0,0 +1,38 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 in_data[1]; +}; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +struct SSBO3 +{ + uint counter; +}; + +constant uint _59_tmp [[function_constant(10)]]; +constant uint _59 = is_function_constant_defined(_59_tmp) ? _59_tmp : 1u; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(_59, 1u, 1u); + +kernel void main0(const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvDispatchBase [[grid_origin]]) +{ + gl_GlobalInvocationID += spvDispatchBase * gl_WorkGroupSize; + float4 _33 = _27.in_data[gl_GlobalInvocationID.x]; + if (dot(_33, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) + { + uint _56 = atomic_fetch_add_explicit((device atomic_uint*)&_52.counter, 1u, memory_order_relaxed); + _49.out_data[_56] = _33; + } +} + diff --git a/reference/opt/shaders-msl/comp/basic.dispatchbase.msl11.comp b/reference/opt/shaders-msl/comp/basic.dispatchbase.msl11.comp new file mode 100644 index 00000000000..2d991f5db54 --- /dev/null +++ b/reference/opt/shaders-msl/comp/basic.dispatchbase.msl11.comp @@ -0,0 +1,36 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 in_data[1]; +}; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +struct SSBO3 +{ + uint counter; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(constant uint3& spvDispatchBase [[buffer(29)]], const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + gl_GlobalInvocationID += spvDispatchBase * gl_WorkGroupSize; + float4 _33 = _27.in_data[gl_GlobalInvocationID.x]; + if (dot(_33, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) + { + uint _56 = atomic_fetch_add_explicit((device atomic_uint*)&_52.counter, 1u, memory_order_relaxed); + _49.out_data[_56] = _33; + } +} + diff --git a/reference/opt/shaders-msl/comp/basic.inline-block.msl2.comp b/reference/opt/shaders-msl/comp/basic.inline-block.msl2.comp new file mode 100644 index 00000000000..902dfc92d93 --- /dev/null +++ b/reference/opt/shaders-msl/comp/basic.inline-block.msl2.comp @@ -0,0 +1,53 @@ +#include +#include + +using namespace metal; + +typedef packed_float4 packed_float4x4[4]; + +struct Baz +{ + int f; + int g; +}; + +struct X +{ + int x; + int y; + float z; +}; + +struct Foo +{ + int a; + int b; + packed_float4x4 c; + X x[2]; +}; + +struct Bar +{ + int d; + int e; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u); + +struct spvDescriptorSetBuffer0 +{ + constant Bar* m_38 [[id(0)]]; + Foo m_32 [[id(1)]]; +}; + +struct spvDescriptorSetBuffer1 +{ + device Baz* baz [[id(0)]][3]; +}; + +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + spvDescriptorSet1.baz[gl_GlobalInvocationID.x]->f = spvDescriptorSet0.m_32.a + (*spvDescriptorSet0.m_38).d; + spvDescriptorSet1.baz[gl_GlobalInvocationID.x]->g = spvDescriptorSet0.m_32.b * (*spvDescriptorSet0.m_38).e; +} + diff --git a/reference/opt/shaders-msl/comp/bitcast-16bit-1.invalid.comp b/reference/opt/shaders-msl/comp/bitcast-16bit-1.invalid.comp deleted file mode 100644 index ad9733a8b58..00000000000 --- a/reference/opt/shaders-msl/comp/bitcast-16bit-1.invalid.comp +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include - -using namespace metal; - -struct SSBO0 -{ - short4 inputs[1]; -}; - -struct SSBO1 -{ - int4 outputs[1]; -}; - -kernel void main0(device SSBO0& _25 [[buffer(0)]], device SSBO1& _39 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - _39.outputs[gl_GlobalInvocationID.x].x = int(as_type(as_type(_25.inputs[gl_GlobalInvocationID.x].xy) + half2(half(1.0)))); - _39.outputs[gl_GlobalInvocationID.x].y = as_type(_25.inputs[gl_GlobalInvocationID.x].zw); - _39.outputs[gl_GlobalInvocationID.x].z = int(as_type(ushort2(_25.inputs[gl_GlobalInvocationID.x].xy))); -} - diff --git a/reference/opt/shaders-msl/comp/bitcast-16bit-2.invalid.comp b/reference/opt/shaders-msl/comp/bitcast-16bit-2.invalid.comp deleted file mode 100644 index a4230b1eb6a..00000000000 --- a/reference/opt/shaders-msl/comp/bitcast-16bit-2.invalid.comp +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include - -using namespace metal; - -struct SSBO1 -{ - short4 outputs[1]; -}; - -struct SSBO0 -{ - int4 inputs[1]; -}; - -struct UBO -{ - half4 const0; -}; - -kernel void main0(device SSBO1& _21 [[buffer(0)]], device SSBO0& _29 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - short2 _47 = as_type(_29.inputs[gl_GlobalInvocationID.x].x) + as_type(_40.const0.xy); - _21.outputs[gl_GlobalInvocationID.x] = short4(_47.x, _47.y, _21.outputs[gl_GlobalInvocationID.x].z, _21.outputs[gl_GlobalInvocationID.x].w); - short2 _66 = short2(as_type(uint(_29.inputs[gl_GlobalInvocationID.x].y)) - as_type(_40.const0.zw)); - _21.outputs[gl_GlobalInvocationID.x] = short4(_21.outputs[gl_GlobalInvocationID.x].x, _21.outputs[gl_GlobalInvocationID.x].y, _66.x, _66.y); -} - diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp new file mode 100644 index 00000000000..fb561482abd --- /dev/null +++ b/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct SSBO; + +struct UBO +{ + uint2 b; +}; + +struct SSBO +{ + packed_float3 a1; + float a2; +}; + +kernel void main0(constant UBO& _10 [[buffer(0)]]) +{ + (reinterpret_cast(as_type(_10.b)))->a1 = float3(1.0, 2.0, 3.0); + uint2 _35 = as_type(reinterpret_cast(reinterpret_cast(as_type(_10.b + uint2(32u))))); + uint2 v2 = _35; + device SSBO* _39 = reinterpret_cast(as_type(_35)); + float3 v3 = float3(_39->a1); + _39->a1 = float3(_39->a1) + float3(1.0); +} + diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp new file mode 100644 index 00000000000..d66154b5494 --- /dev/null +++ b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp @@ -0,0 +1,96 @@ +#include +#include + +using namespace metal; + +struct t21; + +struct t24 +{ + int4 m0[2]; + int m1; + ulong2 m2[2]; + device t21* m3; + float2x4 m4; +}; + +struct t21 +{ + int4 m0[2]; + int m1; + ulong2 m2[2]; + device t21* m3; + float2x4 m4; +}; + +struct t35 +{ + int m0[32]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1)]], texture2d v295 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + int v8 = 0; + int _30 = 0 | (u24.m0[0].x - 0); + v8 = _30; + int _44 = _30 | (u24.m0[u35.m0[1]].x - 1); + v8 = _44; + int _50 = _44 | (u24.m1 - 2); + v8 = _50; + int _60 = _50 | int(u24.m4[0u][0] - 3.0); + v8 = _60; + int _68 = _60 | int(u24.m4[1u][0] - 5.0); + v8 = _68; + int _75 = _68 | int(u24.m4[0u][1] - 4.0); + v8 = _75; + int _82 = _75 | int(u24.m4[1u][1] - 6.0); + v8 = _82; + int _92 = _82 | (((device t21*)u24.m2[0].x)->m0[0].x - 3); + v8 = _92; + int _101 = _92 | (((device t21*)u24.m2[0].x)->m0[u35.m0[1]].x - 4); + v8 = _101; + int _109 = _101 | (((device t21*)u24.m2[0].x)->m1 - 5); + v8 = _109; + int _118 = _109 | int(((device t21*)u24.m2[0].x)->m4[0u][0] - 6.0); + v8 = _118; + int _127 = _118 | int(((device t21*)u24.m2[0].x)->m4[1u][0] - 8.0); + v8 = _127; + int _136 = _127 | int(((device t21*)u24.m2[0].x)->m4[0u][1] - 7.0); + v8 = _136; + int _145 = _136 | int(((device t21*)u24.m2[0].x)->m4[1u][1] - 9.0); + v8 = _145; + int _155 = _145 | (((device t21*)u24.m2[u35.m0[1]].x)->m0[0].x - 6); + v8 = _155; + int _167 = _155 | (((device t21*)u24.m2[u35.m0[1]].x)->m0[u35.m0[1]].x - 7); + v8 = _167; + int _177 = _167 | (((device t21*)u24.m2[u35.m0[1]].x)->m1 - 8); + v8 = _177; + int _187 = _177 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][0] - 9.0); + v8 = _187; + int _198 = _187 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][0] - 11.0); + v8 = _198; + int _209 = _198 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][1] - 10.0); + v8 = _209; + int _220 = _209 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][1] - 12.0); + v8 = _220; + int _228 = _220 | (u24.m3->m0[0].x - 9); + v8 = _228; + int _238 = _228 | (u24.m3->m0[u35.m0[1]].x - 10); + v8 = _238; + int _246 = _238 | (u24.m3->m1 - 11); + v8 = _246; + int _254 = _246 | int(u24.m3->m4[0u][0] - 12.0); + v8 = _254; + int _263 = _254 | int(u24.m3->m4[1u][0] - 14.0); + v8 = _263; + int _272 = _263 | int(u24.m3->m4[0u][1] - 13.0); + v8 = _272; + int _281 = _272 | int(u24.m3->m4[1u][1] - 15.0); + v8 = _281; + uint4 _292 = select(uint4(1u, 0u, 0u, 1u), uint4(0u), bool4(_281 != 0)); + uint4 v284 = _292; + v295.write(_292, uint2(int2(gl_GlobalInvocationID.xy))); +} + diff --git a/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp new file mode 100644 index 00000000000..d85fa356b80 --- /dev/null +++ b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp @@ -0,0 +1,56 @@ +#include +#include + +using namespace metal; + +struct Position; +struct PositionReferences; + +struct Position +{ + float2 positions[1]; +}; + +struct Registers +{ + device PositionReferences* references; + float fract_time; +}; + +struct PositionReferences +{ + device Position* buffers[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u); + +kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + uint2 local_offset = gl_GlobalInvocationID.xy; + uint _19 = local_offset.y; + uint _29 = local_offset.x; + uint _30 = ((_19 * 8u) * gl_NumWorkGroups.x) + _29; + uint local_index = _30; + uint slice = gl_WorkGroupID.z; + device Position* __restrict positions = registers.references->buffers[gl_WorkGroupID.z]; + float _66 = float(gl_WorkGroupID.z); + float _70 = fract(fma(_66, 0.100000001490116119384765625, registers.fract_time)); + float _71 = 6.283125400543212890625 * _70; + float offset = _71; + float2 pos = float2(local_offset); + float _83 = sin(fma(2.2000000476837158203125, pos.x, _71)); + pos.x = fma(0.20000000298023223876953125, _83, pos.x); + float _97 = sin(fma(2.25, pos.y, _70 * 12.56625080108642578125)); + pos.y = fma(0.20000000298023223876953125, _97, pos.y); + float _111 = cos(fma(1.7999999523162841796875, pos.y, _70 * 18.849376678466796875)); + pos.x = fma(0.20000000298023223876953125, _111, pos.x); + float _125 = cos(fma(2.849999904632568359375, pos.x, _70 * 25.1325016021728515625)); + pos.y = fma(0.20000000298023223876953125, _125, pos.y); + float _133 = sin(_71); + pos.x = fma(0.5, _133, pos.x); + float _142 = sin(fma(6.283125400543212890625, _70, 0.300000011920928955078125)); + pos.y = fma(0.5, _142, pos.y); + float2 _155 = float2(gl_NumWorkGroups.xy); + registers.references->buffers[gl_WorkGroupID.z]->positions[_30] = (pos / fma(float2(8.0), _155, float2(-1.0))) - float2(0.5); +} + diff --git a/reference/opt/shaders-msl/comp/coherent-block.comp b/reference/opt/shaders-msl/comp/coherent-block.comp index bec9b218c7b..58bbacb7f0c 100644 --- a/reference/opt/shaders-msl/comp/coherent-block.comp +++ b/reference/opt/shaders-msl/comp/coherent-block.comp @@ -8,7 +8,9 @@ struct SSBO float4 value; }; -kernel void main0(device SSBO& _10 [[buffer(0)]]) +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(volatile device SSBO& _10 [[buffer(0)]]) { _10.value = float4(20.0); } diff --git a/reference/opt/shaders-msl/comp/coherent-image.comp b/reference/opt/shaders-msl/comp/coherent-image.comp index 0fe044fb9ae..5090484464d 100644 --- a/reference/opt/shaders-msl/comp/coherent-image.comp +++ b/reference/opt/shaders-msl/comp/coherent-image.comp @@ -8,7 +8,9 @@ struct SSBO int4 value; }; -kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d uImage [[texture(0)]]) +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d uImage [[texture(0)]]) { _10.value = uImage.read(uint2(int2(10))); } diff --git a/reference/opt/shaders-msl/comp/complex-composite-constant-array.comp b/reference/opt/shaders-msl/comp/complex-composite-constant-array.comp new file mode 100644 index 00000000000..bd58c95a006 --- /dev/null +++ b/reference/opt/shaders-msl/comp/complex-composite-constant-array.comp @@ -0,0 +1,59 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct SSBO +{ + float4x4 a; + uint index; +}; + +kernel void main0(device SSBO& _14 [[buffer(0)]]) +{ + spvUnsafeArray _32 = spvUnsafeArray({ float4x4(float4(1.0, 0.0, 0.0, 0.0), float4(0.0, 1.0, 0.0, 0.0), float4(0.0, 0.0, 1.0, 0.0), float4(0.0, 0.0, 0.0, 1.0)), float4x4(float4(2.0, 0.0, 0.0, 0.0), float4(0.0, 2.0, 0.0, 0.0), float4(0.0, 0.0, 2.0, 0.0), float4(0.0, 0.0, 0.0, 2.0)) }); + + _14.a = _32[_14.index]; +} + diff --git a/reference/opt/shaders-msl/comp/composite-array-initialization.comp b/reference/opt/shaders-msl/comp/composite-array-initialization.comp index 8dec8bddb31..6181ae69b11 100644 --- a/reference/opt/shaders-msl/comp/composite-array-initialization.comp +++ b/reference/opt/shaders-msl/comp/composite-array-initialization.comp @@ -1,10 +1,49 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Data { float a; @@ -27,26 +66,13 @@ struct SSBO constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u); -constant Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; - -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } }; - Data data2[2]; - spvArrayCopyFromStack1(data2, _31); + spvUnsafeArray _25 = spvUnsafeArray({ Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }); + + spvUnsafeArray _31 = spvUnsafeArray({ Data{ X, 2.0 }, Data{ 3.0, 5.0 } }); + spvUnsafeArray data2; + data2 = _31; _53.outdata[gl_WorkGroupID.x].a = _25[gl_LocalInvocationID.x].a + data2[gl_LocalInvocationID.x].a; _53.outdata[gl_WorkGroupID.x].b = _25[gl_LocalInvocationID.x].b + data2[gl_LocalInvocationID.x].b; } diff --git a/reference/opt/shaders-msl/comp/composite-array-initialization.force-native-array.comp b/reference/opt/shaders-msl/comp/composite-array-initialization.force-native-array.comp new file mode 100644 index 00000000000..536a6e30747 --- /dev/null +++ b/reference/opt/shaders-msl/comp/composite-array-initialization.force-native-array.comp @@ -0,0 +1,148 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +struct Data +{ + float a; + float b; +}; + +constant float X_tmp [[function_constant(0)]]; +constant float X = is_function_constant_defined(X_tmp) ? X_tmp : 4.0; + +struct Data_1 +{ + float a; + float b; +}; + +struct SSBO +{ + Data_1 outdata[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u); + +kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +{ + Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; + + Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } }; + Data data2[2]; + spvArrayCopyFromStackToStack1(data2, _31); + _53.outdata[gl_WorkGroupID.x].a = _25[gl_LocalInvocationID.x].a + data2[gl_LocalInvocationID.x].a; + _53.outdata[gl_WorkGroupID.x].b = _25[gl_LocalInvocationID.x].b + data2[gl_LocalInvocationID.x].b; +} + diff --git a/reference/opt/shaders-msl/comp/composite-construct.comp b/reference/opt/shaders-msl/comp/composite-construct.comp index 6d44fc57b23..09e6fc7d959 100644 --- a/reference/opt/shaders-msl/comp/composite-construct.comp +++ b/reference/opt/shaders-msl/comp/composite-construct.comp @@ -1,10 +1,49 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct SSBO0 { float4 as[1]; @@ -15,24 +54,13 @@ struct SSBO1 float4 bs[1]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO1& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { - float4 _37[2] = { _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] }; - float4 values[2]; - spvArrayCopyFromStack1(values, _37); + spvUnsafeArray _37 = spvUnsafeArray({ _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] }); + spvUnsafeArray values; + values = _37; _16.as[0] = values[gl_LocalInvocationIndex]; _32.bs[1] = float4(40.0); } diff --git a/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp b/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp index ea9693ce474..cb396cff20e 100644 --- a/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp +++ b/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp @@ -10,6 +10,8 @@ struct BUF float c; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device BUF& o [[buffer(0)]]) { o.a = 4; diff --git a/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp b/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp new file mode 100644 index 00000000000..cb396cff20e --- /dev/null +++ b/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct BUF +{ + int a; + float b; + float c; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device BUF& o [[buffer(0)]]) +{ + o.a = 4; + o.b = o.c; +} + diff --git a/reference/opt/shaders-msl/comp/culling.comp b/reference/opt/shaders-msl/comp/culling.comp index 95ffff8393b..55735475a7c 100644 --- a/reference/opt/shaders-msl/comp/culling.comp +++ b/reference/opt/shaders-msl/comp/culling.comp @@ -28,7 +28,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff float _28 = _22.in_data[gl_GlobalInvocationID.x]; if (_28 > 12.0) { - uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed); + uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed); _38.out_data[_45] = _28; } } diff --git a/reference/opt/shaders-msl/comp/defer-parens.comp b/reference/opt/shaders-msl/comp/defer-parens.comp index 69a8aab92dd..8c130e3a0c0 100644 --- a/reference/opt/shaders-msl/comp/defer-parens.comp +++ b/reference/opt/shaders-msl/comp/defer-parens.comp @@ -9,6 +9,8 @@ struct SSBO int index; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _13 [[buffer(0)]]) { float4 _17 = _13.data; diff --git a/reference/opt/shaders-msl/comp/dowhile.comp b/reference/opt/shaders-msl/comp/dowhile.comp index 3ebafe0fdeb..b503c948ad8 100644 --- a/reference/opt/shaders-msl/comp/dowhile.comp +++ b/reference/opt/shaders-msl/comp/dowhile.comp @@ -14,21 +14,23 @@ struct SSBO2 float4 out_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - float4 _57; - int _58; - _58 = 0; - _57 = _28.in_data[gl_GlobalInvocationID.x]; + float4 _59; + int _60; + _60 = 0; + _59 = _28.in_data[gl_GlobalInvocationID.x]; float4 _42; for (;;) { - _42 = _28.mvp * _57; - int _44 = _58 + 1; + _42 = _28.mvp * _59; + int _44 = _60 + 1; if (_44 < 16) { - _58 = _44; - _57 = _42; + _60 = _44; + _59 = _42; } else { diff --git a/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp b/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp index 7cb8913dabf..fbf4c4f7fc4 100644 --- a/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp +++ b/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp @@ -5,17 +5,6 @@ using namespace metal; -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -28,6 +17,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -65,66 +65,6 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) -{ - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} - -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - kernel void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture2d foo [[texture(0)]], texture2d bar [[texture(1)]], sampler fooSmplr [[sampler(0)]]) { constant uint& fooSwzl = spvSwizzleConstants[0]; diff --git a/reference/opt/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp b/reference/opt/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp index fe0212ec3ff..333485a256a 100644 --- a/reference/opt/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp +++ b/reference/opt/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp @@ -5,19 +5,19 @@ using namespace metal; -struct myBlock -{ - int a; - float b[1]; -}; - // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } +struct myBlock +{ + int a; + float b[1]; +}; + kernel void main0(device myBlock& myStorage [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { myStorage.a = (myStorage.a + 1) % 256; diff --git a/reference/opt/shaders-msl/comp/global-invocation-id.comp b/reference/opt/shaders-msl/comp/global-invocation-id.comp index fe0212ec3ff..333485a256a 100644 --- a/reference/opt/shaders-msl/comp/global-invocation-id.comp +++ b/reference/opt/shaders-msl/comp/global-invocation-id.comp @@ -5,19 +5,19 @@ using namespace metal; -struct myBlock -{ - int a; - float b[1]; -}; - // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } +struct myBlock +{ + int a; + float b[1]; +}; + kernel void main0(device myBlock& myStorage [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { myStorage.a = (myStorage.a + 1) % 256; diff --git a/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp b/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp new file mode 100644 index 00000000000..05dc38746a8 --- /dev/null +++ b/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp @@ -0,0 +1,37 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct SSBO +{ + float4 outdata; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +struct spvDescriptorSetBuffer0 +{ + texture2d uImage [[id(0)]]; + device atomic_uint* uImage_atomic [[id(1)]]; + device SSBO* m_31 [[id(2)]]; + texture2d uTexture [[id(3)]]; + sampler uTextureSmplr [[id(4)]]; +}; + +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), spvDescriptorSet0.uImage)], 10u, memory_order_relaxed); + (*spvDescriptorSet0.m_31).outdata = spvDescriptorSet0.uTexture.sample(spvDescriptorSet0.uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(_26)); +} + diff --git a/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.comp b/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.comp new file mode 100644 index 00000000000..7b0a129a488 --- /dev/null +++ b/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.comp @@ -0,0 +1,28 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct SSBO +{ + float4 outdata; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _31 [[buffer(1)]], texture2d uImage [[texture(0)]], device atomic_uint* uImage_atomic [[buffer(0)]], texture2d uTexture [[texture(1)]], sampler uTextureSmplr [[sampler(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), uImage)], 10u, memory_order_relaxed); + _31.outdata = uTexture.sample(uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(_26)); +} + diff --git a/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp b/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp index 1eeaf87cf44..41c4dfc1802 100644 --- a/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp +++ b/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp @@ -3,6 +3,8 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(texturecube_array uImageIn [[texture(0)]], texturecube_array uImageOut [[texture(1)]]) { uImageOut.write(uImageIn.read(uint2(int3(9, 7, 11).xy), uint(int3(9, 7, 11).z) % 6u, uint(int3(9, 7, 11).z) / 6u), uint2(int3(9, 7, 11).xy), uint(int3(9, 7, 11).z) % 6u, uint(int3(9, 7, 11).z) / 6u); diff --git a/reference/opt/shaders-msl/comp/image.comp b/reference/opt/shaders-msl/comp/image.comp index 447732dd235..c875e78de02 100644 --- a/reference/opt/shaders-msl/comp/image.comp +++ b/reference/opt/shaders-msl/comp/image.comp @@ -3,6 +3,8 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(texture2d uImageIn [[texture(0)]], texture2d uImageOut [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { int2 _23 = int2(gl_GlobalInvocationID.xy); diff --git a/reference/opt/shaders-msl/comp/insert.comp b/reference/opt/shaders-msl/comp/insert.comp index 1418ce35b5c..437b7f32898 100644 --- a/reference/opt/shaders-msl/comp/insert.comp +++ b/reference/opt/shaders-msl/comp/insert.comp @@ -8,19 +8,11 @@ struct SSBO float4 out_data[1]; }; -constant float4 _52 = {}; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - float4 _45 = _52; - _45.x = 10.0; - float4 _47 = _45; - _47.y = 30.0; - float4 _49 = _47; - _49.z = 70.0; - float4 _51 = _49; - _51.w = 90.0; - _27.out_data[gl_GlobalInvocationID.x] = _51; - _27.out_data[gl_GlobalInvocationID.x].y = 20.0; + _27.out_data[gl_GlobalInvocationID.x] = float4(10.0, 30.0, 70.0, 90.0); + ((device float*)&_27.out_data[gl_GlobalInvocationID.x])[1u] = 20.0; } diff --git a/reference/opt/shaders-msl/comp/int64.invalid.msl22.comp b/reference/opt/shaders-msl/comp/int64.invalid.msl22.comp deleted file mode 100644 index 13304bd0e81..00000000000 --- a/reference/opt/shaders-msl/comp/int64.invalid.msl22.comp +++ /dev/null @@ -1,24 +0,0 @@ -#include -#include - -using namespace metal; - -struct SSBO -{ - int s32; - uint u32; -}; - -constant long _162 = {}; - -kernel void main0(device SSBO& _96 [[buffer(0)]]) -{ - long4 _137; - ulong4 _141; - _137 = abs((_137 + long4(30l, 40l, 50l, 60l)) + long4(_141 + ulong4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul))); - _141 += ulong4(long4(999999999999999999l, 8888888888888888l, 77777777777777777l, 6666666666666666l)); - ulong _109 = ulong(_162); - _96.s32 = int(uint(((ulong(_137.x) + _141.y) + _109) + _109)); - _96.u32 = uint(((ulong(_137.y) + _141.z) + ulong(_162 + 1l)) + _109); -} - diff --git a/reference/opt/shaders-msl/comp/inverse.comp b/reference/opt/shaders-msl/comp/inverse.comp index f2f499b91eb..0a1d298b0da 100644 --- a/reference/opt/shaders-msl/comp/inverse.comp +++ b/reference/opt/shaders-msl/comp/inverse.comp @@ -5,34 +5,23 @@ using namespace metal; -struct MatrixOut -{ - float2x2 m2out; - float3x3 m3out; - float4x4 m4out; -}; - -struct MatrixIn -{ - float2x2 m2in; - float3x3 m3in; - float4x4 m4in; -}; - // Returns the determinant of a 2x2 matrix. -inline float spvDet2x2(float a1, float a2, float b1, float b2) +static inline __attribute__((always_inline)) +float spvDet2x2(float a1, float a2, float b1, float b2) { return a1 * b2 - b1 * a2; } // Returns the determinant of a 3x3 matrix. -inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) +static inline __attribute__((always_inline)) +float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) { return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3); } // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. +static inline __attribute__((always_inline)) float4x4 spvInverse4x4(float4x4 m) { float4x4 adj; // The adjoint matrix (inverse after dividing by determinant) @@ -68,6 +57,7 @@ float4x4 spvInverse4x4(float4x4 m) // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. +static inline __attribute__((always_inline)) float3x3 spvInverse3x3(float3x3 m) { float3x3 adj; // The adjoint matrix (inverse after dividing by determinant) @@ -95,6 +85,7 @@ float3x3 spvInverse3x3(float3x3 m) // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. +static inline __attribute__((always_inline)) float2x2 spvInverse2x2(float2x2 m) { float2x2 adj; // The adjoint matrix (inverse after dividing by determinant) @@ -114,6 +105,22 @@ float2x2 spvInverse2x2(float2x2 m) return (det != 0.0f) ? (adj * (1.0f / det)) : m; } +struct MatrixOut +{ + float2x2 m2out; + float3x3 m3out; + float4x4 m4out; +}; + +struct MatrixIn +{ + float2x2 m2in; + float3x3 m3in; + float4x4 m4in; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device MatrixOut& _15 [[buffer(0)]], const device MatrixIn& _20 [[buffer(1)]]) { _15.m2out = spvInverse2x2(_20.m2in); diff --git a/reference/opt/shaders-msl/comp/local-invocation-id.comp b/reference/opt/shaders-msl/comp/local-invocation-id.comp index 772e5e0d867..45059905881 100644 --- a/reference/opt/shaders-msl/comp/local-invocation-id.comp +++ b/reference/opt/shaders-msl/comp/local-invocation-id.comp @@ -5,19 +5,19 @@ using namespace metal; -struct myBlock -{ - int a; - float b[1]; -}; - // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } +struct myBlock +{ + int a; + float b[1]; +}; + kernel void main0(device myBlock& myStorage [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { myStorage.a = (myStorage.a + 1) % 256; diff --git a/reference/opt/shaders-msl/comp/local-invocation-index.comp b/reference/opt/shaders-msl/comp/local-invocation-index.comp index 41adbdca5cf..67426dd3f6b 100644 --- a/reference/opt/shaders-msl/comp/local-invocation-index.comp +++ b/reference/opt/shaders-msl/comp/local-invocation-index.comp @@ -5,19 +5,19 @@ using namespace metal; -struct myBlock -{ - int a; - float b[1]; -}; - // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } +struct myBlock +{ + int a; + float b[1]; +}; + kernel void main0(device myBlock& myStorage [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { myStorage.a = (myStorage.a + 1) % 256; diff --git a/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp new file mode 100644 index 00000000000..cf26178ee87 --- /dev/null +++ b/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct model_t +{ + float3x3 mtx_rm; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device model_t& model [[buffer(0)]]) +{ + if ((transpose(model.mtx_rm) * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0)))[0].x != 0.0) + { + model.mtx_rm = transpose(float3x3(float3(-5.0, -3.0, -5.0), float3(-2.0, 2.0, -5.0), float3(6.0, 3.0, -8.0))); + } +} + diff --git a/reference/opt/shaders-msl/comp/mat3.comp b/reference/opt/shaders-msl/comp/mat3.comp index 72f08dd85ed..31351ba57be 100644 --- a/reference/opt/shaders-msl/comp/mat3.comp +++ b/reference/opt/shaders-msl/comp/mat3.comp @@ -8,6 +8,8 @@ struct SSBO2 float3x3 out_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { _22.out_data[gl_GlobalInvocationID.x] = float3x3(float3(10.0), float3(20.0), float3(40.0)); diff --git a/reference/opt/shaders-msl/comp/mod.comp b/reference/opt/shaders-msl/comp/mod.comp index 8574f87b7e2..94d739fe6c3 100644 --- a/reference/opt/shaders-msl/comp/mod.comp +++ b/reference/opt/shaders-msl/comp/mod.comp @@ -5,6 +5,13 @@ using namespace metal; +// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() +template +inline Tx mod(Tx x, Ty y) +{ + return x - y * floor(x / y); +} + struct SSBO { float4 in_data[1]; @@ -15,12 +22,7 @@ struct SSBO2 float4 out_data[1]; }; -// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() -template -Tx mod(Tx x, Ty y) -{ - return x - y * floor(x / y); -} +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _33 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { diff --git a/reference/opt/shaders-msl/comp/modf.comp b/reference/opt/shaders-msl/comp/modf.comp index 39e402337f8..df19cae502f 100644 --- a/reference/opt/shaders-msl/comp/modf.comp +++ b/reference/opt/shaders-msl/comp/modf.comp @@ -13,6 +13,8 @@ struct SSBO2 float4 out_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { float4 i; diff --git a/reference/opt/shaders-msl/comp/outer-product.comp b/reference/opt/shaders-msl/comp/outer-product.comp index 8e32db392ea..e589642dbda 100644 --- a/reference/opt/shaders-msl/comp/outer-product.comp +++ b/reference/opt/shaders-msl/comp/outer-product.comp @@ -23,6 +23,8 @@ struct ReadSSBO float4 v4; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]]) { _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y); diff --git a/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp b/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp new file mode 100644 index 00000000000..c119186663d --- /dev/null +++ b/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp @@ -0,0 +1,116 @@ +#include +#include + +using namespace metal; + +struct SSBO_A +{ + float data[1]; +}; + +struct UBO_C +{ + float4 data[1024]; +}; + +struct Registers +{ + float reg; +}; + +struct SSBO_B +{ + uint2 data[1]; +}; + +struct UBO_D +{ + uint4 data[1024]; +}; + +struct SSBO_BRO +{ + uint2 data[1]; +}; + +struct SSBO_As +{ + float data[1]; +}; + +struct UBO_Cs +{ + float4 data[1024]; +}; + +struct SSBO_Bs +{ + uint2 data[1024]; +}; + +struct UBO_Ds +{ + uint4 data[1024]; +}; + +struct SSBO_BsRO +{ + uint2 data[1024]; +}; + +struct SSBO_E +{ + float data[1]; +}; + +struct UBO_G +{ + float4 data[1024]; +}; + +struct SSBO_F +{ + uint2 data[1]; +}; + +struct UBO_H +{ + uint4 data[1024]; +}; + +struct SSBO_I +{ + uint2 data[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +struct spvDescriptorSetBuffer0 +{ + device SSBO_A* ssbo_a [[id(0)]]; + constant UBO_C* ubo_c [[id(1)]]; + device SSBO_As* ssbo_as [[id(2)]][4]; + constant UBO_Cs* ubo_cs [[id(6)]][4]; +}; + +kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& _42 [[buffer(1)]], device void* spvBufferAliasSet2Binding0 [[buffer(2)]], constant void* spvBufferAliasSet2Binding1 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding0; + constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding1; + device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0; + constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1; + const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0; + device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a); + constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c); + const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a); + const device auto& ssbo_bs = (device SSBO_Bs* const device (&)[4])spvDescriptorSet0.ssbo_as; + const device auto& ubo_ds = (constant UBO_Ds* const device (&)[4])spvDescriptorSet0.ubo_cs; + const device auto& ssbo_bs_readonly = (const device SSBO_BsRO* const device (&)[4])spvDescriptorSet0.ssbo_as; + (*spvDescriptorSet0.ssbo_a).data[gl_GlobalInvocationID.x] = (*spvDescriptorSet0.ubo_c).data[gl_WorkGroupID.x].x + _42.reg; + ssbo_b.data[gl_GlobalInvocationID.x] = ubo_d.data[gl_WorkGroupID.y].xy + ssbo_b_readonly.data[gl_GlobalInvocationID.x]; + spvDescriptorSet0.ssbo_as[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = spvDescriptorSet0.ubo_cs[gl_WorkGroupID.x]->data[0].x; + ssbo_bs[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_ds[gl_WorkGroupID.x]->data[0].xy + ssbo_bs_readonly[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x]; + ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x].x; + ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y].xy + ssbo_i.data[gl_GlobalInvocationID.x]; +} + diff --git a/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp b/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp new file mode 100644 index 00000000000..9cef6b208f4 --- /dev/null +++ b/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp @@ -0,0 +1,116 @@ +#include +#include + +using namespace metal; + +struct SSBO_A +{ + float data[1]; +}; + +struct UBO_C +{ + float4 data[1024]; +}; + +struct Registers +{ + float reg; +}; + +struct SSBO_B +{ + uint2 data[1]; +}; + +struct UBO_D +{ + uint4 data[1024]; +}; + +struct SSBO_BRO +{ + uint2 data[1]; +}; + +struct SSBO_As +{ + float data[1]; +}; + +struct UBO_Cs +{ + float4 data[1024]; +}; + +struct SSBO_Bs +{ + uint2 data[1024]; +}; + +struct UBO_Ds +{ + uint4 data[1024]; +}; + +struct SSBO_BsRO +{ + uint2 data[1024]; +}; + +struct SSBO_E +{ + float data[1]; +}; + +struct UBO_G +{ + float4 data[1024]; +}; + +struct SSBO_F +{ + uint2 data[1]; +}; + +struct UBO_H +{ + uint4 data[1024]; +}; + +struct SSBO_I +{ + uint2 data[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +struct spvDescriptorSetBuffer0 +{ + device SSBO_A* ssbo_a [[id(0)]]; + constant UBO_C* ubo_c [[id(1)]]; + device SSBO_As* ssbo_as [[id(2)]][4]; + constant UBO_Cs* ubo_cs [[id(6)]][4]; +}; + +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& _42 [[buffer(1)]], device void* spvBufferAliasSet2Binding0 [[buffer(2)]], constant void* spvBufferAliasSet2Binding1 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding0; + constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding1; + device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0; + constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1; + const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0; + device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a); + constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c); + const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a); + constant auto& ssbo_bs = (device SSBO_Bs* constant (&)[4])spvDescriptorSet0.ssbo_as; + constant auto& ubo_ds = (constant UBO_Ds* constant (&)[4])spvDescriptorSet0.ubo_cs; + constant auto& ssbo_bs_readonly = (const device SSBO_BsRO* constant (&)[4])spvDescriptorSet0.ssbo_as; + (*spvDescriptorSet0.ssbo_a).data[gl_GlobalInvocationID.x] = (*spvDescriptorSet0.ubo_c).data[gl_WorkGroupID.x].x + _42.reg; + ssbo_b.data[gl_GlobalInvocationID.x] = ubo_d.data[gl_WorkGroupID.y].xy + ssbo_b_readonly.data[gl_GlobalInvocationID.x]; + spvDescriptorSet0.ssbo_as[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = spvDescriptorSet0.ubo_cs[gl_WorkGroupID.x]->data[0].x; + ssbo_bs[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_ds[gl_WorkGroupID.x]->data[0].xy + ssbo_bs_readonly[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x]; + ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x].x; + ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y].xy + ssbo_i.data[gl_GlobalInvocationID.x]; +} + diff --git a/reference/opt/shaders-msl/comp/ray-query.nocompat.spv14.vk.comp b/reference/opt/shaders-msl/comp/ray-query.nocompat.spv14.vk.comp new file mode 100644 index 00000000000..b03d524c25a --- /dev/null +++ b/reference/opt/shaders-msl/comp/ray-query.nocompat.spv14.vk.comp @@ -0,0 +1,91 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include +using namespace metal::raytracing; + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Params +{ + uint ray_flags; + uint cull_mask; + char _m2_pad[8]; + packed_float3 origin; + float tmin; + packed_float3 dir; + float tmax; + float thit; +}; + +kernel void main0(constant Params& _18 [[buffer(1)]], acceleration_structure AS0 [[buffer(0)]], acceleration_structure AS1 [[buffer(2)]]) +{ + intersection_query q; + intersection_params _intersection_params_; + q.reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS0, _intersection_params_); + spvUnsafeArray, 2> q2; + intersection_params _intersection_params_; + q2[1].reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS1, _intersection_params_); + bool _63 = q.next(); + q2[0].abort(); + q.commit_bounding_box_intersection(_18.thit); + _14.commit_triangle_intersection(); + float _71 = q.get_ray_min_distance(); + float3 _74 = q.get_world_space_ray_origin(); + float3 _75 = q.get_world_space_ray_direction(); + uint _80 = (uint)q2[1].get_committed_intersection_type(); + uint _83 = (uint)q2[0].get_committed_intersection_type(); + bool _85 = q2[1].is_candidate_non_opaque_bounding_box(); + float _87 = q2[1].get_committed_distance(); + float _89 = q2[1].get_committed_distance(); + int _92 = q.get_committed_user_instance_id(); + int _94 = q2[0].get_committed_instance_id(); + int _96 = q2[1].get_committed_geometry_id(); + int _97 = q.get_committed_primitive_id(); + float2 _100 = q2[0].get_committed_triangle_barycentric_coord(); + bool _103 = q.is_committed_triangle_front_facing(); + float3 _104 = q.get_committed_ray_direction(); + float3 _106 = q2[0].get_committed_ray_origin(); + float4x3 _110 = q.get_committed_object_to_world_transform(); + float4x3 _112 = q2[1].get_committed_world_to_object_transform(); +} + diff --git a/reference/opt/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp b/reference/opt/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp new file mode 100644 index 00000000000..dde7f47b085 --- /dev/null +++ b/reference/opt/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp @@ -0,0 +1,71 @@ +#include +#include +#if __METAL_VERSION__ >= 230 +#include +using namespace metal::raytracing; +#endif + +using namespace metal; + +struct Params +{ + uint ray_flags; + uint cull_mask; + char _m2_pad[8]; + packed_float3 origin; + float tmin; + packed_float3 dir; + float tmax; + float thit; +}; + +kernel void main0(constant Params& _18 [[buffer(1)]], raytracing::acceleration_structure AS0 [[buffer(0)]], raytracing::acceleration_structure AS1 [[buffer(2)]]) +{ + raytracing::intersection_query q; + q.reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS0, intersection_params()); + raytracing::intersection_query q2[2]; + q2[1].reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS1, intersection_params()); + bool _63 = q.next(); + bool res = _63; + q2[0].abort(); + q.commit_bounding_box_intersection(_18.thit); + q2[1].commit_triangle_intersection(); + float _71 = q.get_ray_min_distance(); + float fval = _71; + float3 _74 = q.get_world_space_ray_direction(); + float3 fvals = _74; + float3 _75 = q.get_world_space_ray_origin(); + fvals = _75; + uint _80 = uint(q2[1].get_committed_intersection_type()); + uint type = _80; + uint _83 = uint(q2[0].get_candidate_intersection_type()) - 1; + type = _83; + bool _85 = q2[1].is_candidate_non_opaque_bounding_box(); + res = _85; + float _87 = q2[1].get_committed_distance(); + fval = _87; + float _89 = q2[1].get_candidate_triangle_distance(); + fval = _89; + int _92 = q.get_committed_user_instance_id(); + int ival = _92; + int _94 = q2[0].get_candidate_instance_id(); + ival = _94; + int _96 = q2[1].get_candidate_geometry_id(); + ival = _96; + int _97 = q.get_committed_primitive_id(); + ival = _97; + float2 _100 = q2[0].get_candidate_triangle_barycentric_coord(); + fvals.x = _100.x; + fvals.y = _100.y; + bool _107 = q.is_committed_triangle_front_facing(); + res = _107; + float3 _108 = q.get_candidate_ray_direction(); + fvals = _108; + float3 _110 = q2[0].get_committed_ray_origin(); + fvals = _110; + float4x3 _114 = q.get_candidate_object_to_world_transform(); + float4x3 matrices = _114; + float4x3 _116 = q2[1].get_committed_world_to_object_transform(); + matrices = _116; +} + diff --git a/reference/opt/shaders-msl/comp/read-write-only.comp b/reference/opt/shaders-msl/comp/read-write-only.comp index 7547b417d8f..0cf8d8e3215 100644 --- a/reference/opt/shaders-msl/comp/read-write-only.comp +++ b/reference/opt/shaders-msl/comp/read-write-only.comp @@ -21,6 +21,8 @@ struct SSBO1 float4 data3; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _10 [[buffer(0)]], const device SSBO0& _15 [[buffer(1)]], device SSBO1& _21 [[buffer(2)]]) { _10.data4 = _15.data0 + _21.data2; diff --git a/reference/opt/shaders-msl/comp/rmw-matrix.comp b/reference/opt/shaders-msl/comp/rmw-matrix.comp index 150db7ede98..b53a3a75c27 100644 --- a/reference/opt/shaders-msl/comp/rmw-matrix.comp +++ b/reference/opt/shaders-msl/comp/rmw-matrix.comp @@ -13,6 +13,8 @@ struct SSBO float4x4 c1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _11 [[buffer(0)]]) { _11.a *= _11.a1; diff --git a/reference/opt/shaders-msl/comp/rmw-opt.comp b/reference/opt/shaders-msl/comp/rmw-opt.comp index 05e1f6f283c..f93967da538 100644 --- a/reference/opt/shaders-msl/comp/rmw-opt.comp +++ b/reference/opt/shaders-msl/comp/rmw-opt.comp @@ -8,6 +8,8 @@ struct SSBO int a; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _9 [[buffer(0)]]) { _9.a += 10; diff --git a/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp index 312a6f9453a..9bf87817747 100644 --- a/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp +++ b/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp @@ -10,12 +10,16 @@ struct SSBO float c; float d; float e; + float f; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _9 [[buffer(0)]]) { _9.c = abs(_9.a - _9.b); _9.d = abs(_9.a); _9.e = sign(_9.a); + _9.f = abs((_9.a - 1.0) - (_9.b - 2.0)); } diff --git a/reference/opt/shaders-msl/comp/shared-matrix-array-of-array.comp b/reference/opt/shaders-msl/comp/shared-matrix-array-of-array.comp new file mode 100644 index 00000000000..0e17f95cb85 --- /dev/null +++ b/reference/opt/shaders-msl/comp/shared-matrix-array-of-array.comp @@ -0,0 +1,1353 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +struct spvStorageMatrix +{ + vec columns[Cols]; + + spvStorageMatrix() thread = default; + thread spvStorageMatrix& operator=(initializer_list> cols) thread + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default; + #endif + + operator matrix() const thread + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const thread + { + return columns[idx]; + } + thread vec& operator[](size_t idx) thread + { + return columns[idx]; + } + + spvStorageMatrix() constant = default; + + spvStorageMatrix(const thread matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const constant matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const device matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const threadgroup matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default; + #endif + + operator matrix() const constant + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const constant + { + return columns[idx]; + } + + spvStorageMatrix() device = default; + device spvStorageMatrix& operator=(initializer_list> cols) device + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default; + + spvStorageMatrix(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default; + + spvStorageMatrix(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default; + + spvStorageMatrix(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default; + #endif + + operator matrix() const device + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const device + { + return columns[idx]; + } + device vec& operator[](size_t idx) device + { + return columns[idx]; + } + + spvStorageMatrix() threadgroup = default; + threadgroup spvStorageMatrix& operator=(initializer_list> cols) threadgroup + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default; + #endif + + operator matrix() const threadgroup + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup + { + return columns[idx]; + } + threadgroup vec& operator[](size_t idx) threadgroup + { + return columns[idx]; + } + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix() threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(initializer_list> cols) threadgroup_imageblock + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + operator matrix() const threadgroup_imageblock + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup_imageblock + { + return columns[idx]; + } + threadgroup_imageblock vec& operator[](size_t idx) threadgroup_imageblock + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix() ray_data = default; + ray_data spvStorageMatrix& operator=(initializer_list> cols) ray_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + #endif + + spvStorageMatrix(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default; + #endif + + operator matrix() const ray_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const ray_data + { + return columns[idx]; + } + ray_data vec& operator[](size_t idx) ray_data + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix() object_data = default; + object_data spvStorageMatrix& operator=(initializer_list> cols) object_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default; + #endif + + spvStorageMatrix(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default; + + operator matrix() const object_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const object_data + { + return columns[idx]; + } + object_data vec& operator[](size_t idx) object_data + { + return columns[idx]; + } + #endif + +}; + +template +matrix transpose(spvStorageMatrix m) +{ + return transpose(matrix(m)); +} + +typedef spvStorageMatrix spvStorage_half2x2; +typedef spvStorageMatrix spvStorage_half2x3; +typedef spvStorageMatrix spvStorage_half2x4; +typedef spvStorageMatrix spvStorage_half3x2; +typedef spvStorageMatrix spvStorage_half3x3; +typedef spvStorageMatrix spvStorage_half3x4; +typedef spvStorageMatrix spvStorage_half4x2; +typedef spvStorageMatrix spvStorage_half4x3; +typedef spvStorageMatrix spvStorage_half4x4; +typedef spvStorageMatrix spvStorage_float2x2; +typedef spvStorageMatrix spvStorage_float2x3; +typedef spvStorageMatrix spvStorage_float2x4; +typedef spvStorageMatrix spvStorage_float3x2; +typedef spvStorageMatrix spvStorage_float3x3; +typedef spvStorageMatrix spvStorage_float3x4; +typedef spvStorageMatrix spvStorage_float4x2; +typedef spvStorageMatrix spvStorage_float4x3; +typedef spvStorageMatrix spvStorage_float4x4; + +struct S1 +{ + spvStorage_float4x3 a[2]; + float b; + spvUnsafeArray c; +}; + +struct S2 +{ + int4 a; + spvUnsafeArray, 1>, 3> b; +}; + +struct block +{ + uint passed; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device block& _383 [[buffer(0)]]) +{ + threadgroup S1 s1; + threadgroup S2 s2; + s1.a[0] = spvStorage_float4x3(float4x3(float3(0.0, 2.0, -8.0), float3(6.0, 7.0, 5.0), float3(-6.0, 1.0, 9.0), float3(-4.0, -3.0, 4.0))); + s1.a[1] = spvStorage_float4x3(float4x3(float3(4.0, 9.0, -9.0), float3(-8.0, -9.0, 8.0), float3(0.0, 4.0, -4.0), float3(7.0, 2.0, -1.0))); + s1.b = 7.0; + s1.c[0] = float2(-5.0, -4.0); + s1.c[1] = float2(3.0, -5.0); + s1.c[2] = float2(-3.0, -1.0); + s2.a = int4(1, 0, -3, 1); + s2.b[0][0][0] = short(true); + s2.b[0][0][1] = short(false); + s2.b[0][0][2] = short(false); + s2.b[1][0][0] = short(true); + s2.b[1][0][1] = short(false); + s2.b[1][0][2] = short(true); + s2.b[2][0][0] = short(false); + s2.b[2][0][1] = short(true); + s2.b[2][0][2] = short(true); + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + bool _464 = abs(-float4x3(s1.a[0])[0].x) < 0.0500000007450580596923828125; + bool _449; + if (_464) + { + _449 = abs(2.0 - float4x3(s1.a[0])[0].y) < 0.0500000007450580596923828125; + } + else + { + _449 = _464; + } + bool _457; + if (_449) + { + _457 = abs((-8.0) - float4x3(s1.a[0])[0].z) < 0.0500000007450580596923828125; + } + else + { + _457 = _449; + } + bool _412; + if (_457) + { + bool _514 = abs(6.0 - float4x3(s1.a[0])[1].x) < 0.0500000007450580596923828125; + bool _499; + if (_514) + { + _499 = abs(7.0 - float4x3(s1.a[0])[1].y) < 0.0500000007450580596923828125; + } + else + { + _499 = _514; + } + bool _507; + if (_499) + { + _507 = abs(5.0 - float4x3(s1.a[0])[1].z) < 0.0500000007450580596923828125; + } + else + { + _507 = _499; + } + _412 = _507; + } + else + { + _412 = _457; + } + bool _420; + if (_412) + { + bool _564 = abs((-6.0) - float4x3(s1.a[0])[2].x) < 0.0500000007450580596923828125; + bool _549; + if (_564) + { + _549 = abs(1.0 - float4x3(s1.a[0])[2].y) < 0.0500000007450580596923828125; + } + else + { + _549 = _564; + } + bool _557; + if (_549) + { + _557 = abs(9.0 - float4x3(s1.a[0])[2].z) < 0.0500000007450580596923828125; + } + else + { + _557 = _549; + } + _420 = _557; + } + else + { + _420 = _412; + } + bool _428; + if (_420) + { + bool _614 = abs((-4.0) - float4x3(s1.a[0])[3].x) < 0.0500000007450580596923828125; + bool _599; + if (_614) + { + _599 = abs((-3.0) - float4x3(s1.a[0])[3].y) < 0.0500000007450580596923828125; + } + else + { + _599 = _614; + } + bool _607; + if (_599) + { + _607 = abs(4.0 - float4x3(s1.a[0])[3].z) < 0.0500000007450580596923828125; + } + else + { + _607 = _599; + } + _428 = _607; + } + else + { + _428 = _420; + } + bool _251; + if (_428) + { + bool _703 = abs(4.0 - float4x3(s1.a[1])[0].x) < 0.0500000007450580596923828125; + bool _688; + if (_703) + { + _688 = abs(9.0 - float4x3(s1.a[1])[0].y) < 0.0500000007450580596923828125; + } + else + { + _688 = _703; + } + bool _696; + if (_688) + { + _696 = abs((-9.0) - float4x3(s1.a[1])[0].z) < 0.0500000007450580596923828125; + } + else + { + _696 = _688; + } + bool _651; + if (_696) + { + bool _753 = abs((-8.0) - float4x3(s1.a[1])[1].x) < 0.0500000007450580596923828125; + bool _738; + if (_753) + { + _738 = abs((-9.0) - float4x3(s1.a[1])[1].y) < 0.0500000007450580596923828125; + } + else + { + _738 = _753; + } + bool _746; + if (_738) + { + _746 = abs(8.0 - float4x3(s1.a[1])[1].z) < 0.0500000007450580596923828125; + } + else + { + _746 = _738; + } + _651 = _746; + } + else + { + _651 = _696; + } + bool _659; + if (_651) + { + bool _803 = abs(-float4x3(s1.a[1])[2].x) < 0.0500000007450580596923828125; + bool _788; + if (_803) + { + _788 = abs(4.0 - float4x3(s1.a[1])[2].y) < 0.0500000007450580596923828125; + } + else + { + _788 = _803; + } + bool _796; + if (_788) + { + _796 = abs((-4.0) - float4x3(s1.a[1])[2].z) < 0.0500000007450580596923828125; + } + else + { + _796 = _788; + } + _659 = _796; + } + else + { + _659 = _651; + } + bool _667; + if (_659) + { + bool _853 = abs(7.0 - float4x3(s1.a[1])[3].x) < 0.0500000007450580596923828125; + bool _838; + if (_853) + { + _838 = abs(2.0 - float4x3(s1.a[1])[3].y) < 0.0500000007450580596923828125; + } + else + { + _838 = _853; + } + bool _846; + if (_838) + { + _846 = abs((-1.0) - float4x3(s1.a[1])[3].z) < 0.0500000007450580596923828125; + } + else + { + _846 = _838; + } + _667 = _846; + } + else + { + _667 = _659; + } + _251 = _667; + } + else + { + _251 = _428; + } + bool _260; + if (_251) + { + _260 = abs(7.0 - s1.b) < 0.0500000007450580596923828125; + } + else + { + _260 = _251; + } + bool _269; + if (_260) + { + bool _900 = abs((-5.0) - s1.c[0].x) < 0.0500000007450580596923828125; + bool _893; + if (_900) + { + _893 = abs((-4.0) - s1.c[0].y) < 0.0500000007450580596923828125; + } + else + { + _893 = _900; + } + _269 = _893; + } + else + { + _269 = _260; + } + bool _278; + if (_269) + { + bool _933 = abs(3.0 - s1.c[1].x) < 0.0500000007450580596923828125; + bool _926; + if (_933) + { + _926 = abs((-5.0) - s1.c[1].y) < 0.0500000007450580596923828125; + } + else + { + _926 = _933; + } + _278 = _926; + } + else + { + _278 = _269; + } + bool _287; + if (_278) + { + bool _966 = abs((-3.0) - s1.c[2].x) < 0.0500000007450580596923828125; + bool _959; + if (_966) + { + _959 = abs((-1.0) - s1.c[2].y) < 0.0500000007450580596923828125; + } + else + { + _959 = _966; + } + _287 = _959; + } + else + { + _287 = _278; + } + bool _296; + if (_287) + { + _296 = all(int4(1, 0, -3, 1) == s2.a); + } + else + { + _296 = _287; + } + bool _305; + if (_296) + { + _305 = true == bool(s2.b[0][0][0]); + } + else + { + _305 = _296; + } + bool _314; + if (_305) + { + _314 = false == bool(s2.b[0][0][1]); + } + else + { + _314 = _305; + } + bool _323; + if (_314) + { + _323 = false == bool(s2.b[0][0][2]); + } + else + { + _323 = _314; + } + bool _332; + if (_323) + { + _332 = true == bool(s2.b[1][0][0]); + } + else + { + _332 = _323; + } + bool _341; + if (_332) + { + _341 = false == bool(s2.b[1][0][1]); + } + else + { + _341 = _332; + } + bool _350; + if (_341) + { + _350 = true == bool(s2.b[1][0][2]); + } + else + { + _350 = _341; + } + bool _359; + if (_350) + { + _359 = false == bool(s2.b[2][0][0]); + } + else + { + _359 = _350; + } + bool _368; + if (_359) + { + _368 = true == bool(s2.b[2][0][1]); + } + else + { + _368 = _359; + } + bool _377; + if (_368) + { + _377 = true == bool(s2.b[2][0][2]); + } + else + { + _377 = _368; + } + if (_377) + { + _383.passed++; + } +} + diff --git a/reference/opt/shaders-msl/comp/shared-matrix-cast.comp b/reference/opt/shaders-msl/comp/shared-matrix-cast.comp new file mode 100644 index 00000000000..32c8e823d4e --- /dev/null +++ b/reference/opt/shaders-msl/comp/shared-matrix-cast.comp @@ -0,0 +1,1017 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +struct spvStorageMatrix +{ + vec columns[Cols]; + + spvStorageMatrix() thread = default; + thread spvStorageMatrix& operator=(initializer_list> cols) thread + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default; + #endif + + operator matrix() const thread + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const thread + { + return columns[idx]; + } + thread vec& operator[](size_t idx) thread + { + return columns[idx]; + } + + spvStorageMatrix() constant = default; + + spvStorageMatrix(const thread matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const constant matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const device matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const threadgroup matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default; + #endif + + operator matrix() const constant + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const constant + { + return columns[idx]; + } + + spvStorageMatrix() device = default; + device spvStorageMatrix& operator=(initializer_list> cols) device + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default; + + spvStorageMatrix(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default; + + spvStorageMatrix(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default; + + spvStorageMatrix(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default; + #endif + + operator matrix() const device + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const device + { + return columns[idx]; + } + device vec& operator[](size_t idx) device + { + return columns[idx]; + } + + spvStorageMatrix() threadgroup = default; + threadgroup spvStorageMatrix& operator=(initializer_list> cols) threadgroup + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default; + #endif + + operator matrix() const threadgroup + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup + { + return columns[idx]; + } + threadgroup vec& operator[](size_t idx) threadgroup + { + return columns[idx]; + } + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix() threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(initializer_list> cols) threadgroup_imageblock + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + operator matrix() const threadgroup_imageblock + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup_imageblock + { + return columns[idx]; + } + threadgroup_imageblock vec& operator[](size_t idx) threadgroup_imageblock + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix() ray_data = default; + ray_data spvStorageMatrix& operator=(initializer_list> cols) ray_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + #endif + + spvStorageMatrix(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default; + #endif + + operator matrix() const ray_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const ray_data + { + return columns[idx]; + } + ray_data vec& operator[](size_t idx) ray_data + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix() object_data = default; + object_data spvStorageMatrix& operator=(initializer_list> cols) object_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default; + #endif + + spvStorageMatrix(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default; + + operator matrix() const object_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const object_data + { + return columns[idx]; + } + object_data vec& operator[](size_t idx) object_data + { + return columns[idx]; + } + #endif + +}; + +template +matrix transpose(spvStorageMatrix m) +{ + return transpose(matrix(m)); +} + +typedef spvStorageMatrix spvStorage_half2x2; +typedef spvStorageMatrix spvStorage_half2x3; +typedef spvStorageMatrix spvStorage_half2x4; +typedef spvStorageMatrix spvStorage_half3x2; +typedef spvStorageMatrix spvStorage_half3x3; +typedef spvStorageMatrix spvStorage_half3x4; +typedef spvStorageMatrix spvStorage_half4x2; +typedef spvStorageMatrix spvStorage_half4x3; +typedef spvStorageMatrix spvStorage_half4x4; +typedef spvStorageMatrix spvStorage_float2x2; +typedef spvStorageMatrix spvStorage_float2x3; +typedef spvStorageMatrix spvStorage_float2x4; +typedef spvStorageMatrix spvStorage_float3x2; +typedef spvStorageMatrix spvStorage_float3x3; +typedef spvStorageMatrix spvStorage_float3x4; +typedef spvStorageMatrix spvStorage_float4x2; +typedef spvStorageMatrix spvStorage_float4x3; +typedef spvStorageMatrix spvStorage_float4x4; + +struct S1 +{ + float4 a; + spvStorage_float3x2 b; + short4 c; +}; + +struct block +{ + uint passed; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device block& _212 [[buffer(0)]]) +{ + threadgroup S1 s1; + s1.a = float4(1.0, -5.0, -9.0, -5.0); + s1.b = spvStorage_float3x2(float3x2(float2(1.0, -7.0), float2(1.0, 2.0), float2(8.0, 7.0))); + s1.c = short4(bool4(false, true, false, false)); + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + bool _264 = abs(1.0 - s1.a.x) < 0.0500000007450580596923828125; + bool _241; + if (_264) + { + _241 = abs((-5.0) - s1.a.y) < 0.0500000007450580596923828125; + } + else + { + _241 = _264; + } + bool _249; + if (_241) + { + _249 = abs((-9.0) - s1.a.z) < 0.0500000007450580596923828125; + } + else + { + _249 = _241; + } + bool _257; + if (_249) + { + _257 = abs((-5.0) - s1.a.w) < 0.0500000007450580596923828125; + } + else + { + _257 = _249; + } + bool _197; + if (_257) + { + bool _340 = abs(1.0 - float3x2(s1.b)[0].x) < 0.0500000007450580596923828125; + bool _333; + if (_340) + { + _333 = abs((-7.0) - float3x2(s1.b)[0].y) < 0.0500000007450580596923828125; + } + else + { + _333 = _340; + } + bool _306; + if (_333) + { + bool _373 = abs(1.0 - float3x2(s1.b)[1].x) < 0.0500000007450580596923828125; + bool _366; + if (_373) + { + _366 = abs(2.0 - float3x2(s1.b)[1].y) < 0.0500000007450580596923828125; + } + else + { + _366 = _373; + } + _306 = _366; + } + else + { + _306 = _333; + } + bool _314; + if (_306) + { + bool _406 = abs(8.0 - float3x2(s1.b)[2].x) < 0.0500000007450580596923828125; + bool _399; + if (_406) + { + _399 = abs(7.0 - float3x2(s1.b)[2].y) < 0.0500000007450580596923828125; + } + else + { + _399 = _406; + } + _314 = _399; + } + else + { + _314 = _306; + } + _197 = _314; + } + else + { + _197 = _257; + } + bool _206; + if (_197) + { + _206 = all(bool4(false, true, false, false) == bool4(s1.c)); + } + else + { + _206 = _197; + } + if (_206) + { + _212.passed++; + } +} + diff --git a/reference/opt/shaders-msl/comp/shared-matrix-nested-struct-array.comp b/reference/opt/shaders-msl/comp/shared-matrix-nested-struct-array.comp new file mode 100644 index 00000000000..dfbd7a76664 --- /dev/null +++ b/reference/opt/shaders-msl/comp/shared-matrix-nested-struct-array.comp @@ -0,0 +1,1369 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +struct spvStorageMatrix +{ + vec columns[Cols]; + + spvStorageMatrix() thread = default; + thread spvStorageMatrix& operator=(initializer_list> cols) thread + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default; + #endif + + operator matrix() const thread + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const thread + { + return columns[idx]; + } + thread vec& operator[](size_t idx) thread + { + return columns[idx]; + } + + spvStorageMatrix() constant = default; + + spvStorageMatrix(const thread matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const constant matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const device matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const threadgroup matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default; + #endif + + operator matrix() const constant + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const constant + { + return columns[idx]; + } + + spvStorageMatrix() device = default; + device spvStorageMatrix& operator=(initializer_list> cols) device + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default; + + spvStorageMatrix(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default; + + spvStorageMatrix(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default; + + spvStorageMatrix(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default; + #endif + + operator matrix() const device + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const device + { + return columns[idx]; + } + device vec& operator[](size_t idx) device + { + return columns[idx]; + } + + spvStorageMatrix() threadgroup = default; + threadgroup spvStorageMatrix& operator=(initializer_list> cols) threadgroup + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default; + #endif + + operator matrix() const threadgroup + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup + { + return columns[idx]; + } + threadgroup vec& operator[](size_t idx) threadgroup + { + return columns[idx]; + } + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix() threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(initializer_list> cols) threadgroup_imageblock + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + operator matrix() const threadgroup_imageblock + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup_imageblock + { + return columns[idx]; + } + threadgroup_imageblock vec& operator[](size_t idx) threadgroup_imageblock + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix() ray_data = default; + ray_data spvStorageMatrix& operator=(initializer_list> cols) ray_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + #endif + + spvStorageMatrix(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default; + #endif + + operator matrix() const ray_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const ray_data + { + return columns[idx]; + } + ray_data vec& operator[](size_t idx) ray_data + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix() object_data = default; + object_data spvStorageMatrix& operator=(initializer_list> cols) object_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default; + #endif + + spvStorageMatrix(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default; + + operator matrix() const object_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const object_data + { + return columns[idx]; + } + object_data vec& operator[](size_t idx) object_data + { + return columns[idx]; + } + #endif + +}; + +template +matrix transpose(spvStorageMatrix m) +{ + return transpose(matrix(m)); +} + +typedef spvStorageMatrix spvStorage_half2x2; +typedef spvStorageMatrix spvStorage_half2x3; +typedef spvStorageMatrix spvStorage_half2x4; +typedef spvStorageMatrix spvStorage_half3x2; +typedef spvStorageMatrix spvStorage_half3x3; +typedef spvStorageMatrix spvStorage_half3x4; +typedef spvStorageMatrix spvStorage_half4x2; +typedef spvStorageMatrix spvStorage_half4x3; +typedef spvStorageMatrix spvStorage_half4x4; +typedef spvStorageMatrix spvStorage_float2x2; +typedef spvStorageMatrix spvStorage_float2x3; +typedef spvStorageMatrix spvStorage_float2x4; +typedef spvStorageMatrix spvStorage_float3x2; +typedef spvStorageMatrix spvStorage_float3x3; +typedef spvStorageMatrix spvStorage_float3x4; +typedef spvStorageMatrix spvStorage_float4x2; +typedef spvStorageMatrix spvStorage_float4x3; +typedef spvStorageMatrix spvStorage_float4x4; + +struct sA +{ + spvStorage_float2x3 mA; +}; + +struct sB +{ + spvStorage_float2x2 mA; + spvStorage_float3x2 mB; + uint3 mC; +}; + +struct sC +{ + sA mA; + sB mB; +}; + +struct sD +{ + sC mA; +}; + +struct sE +{ + spvStorage_float3x2 mA; + spvStorage_float4x3 mB; +}; + +struct sF +{ + sE mA; +}; + +struct sG +{ + sF mA; +}; + +struct sH +{ + spvUnsafeArray mA; +}; + +struct S1 +{ + sD a; + sG b; + spvUnsafeArray c; +}; + +struct block +{ + uint passed; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device block& _424 [[buffer(0)]]) +{ + threadgroup S1 s1; + s1.a.mA.mA.mA = spvStorage_float2x3(float2x3(float3(6.0, 8.0, 8.0), float3(0.0, -4.0, -5.0))); + s1.a.mA.mB.mA = spvStorage_float2x2(float2x2(float2(9.0, -4.0), float2(-6.0, -1.0))); + s1.a.mA.mB.mB = spvStorage_float3x2(float3x2(float2(-1.0, -2.0), float2(1.0, 6.0), float2(5.0, 7.0))); + s1.a.mA.mB.mC = uint3(3u, 1u, 5u); + s1.b.mA.mA.mA = spvStorage_float3x2(float3x2(float2(8.0, 3.0), float2(0.0, 2.0), float2(1.0, 8.0))); + s1.b.mA.mA.mB = spvStorage_float4x3(float4x3(float3(0.0, 9.0, -1.0), float3(-1.0, -7.0, 7.0), float3(-4.0, -3.0, 1.0), float3(-4.0, -9.0, 1.0))); + s1.c[0].mA[0] = short3(bool3(true, false, false)); + s1.c[0].mA[1] = short3(bool3(true, false, false)); + s1.c[1].mA[0] = short3(bool3(false)); + s1.c[1].mA[1] = short3(bool3(false)); + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + bool _484 = abs(6.0 - float2x3(s1.a.mA.mA.mA)[0].x) < 0.0500000007450580596923828125; + bool _469; + if (_484) + { + _469 = abs(8.0 - float2x3(s1.a.mA.mA.mA)[0].y) < 0.0500000007450580596923828125; + } + else + { + _469 = _484; + } + bool _477; + if (_469) + { + _477 = abs(8.0 - float2x3(s1.a.mA.mA.mA)[0].z) < 0.0500000007450580596923828125; + } + else + { + _477 = _469; + } + bool _448; + if (_477) + { + bool _534 = abs(-float2x3(s1.a.mA.mA.mA)[1].x) < 0.0500000007450580596923828125; + bool _519; + if (_534) + { + _519 = abs((-4.0) - float2x3(s1.a.mA.mA.mA)[1].y) < 0.0500000007450580596923828125; + } + else + { + _519 = _534; + } + bool _527; + if (_519) + { + _527 = abs((-5.0) - float2x3(s1.a.mA.mA.mA)[1].z) < 0.0500000007450580596923828125; + } + else + { + _527 = _519; + } + _448 = _527; + } + else + { + _448 = _477; + } + bool _346; + if (_448) + { + bool _593 = abs(9.0 - float2x2(s1.a.mA.mB.mA)[0].x) < 0.0500000007450580596923828125; + bool _586; + if (_593) + { + _586 = abs((-4.0) - float2x2(s1.a.mA.mB.mA)[0].y) < 0.0500000007450580596923828125; + } + else + { + _586 = _593; + } + bool _567; + if (_586) + { + bool _626 = abs((-6.0) - float2x2(s1.a.mA.mB.mA)[1].x) < 0.0500000007450580596923828125; + bool _619; + if (_626) + { + _619 = abs((-1.0) - float2x2(s1.a.mA.mB.mA)[1].y) < 0.0500000007450580596923828125; + } + else + { + _619 = _626; + } + _567 = _619; + } + else + { + _567 = _586; + } + _346 = _567; + } + else + { + _346 = _448; + } + bool _355; + if (_346) + { + bool _688 = abs((-1.0) - float3x2(s1.a.mA.mB.mB)[0].x) < 0.0500000007450580596923828125; + bool _681; + if (_688) + { + _681 = abs((-2.0) - float3x2(s1.a.mA.mB.mB)[0].y) < 0.0500000007450580596923828125; + } + else + { + _681 = _688; + } + bool _654; + if (_681) + { + bool _721 = abs(1.0 - float3x2(s1.a.mA.mB.mB)[1].x) < 0.0500000007450580596923828125; + bool _714; + if (_721) + { + _714 = abs(6.0 - float3x2(s1.a.mA.mB.mB)[1].y) < 0.0500000007450580596923828125; + } + else + { + _714 = _721; + } + _654 = _714; + } + else + { + _654 = _681; + } + bool _662; + if (_654) + { + bool _754 = abs(5.0 - float3x2(s1.a.mA.mB.mB)[2].x) < 0.0500000007450580596923828125; + bool _747; + if (_754) + { + _747 = abs(7.0 - float3x2(s1.a.mA.mB.mB)[2].y) < 0.0500000007450580596923828125; + } + else + { + _747 = _754; + } + _662 = _747; + } + else + { + _662 = _654; + } + _355 = _662; + } + else + { + _355 = _346; + } + bool _364; + if (_355) + { + _364 = all(uint3(3u, 1u, 5u) == s1.a.mA.mB.mC); + } + else + { + _364 = _355; + } + bool _373; + if (_364) + { + bool _822 = abs(8.0 - float3x2(s1.b.mA.mA.mA)[0].x) < 0.0500000007450580596923828125; + bool _815; + if (_822) + { + _815 = abs(3.0 - float3x2(s1.b.mA.mA.mA)[0].y) < 0.0500000007450580596923828125; + } + else + { + _815 = _822; + } + bool _788; + if (_815) + { + bool _855 = abs(-float3x2(s1.b.mA.mA.mA)[1].x) < 0.0500000007450580596923828125; + bool _848; + if (_855) + { + _848 = abs(2.0 - float3x2(s1.b.mA.mA.mA)[1].y) < 0.0500000007450580596923828125; + } + else + { + _848 = _855; + } + _788 = _848; + } + else + { + _788 = _815; + } + bool _796; + if (_788) + { + bool _888 = abs(1.0 - float3x2(s1.b.mA.mA.mA)[2].x) < 0.0500000007450580596923828125; + bool _881; + if (_888) + { + _881 = abs(8.0 - float3x2(s1.b.mA.mA.mA)[2].y) < 0.0500000007450580596923828125; + } + else + { + _881 = _888; + } + _796 = _881; + } + else + { + _796 = _788; + } + _373 = _796; + } + else + { + _373 = _364; + } + bool _382; + if (_373) + { + bool _970 = abs(-float4x3(s1.b.mA.mA.mB)[0].x) < 0.0500000007450580596923828125; + bool _955; + if (_970) + { + _955 = abs(9.0 - float4x3(s1.b.mA.mA.mB)[0].y) < 0.0500000007450580596923828125; + } + else + { + _955 = _970; + } + bool _963; + if (_955) + { + _963 = abs((-1.0) - float4x3(s1.b.mA.mA.mB)[0].z) < 0.0500000007450580596923828125; + } + else + { + _963 = _955; + } + bool _918; + if (_963) + { + bool _1020 = abs((-1.0) - float4x3(s1.b.mA.mA.mB)[1].x) < 0.0500000007450580596923828125; + bool _1005; + if (_1020) + { + _1005 = abs((-7.0) - float4x3(s1.b.mA.mA.mB)[1].y) < 0.0500000007450580596923828125; + } + else + { + _1005 = _1020; + } + bool _1013; + if (_1005) + { + _1013 = abs(7.0 - float4x3(s1.b.mA.mA.mB)[1].z) < 0.0500000007450580596923828125; + } + else + { + _1013 = _1005; + } + _918 = _1013; + } + else + { + _918 = _963; + } + bool _926; + if (_918) + { + bool _1070 = abs((-4.0) - float4x3(s1.b.mA.mA.mB)[2].x) < 0.0500000007450580596923828125; + bool _1055; + if (_1070) + { + _1055 = abs((-3.0) - float4x3(s1.b.mA.mA.mB)[2].y) < 0.0500000007450580596923828125; + } + else + { + _1055 = _1070; + } + bool _1063; + if (_1055) + { + _1063 = abs(1.0 - float4x3(s1.b.mA.mA.mB)[2].z) < 0.0500000007450580596923828125; + } + else + { + _1063 = _1055; + } + _926 = _1063; + } + else + { + _926 = _918; + } + bool _934; + if (_926) + { + bool _1120 = abs((-4.0) - float4x3(s1.b.mA.mA.mB)[3].x) < 0.0500000007450580596923828125; + bool _1105; + if (_1120) + { + _1105 = abs((-9.0) - float4x3(s1.b.mA.mA.mB)[3].y) < 0.0500000007450580596923828125; + } + else + { + _1105 = _1120; + } + bool _1113; + if (_1105) + { + _1113 = abs(1.0 - float4x3(s1.b.mA.mA.mB)[3].z) < 0.0500000007450580596923828125; + } + else + { + _1113 = _1105; + } + _934 = _1113; + } + else + { + _934 = _926; + } + _382 = _934; + } + else + { + _382 = _373; + } + bool _391; + if (_382) + { + _391 = all(bool3(true, false, false) == bool3(s1.c[0].mA[0])); + } + else + { + _391 = _382; + } + bool _400; + if (_391) + { + _400 = all(bool3(true, false, false) == bool3(s1.c[0].mA[1])); + } + else + { + _400 = _391; + } + bool _409; + if (_400) + { + _409 = all(bool3(false) == bool3(s1.c[1].mA[0])); + } + else + { + _409 = _400; + } + bool _418; + if (_409) + { + _418 = all(bool3(false) == bool3(s1.c[1].mA[1])); + } + else + { + _418 = _409; + } + if (_418) + { + _424.passed++; + } +} + diff --git a/reference/opt/shaders-msl/comp/shared-matrix-nested-struct.comp b/reference/opt/shaders-msl/comp/shared-matrix-nested-struct.comp new file mode 100644 index 00000000000..6565536651d --- /dev/null +++ b/reference/opt/shaders-msl/comp/shared-matrix-nested-struct.comp @@ -0,0 +1,1443 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +struct spvStorageMatrix +{ + vec columns[Cols]; + + spvStorageMatrix() thread = default; + thread spvStorageMatrix& operator=(initializer_list> cols) thread + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default; + #endif + + operator matrix() const thread + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const thread + { + return columns[idx]; + } + thread vec& operator[](size_t idx) thread + { + return columns[idx]; + } + + spvStorageMatrix() constant = default; + + spvStorageMatrix(const thread matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const constant matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const device matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const threadgroup matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default; + #endif + + operator matrix() const constant + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const constant + { + return columns[idx]; + } + + spvStorageMatrix() device = default; + device spvStorageMatrix& operator=(initializer_list> cols) device + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default; + + spvStorageMatrix(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default; + + spvStorageMatrix(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default; + + spvStorageMatrix(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default; + #endif + + operator matrix() const device + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const device + { + return columns[idx]; + } + device vec& operator[](size_t idx) device + { + return columns[idx]; + } + + spvStorageMatrix() threadgroup = default; + threadgroup spvStorageMatrix& operator=(initializer_list> cols) threadgroup + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default; + #endif + + operator matrix() const threadgroup + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup + { + return columns[idx]; + } + threadgroup vec& operator[](size_t idx) threadgroup + { + return columns[idx]; + } + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix() threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(initializer_list> cols) threadgroup_imageblock + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + operator matrix() const threadgroup_imageblock + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup_imageblock + { + return columns[idx]; + } + threadgroup_imageblock vec& operator[](size_t idx) threadgroup_imageblock + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix() ray_data = default; + ray_data spvStorageMatrix& operator=(initializer_list> cols) ray_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + #endif + + spvStorageMatrix(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default; + #endif + + operator matrix() const ray_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const ray_data + { + return columns[idx]; + } + ray_data vec& operator[](size_t idx) ray_data + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix() object_data = default; + object_data spvStorageMatrix& operator=(initializer_list> cols) object_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default; + #endif + + spvStorageMatrix(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default; + + operator matrix() const object_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const object_data + { + return columns[idx]; + } + object_data vec& operator[](size_t idx) object_data + { + return columns[idx]; + } + #endif + +}; + +template +matrix transpose(spvStorageMatrix m) +{ + return transpose(matrix(m)); +} + +typedef spvStorageMatrix spvStorage_half2x2; +typedef spvStorageMatrix spvStorage_half2x3; +typedef spvStorageMatrix spvStorage_half2x4; +typedef spvStorageMatrix spvStorage_half3x2; +typedef spvStorageMatrix spvStorage_half3x3; +typedef spvStorageMatrix spvStorage_half3x4; +typedef spvStorageMatrix spvStorage_half4x2; +typedef spvStorageMatrix spvStorage_half4x3; +typedef spvStorageMatrix spvStorage_half4x4; +typedef spvStorageMatrix spvStorage_float2x2; +typedef spvStorageMatrix spvStorage_float2x3; +typedef spvStorageMatrix spvStorage_float2x4; +typedef spvStorageMatrix spvStorage_float3x2; +typedef spvStorageMatrix spvStorage_float3x3; +typedef spvStorageMatrix spvStorage_float3x4; +typedef spvStorageMatrix spvStorage_float4x2; +typedef spvStorageMatrix spvStorage_float4x3; +typedef spvStorageMatrix spvStorage_float4x4; + +struct S1 +{ + uint a; + float4 b; +}; + +struct sA +{ + spvStorage_float4x4 mA; + short3 mB; + short4 mC; +}; + +struct sB +{ + short2 mA; +}; + +struct sC +{ + float mA; + uint4 mB; + float mC; +}; + +struct sD +{ + sA mA; + sB mB; + sC mC; +}; + +struct sE +{ + sD mA; +}; + +struct sF +{ + uint3 mA; + short mB; +}; + +struct sG +{ + sF mA; + spvStorage_float3x2 mB; +}; + +struct sH +{ + sG mA; + float2 mB; +}; + +struct sI +{ + spvStorage_float2x2 mA; + short3 mB; + short4 mC; +}; + +struct sJ +{ + sI mA; + short3 mB; +}; + +struct sK +{ + short2 mA; + sJ mB; + int2 mC; +}; + +struct S2 +{ + sE a; + int3 b; + sH c; + sK d; +}; + +struct block +{ + uint passed; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device block& _612 [[buffer(0)]]) +{ + threadgroup S1 s1; + threadgroup S2 s2; + s1.a = 0u; + s1.b = float4(8.0, 8.0, 0.0, -4.0); + s2.a.mA.mA.mA = spvStorage_float4x4(float4x4(float4(-5.0, 9.0, -4.0, -6.0), float4(-1.0, -1.0, -2.0, 1.0), float4(6.0, 5.0, 7.0, -2.0), float4(-4.0, -9.0, 8.0, 3.0))); + s2.a.mA.mA.mB = short3(bool3(true, false, false)); + s2.a.mA.mA.mC = short4(bool4(true, true, true, false)); + s2.a.mA.mB.mA = short2(bool2(true)); + s2.a.mA.mC.mA = 7.0; + s2.a.mA.mC.mB = uint4(8u, 6u, 2u, 0u); + s2.a.mA.mC.mC = -9.0; + s2.b = int3(1, -4, 0); + s2.c.mA.mA.mA = uint3(4u, 9u, 1u); + s2.c.mA.mA.mB = short(false); + s2.c.mA.mB = spvStorage_float3x2(float3x2(float2(3.0, -5.0), float2(-1.0, -5.0), float2(-1.0, -9.0))); + s2.c.mB = float2(-6.0, -9.0); + s2.d.mA = short2(bool2(true, false)); + s2.d.mB.mA.mA = spvStorage_float2x2(float2x2(float2(-2.0, 3.0), float2(7.0, 2.0))); + s2.d.mB.mA.mB = short3(bool3(false)); + s2.d.mB.mA.mC = short4(bool4(false, false, false, true)); + s2.d.mB.mB = short3(bool3(true, false, false)); + s2.d.mC = int2(-9, 0); + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + bool _622 = 0u == s1.a; + bool _444; + if (_622) + { + bool _668 = abs(8.0 - s1.b.x) < 0.0500000007450580596923828125; + bool _645; + if (_668) + { + _645 = abs(8.0 - s1.b.y) < 0.0500000007450580596923828125; + } + else + { + _645 = _668; + } + bool _653; + if (_645) + { + _653 = abs(-s1.b.z) < 0.0500000007450580596923828125; + } + else + { + _653 = _645; + } + bool _661; + if (_653) + { + _661 = abs((-4.0) - s1.b.w) < 0.0500000007450580596923828125; + } + else + { + _661 = _653; + } + _444 = _661; + } + else + { + _444 = _622; + } + bool _453; + if (_444) + { + bool _774 = abs((-5.0) - float4x4(s2.a.mA.mA.mA)[0].x) < 0.0500000007450580596923828125; + bool _751; + if (_774) + { + _751 = abs(9.0 - float4x4(s2.a.mA.mA.mA)[0].y) < 0.0500000007450580596923828125; + } + else + { + _751 = _774; + } + bool _759; + if (_751) + { + _759 = abs((-4.0) - float4x4(s2.a.mA.mA.mA)[0].z) < 0.0500000007450580596923828125; + } + else + { + _759 = _751; + } + bool _767; + if (_759) + { + _767 = abs((-6.0) - float4x4(s2.a.mA.mA.mA)[0].w) < 0.0500000007450580596923828125; + } + else + { + _767 = _759; + } + bool _712; + if (_767) + { + bool _841 = abs((-1.0) - float4x4(s2.a.mA.mA.mA)[1].x) < 0.0500000007450580596923828125; + bool _818; + if (_841) + { + _818 = abs((-1.0) - float4x4(s2.a.mA.mA.mA)[1].y) < 0.0500000007450580596923828125; + } + else + { + _818 = _841; + } + bool _826; + if (_818) + { + _826 = abs((-2.0) - float4x4(s2.a.mA.mA.mA)[1].z) < 0.0500000007450580596923828125; + } + else + { + _826 = _818; + } + bool _834; + if (_826) + { + _834 = abs(1.0 - float4x4(s2.a.mA.mA.mA)[1].w) < 0.0500000007450580596923828125; + } + else + { + _834 = _826; + } + _712 = _834; + } + else + { + _712 = _767; + } + bool _720; + if (_712) + { + bool _908 = abs(6.0 - float4x4(s2.a.mA.mA.mA)[2].x) < 0.0500000007450580596923828125; + bool _885; + if (_908) + { + _885 = abs(5.0 - float4x4(s2.a.mA.mA.mA)[2].y) < 0.0500000007450580596923828125; + } + else + { + _885 = _908; + } + bool _893; + if (_885) + { + _893 = abs(7.0 - float4x4(s2.a.mA.mA.mA)[2].z) < 0.0500000007450580596923828125; + } + else + { + _893 = _885; + } + bool _901; + if (_893) + { + _901 = abs((-2.0) - float4x4(s2.a.mA.mA.mA)[2].w) < 0.0500000007450580596923828125; + } + else + { + _901 = _893; + } + _720 = _901; + } + else + { + _720 = _712; + } + bool _728; + if (_720) + { + bool _975 = abs((-4.0) - float4x4(s2.a.mA.mA.mA)[3].x) < 0.0500000007450580596923828125; + bool _952; + if (_975) + { + _952 = abs((-9.0) - float4x4(s2.a.mA.mA.mA)[3].y) < 0.0500000007450580596923828125; + } + else + { + _952 = _975; + } + bool _960; + if (_952) + { + _960 = abs(8.0 - float4x4(s2.a.mA.mA.mA)[3].z) < 0.0500000007450580596923828125; + } + else + { + _960 = _952; + } + bool _968; + if (_960) + { + _968 = abs(3.0 - float4x4(s2.a.mA.mA.mA)[3].w) < 0.0500000007450580596923828125; + } + else + { + _968 = _960; + } + _728 = _968; + } + else + { + _728 = _720; + } + _453 = _728; + } + else + { + _453 = _444; + } + bool _462; + if (_453) + { + _462 = all(bool3(true, false, false) == bool3(s2.a.mA.mA.mB)); + } + else + { + _462 = _453; + } + bool _471; + if (_462) + { + _471 = all(bool4(true, true, true, false) == bool4(s2.a.mA.mA.mC)); + } + else + { + _471 = _462; + } + bool _480; + if (_471) + { + _480 = all(bool2(true) == bool2(s2.a.mA.mB.mA)); + } + else + { + _480 = _471; + } + bool _489; + if (_480) + { + _489 = abs(7.0 - s2.a.mA.mC.mA) < 0.0500000007450580596923828125; + } + else + { + _489 = _480; + } + bool _498; + if (_489) + { + _498 = all(uint4(8u, 6u, 2u, 0u) == s2.a.mA.mC.mB); + } + else + { + _498 = _489; + } + bool _507; + if (_498) + { + _507 = abs((-9.0) - s2.a.mA.mC.mC) < 0.0500000007450580596923828125; + } + else + { + _507 = _498; + } + bool _516; + if (_507) + { + _516 = all(int3(1, -4, 0) == s2.b); + } + else + { + _516 = _507; + } + bool _525; + if (_516) + { + _525 = all(uint3(4u, 9u, 1u) == s2.c.mA.mA.mA); + } + else + { + _525 = _516; + } + bool _534; + if (_525) + { + _534 = false == bool(s2.c.mA.mA.mB); + } + else + { + _534 = _525; + } + bool _543; + if (_534) + { + bool _1106 = abs(3.0 - float3x2(s2.c.mA.mB)[0].x) < 0.0500000007450580596923828125; + bool _1099; + if (_1106) + { + _1099 = abs((-5.0) - float3x2(s2.c.mA.mB)[0].y) < 0.0500000007450580596923828125; + } + else + { + _1099 = _1106; + } + bool _1072; + if (_1099) + { + bool _1139 = abs((-1.0) - float3x2(s2.c.mA.mB)[1].x) < 0.0500000007450580596923828125; + bool _1132; + if (_1139) + { + _1132 = abs((-5.0) - float3x2(s2.c.mA.mB)[1].y) < 0.0500000007450580596923828125; + } + else + { + _1132 = _1139; + } + _1072 = _1132; + } + else + { + _1072 = _1099; + } + bool _1080; + if (_1072) + { + bool _1172 = abs((-1.0) - float3x2(s2.c.mA.mB)[2].x) < 0.0500000007450580596923828125; + bool _1165; + if (_1172) + { + _1165 = abs((-9.0) - float3x2(s2.c.mA.mB)[2].y) < 0.0500000007450580596923828125; + } + else + { + _1165 = _1172; + } + _1080 = _1165; + } + else + { + _1080 = _1072; + } + _543 = _1080; + } + else + { + _543 = _534; + } + bool _552; + if (_543) + { + bool _1205 = abs((-6.0) - s2.c.mB.x) < 0.0500000007450580596923828125; + bool _1198; + if (_1205) + { + _1198 = abs((-9.0) - s2.c.mB.y) < 0.0500000007450580596923828125; + } + else + { + _1198 = _1205; + } + _552 = _1198; + } + else + { + _552 = _543; + } + bool _561; + if (_552) + { + _561 = all(bool2(true, false) == bool2(s2.d.mA)); + } + else + { + _561 = _552; + } + bool _570; + if (_561) + { + bool _1263 = abs((-2.0) - float2x2(s2.d.mB.mA.mA)[0].x) < 0.0500000007450580596923828125; + bool _1256; + if (_1263) + { + _1256 = abs(3.0 - float2x2(s2.d.mB.mA.mA)[0].y) < 0.0500000007450580596923828125; + } + else + { + _1256 = _1263; + } + bool _1237; + if (_1256) + { + bool _1296 = abs(7.0 - float2x2(s2.d.mB.mA.mA)[1].x) < 0.0500000007450580596923828125; + bool _1289; + if (_1296) + { + _1289 = abs(2.0 - float2x2(s2.d.mB.mA.mA)[1].y) < 0.0500000007450580596923828125; + } + else + { + _1289 = _1296; + } + _1237 = _1289; + } + else + { + _1237 = _1256; + } + _570 = _1237; + } + else + { + _570 = _561; + } + bool _579; + if (_570) + { + _579 = all(bool3(false) == bool3(s2.d.mB.mA.mB)); + } + else + { + _579 = _570; + } + bool _588; + if (_579) + { + _588 = all(bool4(false, false, false, true) == bool4(s2.d.mB.mA.mC)); + } + else + { + _588 = _579; + } + bool _597; + if (_588) + { + _597 = all(bool3(true, false, false) == bool3(s2.d.mB.mB)); + } + else + { + _597 = _588; + } + bool _606; + if (_597) + { + _606 = all(int2(-9, 0) == s2.d.mC); + } + else + { + _606 = _597; + } + if (_606) + { + _612.passed++; + } +} + diff --git a/reference/opt/shaders-msl/comp/shared-struct-bool-cast.comp b/reference/opt/shaders-msl/comp/shared-struct-bool-cast.comp new file mode 100644 index 00000000000..538ab0bd69c --- /dev/null +++ b/reference/opt/shaders-msl/comp/shared-struct-bool-cast.comp @@ -0,0 +1,63 @@ +#include +#include + +using namespace metal; + +struct S1 +{ + int3 a; + uint2 b; + short4 c; + uint d; +}; + +struct block +{ + uint passed; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device block& _132 [[buffer(0)]]) +{ + threadgroup S1 s1; + s1.a = int3(6, 8, 8); + s1.b = uint2(4u); + s1.c = short4(bool4(false, false, false, true)); + s1.d = 6u; + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + bool _144 = all(int3(6, 8, 8) == s1.a); + bool _108; + if (_144) + { + _108 = all(uint2(4u) == s1.b); + } + else + { + _108 = _144; + } + bool _117; + if (_108) + { + _117 = all(bool4(false, false, false, true) == bool4(s1.c)); + } + else + { + _117 = _108; + } + bool _126; + if (_117) + { + _126 = 6u == s1.d; + } + else + { + _126 = _117; + } + if (_126) + { + _132.passed++; + } +} + diff --git a/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp b/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp index d3c8b7dc4a3..8f54f0528dc 100644 --- a/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp +++ b/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp @@ -40,6 +40,7 @@ struct SSBO constant int e_tmp [[function_constant(3)]]; constant int e = is_function_constant_defined(e_tmp) ? e_tmp : 400; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); kernel void main0(device SSBO& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { diff --git a/reference/opt/shaders-msl/comp/spec-constant-work-group-size.comp b/reference/opt/shaders-msl/comp/spec-constant-work-group-size.comp index bb796ab95d7..de30edec155 100644 --- a/reference/opt/shaders-msl/comp/spec-constant-work-group-size.comp +++ b/reference/opt/shaders-msl/comp/spec-constant-work-group-size.comp @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + #ifndef SPIRV_CROSS_CONSTANT_ID_1 #define SPIRV_CROSS_CONSTANT_ID_1 2 #endif @@ -27,7 +68,7 @@ constant int _32 = (1 - a); kernel void main0(device SSBO& _17 [[buffer(0)]]) { - int spec_const_array_size[b]; + spvUnsafeArray spec_const_array_size; spec_const_array_size[a] = a; _17.v[_30] = b + spec_const_array_size[_32]; } diff --git a/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp b/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp index 905222d398d..b584f307ef2 100644 --- a/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp +++ b/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Sub { float4 f[2]; @@ -16,33 +57,35 @@ struct SSBO Sub sub[2]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - float _153[2]; - _153[0] = _27.sub[gl_WorkGroupID.x].f[0].x; - _153[1] = _27.sub[gl_WorkGroupID.x].f[1].x; - float2 _154[2]; - _154[0] = _27.sub[gl_WorkGroupID.x].f2[0].xy; - _154[1] = _27.sub[gl_WorkGroupID.x].f2[1].xy; - float3 _155[2]; - _155[0] = _27.sub[gl_WorkGroupID.x].f3[0]; - _155[1] = _27.sub[gl_WorkGroupID.x].f3[1]; - float4 _156[2]; - _156[0] = _27.sub[gl_WorkGroupID.x].f4[0]; - _156[1] = _27.sub[gl_WorkGroupID.x].f4[1]; - _153[gl_GlobalInvocationID.x] += 1.0; - _154[gl_GlobalInvocationID.x] += float2(2.0); - _155[gl_GlobalInvocationID.x] += float3(3.0); - _156[gl_GlobalInvocationID.x] += float4(4.0); - _27.sub[gl_WorkGroupID.x].f[0].x = _153[0]; - _27.sub[gl_WorkGroupID.x].f[1].x = _153[1]; - _27.sub[gl_WorkGroupID.x].f2[0].xy = _154[0]; - _27.sub[gl_WorkGroupID.x].f2[1].xy = _154[1]; - _27.sub[gl_WorkGroupID.x].f3[0] = _155[0]; - _27.sub[gl_WorkGroupID.x].f3[1] = _155[1]; - _27.sub[gl_WorkGroupID.x].f4[0] = _156[0]; - _27.sub[gl_WorkGroupID.x].f4[1] = _156[1]; - _27.sub[0].f[0].x += 5.0; - _27.sub[0].f2[1].xy += float2(5.0); + spvUnsafeArray _155; + _155[0] = _27.sub[gl_WorkGroupID.x].f[0].x; + _155[1] = _27.sub[gl_WorkGroupID.x].f[1].x; + spvUnsafeArray _156; + _156[0] = _27.sub[gl_WorkGroupID.x].f2[0].xy; + _156[1] = _27.sub[gl_WorkGroupID.x].f2[1].xy; + spvUnsafeArray _157; + _157[0] = _27.sub[gl_WorkGroupID.x].f3[0]; + _157[1] = _27.sub[gl_WorkGroupID.x].f3[1]; + spvUnsafeArray _158; + _158[0] = _27.sub[gl_WorkGroupID.x].f4[0]; + _158[1] = _27.sub[gl_WorkGroupID.x].f4[1]; + _155[gl_GlobalInvocationID.x] += 1.0; + _156[gl_GlobalInvocationID.x] += float2(2.0); + _157[gl_GlobalInvocationID.x] += float3(3.0); + _158[gl_GlobalInvocationID.x] += float4(4.0); + (device float&)_27.sub[gl_WorkGroupID.x].f[0] = _155[0]; + (device float&)_27.sub[gl_WorkGroupID.x].f[1] = _155[1]; + (device float2&)_27.sub[gl_WorkGroupID.x].f2[0] = _156[0]; + (device float2&)_27.sub[gl_WorkGroupID.x].f2[1] = _156[1]; + _27.sub[gl_WorkGroupID.x].f3[0] = _157[0]; + _27.sub[gl_WorkGroupID.x].f3[1] = _157[1]; + _27.sub[gl_WorkGroupID.x].f4[0] = _158[0]; + _27.sub[gl_WorkGroupID.x].f4[1] = _158[1]; + (device float&)_27.sub[0].f[0] = _27.sub[0].f[0].x + 5.0; + (device float2&)_27.sub[0].f2[1] = _27.sub[0].f2[1].xy + float2(5.0); } diff --git a/reference/opt/shaders-msl/comp/struct-layout.comp b/reference/opt/shaders-msl/comp/struct-layout.comp index d4413625271..0445f5aef9c 100644 --- a/reference/opt/shaders-msl/comp/struct-layout.comp +++ b/reference/opt/shaders-msl/comp/struct-layout.comp @@ -18,6 +18,8 @@ struct SSBO Foo in_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _23 [[buffer(0)]], const device SSBO& _30 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { _23.out_data[gl_GlobalInvocationID.x].m = _30.in_data[gl_GlobalInvocationID.x].m * _30.in_data[gl_GlobalInvocationID.x].m; diff --git a/reference/opt/shaders-msl/comp/struct-nested.comp b/reference/opt/shaders-msl/comp/struct-nested.comp index 6a1419cecbf..ad706c59095 100644 --- a/reference/opt/shaders-msl/comp/struct-nested.comp +++ b/reference/opt/shaders-msl/comp/struct-nested.comp @@ -18,6 +18,8 @@ struct dstbuffer s2 test[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device dstbuffer& _19 [[buffer(0)]]) { _19.test[0].b.a = 0; diff --git a/reference/opt/shaders-msl/comp/struct-packing.comp b/reference/opt/shaders-msl/comp/struct-packing.comp index 35cf1b22cb4..dc1654399d3 100644 --- a/reference/opt/shaders-msl/comp/struct-packing.comp +++ b/reference/opt/shaders-msl/comp/struct-packing.comp @@ -3,12 +3,11 @@ using namespace metal; -typedef packed_float2 packed_rm_float2x3[3]; - struct S0 { float2 a[1]; float b; + char _m0_final_padding[4]; }; struct S1 @@ -21,6 +20,7 @@ struct S2 { float3 a[1]; float b; + char _m0_final_padding[12]; }; struct S3 @@ -45,6 +45,7 @@ struct Content S3 m3; float m4; S4 m3s[8]; + char _m0_final_padding[8]; }; struct SSBO1 @@ -58,17 +59,17 @@ struct SSBO1 float3x2 m3; float2x2 m4; float2x2 m5[9]; - packed_rm_float2x3 m6[4][2]; - char _m10_pad[8]; - float3x2 m7; - char _m11_pad[8]; + float3x2 m6[4][2]; + float2x3 m7; float array[1]; }; struct S0_1 { - float4 a[1]; + float2 a[1]; + char _m1_pad[8]; float b; + char _m0_final_padding[12]; }; struct S1_1 @@ -81,6 +82,7 @@ struct S2_1 { float3 a[1]; float b; + char _m0_final_padding[12]; }; struct S3_1 @@ -92,6 +94,7 @@ struct S3_1 struct S4_1 { float2 c; + char _m0_final_padding[8]; }; struct Content_1 @@ -104,8 +107,8 @@ struct Content_1 S2_1 m2; S3_1 m3; float m4; - char _m8_pad[12]; - /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8]; + char _m8_pad[8]; + S4_1 m3s[8]; }; struct SSBO0 @@ -116,16 +119,18 @@ struct SSBO0 float4 array[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]]) { Content_1 _60 = ssbo_140.content; - ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0].xy; + ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0]; ssbo_430.content.m0s[0].b = _60.m0s[0].b; ssbo_430.content.m1s[0].a = float3(_60.m1s[0].a); ssbo_430.content.m1s[0].b = _60.m1s[0].b; ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0]; ssbo_430.content.m2s[0].b = _60.m2s[0].b; - ssbo_430.content.m0.a[0] = _60.m0.a[0].xy; + ssbo_430.content.m0.a[0] = _60.m0.a[0]; ssbo_430.content.m0.b = _60.m0.b; ssbo_430.content.m1.a = float3(_60.m1.a); ssbo_430.content.m1.b = _60.m1.b; @@ -142,6 +147,6 @@ kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [ ssbo_430.content.m3s[5].c = _60.m3s[5].c; ssbo_430.content.m3s[6].c = _60.m3s[6].c; ssbo_430.content.m3s[7].c = _60.m3s[7].c; - ssbo_430.content.m1.a = ssbo_430.content.m3.a * float3x2(float2(ssbo_430.m6[1][1][0]), float2(ssbo_430.m6[1][1][1]), float2(ssbo_430.m6[1][1][2])); + ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1]; } diff --git a/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp b/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp new file mode 100644 index 00000000000..c1eccf27805 --- /dev/null +++ b/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 values[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u); + +kernel void main0(device SSBO& _23 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + threadgroup short4 foo[4]; + foo[gl_LocalInvocationIndex] = short4(_23.values[gl_GlobalInvocationID.x] != float4(10.0)); + threadgroup_barrier(mem_flags::mem_threadgroup); + _23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u])); +} + diff --git a/reference/opt/shaders-msl/comp/torture-loop.comp b/reference/opt/shaders-msl/comp/torture-loop.comp index 4c367d3e6da..ff7e02e2022 100644 --- a/reference/opt/shaders-msl/comp/torture-loop.comp +++ b/reference/opt/shaders-msl/comp/torture-loop.comp @@ -14,29 +14,31 @@ struct SSBO2 float4 out_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - float4 _99; - _99 = _24.in_data[gl_GlobalInvocationID.x]; - for (int _93 = 0; (_93 + 1) < 10; ) + float4 _101; + _101 = _24.in_data[gl_GlobalInvocationID.x]; + for (int _95 = 0; (_95 + 1) < 10; ) { - _99 *= 2.0; - _93 += 2; + _101 *= 2.0; + _95 += 2; continue; } - float4 _98; - _98 = _99; - float4 _103; - for (uint _94 = 0u; _94 < 16u; _98 = _103, _94++) + float4 _100; + _100 = _101; + float4 _105; + for (uint _96 = 0u; _96 < 16u; _100 = _105, _96++) { - _103 = _98; - for (uint _100 = 0u; _100 < 30u; ) + _105 = _100; + for (uint _102 = 0u; _102 < 30u; ) { - _103 = _24.mvp * _103; - _100++; + _105 = _24.mvp * _105; + _102++; continue; } } - _89.out_data[gl_GlobalInvocationID.x] = _98; + _89.out_data[gl_GlobalInvocationID.x] = _100; } diff --git a/reference/opt/shaders-msl/comp/type-alias.comp b/reference/opt/shaders-msl/comp/type-alias.comp index 8a68933d82d..2f6a0b7ba8b 100644 --- a/reference/opt/shaders-msl/comp/type-alias.comp +++ b/reference/opt/shaders-msl/comp/type-alias.comp @@ -28,6 +28,8 @@ struct SSBO2 float4 outputs[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO0& _36 [[buffer(0)]], device SSBO1& _55 [[buffer(1)]], device SSBO2& _66 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { _66.outputs[gl_GlobalInvocationID.x] = _36.s0s[gl_GlobalInvocationID.x].a + _55.s1s[gl_GlobalInvocationID.x].a; diff --git a/reference/opt/shaders-msl/comp/type_casting_i64.msl22.comp b/reference/opt/shaders-msl/comp/type_casting_i64.msl22.comp new file mode 100644 index 00000000000..6820b077a1a --- /dev/null +++ b/reference/opt/shaders-msl/comp/type_casting_i64.msl22.comp @@ -0,0 +1,27 @@ +#include +#include + +using namespace metal; + +struct dst_buff_t +{ + int m0[1]; +}; + +struct src_buff_t +{ + int m0[1]; +}; + +constant int base_val_tmp [[function_constant(0)]]; +constant int base_val = is_function_constant_defined(base_val_tmp) ? base_val_tmp : 0; +constant long shift_val_tmp [[function_constant(1)]]; +constant long shift_val = is_function_constant_defined(shift_val_tmp) ? shift_val_tmp : 0l; +constant int offset = (base_val >> int(shift_val)); +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device dst_buff_t& dst_buff [[buffer(0)]], device src_buff_t& src_buff [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + dst_buff.m0[gl_GlobalInvocationID.x] = src_buff.m0[gl_GlobalInvocationID.x] + offset; +} + diff --git a/reference/opt/shaders-msl/comp/udiv.comp b/reference/opt/shaders-msl/comp/udiv.comp index 32874ad7879..7f7315b882a 100644 --- a/reference/opt/shaders-msl/comp/udiv.comp +++ b/reference/opt/shaders-msl/comp/udiv.comp @@ -13,6 +13,8 @@ struct SSBO uint inputs[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _10 [[buffer(0)]], device SSBO& _23 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { _10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u; diff --git a/reference/opt/shaders-msl/comp/writable-ssbo.comp b/reference/opt/shaders-msl/comp/writable-ssbo.comp index 9dc53b6dd5d..310cda7fef9 100644 --- a/reference/opt/shaders-msl/comp/writable-ssbo.comp +++ b/reference/opt/shaders-msl/comp/writable-ssbo.comp @@ -5,19 +5,19 @@ using namespace metal; -struct myBlock -{ - int a; - float b; -}; - // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } +struct myBlock +{ + int a; + float b; +}; + kernel void main0(device myBlock& myStorage [[buffer(0)]]) { myStorage.a = (myStorage.a + 1) % 256; diff --git a/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp b/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp index a37fe519a55..cea12980c67 100644 --- a/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp +++ b/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp @@ -91,6 +91,8 @@ struct ResType_7 int4 _m1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBOUint& u [[buffer(0)]], device SSBOInt& i [[buffer(1)]]) { ResType _25; diff --git a/reference/opt/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc b/reference/opt/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc index a5e30b6de1a..01fceeb6c7b 100644 --- a/reference/opt/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc +++ b/reference/opt/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float3 vVertex; @@ -10,7 +51,7 @@ struct main0_out struct main0_patchOut { - float3 vPatch[2]; + spvUnsafeArray vPatch; }; struct main0_in @@ -28,7 +69,7 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_ if (gl_InvocationID >= 4) return; gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput; - threadgroup_barrier(mem_flags::mem_device); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); if (gl_InvocationID == 0) { patchOut.vPatch[0] = float3(10.0); diff --git a/reference/opt/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc b/reference/opt/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc new file mode 100644 index 00000000000..8ebde9d9d76 --- /dev/null +++ b/reference/opt/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc @@ -0,0 +1,39 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + float3 vFoo; +}; + +struct main0_in +{ + uint3 m_86; + ushort2 m_90; + float4 gl_Position; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 1]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 1; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625); + patchOut.vFoo = float3(1.0); + gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position; +} + diff --git a/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc new file mode 100644 index 00000000000..184a4a6f9b3 --- /dev/null +++ b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc @@ -0,0 +1,36 @@ +#include +#include + +using namespace metal; + +struct Boo +{ + float3 a; + uint3 b; +}; + +struct main0_out +{ + Boo vVertex; +}; + +struct main0_in +{ + Boo vInput; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput; + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0); +} + diff --git a/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc index cd4d8d80e52..f5fd60a9f71 100644 --- a/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc +++ b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc @@ -16,8 +16,8 @@ struct main0_out struct main0_in { - float3 Boo_a [[attribute(0)]]; - float3 Boo_b [[attribute(1)]]; + float3 vInput_a [[attribute(0)]]; + float3 vInput_b [[attribute(1)]]; }; kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) @@ -28,10 +28,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_ threadgroup_barrier(mem_flags::mem_threadgroup); if (gl_InvocationID >= 4) return; - Boo vInput_24; - vInput_24.a = gl_in[gl_InvocationID].Boo_a; - vInput_24.b = gl_in[gl_InvocationID].Boo_b; - gl_out[gl_InvocationID].vVertex = vInput_24; + Boo _25 = Boo{ gl_in[gl_InvocationID].vInput_a, gl_in[gl_InvocationID].vInput_b }; + gl_out[gl_InvocationID].vVertex = _25; spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0); diff --git a/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert b/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert new file mode 100644 index 00000000000..a414c98542c --- /dev/null +++ b/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + out.gl_Position = float4(10.0); + out.gl_ClipDistance[0] = 1.0; + out.gl_ClipDistance[1] = 4.0; + return out; +} + diff --git a/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert b/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert index a414c98542c..2d98929051b 100644 --- a/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert +++ b/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert @@ -7,6 +7,8 @@ struct main0_out { float4 gl_Position [[position]]; float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; }; vertex main0_out main0() @@ -15,6 +17,8 @@ vertex main0_out main0() out.gl_Position = float4(10.0); out.gl_ClipDistance[0] = 1.0; out.gl_ClipDistance[1] = 4.0; + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; return out; } diff --git a/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert b/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert new file mode 100644 index 00000000000..b3c8b6bb278 --- /dev/null +++ b/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], uint3 spvDispatchBase [[grid_origin]], device main0_out* spvOut [[buffer(28)]]) +{ + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + uint gl_BaseVertex = spvDispatchBase.x; + uint gl_BaseInstance = spvDispatchBase.y; + out.gl_Position = float4(float(int(gl_BaseVertex)), float(int(gl_BaseInstance)), 0.0, 1.0); +} + diff --git a/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert b/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert index 1d203ba98bc..a32c1948f88 100644 --- a/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert +++ b/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert @@ -11,7 +11,7 @@ struct main0_out vertex main0_out main0(uint gl_BaseVertex [[base_vertex]], uint gl_BaseInstance [[base_instance]]) { main0_out out = {}; - out.gl_Position = float4(float(gl_BaseVertex), float(gl_BaseInstance), 0.0, 1.0); + out.gl_Position = float4(float(int(gl_BaseVertex)), float(int(gl_BaseInstance)), 0.0, 1.0); return out; } diff --git a/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert b/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert index 387fe0a8353..b40528115f0 100644 --- a/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert +++ b/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert @@ -7,7 +7,7 @@ struct UBO { float4x4 uMVPR; float4x4 uMVPC; - float2x4 uMVP; + float4x4 uMVP; }; struct main0_out diff --git a/reference/opt/shaders-msl/flatten/struct.flatten.vert b/reference/opt/shaders-msl/flatten/struct.flatten.vert index d97a34a859f..dc96ceae3ee 100644 --- a/reference/opt/shaders-msl/flatten/struct.flatten.vert +++ b/reference/opt/shaders-msl/flatten/struct.flatten.vert @@ -34,7 +34,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]] out.gl_Position = _18.uMVP * in.aVertex; out.vColor = float4(0.0); float3 _39 = in.aVertex.xyz - float3(_18.light.Position); - out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(_39) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_39))); + out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(_39) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(_39))); return out; } diff --git a/reference/opt/shaders-msl/frag/array-component-io.frag b/reference/opt/shaders-msl/frag/array-component-io.frag new file mode 100644 index 00000000000..9b4c5b5204f --- /dev/null +++ b/reference/opt/shaders-msl/frag/array-component-io.frag @@ -0,0 +1,99 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 m_location_0 [[color(0)]]; + float4 m_location_1 [[color(1)]]; + float4 m_location_2 [[color(2)]]; +}; + +struct main0_in +{ + float InC_0 [[user(locn0_1), flat]]; + float InA_0 [[user(locn1), flat]]; + float InC_1 [[user(locn1_1), flat]]; + float2 InB_0 [[user(locn1_2), flat]]; + float InA_1 [[user(locn2), flat]]; + float InC_2 [[user(locn2_1), flat]]; + float2 InB_1 [[user(locn2_2), flat]]; + float InD [[user(locn3_1), sample_perspective]]; + float InE [[user(locn4_2), center_no_perspective]]; + float InF [[user(locn5_3), centroid_perspective]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray A = {}; + spvUnsafeArray B = {}; + spvUnsafeArray C = {}; + float D = {}; + spvUnsafeArray InA = {}; + spvUnsafeArray InB = {}; + spvUnsafeArray InC = {}; + InA[0] = in.InA_0; + InA[1] = in.InA_1; + InB[0] = in.InB_0; + InB[1] = in.InB_1; + InC[0] = in.InC_0; + InC[1] = in.InC_1; + InC[2] = in.InC_2; + A = InA; + B = InB; + C = InC; + D = (in.InD + in.InE) + in.InF; + out.m_location_1.x = A[0]; + out.m_location_2.x = A[1]; + out.m_location_1.zw = B[0]; + out.m_location_2.zw = B[1]; + out.m_location_0.y = C[0]; + out.m_location_1.y = C[1]; + out.m_location_2.y = C[2]; + out.m_location_0.w = D; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag index 9b757b6a31c..79f9025a78d 100644 --- a/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag +++ b/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag @@ -1,9 +1,50 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; -constant float _17[5] = { 1.0, 2.0, 3.0, 4.0, 5.0 }; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _17 = spvUnsafeArray({ 1.0, 2.0, 3.0, 4.0, 5.0 }); struct main0_out { diff --git a/reference/opt/shaders-msl/frag/array-of-array-lut.frag b/reference/opt/shaders-msl/frag/array-of-array-lut.frag new file mode 100644 index 00000000000..ba553824e79 --- /dev/null +++ b/reference/opt/shaders-msl/frag/array-of-array-lut.frag @@ -0,0 +1,68 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _17 = spvUnsafeArray({ 1.0, 2.0, 3.0 }); +constant spvUnsafeArray _21 = spvUnsafeArray({ 4.0, 5.0, 6.0 }); +constant spvUnsafeArray, 2> _22 = spvUnsafeArray, 2>({ spvUnsafeArray({ 1.0, 2.0, 3.0 }), spvUnsafeArray({ 4.0, 5.0, 6.0 }) }); + +struct main0_out +{ + float vOutput [[color(0)]]; +}; + +struct main0_in +{ + int vIndex1 [[user(locn0)]]; + int vIndex2 [[user(locn1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.vOutput = _22[in.vIndex1][in.vIndex2]; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag b/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag new file mode 100644 index 00000000000..936b11dc853 --- /dev/null +++ b/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag @@ -0,0 +1,105 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +{ + return static_cast(x); +} +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +{ + return static_cast(x); +} + +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + +template +inline T spvGetSwizzle(vec x, T c, spvSwizzle s) +{ + switch (s) + { + case spvSwizzle::none: + return c; + case spvSwizzle::zero: + return 0; + case spvSwizzle::one: + return 1; + case spvSwizzle::red: + return x.r; + case spvSwizzle::green: + return x.g; + case spvSwizzle::blue: + return x.b; + case spvSwizzle::alpha: + return x.a; + } +} + +// Wrapper function that swizzles texture samples and fetches. +template +inline vec spvTextureSwizzle(vec x, uint s) +{ + if (!s) + return x; + return vec(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF))); +} + +template +inline T spvTextureSwizzle(T x, uint s) +{ + return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; +} + +struct UBO +{ + uint index; +}; + +struct UBO2 +{ + uint index2; +}; + +struct spvDescriptorSetBuffer0 +{ + array, 4> uSampler [[id(0)]]; + array uSamplerSmplr [[id(4)]]; + constant UBO* uUBO [[id(8)]]; + constant UBO2* m_50 [[id(9)]]; + constant uint* spvSwizzleConstants [[id(10)]]; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(30)]]) +{ + main0_out out = {}; + constant uint* spvDescriptorSet0_uSamplerSwzl = &spvDescriptorSet0.spvSwizzleConstants[0]; + out.FragColor = spvTextureSwizzle(spvDescriptorSet0.uSampler[(*spvDescriptorSet0.uUBO).index].sample(spvDescriptorSet0.uSamplerSmplr[(*spvDescriptorSet0.uUBO).index], in.vUV), spvDescriptorSet0_uSamplerSwzl[(*spvDescriptorSet0.uUBO).index]); + out.FragColor += spvTextureSwizzle(spvDescriptorSet0.uSampler[(*spvDescriptorSet0.m_50).index2].sample(spvDescriptorSet0.uSamplerSmplr[(*spvDescriptorSet0.m_50).index2], in.vUV), spvDescriptorSet0_uSamplerSwzl[(*spvDescriptorSet0.m_50).index2]); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag b/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag new file mode 100644 index 00000000000..c680f04b573 --- /dev/null +++ b/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag @@ -0,0 +1,96 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +{ + return static_cast(x); +} +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +{ + return static_cast(x); +} + +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + +template +inline T spvGetSwizzle(vec x, T c, spvSwizzle s) +{ + switch (s) + { + case spvSwizzle::none: + return c; + case spvSwizzle::zero: + return 0; + case spvSwizzle::one: + return 1; + case spvSwizzle::red: + return x.r; + case spvSwizzle::green: + return x.g; + case spvSwizzle::blue: + return x.b; + case spvSwizzle::alpha: + return x.a; + } +} + +// Wrapper function that swizzles texture samples and fetches. +template +inline vec spvTextureSwizzle(vec x, uint s) +{ + if (!s) + return x; + return vec(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF))); +} + +template +inline T spvTextureSwizzle(T x, uint s) +{ + return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; +} + +struct UBO +{ + uint index; +}; + +struct UBO2 +{ + uint index2; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(30)]], constant UBO& uUBO [[buffer(0)]], constant UBO2& _50 [[buffer(1)]], array, 4> uSampler [[texture(0)]], array uSamplerSmplr [[sampler(0)]]) +{ + main0_out out = {}; + constant uint* uSamplerSwzl = &spvSwizzleConstants[0]; + out.FragColor = spvTextureSwizzle(uSampler[uUBO.index].sample(uSamplerSmplr[uUBO.index], in.vUV), uSamplerSwzl[uUBO.index]); + out.FragColor += spvTextureSwizzle(uSampler[_50.index2].sample(uSamplerSmplr[_50.index2], in.vUV), uSamplerSwzl[_50.index2]); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag index 2160e0be61e..cc503e86957 100644 --- a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag +++ b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag @@ -5,22 +5,17 @@ using namespace metal; -struct spvDescriptorSetBuffer0 -{ - array, 4> uSampler0 [[id(0)]]; - array uSampler0Smplr [[id(4)]]; - constant uint* spvSwizzleConstants [[id(8)]]; -}; - -struct main0_out +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { - float4 FragColor [[color(0)]]; -}; - -struct main0_in + return static_cast(x); +} +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { - float2 vUV [[user(locn0)]]; -}; + return static_cast(x); +} enum class spvSwizzle : uint { @@ -33,18 +28,6 @@ enum class spvSwizzle : uint alpha }; -template struct spvRemoveReference { typedef T type; }; -template struct spvRemoveReference { typedef T type; }; -template struct spvRemoveReference { typedef T type; }; -template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) -{ - return static_cast(x); -} -template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) -{ - return static_cast(x); -} - template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -82,65 +65,22 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +struct spvDescriptorSetBuffer0 { - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} + array, 4> uSampler0 [[id(0)]]; + array uSampler0Smplr [[id(4)]]; + constant uint* spvSwizzleConstants [[id(8)]]; +}; -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +struct main0_out { - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vUV [[user(locn0)]]; +}; fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(30)]], texture2d uSampler1 [[texture(0)]], sampler uSampler1Smplr [[sampler(0)]]) { @@ -148,9 +88,10 @@ fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuff constant uint* spvDescriptorSet0_uSampler0Swzl = &spvDescriptorSet0.spvSwizzleConstants[0]; constant uint& uSampler1Swzl = spvSwizzleConstants[0]; out.FragColor = spvTextureSwizzle(spvDescriptorSet0.uSampler0[2].sample(spvDescriptorSet0.uSampler0Smplr[2], in.vUV), spvDescriptorSet0_uSampler0Swzl[2]); - out.FragColor += spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, in.vUV), uSampler1Swzl); + float4 _73 = spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, in.vUV), uSampler1Swzl); + out.FragColor += _73; out.FragColor += spvTextureSwizzle(spvDescriptorSet0.uSampler0[1].sample(spvDescriptorSet0.uSampler0Smplr[1], in.vUV), spvDescriptorSet0_uSampler0Swzl[1]); - out.FragColor += spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, in.vUV), uSampler1Swzl); + out.FragColor += _73; return out; } diff --git a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag index 337abb99d8d..5b1d17c56cc 100644 --- a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag +++ b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag @@ -5,15 +5,17 @@ using namespace metal; -struct main0_out +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { - float4 FragColor [[color(0)]]; -}; - -struct main0_in + return static_cast(x); +} +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { - float2 vUV [[user(locn0)]]; -}; + return static_cast(x); +} enum class spvSwizzle : uint { @@ -26,18 +28,6 @@ enum class spvSwizzle : uint alpha }; -template struct spvRemoveReference { typedef T type; }; -template struct spvRemoveReference { typedef T type; }; -template struct spvRemoveReference { typedef T type; }; -template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) -{ - return static_cast(x); -} -template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) -{ - return static_cast(x); -} - template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -75,65 +65,15 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +struct main0_out { - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} + float4 FragColor [[color(0)]]; +}; -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +struct main0_in { - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} + float2 vUV [[user(locn0)]]; +}; fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(30)]], array, 4> uSampler [[texture(0)]], array uSamplerSmplr [[sampler(0)]]) { diff --git a/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag b/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag index 53b8a74388b..012d99b5b76 100644 --- a/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag +++ b/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag @@ -15,14 +15,14 @@ struct main0_out struct main0_in { - float3 gl_BaryCoordNoPerspNV [[barycentric_coord, center_no_perspective]]; + float3 gl_BaryCoordNoPerspEXT [[barycentric_coord, center_no_perspective]]; }; fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]]) { main0_out out = {}; int _23 = 3 * int(gl_PrimitiveID); - out.value = ((_19.uvs[_23] * in.gl_BaryCoordNoPerspNV.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordNoPerspNV.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordNoPerspNV.z); + out.value = ((_19.uvs[_23] * in.gl_BaryCoordNoPerspEXT.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordNoPerspEXT.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordNoPerspEXT.z); return out; } diff --git a/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag b/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag index ae2c704d055..d6e9dcdbf96 100644 --- a/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag +++ b/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag @@ -15,14 +15,14 @@ struct main0_out struct main0_in { - float3 gl_BaryCoordNV [[barycentric_coord, center_perspective]]; + float3 gl_BaryCoordEXT [[barycentric_coord, center_perspective]]; }; fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]]) { main0_out out = {}; int _23 = 3 * int(gl_PrimitiveID); - out.value = ((_19.uvs[_23] * in.gl_BaryCoordNV.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordNV.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordNV.z); + out.value = ((_19.uvs[_23] * in.gl_BaryCoordEXT.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordEXT.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordEXT.z); return out; } diff --git a/reference/opt/shaders-msl/frag/basic.force-sample.frag b/reference/opt/shaders-msl/frag/basic.force-sample.frag new file mode 100644 index 00000000000..b9706b73f56 --- /dev/null +++ b/reference/opt/shaders-msl/frag/basic.force-sample.frag @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vColor [[user(locn0)]]; + float2 vTex [[user(locn1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + out.FragColor = in.vColor * uTex.sample(uTexSmplr, in.vTex); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/bitcasting.1d-as-2d.frag b/reference/opt/shaders-msl/frag/bitcasting.1d-as-2d.frag new file mode 100644 index 00000000000..d341397f4c0 --- /dev/null +++ b/reference/opt/shaders-msl/frag/bitcasting.1d-as-2d.frag @@ -0,0 +1,26 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor0 [[color(0)]]; + float4 FragColor1 [[color(1)]]; +}; + +struct main0_in +{ + float4 VertGeom [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d TextureBase [[texture(0)]], texture2d TextureDetail [[texture(1)]], sampler TextureBaseSmplr [[sampler(0)]], sampler TextureDetailSmplr [[sampler(1)]]) +{ + main0_out out = {}; + float4 _22 = TextureBase.sample(TextureBaseSmplr, float2(in.VertGeom.x, 0.5)); + float4 _30 = TextureDetail.sample(TextureDetailSmplr, float2(in.VertGeom.x, 0.5), int2(3, 0)); + out.FragColor0 = as_type(as_type(_22)) * as_type(as_type(_30)); + out.FragColor1 = as_type(as_type(_22)) * as_type(as_type(_30)); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/buffer-read-write.frag b/reference/opt/shaders-msl/frag/buffer-read-write.frag index 2b2ac7f0608..4f114ed7247 100644 --- a/reference/opt/shaders-msl/frag/buffer-read-write.frag +++ b/reference/opt/shaders-msl/frag/buffer-read-write.frag @@ -5,17 +5,18 @@ using namespace metal; -struct main0_out -{ - float4 FragColor [[color(0)]]; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + fragment main0_out main0(texture2d buf [[texture(0)]], texture2d bufOut [[texture(1)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; diff --git a/reference/opt/shaders-msl/frag/clip-distance-varying.frag b/reference/opt/shaders-msl/frag/clip-distance-varying.frag new file mode 100644 index 00000000000..9a72d5ba39f --- /dev/null +++ b/reference/opt/shaders-msl/frag/clip-distance-varying.frag @@ -0,0 +1,67 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray gl_ClipDistance = {}; + gl_ClipDistance[0] = in.gl_ClipDistance_0; + gl_ClipDistance[1] = in.gl_ClipDistance_1; + out.FragColor = float4((1.0 - gl_ClipDistance[0]) - gl_ClipDistance[1]); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/constant-array.frag b/reference/opt/shaders-msl/frag/constant-array.frag index a0b830daae3..ca7efc5341d 100644 --- a/reference/opt/shaders-msl/frag/constant-array.frag +++ b/reference/opt/shaders-msl/frag/constant-array.frag @@ -1,19 +1,59 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Foobar { float a; float b; }; -constant float4 _37[3] = { float4(1.0), float4(2.0), float4(3.0) }; -constant float4 _49[2] = { float4(1.0), float4(2.0) }; -constant float4 _54[2] = { float4(8.0), float4(10.0) }; -constant float4 _55[2][2] = { { float4(1.0), float4(2.0) }, { float4(8.0), float4(10.0) } }; -constant Foobar _75[2] = { Foobar{ 10.0, 40.0 }, Foobar{ 90.0, 70.0 } }; +constant spvUnsafeArray _37 = spvUnsafeArray({ float4(1.0), float4(2.0), float4(3.0) }); +constant spvUnsafeArray _49 = spvUnsafeArray({ float4(1.0), float4(2.0) }); +constant spvUnsafeArray _54 = spvUnsafeArray({ float4(8.0), float4(10.0) }); +constant spvUnsafeArray, 2> _55 = spvUnsafeArray, 2>({ spvUnsafeArray({ float4(1.0), float4(2.0) }), spvUnsafeArray({ float4(8.0), float4(10.0) }) }); struct main0_out { @@ -27,6 +67,8 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]]) { + spvUnsafeArray _75 = spvUnsafeArray({ Foobar{ 10.0, 40.0 }, Foobar{ 90.0, 70.0 } }); + main0_out out = {}; out.FragColor = ((_37[in.index] + _55[in.index][in.index + 1]) + float4(30.0)) + float4(_75[in.index].a + _75[in.index].b); return out; diff --git a/reference/opt/shaders-msl/frag/constant-composites.frag b/reference/opt/shaders-msl/frag/constant-composites.frag index 335cbcd796c..e0fa980fb85 100644 --- a/reference/opt/shaders-msl/frag/constant-composites.frag +++ b/reference/opt/shaders-msl/frag/constant-composites.frag @@ -1,16 +1,56 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Foo { float a; float b; }; -constant float _16[4] = { 1.0, 4.0, 3.0, 2.0 }; -constant Foo _28[2] = { Foo{ 10.0, 20.0 }, Foo{ 30.0, 40.0 } }; +constant spvUnsafeArray _16 = spvUnsafeArray({ 1.0, 4.0, 3.0, 2.0 }); struct main0_out { @@ -24,6 +64,8 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]]) { + spvUnsafeArray _28 = spvUnsafeArray({ Foo{ 10.0, 20.0 }, Foo{ 30.0, 40.0 } }); + main0_out out = {}; out.FragColor = float4(_16[in.line]); out.FragColor += float4(_28[in.line].a * _28[1 - in.line].a); diff --git a/reference/opt/shaders-msl/frag/cull-distance-varying.frag b/reference/opt/shaders-msl/frag/cull-distance-varying.frag new file mode 100644 index 00000000000..708a295710d --- /dev/null +++ b/reference/opt/shaders-msl/frag/cull-distance-varying.frag @@ -0,0 +1,67 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float gl_CullDistance_0 [[user(cull0)]]; + float gl_CullDistance_1 [[user(cull1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray gl_CullDistance = {}; + gl_CullDistance[0] = in.gl_CullDistance_0; + gl_CullDistance[1] = in.gl_CullDistance_1; + out.FragColor = float4((1.0 - gl_CullDistance[0]) - gl_CullDistance[1]); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/depth-out-early-frag-tests.frag b/reference/opt/shaders-msl/frag/depth-out-early-frag-tests.frag new file mode 100644 index 00000000000..21884d81c5b --- /dev/null +++ b/reference/opt/shaders-msl/frag/depth-out-early-frag-tests.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 color_out [[color(0)]]; +}; + +[[ early_fragment_tests ]] fragment main0_out main0() +{ + float gl_FragDepth; + main0_out out = {}; + out.color_out = float4(1.0, 0.0, 0.0, 1.0); + gl_FragDepth = 0.699999988079071044921875; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/depth-out-no-early-frag-tests.frag b/reference/opt/shaders-msl/frag/depth-out-no-early-frag-tests.frag new file mode 100644 index 00000000000..57d810fafcb --- /dev/null +++ b/reference/opt/shaders-msl/frag/depth-out-no-early-frag-tests.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 color_out [[color(0)]]; + float gl_FragDepth [[depth(less)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + out.color_out = float4(1.0, 0.0, 0.0, 1.0); + out.gl_FragDepth = 0.699999988079071044921875; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/disable-frag-output.frag-output.frag b/reference/opt/shaders-msl/frag/disable-frag-output.frag-output.frag new file mode 100644 index 00000000000..63bc45b8af7 --- /dev/null +++ b/reference/opt/shaders-msl/frag/disable-frag-output.frag-output.frag @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 buf1 [[color(1)]]; + float4 buf3 [[color(3)]]; + float4 buf6 [[color(6)]]; + float4 buf7 [[color(7)]]; +}; + +fragment main0_out main0() +{ + float4 buf0; + float4 buf2; + float4 buf4; + float4 buf5; + float gl_FragDepth; + int gl_FragStencilRefARB; + main0_out out = {}; + buf0 = float4(0.0, 0.0, 0.0, 1.0); + out.buf1 = float4(1.0, 0.0, 0.0, 1.0); + buf2 = float4(0.0, 1.0, 0.0, 1.0); + out.buf3 = float4(0.0, 0.0, 1.0, 1.0); + buf4 = float4(1.0, 0.0, 1.0, 0.5); + buf5 = float4(0.25); + out.buf6 = float4(0.75); + out.buf7 = float4(1.0); + gl_FragDepth = 0.89999997615814208984375; + gl_FragStencilRefARB = uint(127); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/for-loop-init.frag b/reference/opt/shaders-msl/frag/for-loop-init.frag index cef6e11d37e..1a42f6e7bec 100644 --- a/reference/opt/shaders-msl/frag/for-loop-init.frag +++ b/reference/opt/shaders-msl/frag/for-loop-init.frag @@ -11,63 +11,61 @@ struct main0_out fragment main0_out main0() { main0_out out = {}; - int _145; - for (;;) + do { out.FragColor = 16; - _145 = 0; - for (; _145 < 25; ) + for (int _143 = 0; _143 < 25; ) { out.FragColor += 10; - _145++; + _143++; continue; } - for (int _146 = 1; _146 < 30; ) + for (int _144 = 1; _144 < 30; ) { out.FragColor += 11; - _146++; + _144++; continue; } - int _147; - _147 = 0; - for (; _147 < 20; ) + int _145; + _145 = 0; + for (; _145 < 20; ) { out.FragColor += 12; - _147++; + _145++; continue; } - int _62 = _147 + 3; + int _62 = _145 + 3; out.FragColor += _62; if (_62 == 40) { - for (int _151 = 0; _151 < 40; ) + for (int _149 = 0; _149 < 40; ) { out.FragColor += 13; - _151++; + _149++; continue; } break; } out.FragColor += _62; - int2 _148; - _148 = int2(0); - for (; _148.x < 10; ) + int2 _146; + _146 = int2(0); + for (; _146.x < 10; ) { - out.FragColor += _148.y; - int2 _144 = _148; - _144.x = _148.x + 4; - _148 = _144; + out.FragColor += _146.y; + int2 _142 = _146; + _142.x = _146.x + 4; + _146 = _142; continue; } - for (int _150 = _62; _150 < 40; ) + for (int _148 = _62; _148 < 40; ) { - out.FragColor += _150; - _150++; + out.FragColor += _148; + _148++; continue; } out.FragColor += _62; break; - } + } while(false); return out; } diff --git a/reference/opt/shaders-msl/frag/fp16.desktop.invalid.frag b/reference/opt/shaders-msl/frag/fp16.desktop.invalid.frag deleted file mode 100644 index d9a0390e1f2..00000000000 --- a/reference/opt/shaders-msl/frag/fp16.desktop.invalid.frag +++ /dev/null @@ -1,16 +0,0 @@ -#include -#include - -using namespace metal; - -struct main0_in -{ - half4 v4 [[user(locn3)]]; -}; - -fragment void main0(main0_in in [[stage_in]]) -{ - half4 _491; - half4 _563 = modf(in.v4, _491); -} - diff --git a/reference/opt/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag b/reference/opt/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag new file mode 100644 index 00000000000..25c62448943 --- /dev/null +++ b/reference/opt/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag @@ -0,0 +1,65 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct foo_t +{ + float x; + uint y; +}; + +struct main0_out +{ + float4 fragColor [[color(0)]]; +}; + +fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d bar [[texture(0)]], device atomic_uint* bar_atomic [[buffer(1)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + if (!gl_HelperInvocation) + { + foo.x = 1.0; + } + uint _91 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + if (int(gl_FragCoord.x) == 3) + { + gl_HelperInvocation = true, discard_fragment(); + } + int2 _101 = int2(gl_FragCoord.xy); + (gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(_101))); + uint _103 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _108 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], 62u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], memory_order_relaxed)); + uint _110 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _112 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _114 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _119 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], 100u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], memory_order_relaxed)); + uint _124; + if (!gl_HelperInvocation) + { + do + { + _124 = 100u; + } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], &_124, 42u, memory_order_relaxed, memory_order_relaxed) && _124 == 100u); + } + else + { + _124 = atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], memory_order_relaxed); + } + bool _125 = gl_HelperInvocation; + out.fragColor = float4(1.0, float(_125), 0.0, 1.0); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag b/reference/opt/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag new file mode 100644 index 00000000000..541096a1fa3 --- /dev/null +++ b/reference/opt/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag @@ -0,0 +1,40 @@ +#include +#include + +using namespace metal; + +struct foo +{ + int x; +}; + +struct main0_out +{ + float4 fragColor [[color(0)]]; +}; + +fragment main0_out main0(device foo& _24 [[buffer(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + if (gl_FragCoord.y == 7.0) + { + gl_HelperInvocation = true, discard_fragment(); + } + if (!gl_HelperInvocation) + { + _24.x = 0; + } + for (; float(_24.x) < gl_FragCoord.x; ) + { + if (!gl_HelperInvocation) + { + _24.x++; + } + continue; + } + out.fragColor = float4(float(_24.x), 0.0, 0.0, 1.0); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag b/reference/opt/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag new file mode 100644 index 00000000000..2b2e0853897 --- /dev/null +++ b/reference/opt/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag @@ -0,0 +1,64 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct foo_t +{ + float x; + uint y; +}; + +struct main0_out +{ + float4 fragColor [[color(0)]]; +}; + +fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d bar [[texture(0)]], device atomic_uint* bar_atomic [[buffer(1)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + if (!gl_HelperInvocation) + { + foo.x = 1.0; + } + uint _90 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + if (int(gl_FragCoord.x) == 3) + { + gl_HelperInvocation = true, discard_fragment(); + } + int2 _100 = int2(gl_FragCoord.xy); + (gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(_100))); + uint _102 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _107 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], 62u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], memory_order_relaxed)); + uint _109 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _111 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _113 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _118 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], 100u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], memory_order_relaxed)); + uint _123; + if (!gl_HelperInvocation) + { + do + { + _123 = 100u; + } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], &_123, 42u, memory_order_relaxed, memory_order_relaxed) && _123 == 100u); + } + else + { + _123 = atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], memory_order_relaxed); + } + out.fragColor = float4(1.0, 0.0, 0.0, 1.0); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/fragment-component-padding.pad-fragment.frag b/reference/opt/shaders-msl/frag/fragment-component-padding.pad-fragment.frag index 53aafa5f7f2..19840fa434f 100644 --- a/reference/opt/shaders-msl/frag/fragment-component-padding.pad-fragment.frag +++ b/reference/opt/shaders-msl/frag/fragment-component-padding.pad-fragment.frag @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 FragColors_0 [[color(0)]]; @@ -19,7 +60,7 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float FragColors[2] = {}; + spvUnsafeArray FragColors = {}; float2 FragColor2 = {}; float3 FragColor3 = {}; FragColors[0] = in.vColor.x; @@ -28,8 +69,8 @@ fragment main0_out main0(main0_in in [[stage_in]]) FragColor3 = in.vColor.zzz; out.FragColors_0 = float4(FragColors[0]); out.FragColors_1 = float4(FragColors[1]); - out.FragColor2 = FragColor2.xyyy; - out.FragColor3 = FragColor3.xyzz; + out.FragColor2.xy = FragColor2; + out.FragColor3.xyz = FragColor3; return out; } diff --git a/reference/opt/shaders-msl/frag/helper-invocation.msl21.frag b/reference/opt/shaders-msl/frag/helper-invocation.msl21.frag index bacf6fa12a0..9d876df1a23 100644 --- a/reference/opt/shaders-msl/frag/helper-invocation.msl21.frag +++ b/reference/opt/shaders-msl/frag/helper-invocation.msl21.frag @@ -16,17 +16,16 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]], texture2d uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]]) { main0_out out = {}; - bool gl_HelperInvocation = simd_is_helper_thread(); - float4 _51; - if (!gl_HelperInvocation) + float4 _52; + if (!simd_is_helper_thread()) { - _51 = uSampler.sample(uSamplerSmplr, in.vUV, level(0.0)); + _52 = uSampler.sample(uSamplerSmplr, in.vUV, level(0.0)); } else { - _51 = float4(1.0); + _52 = float4(1.0); } - out.FragColor = _51; + out.FragColor = _52; return out; } diff --git a/reference/opt/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag b/reference/opt/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag new file mode 100644 index 00000000000..0c6e6f49915 --- /dev/null +++ b/reference/opt/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag @@ -0,0 +1,44 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + float4 v; +}; + +struct spvDescriptorSetBuffer0 +{ + array, 10000> uSamplers [[id(0)]]; + array uSamplersSmplr [[id(10000)]]; +}; + +struct spvDescriptorSetBuffer1 +{ + constant UBO* vs [[id(0)]][10000]; +}; + +struct spvDescriptorSetBuffer2 +{ + texture2d uSampler [[id(0)]]; + sampler uSamplerSmplr [[id(1)]]; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], const device spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant spvDescriptorSetBuffer2& spvDescriptorSet2 [[buffer(2)]]) +{ + main0_out out = {}; + out.FragColor = (spvDescriptorSet0.uSamplers[9999].sample(spvDescriptorSet0.uSamplersSmplr[9999], in.vUV) + spvDescriptorSet1.vs[5000]->v) + spvDescriptorSet2.uSampler.sample(spvDescriptorSet2.uSamplerSmplr, in.vUV); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/image-query-lod.msl22.frag b/reference/opt/shaders-msl/frag/image-query-lod.msl22.frag index a2b8262e20e..de7a60cc5a2 100644 --- a/reference/opt/shaders-msl/frag/image-query-lod.msl22.frag +++ b/reference/opt/shaders-msl/frag/image-query-lod.msl22.frag @@ -41,30 +41,30 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d uSampler2D [ _111.x = uTextureCube.calculate_clamped_lod(uSampler, in.vUV); _111.y = uTextureCube.calculate_unclamped_lod(uSampler, in.vUV); out.FragColor += _111; - float2 _118; - _118.x = uSampler2D.calculate_clamped_lod(uSampler2DSmplr, in.vUV.xy); - _118.y = uSampler2D.calculate_unclamped_lod(uSampler2DSmplr, in.vUV.xy); - out.FragColor += _118; - float2 _123; - _123.x = uSampler3D.calculate_clamped_lod(uSampler3DSmplr, in.vUV); - _123.y = uSampler3D.calculate_unclamped_lod(uSampler3DSmplr, in.vUV); - out.FragColor += _123; - float2 _128; - _128.x = uSamplerCube.calculate_clamped_lod(uSamplerCubeSmplr, in.vUV); - _128.y = uSamplerCube.calculate_unclamped_lod(uSamplerCubeSmplr, in.vUV); - out.FragColor += _128; - float2 _136; - _136.x = uTexture2D.calculate_clamped_lod(uSampler, in.vUV.xy); - _136.y = uTexture2D.calculate_unclamped_lod(uSampler, in.vUV.xy); - out.FragColor += _136; - float2 _143; - _143.x = uTexture3D.calculate_clamped_lod(uSampler, in.vUV); - _143.y = uTexture3D.calculate_unclamped_lod(uSampler, in.vUV); - out.FragColor += _143; - float2 _150; - _150.x = uTextureCube.calculate_clamped_lod(uSampler, in.vUV); - _150.y = uTextureCube.calculate_unclamped_lod(uSampler, in.vUV); - out.FragColor += _150; + float2 _119; + _119.x = uSampler2D.calculate_clamped_lod(uSampler2DSmplr, in.vUV.xy); + _119.y = uSampler2D.calculate_unclamped_lod(uSampler2DSmplr, in.vUV.xy); + out.FragColor += _119; + float2 _124; + _124.x = uSampler3D.calculate_clamped_lod(uSampler3DSmplr, in.vUV); + _124.y = uSampler3D.calculate_unclamped_lod(uSampler3DSmplr, in.vUV); + out.FragColor += _124; + float2 _129; + _129.x = uSamplerCube.calculate_clamped_lod(uSamplerCubeSmplr, in.vUV); + _129.y = uSamplerCube.calculate_unclamped_lod(uSamplerCubeSmplr, in.vUV); + out.FragColor += _129; + float2 _137; + _137.x = uTexture2D.calculate_clamped_lod(uSampler, in.vUV.xy); + _137.y = uTexture2D.calculate_unclamped_lod(uSampler, in.vUV.xy); + out.FragColor += _137; + float2 _144; + _144.x = uTexture3D.calculate_clamped_lod(uSampler, in.vUV); + _144.y = uTexture3D.calculate_unclamped_lod(uSampler, in.vUV); + out.FragColor += _144; + float2 _151; + _151.x = uTextureCube.calculate_clamped_lod(uSampler, in.vUV); + _151.y = uTextureCube.calculate_unclamped_lod(uSampler, in.vUV); + out.FragColor += _151; return out; } diff --git a/reference/opt/shaders-msl/frag/in_block.frag b/reference/opt/shaders-msl/frag/in_block.frag index 8178c9a4ed6..efb0cbd4296 100644 --- a/reference/opt/shaders-msl/frag/in_block.frag +++ b/reference/opt/shaders-msl/frag/in_block.frag @@ -16,16 +16,16 @@ struct main0_out struct main0_in { - float4 VertexOut_color [[user(locn2)]]; - float4 VertexOut_color2 [[user(locn3)]]; + float4 inputs_color [[user(locn2)]]; + float4 inputs_color2 [[user(locn3)]]; }; fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; VertexOut inputs = {}; - inputs.color = in.VertexOut_color; - inputs.color2 = in.VertexOut_color2; + inputs.color = in.inputs_color; + inputs.color2 = in.inputs_color2; out.FragColor = inputs.color + inputs.color2; return out; } diff --git a/reference/opt/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag b/reference/opt/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag new file mode 100644 index 00000000000..7b011ffb580 --- /dev/null +++ b/reference/opt/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag @@ -0,0 +1,105 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Foo +{ + float a; + float b; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float foos_0_a [[user(locn1)]]; + float foos_0_b [[user(locn2)]]; + float foos_1_a [[user(locn3)]]; + float foos_1_b [[user(locn4)]]; + float foos_2_a [[user(locn5)]]; + float foos_2_b [[user(locn6)]]; + float foos_3_a [[user(locn7)]]; + float foos_3_b [[user(locn8)]]; + float bars_0_a [[user(locn10)]]; + float bars_0_b [[user(locn11)]]; + float bars_1_a [[user(locn12)]]; + float bars_1_b [[user(locn13)]]; + float bars_2_a [[user(locn14)]]; + float bars_2_b [[user(locn15)]]; + float bars_3_a [[user(locn16)]]; + float bars_3_b [[user(locn17)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray foos = {}; + spvUnsafeArray bars = {}; + foos[0].a = in.foos_0_a; + foos[0].b = in.foos_0_b; + foos[1].a = in.foos_1_a; + foos[1].b = in.foos_1_b; + foos[2].a = in.foos_2_a; + foos[2].b = in.foos_2_b; + foos[3].a = in.foos_3_a; + foos[3].b = in.foos_3_b; + bars[0].a = in.bars_0_a; + bars[0].b = in.bars_0_b; + bars[1].a = in.bars_1_a; + bars[1].b = in.bars_1_b; + bars[2].a = in.bars_2_a; + bars[2].b = in.bars_2_b; + bars[3].a = in.bars_3_a; + bars[3].b = in.bars_3_b; + out.FragColor.x = foos[0].a; + out.FragColor.y = foos[1].b; + out.FragColor.z = foos[2].a; + out.FragColor.w = bars[3].b; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/in_mat.frag b/reference/opt/shaders-msl/frag/in_mat.frag index 83ed9b5ea32..5d0b44eb525 100644 --- a/reference/opt/shaders-msl/frag/in_mat.frag +++ b/reference/opt/shaders-msl/frag/in_mat.frag @@ -27,11 +27,11 @@ fragment main0_out main0(main0_in in [[stage_in]], texturecube samplerCol inInvModelView[1] = in.inInvModelView_1; inInvModelView[2] = in.inInvModelView_2; inInvModelView[3] = in.inInvModelView_3; - float4 _31 = inInvModelView * float4(reflect(normalize(in.inPos), normalize(in.inNormal)), 0.0); + float4 _31 = inInvModelView * float4(reflect(fast::normalize(in.inPos), fast::normalize(in.inNormal)), 0.0); float _33 = _31.x; - float3 _59 = float3(_33, _31.yz); - _59.x = _33 * (-1.0); - out.outFragColor = samplerColor.sample(samplerColorSmplr, _59, bias(in.inLodBias)); + float3 _36 = float3(_33, _31.yz); + _36.x = _33 * (-1.0); + out.outFragColor = samplerColor.sample(samplerColorSmplr, _36, bias(in.inLodBias)); return out; } diff --git a/reference/opt/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag b/reference/opt/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag new file mode 100644 index 00000000000..52a78cf93ac --- /dev/null +++ b/reference/opt/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(texture2d_ms_array uSubpass0 [[texture(0)]], texture2d_ms_array uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]], uint gl_Layer [[render_target_array_index]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), gl_Layer, 1) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_Layer, 2)) + uSubpass0.read(uint2(gl_FragCoord.xy), gl_Layer, gl_SampleID); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/input-attachment-ms.frag b/reference/opt/shaders-msl/frag/input-attachment-ms.frag index 906cabbf474..0c47348d6e8 100644 --- a/reference/opt/shaders-msl/frag/input-attachment-ms.frag +++ b/reference/opt/shaders-msl/frag/input-attachment-ms.frag @@ -11,6 +11,7 @@ struct main0_out fragment main0_out main0(texture2d_ms uSubpass0 [[texture(0)]], texture2d_ms uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]]) { main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), 1) + uSubpass1.read(uint2(gl_FragCoord.xy), 2)) + uSubpass0.read(uint2(gl_FragCoord.xy), gl_SampleID); return out; } diff --git a/reference/opt/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag b/reference/opt/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag new file mode 100644 index 00000000000..e27b24adf39 --- /dev/null +++ b/reference/opt/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(constant uint* spvViewMask [[buffer(24)]], texture2d_ms_array uSubpass0 [[texture(0)]], texture2d_ms_array uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]], uint gl_ViewIndex [[render_target_array_index]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + gl_ViewIndex += spvViewMask[0]; + out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), gl_ViewIndex, 1) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_ViewIndex, 2)) + uSubpass0.read(uint2(gl_FragCoord.xy), gl_ViewIndex, gl_SampleID); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/input-attachment.arrayed-subpass.frag b/reference/opt/shaders-msl/frag/input-attachment.arrayed-subpass.frag new file mode 100644 index 00000000000..5d5ee43104d --- /dev/null +++ b/reference/opt/shaders-msl/frag/input-attachment.arrayed-subpass.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(texture2d_array uSubpass0 [[texture(0)]], texture2d_array uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]], uint gl_Layer [[render_target_array_index]]) +{ + main0_out out = {}; + out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), gl_Layer) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_Layer); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/input-attachment.frag b/reference/opt/shaders-msl/frag/input-attachment.frag index 122190648a2..790dce3c1bf 100644 --- a/reference/opt/shaders-msl/frag/input-attachment.frag +++ b/reference/opt/shaders-msl/frag/input-attachment.frag @@ -11,7 +11,7 @@ struct main0_out fragment main0_out main0(texture2d uSubpass0 [[texture(0)]], texture2d uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; - out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), 0) + uSubpass1.read(uint2(gl_FragCoord.xy), 0); + out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy)) + uSubpass1.read(uint2(gl_FragCoord.xy)); return out; } diff --git a/reference/opt/shaders-msl/frag/input-attachment.multiview.frag b/reference/opt/shaders-msl/frag/input-attachment.multiview.frag new file mode 100644 index 00000000000..7cf06d2d6b9 --- /dev/null +++ b/reference/opt/shaders-msl/frag/input-attachment.multiview.frag @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(constant uint* spvViewMask [[buffer(24)]], texture2d_array uSubpass0 [[texture(0)]], texture2d_array uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]], uint gl_ViewIndex [[render_target_array_index]]) +{ + main0_out out = {}; + gl_ViewIndex += spvViewMask[0]; + out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), gl_ViewIndex) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_ViewIndex); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag b/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag index 2b420195ffc..b3bab04319e 100644 --- a/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag +++ b/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag @@ -21,27 +21,27 @@ struct main0_out struct main0_in { - float2 Input_v0 [[user(locn0), centroid_no_perspective]]; - float2 Input_v1 [[user(locn1), centroid_no_perspective]]; - float3 Input_v2 [[user(locn2), centroid_no_perspective]]; - float4 Input_v3 [[user(locn3), centroid_no_perspective]]; - float Input_v4 [[user(locn4), centroid_no_perspective]]; - float Input_v5 [[user(locn5), centroid_no_perspective]]; - float Input_v6 [[user(locn6), centroid_no_perspective]]; + float2 inp_v0 [[user(locn0), centroid_no_perspective]]; + float2 inp_v1 [[user(locn1), centroid_no_perspective]]; + float3 inp_v2 [[user(locn2), centroid_no_perspective]]; + float4 inp_v3 [[user(locn3), centroid_no_perspective]]; + float inp_v4 [[user(locn4), centroid_no_perspective]]; + float inp_v5 [[user(locn5), centroid_no_perspective]]; + float inp_v6 [[user(locn6), centroid_no_perspective]]; }; fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; Input inp = {}; - inp.v0 = in.Input_v0; - inp.v1 = in.Input_v1; - inp.v2 = in.Input_v2; - inp.v3 = in.Input_v3; - inp.v4 = in.Input_v4; - inp.v5 = in.Input_v5; - inp.v6 = in.Input_v6; - out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, ((inp.v3.w * inp.v4) + inp.v5) - inp.v6); + inp.v0 = in.inp_v0; + inp.v1 = in.inp_v1; + inp.v2 = in.inp_v2; + inp.v3 = in.inp_v3; + inp.v4 = in.inp_v4; + inp.v5 = in.inp_v5; + inp.v6 = in.inp_v6; + out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, fma(inp.v3.w, inp.v4, inp.v5) - inp.v6); return out; } diff --git a/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag b/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag index aff6e1b0f70..208e8806b24 100644 --- a/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag +++ b/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag @@ -22,7 +22,7 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - out.FragColor = float4(in.v0.x + in.v1.y, in.v2.xy, ((in.v3.w * in.v4) + in.v5) - in.v6); + out.FragColor = float4(in.v0.x + in.v1.y, in.v2.xy, fma(in.v3.w, in.v4, in.v5) - in.v6); return out; } diff --git a/reference/opt/shaders-msl/frag/lut-promotion.frag b/reference/opt/shaders-msl/frag/lut-promotion.frag index c9169b790d3..e24bcd6d111 100644 --- a/reference/opt/shaders-msl/frag/lut-promotion.frag +++ b/reference/opt/shaders-msl/frag/lut-promotion.frag @@ -1,13 +1,52 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; -constant float _16[16] = { 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 }; -constant float4 _60[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; -constant float4 _104[4] = { float4(20.0), float4(30.0), float4(50.0), float4(60.0) }; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _16 = spvUnsafeArray({ 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 }); +constant spvUnsafeArray _60 = spvUnsafeArray({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) }); +constant spvUnsafeArray _104 = spvUnsafeArray({ float4(20.0), float4(30.0), float4(50.0), float4(60.0) }); struct main0_out { @@ -19,19 +58,6 @@ struct main0_in int index [[user(locn0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; @@ -53,7 +79,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) { out.FragColor += _60[in.index & 1].x; } - float4 foobar[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; + spvUnsafeArray foobar = spvUnsafeArray({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) }); if (_63) { foobar[1].z = 20.0; diff --git a/reference/opt/shaders-msl/frag/modf-access-tracking-function.frag b/reference/opt/shaders-msl/frag/modf-access-tracking-function.frag new file mode 100644 index 00000000000..612dd4e92d1 --- /dev/null +++ b/reference/opt/shaders-msl/frag/modf-access-tracking-function.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 vo0 [[color(0)]]; + float4 vo1 [[color(1)]]; +}; + +struct main0_in +{ + float4 v [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float4 _25 = modf(in.v, out.vo1); + out.vo0 = _25; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/mrt-array.frag b/reference/opt/shaders-msl/frag/mrt-array.frag index d7cea6baf94..d7fccdedc86 100644 --- a/reference/opt/shaders-msl/frag/mrt-array.frag +++ b/reference/opt/shaders-msl/frag/mrt-array.frag @@ -1,10 +1,56 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() +template +inline Tx mod(Tx x, Ty y) +{ + return x - y * floor(x / y); +} + struct main0_out { float4 FragColor_0 [[color(0)]]; @@ -19,17 +65,10 @@ struct main0_in float4 vB [[user(locn1)]]; }; -// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() -template -Tx mod(Tx x, Ty y) -{ - return x - y * floor(x / y); -} - fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float4 FragColor[4] = {}; + spvUnsafeArray FragColor = {}; FragColor[0] = mod(in.vA, in.vB); FragColor[1] = in.vA + in.vB; FragColor[2] = in.vA - in.vB; diff --git a/reference/opt/shaders-msl/frag/nonuniform-qualifier.msl2.frag b/reference/opt/shaders-msl/frag/nonuniform-qualifier.msl2.frag index 14a6999f339..bdd8d1419be 100644 --- a/reference/opt/shaders-msl/frag/nonuniform-qualifier.msl2.frag +++ b/reference/opt/shaders-msl/frag/nonuniform-qualifier.msl2.frag @@ -39,12 +39,15 @@ fragment main0_out main0(main0_in in [[stage_in]], constant UBO* ubos_0 [[buffer }; main0_out out = {}; - int _24 = in.vIndex + 10; - int _35 = in.vIndex + 40; - out.FragColor = uSamplers[_24].sample(uSamps[_35], in.vUV); - out.FragColor = uCombinedSamplers[_24].sample(uCombinedSamplersSmplr[_24], in.vUV); - out.FragColor += ubos[(in.vIndex + 20)]->v[_35]; - out.FragColor += ssbos[(in.vIndex + 50)]->v[in.vIndex + 60]; + int _25 = in.vIndex + 10; + int _37 = in.vIndex + 40; + out.FragColor = uSamplers[_25].sample(uSamps[_37], in.vUV); + out.FragColor = uCombinedSamplers[_25].sample(uCombinedSamplersSmplr[_25], in.vUV); + int _69 = in.vIndex + 20; + out.FragColor += ubos[_69]->v[_37]; + int _87 = in.vIndex + 50; + int _91 = in.vIndex + 60; + out.FragColor += ssbos[_87]->v[_91]; return out; } diff --git a/reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag b/reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag index dd319af5552..a12c44912f7 100644 --- a/reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag +++ b/reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag @@ -17,7 +17,11 @@ struct main0_out fragment main0_out main0(constant UBO& _15 [[buffer(0)]]) { main0_out out = {}; - out.FragColor = float4(_15.color[0], _15.color[1], _15.color[2], float4(1.0).w); + float4 _36 = float4(1.0); + _36.x = _15.color[0]; + _36.y = _15.color[1]; + _36.z = _15.color[2]; + out.FragColor = _36; return out; } diff --git a/reference/opt/shaders-msl/frag/packing-test-3.frag b/reference/opt/shaders-msl/frag/packing-test-3.frag index 8cc5f5c75d7..1f696e94ef0 100644 --- a/reference/opt/shaders-msl/frag/packing-test-3.frag +++ b/reference/opt/shaders-msl/frag/packing-test-3.frag @@ -19,10 +19,10 @@ struct main0_out float4 _entryPointOutput [[color(0)]]; }; -fragment main0_out main0(constant CB0& _26 [[buffer(0)]]) +fragment main0_out main0(constant CB0& _RESERVED_IDENTIFIER_FIXUP_24 [[buffer(0)]]) { main0_out out = {}; - out._entryPointOutput = float4(_26.CB0[1].position[0], _26.CB0[1].position[1], _26.CB0[1].position[2], _26.CB0[1].radius); + out._entryPointOutput = float4(_RESERVED_IDENTIFIER_FIXUP_24.CB0[1].position[0], _RESERVED_IDENTIFIER_FIXUP_24.CB0[1].position[1], _RESERVED_IDENTIFIER_FIXUP_24.CB0[1].position[2], _RESERVED_IDENTIFIER_FIXUP_24.CB0[1].radius); return out; } diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag new file mode 100644 index 00000000000..1bfaff53bf8 --- /dev/null +++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -0,0 +1,53 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +struct spvDescriptorSetBuffer0 +{ + device Buffer3* m_9 [[id(0)]]; + texture2d img4 [[id(1)]]; + texture2d img [[id(2), raster_order_group(0)]]; + texture2d img3 [[id(3), raster_order_group(0)]]; + texture2d img2 [[id(4), raster_order_group(0)]]; + device atomic_uint* img2_atomic [[id(5), raster_order_group(0)]]; + volatile device Buffer* m_42 [[id(6), raster_order_group(0)]]; + device Buffer2* m_52 [[id(7), raster_order_group(0)]]; +}; + +fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]]) +{ + (*spvDescriptorSet0.m_9).baz = 0; + spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0))); + uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.img2_atomic[spvImage2DAtomicCoord(int2(0), spvDescriptorSet0.img2)], 1u, memory_order_relaxed); + (*spvDescriptorSet0.m_42).foo += 42; + uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_42).bar, (*spvDescriptorSet0.m_52).quux, memory_order_relaxed); +} + diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag new file mode 100644 index 00000000000..6a300e8c589 --- /dev/null +++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -0,0 +1,41 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _42 [[buffer(2), raster_order_group(0)]], device Buffer2& _52 [[buffer(3), raster_order_group(0)]], texture2d img4 [[texture(0)]], texture2d img [[texture(1), raster_order_group(0)]], texture2d img3 [[texture(2), raster_order_group(0)]], texture2d img2 [[texture(3), raster_order_group(0)]], device atomic_uint* img2_atomic [[buffer(1), raster_order_group(0)]]) +{ + _9.baz = 0; + img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + img.write(img3.read(uint2(int2(0))), uint2(int2(0))); + uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&img2_atomic[spvImage2DAtomicCoord(int2(0), img2)], 1u, memory_order_relaxed); + _42.foo += 42; + uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_42.bar, _52.quux, memory_order_relaxed); +} + diff --git a/reference/opt/shaders-msl/frag/post-depth-coverage.ios.msl2.frag b/reference/opt/shaders-msl/frag/post-depth-coverage.ios.msl2.frag new file mode 100644 index 00000000000..3b2885e2e2a --- /dev/null +++ b/reference/opt/shaders-msl/frag/post-depth-coverage.ios.msl2.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +[[ early_fragment_tests ]] fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask, post_depth_coverage]]) +{ + main0_out out = {}; + out.FragColor = float4(float(gl_SampleMaskIn)); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/post-depth-coverage.msl23.frag b/reference/opt/shaders-msl/frag/post-depth-coverage.msl23.frag new file mode 100644 index 00000000000..3b2885e2e2a --- /dev/null +++ b/reference/opt/shaders-msl/frag/post-depth-coverage.msl23.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +[[ early_fragment_tests ]] fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask, post_depth_coverage]]) +{ + main0_out out = {}; + out.FragColor = float4(float(gl_SampleMaskIn)); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag b/reference/opt/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag new file mode 100644 index 00000000000..859ace2cd48 --- /dev/null +++ b/reference/opt/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag @@ -0,0 +1,45 @@ +#include +#include +#if __METAL_VERSION__ >= 230 +#include +using namespace metal::raytracing; +#endif + +using namespace metal; + +struct main0_out +{ + float4 outColor [[color(0)]]; +}; + +struct main0_in +{ + float4 inPos [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], raytracing::acceleration_structure topLevelAS [[buffer(0)]]) +{ + main0_out out = {}; + raytracing::intersection_query rayQuery; + rayQuery.reset(ray(float3((in.inPos.xy * 4.0) - float2(2.0), 1.0), float3(0.0, 0.0, -1.0), 0.001000000047497451305389404296875, 2.0), topLevelAS, intersection_params()); + for (;;) + { + bool _88 = rayQuery.next(); + if (_88) + { + continue; + } + else + { + break; + } + } + uint _92 = uint(rayQuery.get_committed_intersection_type()); + if (_92 == 0u) + { + discard_fragment(); + } + out.outColor = in.inPos; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/read-cull-clip-distance-in-function.frag b/reference/opt/shaders-msl/frag/read-cull-clip-distance-in-function.frag new file mode 100644 index 00000000000..3c9757ebd26 --- /dev/null +++ b/reference/opt/shaders-msl/frag/read-cull-clip-distance-in-function.frag @@ -0,0 +1,72 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; + float gl_CullDistance_0 [[user(cull0)]]; + float gl_CullDistance_1 [[user(cull1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray gl_CullDistance = {}; + spvUnsafeArray gl_ClipDistance = {}; + gl_CullDistance[0] = in.gl_CullDistance_0; + gl_CullDistance[1] = in.gl_CullDistance_1; + gl_ClipDistance[0] = in.gl_ClipDistance_0; + gl_ClipDistance[1] = in.gl_ClipDistance_1; + out.FragColor = float4(gl_CullDistance[0], gl_CullDistance[1], gl_ClipDistance[0], gl_ClipDistance[1]); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/return-value-after-discard-terminator.frag b/reference/opt/shaders-msl/frag/return-value-after-discard-terminator.frag new file mode 100644 index 00000000000..92097dfa4ad --- /dev/null +++ b/reference/opt/shaders-msl/frag/return-value-after-discard-terminator.frag @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct buff_t +{ + int m0[1024]; +}; + +struct main0_out +{ + float4 frag_clr [[color(0)]]; +}; + +fragment main0_out main0(device buff_t& buff [[buffer(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + int4 _16 = int4(gl_FragCoord); + out.frag_clr = float4(0.0, 0.0, 1.0, 1.0); + buff.m0[(_16.y * 32) + _16.x] = 1; + discard_fragment(); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag b/reference/opt/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag new file mode 100644 index 00000000000..4f7e9b53b24 --- /dev/null +++ b/reference/opt/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + float3 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], depth2d uTexture [[texture(0)]], sampler uSampler [[sampler(0)]], sampler uSamplerShadow [[sampler(1)]]) +{ + main0_out out = {}; + out.FragColor = float4(uTexture.sample(uSampler, in.vUV.xy)).x; + out.FragColor += uTexture.sample_compare(uSamplerShadow, in.vUV.xy, in.vUV.z); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag b/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag new file mode 100644 index 00000000000..626fe4c79c2 --- /dev/null +++ b/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; + uint gl_SampleMask [[sample_mask]]; +}; + +fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + out.FragColor = float4(1.0); + out.gl_SampleMask = (gl_SampleMaskIn & 0x22 & (1 << gl_SampleID)); + out.gl_SampleMask &= 0x22; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag b/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag new file mode 100644 index 00000000000..f478901b6be --- /dev/null +++ b/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; + uint gl_SampleMask [[sample_mask]]; +}; + +fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask]]) +{ + main0_out out = {}; + out.FragColor = float4(1.0); + out.gl_SampleMask = (gl_SampleMaskIn & 0x22); + out.gl_SampleMask &= 0x22; + return out; +} + diff --git a/reference/shaders-msl/frag/16bit-constants.frag b/reference/opt/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag similarity index 53% rename from reference/shaders-msl/frag/16bit-constants.frag rename to reference/opt/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag index 56c7ea5df4a..d04f2033bb5 100644 --- a/reference/shaders-msl/frag/16bit-constants.frag +++ b/reference/opt/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag @@ -5,17 +5,15 @@ using namespace metal; struct main0_out { - half foo [[color(0)]]; - short bar [[color(1)]]; - ushort baz [[color(2)]]; + float4 FragColor [[color(0)]]; + uint gl_SampleMask [[sample_mask]]; }; fragment main0_out main0() { main0_out out = {}; - out.foo = half(1.0); - out.bar = 2; - out.baz = 3u; + out.FragColor = float4(1.0); + out.gl_SampleMask = 0x22; return out; } diff --git a/reference/opt/shaders-msl/frag/sample-mask.fixed-sample-mask.frag b/reference/opt/shaders-msl/frag/sample-mask.fixed-sample-mask.frag new file mode 100644 index 00000000000..76306b5ade8 --- /dev/null +++ b/reference/opt/shaders-msl/frag/sample-mask.fixed-sample-mask.frag @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; + uint gl_SampleMask [[sample_mask]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + out.FragColor = float4(1.0); + out.gl_SampleMask = 0; + out.gl_SampleMask &= 0x22; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag new file mode 100644 index 00000000000..5df60f909e5 --- /dev/null +++ b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(texture2d_array tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + float3 _28 = float3(gl_FragCoord.xy, float(gl_SampleID)); + out.FragColor = tex.sample(texSmplr, _28.xy, uint(round(_28.z))); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag new file mode 100644 index 00000000000..386230ef0c3 --- /dev/null +++ b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float foo [[user(locn0), sample_perspective]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d_array tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + float3 _26 = float3(gl_FragCoord.xy, in.foo); + out.FragColor = tex.sample(texSmplr, _26.xy, uint(round(_26.z))); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag new file mode 100644 index 00000000000..f8f357fe7c9 --- /dev/null +++ b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(texture2d tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + float2 gl_SamplePosition = get_sample_position(gl_SampleID); + out.FragColor = tex.sample(texSmplr, (gl_FragCoord.xy - gl_SamplePosition)); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag b/reference/opt/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag new file mode 100644 index 00000000000..1ed8148d4c0 --- /dev/null +++ b/reference/opt/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(texture2d tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + out.FragColor = tex.sample(texSmplr, gl_FragCoord.xy); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag b/reference/opt/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag new file mode 100644 index 00000000000..70278b12907 --- /dev/null +++ b/reference/opt/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float vTex [[user(locn0), flat]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]]) +{ + main0_out out = {}; + out.FragColor += ((uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), bias(2.0)) + uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), level(3.0))) + uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), gradient2d(5.0, 8.0))); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag b/reference/opt/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag new file mode 100644 index 00000000000..6aaffe532ba --- /dev/null +++ b/reference/opt/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + float3 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], depth2d_array uTex [[texture(0)]], sampler uShadow [[sampler(0)]]) +{ + main0_out out = {}; + out.FragColor = uTex.sample_compare(uShadow, float2(in.vUV.x, 0.5), uint(round(in.vUV.y)), in.vUV.z, bias(1.0)); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag b/reference/opt/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag new file mode 100644 index 00000000000..07845691942 --- /dev/null +++ b/reference/opt/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], depth2d_array uTex [[texture(0)]], sampler uShadow [[sampler(0)]]) +{ + main0_out out = {}; + out.FragColor = uTex.sample_compare(uShadow, in.vUV.xy, uint(round(in.vUV.z)), in.vUV.w, level(0)) + uTex.sample_compare(uShadow, in.vUV.xy, uint(round(in.vUV.z)), in.vUV.w, gradient2d(float2(1.0), float2(1.0))); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag b/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag index 592d445810f..fc908cb3e18 100644 --- a/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag +++ b/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag @@ -5,18 +5,8 @@ using namespace metal; -struct main0_out -{ - float FragColor [[color(0)]]; -}; - -struct main0_in -{ - float3 vRefract [[user(locn0)]]; -}; - template -inline T spvReflect(T i, T n) +[[clang::optnone]] T spvReflect(T i, T n) { return i - T(2) * i * n * n; } @@ -37,6 +27,16 @@ inline T spvRefract(T i, T n, T eta) } } +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + float3 vRefract [[user(locn0)]]; +}; + fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; diff --git a/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag b/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag index 30b28d21311..fff932c0371 100644 --- a/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag +++ b/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag @@ -35,43 +35,19 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]], device SSBO& ssbo [[buffer(0)]], constant Push& registers [[buffer(1)]], constant UBO& ubo [[buffer(2)]]) { main0_out out = {}; - short _196 = 10; - int _197 = 20; - char2 _198 = as_type(_196); - char4 _199 = as_type(_197); - _196 = as_type(_198); - _197 = as_type(_199); - ssbo.i8[0] = _199.x; - ssbo.i8[1] = _199.y; - ssbo.i8[2] = _199.z; - ssbo.i8[3] = _199.w; - ushort _220 = 10u; - uint _221 = 20u; - uchar2 _222 = as_type(_220); - uchar4 _223 = as_type(_221); - _220 = as_type(_222); - _221 = as_type(_223); - ssbo.u8[0] = _223.x; - ssbo.u8[1] = _223.y; - ssbo.u8[2] = _223.z; - ssbo.u8[3] = _223.w; - char4 _246 = char4(in.vColor); - char4 _244 = _246; - _244 += char4(registers.i8); - _244 += char4(-40); - _244 += char4(-50); - _244 += char4(char(10), char(20), char(30), char(40)); - _244 += char4(ssbo.i8[4]); - _244 += char4(ubo.i8); - out.FragColorInt = int4(_244); - uchar4 _271 = uchar4(_246); - _271 += uchar4(registers.u8); - _271 += uchar4(216); - _271 += uchar4(206); - _271 += uchar4(uchar(10), uchar(20), uchar(30), uchar(40)); - _271 += uchar4(ssbo.u8[4]); - _271 += uchar4(ubo.u8); - out.FragColorUint = uint4(_271); + char4 _204 = as_type(20); + ssbo.i8[0] = _204.x; + ssbo.i8[1] = _204.y; + ssbo.i8[2] = _204.z; + ssbo.i8[3] = _204.w; + uchar4 _229 = as_type(20u); + ssbo.u8[0] = _229.x; + ssbo.u8[1] = _229.y; + ssbo.u8[2] = _229.z; + ssbo.u8[3] = _229.w; + char4 _249 = char4(in.vColor); + out.FragColorInt = int4((((((_249 + char4(registers.i8)) + char4(-40)) + char4(-50)) + char4(char(10), char(20), char(30), char(40))) + char4(ssbo.i8[4])) + char4(ubo.i8)); + out.FragColorUint = uint4((((((uchar4(_249) + uchar4(registers.u8)) + uchar4(216)) + uchar4(206)) + uchar4(uchar(10), uchar(20), uchar(30), uchar(40))) + uchar4(ssbo.u8[4])) + uchar4(ubo.u8)); return out; } diff --git a/reference/opt/shaders-msl/frag/shadow-compare-global-alias.invalid.frag b/reference/opt/shaders-msl/frag/shadow-compare-global-alias.invalid.frag deleted file mode 100644 index a58f13fc402..00000000000 --- a/reference/opt/shaders-msl/frag/shadow-compare-global-alias.invalid.frag +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include - -using namespace metal; - -struct main0_out -{ - float FragColor [[color(0)]]; -}; - -struct main0_in -{ - float3 vUV [[user(locn0)]]; -}; - -fragment main0_out main0(main0_in in [[stage_in]], depth2d uTex [[texture(0)]], depth2d uSampler [[texture(1)]], sampler uSamp [[sampler(0)]], sampler uSamplerSmplr [[sampler(1)]]) -{ - main0_out out = {}; - out.FragColor = uSampler.sample_compare(uSamplerSmplr, in.vUV.xy, in.vUV.z); - out.FragColor += uTex.sample_compare(uSamp, in.vUV.xy, in.vUV.z); - out.FragColor += uTex.sample_compare(uSamp, in.vUV.xy, in.vUV.z); - out.FragColor += uSampler.sample_compare(uSamplerSmplr, in.vUV.xy, in.vUV.z); - out.FragColor += uTex.sample_compare(uSamp, in.vUV.xy, in.vUV.z); - out.FragColor += uSampler.sample_compare(uSamplerSmplr, in.vUV.xy, in.vUV.z); - return out; -} - diff --git a/reference/opt/shaders-msl/frag/subgroup-globals-extract.msl22.frag b/reference/opt/shaders-msl/frag/subgroup-globals-extract.msl22.frag new file mode 100644 index 00000000000..b2cfddf6429 --- /dev/null +++ b/reference/opt/shaders-msl/frag/subgroup-globals-extract.msl22.frag @@ -0,0 +1,56 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); +} + +inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); +} + +inline uint spvPopCount4(uint4 ballot) +{ + return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); +} + +inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +struct main0_out +{ + uint2 FragColor [[color(0)]]; +}; + +fragment main0_out main0(uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]], uint gl_SubgroupSize [[threads_per_simdgroup]]) +{ + main0_out out = {}; + out.FragColor.x = (((spvSubgroupBallotFindLSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize) + spvSubgroupBallotFindMSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize)) + spvSubgroupBallotBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize)) + spvSubgroupBallotInclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID)) + spvSubgroupBallotExclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/switch-unreachable-break.frag b/reference/opt/shaders-msl/frag/switch-unreachable-break.frag new file mode 100644 index 00000000000..8d7903b79b4 --- /dev/null +++ b/reference/opt/shaders-msl/frag/switch-unreachable-break.frag @@ -0,0 +1,43 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + int cond; + int cond2; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(constant UBO& _15 [[buffer(0)]]) +{ + main0_out out = {}; + out.FragColor = float4(10.0); + switch (_15.cond) + { + case 1: + { + if (_15.cond2 < 50) + { + break; + } + else + { + discard_fragment(); + } + break; // unreachable workaround + } + default: + { + out.FragColor = float4(20.0); + break; + } + } + return out; +} + diff --git a/reference/opt/shaders-msl/frag/swizzle.frag b/reference/opt/shaders-msl/frag/swizzle.frag index 7a0494e064a..cda23096c54 100644 --- a/reference/opt/shaders-msl/frag/swizzle.frag +++ b/reference/opt/shaders-msl/frag/swizzle.frag @@ -17,9 +17,11 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]], texture2d samp [[texture(0)]], sampler sampSmplr [[sampler(0)]]) { main0_out out = {}; - out.FragColor = float4(samp.sample(sampSmplr, in.vUV).xyz, 1.0); - out.FragColor = float4(samp.sample(sampSmplr, in.vUV).xz, 1.0, 4.0); - out.FragColor = float4(samp.sample(sampSmplr, in.vUV).xx, samp.sample(sampSmplr, (in.vUV + float2(0.100000001490116119384765625))).yy); + float4 _19 = samp.sample(sampSmplr, in.vUV); + float _23 = _19.x; + out.FragColor = float4(_23, _19.yz, 1.0); + out.FragColor = float4(_23, _19.z, 1.0, 4.0); + out.FragColor = float4(_23, _23, samp.sample(sampSmplr, (in.vUV + float2(0.100000001490116119384765625))).yy); out.FragColor = float4(in.vNormal, 1.0); out.FragColor = float4(in.vNormal + float3(1.7999999523162841796875), 1.0); out.FragColor = float4(in.vUV, in.vUV + float2(1.7999999523162841796875)); diff --git a/reference/opt/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag b/reference/opt/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag new file mode 100644 index 00000000000..98b9bb7ef80 --- /dev/null +++ b/reference/opt/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(texture2d uTexture [[texture(0)]], texture2d uTexture2 [[texture(1)]], sampler uTextureSmplr [[sampler(0)]], sampler uTexture2Smplr [[sampler(1)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + out.FragColor = uTexture.read(uint2(int2(gl_FragCoord.xy)) + uint2(int2(1)), 0); + out.FragColor += uTexture2.read(uint2(uint(int(gl_FragCoord.x)), 0) + uint2(uint(-1), 0), 0); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/texture-cube-array.frag b/reference/opt/shaders-msl/frag/texture-cube-array.frag new file mode 100644 index 00000000000..0af8a047b3f --- /dev/null +++ b/reference/opt/shaders-msl/frag/texture-cube-array.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texturecube cubeSampler [[texture(0)]], texturecube_array cubeArraySampler [[texture(1)]], texture2d_array texArraySampler [[texture(2)]], sampler cubeSamplerSmplr [[sampler(0)]], sampler cubeArraySamplerSmplr [[sampler(1)]], sampler texArraySamplerSmplr [[sampler(2)]]) +{ + main0_out out = {}; + out.FragColor = (cubeSampler.sample(cubeSamplerSmplr, in.vUV.xyz) + cubeArraySampler.sample(cubeArraySamplerSmplr, in.vUV.xyz, uint(round(in.vUV.w)))) + texArraySampler.sample(texArraySamplerSmplr, in.vUV.xyz.xy, uint(round(in.vUV.xyz.z))); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag b/reference/opt/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag new file mode 100644 index 00000000000..c057e217e21 --- /dev/null +++ b/reference/opt/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag @@ -0,0 +1,58 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +static inline __attribute__((always_inline)) +float3 spvCubemapTo2DArrayFace(float3 P) +{ + float3 Coords = abs(P.xyz); + float CubeFace = 0; + float ProjectionAxis = 0; + float u = 0; + float v = 0; + if (Coords.x >= Coords.y && Coords.x >= Coords.z) + { + CubeFace = P.x >= 0 ? 0 : 1; + ProjectionAxis = Coords.x; + u = P.x >= 0 ? -P.z : P.z; + v = -P.y; + } + else if (Coords.y >= Coords.x && Coords.y >= Coords.z) + { + CubeFace = P.y >= 0 ? 2 : 3; + ProjectionAxis = Coords.y; + u = P.x; + v = P.y >= 0 ? P.z : -P.z; + } + else + { + CubeFace = P.z >= 0 ? 4 : 5; + ProjectionAxis = Coords.z; + u = P.z >= 0 ? P.x : -P.x; + v = -P.y; + } + u = 0.5 * (u/ProjectionAxis + 1); + v = 0.5 * (v/ProjectionAxis + 1); + return float3(u, v, CubeFace); +} + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texturecube cubeSampler [[texture(0)]], texture2d_array cubeArraySampler [[texture(1)]], texture2d_array texArraySampler [[texture(2)]], sampler cubeSamplerSmplr [[sampler(0)]], sampler cubeArraySamplerSmplr [[sampler(1)]], sampler texArraySamplerSmplr [[sampler(2)]]) +{ + main0_out out = {}; + out.FragColor = (cubeSampler.sample(cubeSamplerSmplr, in.vUV.xyz) + cubeArraySampler.sample(cubeArraySamplerSmplr, spvCubemapTo2DArrayFace(in.vUV.xyz).xy, uint(spvCubemapTo2DArrayFace(in.vUV.xyz).z) + (uint(round(in.vUV.w)) * 6u))) + texArraySampler.sample(texArraySamplerSmplr, in.vUV.xyz.xy, uint(round(in.vUV.xyz.z))); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/texture-proj-shadow.frag b/reference/opt/shaders-msl/frag/texture-proj-shadow.frag index 52d4a026d2d..6d465ce9f9f 100644 --- a/reference/opt/shaders-msl/frag/texture-proj-shadow.frag +++ b/reference/opt/shaders-msl/frag/texture-proj-shadow.frag @@ -18,12 +18,13 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]], depth2d uShadow2D [[texture(0)]], texture1d uSampler1D [[texture(1)]], texture2d uSampler2D [[texture(2)]], texture3d uSampler3D [[texture(3)]], sampler uShadow2DSmplr [[sampler(0)]], sampler uSampler1DSmplr [[sampler(1)]], sampler uSampler2DSmplr [[sampler(2)]], sampler uSampler3DSmplr [[sampler(3)]]) { main0_out out = {}; - float4 _20 = in.vClip4; - _20.z = in.vClip4.w; - out.FragColor = uShadow2D.sample_compare(uShadow2DSmplr, _20.xy / _20.z, in.vClip4.z / _20.z); + float4 _17 = in.vClip4; + float4 _20 = _17; + _20.z = _17.w; + out.FragColor = uShadow2D.sample_compare(uShadow2DSmplr, _20.xy / _20.z, _17.z / _20.z); out.FragColor = uSampler1D.sample(uSampler1DSmplr, in.vClip2.x / in.vClip2.y).x; out.FragColor = uSampler2D.sample(uSampler2DSmplr, in.vClip3.xy / in.vClip3.z).x; - out.FragColor = uSampler3D.sample(uSampler3DSmplr, in.vClip4.xyz / in.vClip4.w).x; + out.FragColor = uSampler3D.sample(uSampler3DSmplr, _17.xyz / _17.w).x; return out; } diff --git a/reference/opt/shaders-msl/frag/ubo_layout.frag b/reference/opt/shaders-msl/frag/ubo_layout.frag index 0bc27462b2e..4ca603d6431 100644 --- a/reference/opt/shaders-msl/frag/ubo_layout.frag +++ b/reference/opt/shaders-msl/frag/ubo_layout.frag @@ -31,7 +31,7 @@ struct main0_out fragment main0_out main0(constant UBO1& ubo1 [[buffer(0)]], constant UBO2& ubo0 [[buffer(1)]]) { main0_out out = {}; - out.FragColor = transpose(ubo1.foo.foo)[0] + ubo0.foo.foo[0]; + out.FragColor = float4(ubo1.foo.foo[0][0], ubo1.foo.foo[1][0], ubo1.foo.foo[2][0], ubo1.foo.foo[3][0]) + ubo0.foo.foo[0]; return out; } diff --git a/reference/opt/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag b/reference/opt/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag new file mode 100644 index 00000000000..1cb7aa70328 --- /dev/null +++ b/reference/opt/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag @@ -0,0 +1,75 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + ushort2 a [[user(locn0)]]; + uint3 b [[user(locn1)]]; + ushort c_0 [[user(locn2)]]; + ushort c_1 [[user(locn3)]]; + uint4 e_0 [[user(locn4)]]; + uint4 e_1 [[user(locn5)]]; + float4 d [[user(locn6)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray c = {}; + spvUnsafeArray e = {}; + c[0] = in.c_0; + c[1] = in.c_1; + e[0] = in.e_0; + e[1] = in.e_1; + out.FragColor = float4(float(int(short(in.a.x))), float(int(in.b.x)), float2(float(uint(c[1])), float(e[0].w)) + in.d.xy); + return out; +} + diff --git a/reference/opt/shaders-msl/intel/shader-integer-functions2.asm.comp b/reference/opt/shaders-msl/intel/shader-integer-functions2.asm.comp new file mode 100644 index 00000000000..1e5d889d462 --- /dev/null +++ b/reference/opt/shaders-msl/intel/shader-integer-functions2.asm.comp @@ -0,0 +1,31 @@ +#include +#include + +using namespace metal; + +struct foo +{ + uint a; + uint b; + int c; + int d; +}; + +kernel void main0(device foo& _4 [[buffer(0)]]) +{ + _4.a = clz(_4.a); + _4.a = ctz(_4.a); + _4.a = absdiff(_4.c, _4.d); + _4.a = absdiff(_4.a, _4.b); + _4.c = addsat(_4.c, _4.d); + _4.a = addsat(_4.a, _4.b); + _4.c = hadd(_4.c, _4.d); + _4.a = hadd(_4.a, _4.b); + _4.c = rhadd(_4.c, _4.d); + _4.a = rhadd(_4.a, _4.b); + _4.c = subsat(_4.c, _4.d); + _4.a = subsat(_4.a, _4.b); + _4.c = int(short(_4.c)) * int(short(_4.d)); + _4.a = uint(ushort(_4.a)) * uint(ushort(_4.b)); +} + diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc new file mode 100644 index 00000000000..24928da01df --- /dev/null +++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc @@ -0,0 +1,188 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + spvUnsafeArray iFoo; + float4 ipFoo; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup float4 spvStorageFoo[8][4][2]; + threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements); + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc new file mode 100644 index 00000000000..a08364e2b34 --- /dev/null +++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc @@ -0,0 +1,191 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + float4 iFoo_0 [[attribute(0)]]; + float4 iFoo_1 [[attribute(1)]]; + float4 ipFoo [[attribute(2)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup float4 Foo[4][2]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvUnsafeArray _38 = spvUnsafeArray({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 }); + spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements); + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc new file mode 100644 index 00000000000..abc95ca899e --- /dev/null +++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc @@ -0,0 +1,79 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + spvUnsafeArray Foo; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + spvUnsafeArray iFoo; + float4 ipFoo; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo; + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc new file mode 100644 index 00000000000..3da1d18c61d --- /dev/null +++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc @@ -0,0 +1,83 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + spvUnsafeArray Foo; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + float4 iFoo_0 [[attribute(0)]]; + float4 iFoo_1 [[attribute(1)]]; + float4 ipFoo [[attribute(2)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvUnsafeArray _38 = spvUnsafeArray({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 }); + gl_out[gl_InvocationID].Foo = _38; + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert new file mode 100644 index 00000000000..2bf5c257d6b --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct V +{ + float4 a; + float4 b; + float4 c; + float4 d; +}; + +struct main0_out +{ + float4 m_22_b; + float4 m_22_c; + float4 m_22_d; + float4 gl_Position; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + V _22 = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.gl_Position = float4(1.0); + _22.a = float4(2.0); + _22.b = float4(3.0); + out.m_22_b = _22.b; + out.m_22_c = _22.c; + out.m_22_d = _22.d; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc new file mode 100644 index 00000000000..18596302eb3 --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc @@ -0,0 +1,41 @@ +#include +#include + +using namespace metal; + +struct P +{ + float a; + float b; +}; + +struct C +{ + float a; + float b; +}; + +struct main0_out +{ + float c_a; + float c_b; + float4 gl_Position; +}; + +struct main0_patchOut +{ + float m_11_b; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup P _11; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + _11.a = 1.0; + patchOut.m_11_b = 2.0; + gl_out[gl_InvocationID].c_a = 3.0; + gl_out[gl_InvocationID].c_b = 4.0; + gl_out[gl_InvocationID].gl_Position = float4(1.0); +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc new file mode 100644 index 00000000000..f2f17bad37e --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc @@ -0,0 +1,44 @@ +#include +#include + +using namespace metal; + +struct P +{ + float a; + float b; +}; + +struct C +{ + float a; + float b; +}; + +struct main0_out +{ + float c_a; + float c_b; + float4 gl_Position; +}; + +struct main0_patchOut +{ + float m_11_b; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + threadgroup P spvStorage_11[8]; + threadgroup P (&_11) = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + _11.a = 1.0; + patchOut.m_11_b = 2.0; + gl_out[gl_InvocationID].c_a = 3.0; + gl_out[gl_InvocationID].c_b = 4.0; + gl_out[gl_InvocationID].gl_Position = float4(1.0); +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.vert b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.vert new file mode 100644 index 00000000000..ad6079061ec --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.vert @@ -0,0 +1,34 @@ +#include +#include + +using namespace metal; + +struct V +{ + float4 a; + float4 b; + float4 c; + float4 d; +}; + +struct main0_out +{ + float4 m_22_b [[user(locn1)]]; + float4 m_22_c [[user(locn2)]]; + float4 m_22_d [[user(locn3)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + V _22 = {}; + out.gl_Position = float4(1.0); + _22.a = float4(2.0); + _22.b = float4(3.0); + out.m_22_b = _22.b; + out.m_22_c = _22.c; + out.m_22_d = _22.d; + return out; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert new file mode 100644 index 00000000000..2b535c312ef --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct V +{ + float4 a; + float4 b; + float4 c; + float4 d; +}; + +struct main0_out +{ + float4 m_22_a; + float4 m_22_c; + float4 m_22_d; + float4 gl_Position; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + V _22 = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.gl_Position = float4(1.0); + _22.a = float4(2.0); + _22.b = float4(3.0); + out.m_22_a = _22.a; + out.m_22_c = _22.c; + out.m_22_d = _22.d; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc new file mode 100644 index 00000000000..7db78a9ba5a --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc @@ -0,0 +1,41 @@ +#include +#include + +using namespace metal; + +struct P +{ + float a; + float b; +}; + +struct C +{ + float a; + float b; +}; + +struct main0_out +{ + float c_b; + float4 gl_Position; +}; + +struct main0_patchOut +{ + float m_11_a; + float m_11_b; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup C c[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + patchOut.m_11_a = 1.0; + patchOut.m_11_b = 2.0; + c[gl_InvocationID].a = 3.0; + gl_out[gl_InvocationID].c_b = 4.0; + gl_out[gl_InvocationID].gl_Position = float4(1.0); +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc new file mode 100644 index 00000000000..ad793918e2f --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc @@ -0,0 +1,44 @@ +#include +#include + +using namespace metal; + +struct P +{ + float a; + float b; +}; + +struct C +{ + float a; + float b; +}; + +struct main0_out +{ + float c_b; + float4 gl_Position; +}; + +struct main0_patchOut +{ + float m_11_a; + float m_11_b; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup C spvStoragec[8][4]; + threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + patchOut.m_11_a = 1.0; + patchOut.m_11_b = 2.0; + c[gl_InvocationID].a = 3.0; + gl_out[gl_InvocationID].c_b = 4.0; + gl_out[gl_InvocationID].gl_Position = float4(1.0); +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.vert b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.vert new file mode 100644 index 00000000000..3b830290f7c --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.vert @@ -0,0 +1,34 @@ +#include +#include + +using namespace metal; + +struct V +{ + float4 a; + float4 b; + float4 c; + float4 d; +}; + +struct main0_out +{ + float4 m_22_a [[user(locn0)]]; + float4 m_22_c [[user(locn2)]]; + float4 m_22_d [[user(locn3)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + V _22 = {}; + out.gl_Position = float4(1.0); + _22.a = float4(2.0); + _22.b = float4(3.0); + out.m_22_a = _22.a; + out.m_22_c = _22.c; + out.m_22_d = _22.d; + return out; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-clip-distance.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-clip-distance.vert new file mode 100644 index 00000000000..1f56f34a76d --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-clip-distance.vert @@ -0,0 +1,67 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 v0 [[user(locn0)]]; + float4 v1 [[user(locn1)]]; + float4 gl_Position [[position]]; + float gl_PointSize [[point_size]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + spvUnsafeArray gl_ClipDistance = {}; + out.v0 = float4(1.0); + out.v1 = float4(2.0); + out.gl_Position = float4(3.0); + out.gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; + return out; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert new file mode 100644 index 00000000000..1c0aab5037b --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert @@ -0,0 +1,68 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 v1; + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + float4 v0 = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + v0 = float4(1.0); + out.v1 = float4(2.0); + out.gl_Position = float4(3.0); + out.gl_PointSize = 4.0; + out.gl_ClipDistance[0] = 1.0; + out.gl_ClipDistance[1] = 0.5; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc new file mode 100644 index 00000000000..7c8e3878248 --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc @@ -0,0 +1,34 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + float4 v1; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup float4 v0[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + v0[gl_InvocationID] = float4(1.0); + v0[gl_InvocationID].x = 2.0; + if (gl_InvocationID == 0) + { + patchOut.v1 = float4(2.0); + ((device float*)&patchOut.v1)[3u] = 4.0; + } + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; + gl_out[gl_InvocationID].gl_Position.z = 5.0; + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc new file mode 100644 index 00000000000..7a5e183a26d --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc @@ -0,0 +1,81 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup float4 spvStoragev0[8][4]; + threadgroup float4 (&v0)[4] = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + v0[gl_InvocationID] = float4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + patchOut.v1[0] = float4(2.0); + ((device float*)&patchOut.v1[0])[0u] = 3.0; + patchOut.v1[1] = float4(2.0); + ((device float*)&patchOut.v1[1])[0u] = 5.0; + } + patchOut.v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.tesc new file mode 100644 index 00000000000..ef3ff9c2210 --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.tesc @@ -0,0 +1,78 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup float4 v0[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + v0[gl_InvocationID] = float4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + patchOut.v1[0] = float4(2.0); + ((device float*)&patchOut.v1[0])[0u] = 3.0; + patchOut.v1[1] = float4(2.0); + ((device float*)&patchOut.v1[1])[0u] = 5.0; + } + patchOut.v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.vert new file mode 100644 index 00000000000..88c6bb6facc --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.vert @@ -0,0 +1,30 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v1 [[user(locn1)]]; + float4 gl_Position [[position]]; + float gl_PointSize [[point_size]]; + float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + float4 v0 = {}; + v0 = float4(1.0); + out.v1 = float4(2.0); + out.gl_Position = float4(3.0); + out.gl_PointSize = 4.0; + out.gl_ClipDistance[0] = 1.0; + out.gl_ClipDistance[1] = 0.5; + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; + return out; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert new file mode 100644 index 00000000000..d558b7aed7c --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert @@ -0,0 +1,68 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 v0; + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + float4 v1 = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.v0 = float4(1.0); + v1 = float4(2.0); + out.gl_Position = float4(3.0); + out.gl_PointSize = 4.0; + out.gl_ClipDistance[0] = 1.0; + out.gl_ClipDistance[1] = 0.5; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc new file mode 100644 index 00000000000..bb87ced750d --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc @@ -0,0 +1,33 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v0; + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ +}; +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup float4 v1; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.x = 2.0; + if (gl_InvocationID == 0) + { + v1 = float4(2.0); + ((threadgroup float*)&v1)[3u] = 4.0; + } + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; + gl_out[gl_InvocationID].gl_Position.z = 5.0; + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc new file mode 100644 index 00000000000..28ec0be0c65 --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc @@ -0,0 +1,40 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v0; + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + float4 v3; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + threadgroup float4 spvStoragev1[8][2]; + threadgroup float4 (&v1)[2] = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = float4(2.0); + ((threadgroup float*)&v1[0])[0u] = 3.0; + v1[1] = float4(2.0); + ((threadgroup float*)&v1[1])[0u] = 5.0; + } + patchOut.v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.tesc new file mode 100644 index 00000000000..1673d523298 --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.tesc @@ -0,0 +1,37 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v0; + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + float4 v3; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup float4 v1[2]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = float4(2.0); + ((threadgroup float*)&v1[0])[0u] = 3.0; + v1[1] = float4(2.0); + ((threadgroup float*)&v1[1])[0u] = 5.0; + } + patchOut.v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.vert new file mode 100644 index 00000000000..cc7d41794d2 --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.vert @@ -0,0 +1,30 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v0 [[user(locn0)]]; + float4 gl_Position [[position]]; + float gl_PointSize [[point_size]]; + float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + float4 v1 = {}; + out.v0 = float4(1.0); + v1 = float4(2.0); + out.gl_Position = float4(3.0); + out.gl_PointSize = 4.0; + out.gl_ClipDistance[0] = 1.0; + out.gl_ClipDistance[1] = 0.5; + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; + return out; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert new file mode 100644 index 00000000000..463ecc87b56 --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert @@ -0,0 +1,68 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 v0; + float4 v1; + float4 gl_Position; + spvUnsafeArray gl_ClipDistance; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + float gl_PointSize = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.v0 = float4(1.0); + out.v1 = float4(2.0); + out.gl_Position = float4(3.0); + gl_PointSize = 4.0; + out.gl_ClipDistance[0] = 1.0; + out.gl_ClipDistance[1] = 0.5; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc new file mode 100644 index 00000000000..694cdbb7ff4 --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc @@ -0,0 +1,89 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_out +{ + float4 v0; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup gl_PerVertex spvStoragegl_out_masked[8][4]; + threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + patchOut.v1[0] = float4(2.0); + ((device float*)&patchOut.v1[0])[0u] = 3.0; + patchOut.v1[1] = float4(2.0); + ((device float*)&patchOut.v1[1])[0u] = 5.0; + } + patchOut.v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out_masked[gl_InvocationID].gl_PointSize = 40.0; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.tesc new file mode 100644 index 00000000000..da976c9a8a2 --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.tesc @@ -0,0 +1,86 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_out +{ + float4 v0; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup gl_PerVertex gl_out_masked[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + patchOut.v1[0] = float4(2.0); + ((device float*)&patchOut.v1[0])[0u] = 3.0; + patchOut.v1[1] = float4(2.0); + ((device float*)&patchOut.v1[1])[0u] = 5.0; + } + patchOut.v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out_masked[gl_InvocationID].gl_PointSize = 40.0; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.vert new file mode 100644 index 00000000000..ffdfdaaff4b --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.vert @@ -0,0 +1,30 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v0 [[user(locn0)]]; + float4 v1 [[user(locn1)]]; + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + float gl_PointSize = {}; + out.v0 = float4(1.0); + out.v1 = float4(2.0); + out.gl_Position = float4(3.0); + gl_PointSize = 4.0; + out.gl_ClipDistance[0] = 1.0; + out.gl_ClipDistance[1] = 0.5; + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; + return out; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc new file mode 100644 index 00000000000..c55e3376828 --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc @@ -0,0 +1,89 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_out +{ + float4 v0; + float gl_PointSize; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup gl_PerVertex spvStoragegl_out_masked[8][4]; + threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + patchOut.v1[0] = float4(2.0); + ((device float*)&patchOut.v1[0])[0u] = 3.0; + patchOut.v1[1] = float4(2.0); + ((device float*)&patchOut.v1[1])[0u] = 5.0; + } + patchOut.v3 = float4(5.0); + gl_out_masked[gl_InvocationID].gl_Position = float4(10.0); + gl_out_masked[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-position.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-position.tesc new file mode 100644 index 00000000000..04584de49fa --- /dev/null +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-position.tesc @@ -0,0 +1,86 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_out +{ + float4 v0; + float gl_PointSize; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup gl_PerVertex gl_out_masked[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + patchOut.v1[0] = float4(2.0); + ((device float*)&patchOut.v1[0])[0u] = 3.0; + patchOut.v1[1] = float4(2.0); + ((device float*)&patchOut.v1[1])[0u] = 5.0; + } + patchOut.v3 = float4(5.0); + gl_out_masked[gl_InvocationID].gl_Position = float4(10.0); + gl_out_masked[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + diff --git a/reference/opt/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc b/reference/opt/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc new file mode 100644 index 00000000000..1618eaa627f --- /dev/null +++ b/reference/opt/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc @@ -0,0 +1,123 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct S +{ + int x; + float4 y; + spvUnsafeArray z; +}; + +struct TheBlock +{ + spvUnsafeArray blockFa; + spvUnsafeArray blockSa; + float blockF; +}; + +struct main0_patchOut +{ + float2 in_te_positionScale; + float2 in_te_positionOffset; + spvUnsafeArray tcBlock; +}; + +struct main0_in +{ + float3 in_tc_attr; + ushort2 m_196; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 5]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 5, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 5, spvIndirectParams[1] - 1); + int _163; + _163 = 0; + float _111; + for (float _170 = 1.2999999523162841796875; _163 < 2; _170 = _111, _163++) + { + float _169; + _169 = _170; + for (int _164 = 0; _164 < 3; ) + { + patchOut.tcBlock[_163].blockFa[_164] = _169; + _169 += 0.4000000059604644775390625; + _164++; + continue; + } + int _165; + float _168; + _168 = _169; + _165 = 0; + float _174; + for (; _165 < 2; _168 = _174, _165++) + { + patchOut.tcBlock[_163].blockSa[_165].x = int(_168); + patchOut.tcBlock[_163].blockSa[_165].y = float4(_168 + 0.4000000059604644775390625, _168 + 1.2000000476837158203125, _168 + 2.0, _168 + 2.80000019073486328125); + _174 = _168 + 0.800000011920928955078125; + for (int _171 = 0; _171 < 2; ) + { + patchOut.tcBlock[_163].blockSa[_165].z[_171] = _174; + _174 += 0.4000000059604644775390625; + _171++; + continue; + } + } + patchOut.tcBlock[_163].blockF = _168; + _111 = _168 + 0.4000000059604644775390625; + } + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(gl_in[0].in_tc_attr.x); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(gl_in[1].in_tc_attr.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(gl_in[2].in_tc_attr.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(gl_in[3].in_tc_attr.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(gl_in[4].in_tc_attr.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(gl_in[5].in_tc_attr.x); + patchOut.in_te_positionScale = float2(gl_in[6].in_tc_attr.x, gl_in[7].in_tc_attr.x); + patchOut.in_te_positionOffset = float2(gl_in[8].in_tc_attr.x, gl_in[9].in_tc_attr.x); +} + diff --git a/reference/opt/shaders-msl/tesc/basic.multi-patch.tesc b/reference/opt/shaders-msl/tesc/basic.multi-patch.tesc new file mode 100644 index 00000000000..fe268316737 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/basic.multi-patch.tesc @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct main0_patchOut +{ + float3 vFoo; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1]; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625); + patchOut.vFoo = float3(1.0); +} + diff --git a/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc b/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc new file mode 100644 index 00000000000..1bed1c90585 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc @@ -0,0 +1,128 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Meep +{ + float a; + float b; +}; + +struct Block +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; +}; + +struct Block_1 +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; +}; + +struct main0_out +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; + spvUnsafeArray B_a; + float B_b; + float2x2 B_m; + Meep B_meep; + spvUnsafeArray B_meeps; + float4 gl_Position; +}; + +struct main0_in +{ + spvUnsafeArray in_a; + float in_b; + float2x2 in_m; + Meep in_meep; + spvUnsafeArray in_meeps; + spvUnsafeArray in_B_a; + float in_B_b; + float2x2 in_B_m; + Meep in_B_meep; + spvUnsafeArray in_B_meeps; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].a[0] = gl_in[gl_InvocationID].in_a[0]; + gl_out[gl_InvocationID].a[1] = gl_in[gl_InvocationID].in_a[1]; + gl_out[gl_InvocationID].b = gl_in[gl_InvocationID].in_b; + gl_out[gl_InvocationID].m = gl_in[gl_InvocationID].in_m; + gl_out[gl_InvocationID].meep.a = gl_in[gl_InvocationID].in_meep.a; + gl_out[gl_InvocationID].meep.b = gl_in[gl_InvocationID].in_meep.b; + gl_out[gl_InvocationID].meeps[0].a = gl_in[gl_InvocationID].in_meeps[0].a; + gl_out[gl_InvocationID].meeps[0].b = gl_in[gl_InvocationID].in_meeps[0].b; + gl_out[gl_InvocationID].meeps[1].a = gl_in[gl_InvocationID].in_meeps[1].a; + gl_out[gl_InvocationID].meeps[1].b = gl_in[gl_InvocationID].in_meeps[1].b; + gl_out[gl_InvocationID].B_a[0] = gl_in[gl_InvocationID].in_B_a[0]; + gl_out[gl_InvocationID].B_a[1] = gl_in[gl_InvocationID].in_B_a[1]; + gl_out[gl_InvocationID].B_b = gl_in[gl_InvocationID].in_B_b; + gl_out[gl_InvocationID].B_m = gl_in[gl_InvocationID].in_B_m; + gl_out[gl_InvocationID].B_meep.a = gl_in[gl_InvocationID].in_B_meep.a; + gl_out[gl_InvocationID].B_meep.b = gl_in[gl_InvocationID].in_B_meep.b; + gl_out[gl_InvocationID].B_meeps[0].a = gl_in[gl_InvocationID].in_B_meeps[0].a; + gl_out[gl_InvocationID].B_meeps[0].b = gl_in[gl_InvocationID].in_B_meeps[0].b; + gl_out[gl_InvocationID].B_meeps[1].a = gl_in[gl_InvocationID].in_B_meeps[1].a; + gl_out[gl_InvocationID].B_meeps[1].b = gl_in[gl_InvocationID].in_B_meeps[1].b; +} + diff --git a/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.tesc b/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.tesc new file mode 100644 index 00000000000..e785fdfe22d --- /dev/null +++ b/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.tesc @@ -0,0 +1,132 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Meep +{ + float a; + float b; +}; + +struct Block +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; +}; + +struct Block_1 +{ + spvUnsafeArray a; + float b; + float2x2 m; +}; + +struct main0_out +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; + spvUnsafeArray B_a; + float B_b; + float2x2 B_m; + Meep B_meep; + spvUnsafeArray B_meeps; + float4 gl_Position; +}; + +struct main0_in +{ + float in_a_0 [[attribute(0)]]; + float in_a_1 [[attribute(1)]]; + float in_b [[attribute(2)]]; + float2 in_m_0 [[attribute(3)]]; + float2 in_m_1 [[attribute(4)]]; + float in_meep_a [[attribute(5)]]; + float in_meep_b [[attribute(6)]]; + float in_B_a_0 [[attribute(11)]]; + float in_B_a_1 [[attribute(12)]]; + float in_B_b [[attribute(13)]]; + float2 in_B_m_0 [[attribute(14)]]; + float2 in_B_m_1 [[attribute(15)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].a[0] = gl_in[gl_InvocationID].in_a_0; + gl_out[gl_InvocationID].a[1] = gl_in[gl_InvocationID].in_a_1; + gl_out[gl_InvocationID].b = gl_in[gl_InvocationID].in_b; + float2x2 _178 = float2x2(gl_in[gl_InvocationID].in_m_0, gl_in[gl_InvocationID].in_m_1); + gl_out[gl_InvocationID].m = _178; + gl_out[gl_InvocationID].meep.a = gl_in[gl_InvocationID].in_meep_a; + gl_out[gl_InvocationID].meep.b = gl_in[gl_InvocationID].in_meep_b; + gl_out[gl_InvocationID].meeps[0].a = 1.0; + gl_out[gl_InvocationID].meeps[0].b = 2.0; + gl_out[gl_InvocationID].meeps[1].a = 3.0; + gl_out[gl_InvocationID].meeps[1].b = 4.0; + gl_out[gl_InvocationID].B_a[0] = gl_in[gl_InvocationID].in_B_a_0; + gl_out[gl_InvocationID].B_a[1] = gl_in[gl_InvocationID].in_B_a_1; + gl_out[gl_InvocationID].B_b = gl_in[gl_InvocationID].in_B_b; + float2x2 _216 = float2x2(gl_in[gl_InvocationID].in_B_m_0, gl_in[gl_InvocationID].in_B_m_1); + gl_out[gl_InvocationID].B_m = _216; + gl_out[gl_InvocationID].B_meep.a = 10.0; + gl_out[gl_InvocationID].B_meep.b = 20.0; + gl_out[gl_InvocationID].B_meeps[0].a = 5.0; + gl_out[gl_InvocationID].B_meeps[0].b = 6.0; + gl_out[gl_InvocationID].B_meeps[1].a = 7.0; + gl_out[gl_InvocationID].B_meeps[1].b = 8.0; +} + diff --git a/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc b/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc new file mode 100644 index 00000000000..c3f7081552c --- /dev/null +++ b/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc @@ -0,0 +1,107 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Meep +{ + float a; + float b; +}; + +struct Block +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; +}; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; + spvUnsafeArray B_a; + float B_b; + float2x2 B_m; + Meep B_meep; + spvUnsafeArray B_meeps; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + patchOut.a[0] = 1.0; + patchOut.a[1] = 2.0; + patchOut.b = 3.0; + patchOut.m = float2x2(float2(2.0, 0.0), float2(0.0, 2.0)); + patchOut.meep.a = 4.0; + patchOut.meep.b = 5.0; + patchOut.meeps[0].a = 6.0; + patchOut.meeps[0].b = 7.0; + patchOut.meeps[1].a = 8.0; + patchOut.meeps[1].b = 9.0; + patchOut.B_a[0] = 1.0; + patchOut.B_a[1] = 2.0; + patchOut.B_b = 3.0; + patchOut.B_m = float2x2(float2(4.0, 0.0), float2(0.0, 4.0)); + patchOut.B_meep.a = 4.0; + patchOut.B_meep.b = 5.0; + patchOut.B_meeps[0].a = 6.0; + patchOut.B_meeps[0].b = 7.0; + patchOut.B_meeps[1].a = 8.0; + patchOut.B_meeps[1].b = 9.0; +} + diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc new file mode 100644 index 00000000000..5ea01cad9fa --- /dev/null +++ b/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4x4 vOutputs; +}; + +struct main0_in +{ + float4x4 vInputs; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].vOutputs = gl_in[gl_InvocationID].vInputs; +} + diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.tesc new file mode 100644 index 00000000000..1ed36213b62 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.tesc @@ -0,0 +1,30 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4x4 vOutputs; +}; + +struct main0_in +{ + float4 vInputs_0 [[attribute(0)]]; + float4 vInputs_1 [[attribute(1)]]; + float4 vInputs_2 [[attribute(2)]]; + float4 vInputs_3 [[attribute(3)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + float4x4 _28 = float4x4(gl_in[gl_InvocationID].vInputs_0, gl_in[gl_InvocationID].vInputs_1, gl_in[gl_InvocationID].vInputs_2, gl_in[gl_InvocationID].vInputs_3); + gl_out[gl_InvocationID].vOutputs = _28; +} + diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc new file mode 100644 index 00000000000..de6ba178075 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc @@ -0,0 +1,73 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct VertexData +{ + float4x4 a; + spvUnsafeArray b; + float4 c; +}; + +struct main0_out +{ + float4 vOutputs; +}; + +struct main0_in +{ + VertexData vInputs; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + int _27 = gl_InvocationID ^ 1; + gl_out[gl_InvocationID].vOutputs = ((gl_in[gl_InvocationID].vInputs.a[1] + gl_in[gl_InvocationID].vInputs.b[1]) + gl_in[gl_InvocationID].vInputs.c) + gl_in[_27].vInputs.c; +} + diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.tesc new file mode 100644 index 00000000000..9eaaa2e6d50 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.tesc @@ -0,0 +1,81 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct VertexData +{ + float4x4 a; + spvUnsafeArray b; + float4 c; +}; + +struct main0_out +{ + float4 vOutputs; +}; + +struct main0_in +{ + float4 vInputs_a_0 [[attribute(0)]]; + float4 vInputs_a_1 [[attribute(1)]]; + float4 vInputs_a_2 [[attribute(2)]]; + float4 vInputs_a_3 [[attribute(3)]]; + float4 vInputs_b_0 [[attribute(4)]]; + float4 vInputs_b_1 [[attribute(5)]]; + float4 vInputs_c [[attribute(6)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + int _27 = gl_InvocationID ^ 1; + gl_out[gl_InvocationID].vOutputs = ((gl_in[gl_InvocationID].vInputs_a_1 + gl_in[gl_InvocationID].vInputs_b_1) + gl_in[gl_InvocationID].vInputs_c) + gl_in[_27].vInputs_c; +} + diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array.multi-patch.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array.multi-patch.tesc new file mode 100644 index 00000000000..ddf142ccba7 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/load-control-point-array.multi-patch.tesc @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 vOutputs; +}; + +struct main0_in +{ + float4 vInputs; + ushort2 m_44; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].vOutputs = gl_in[gl_InvocationID].vInputs; +} + diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array.tesc new file mode 100644 index 00000000000..6f0da387e29 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/load-control-point-array.tesc @@ -0,0 +1,26 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 vOutputs; +}; + +struct main0_in +{ + float4 vInputs [[attribute(0)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + gl_out[gl_InvocationID].vOutputs = gl_in[gl_InvocationID].vInputs; +} + diff --git a/reference/opt/shaders-msl/tesc/matrix-output.multi-patch.tesc b/reference/opt/shaders-msl/tesc/matrix-output.multi-patch.tesc new file mode 100644 index 00000000000..28fff015558 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/matrix-output.multi-patch.tesc @@ -0,0 +1,41 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float in_te_attr; + float4x3 in_te_data0; + float4x3 in_te_data1; +}; + +struct main0_in +{ + float3 in_tc_attr; + ushort2 m_104; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 3]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 3; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1); + float _15 = float(gl_InvocationID); + float3 _18 = float3(_15, 0.0, 0.0); + float3 _19 = float3(0.0, _15, 0.0); + float3 _20 = float3(0.0, 0.0, _15); + gl_out[gl_InvocationID].in_te_data0 = float4x3(_18, _19, _20, float3(0.0)); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + int _42 = (gl_InvocationID + 1) % 3; + gl_out[gl_InvocationID].in_te_data1 = float4x3(_18 + gl_out[_42].in_te_data0[0], _19 + gl_out[_42].in_te_data0[1], _20 + gl_out[_42].in_te_data0[2], gl_out[_42].in_te_data0[3]); + gl_out[gl_InvocationID].in_te_attr = gl_in[gl_InvocationID].in_tc_attr.x; + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(1.0); +} + diff --git a/reference/opt/shaders-msl/tesc/reload-tess-level.multi-patch.tesc b/reference/opt/shaders-msl/tesc/reload-tess-level.multi-patch.tesc new file mode 100644 index 00000000000..ae33de517a3 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/reload-tess-level.multi-patch.tesc @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_in +{ + uint3 m_82; + ushort2 m_86; + float4 gl_Position; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + if (gl_InvocationID == 0) + { + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5)); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5)); + } + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; +} + diff --git a/reference/opt/shaders-msl/tesc/reload-tess-level.tesc b/reference/opt/shaders-msl/tesc/reload-tess-level.tesc new file mode 100644 index 00000000000..eafc50607d7 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/reload-tess-level.tesc @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_in +{ + float4 gl_Position [[attribute(0)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + if (gl_InvocationID == 0) + { + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5)); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5)); + } + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; +} + diff --git a/reference/opt/shaders-msl/tesc/struct-output.multi-patch.tesc b/reference/opt/shaders-msl/tesc/struct-output.multi-patch.tesc new file mode 100644 index 00000000000..6c526546156 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/struct-output.multi-patch.tesc @@ -0,0 +1,48 @@ +#include +#include + +using namespace metal; + +struct te_data +{ + float a; + float b; + uint c; +}; + +struct main0_out +{ + float in_te_attr; + te_data in_te_data0; + te_data in_te_data1; +}; + +struct main0_in +{ + float3 in_tc_attr; + ushort2 m_119; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 3]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 3; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1); + float _15 = float(gl_InvocationID); + int _18 = gl_InvocationID + 1; + float _19 = float(_18); + uint _21 = uint(gl_InvocationID); + gl_out[gl_InvocationID].in_te_data0 = te_data{ _15, _19, _21 }; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + int _38 = _18 % 3; + gl_out[gl_InvocationID].in_te_data1 = te_data{ _15 + gl_out[_38].in_te_data0.a, _19 + gl_out[_38].in_te_data0.b, _21 + gl_out[_38].in_te_data0.c }; + gl_out[gl_InvocationID].in_te_attr = gl_in[gl_InvocationID].in_tc_attr.x; + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(1.0); +} + diff --git a/reference/opt/shaders-msl/tesc/water_tess.multi-patch.tesc b/reference/opt/shaders-msl/tesc/water_tess.multi-patch.tesc new file mode 100644 index 00000000000..356a963d689 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/water_tess.multi-patch.tesc @@ -0,0 +1,91 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + float4 uScale; + float3 uCamPos; + float2 uPatchSize; + float2 uMaxTessLevel; + float uDistanceMod; + float4 uFrustum[6]; +}; + +struct main0_patchOut +{ + float2 vOutPatchPosBase; + float4 vPatchLods; +}; + +struct main0_in +{ + float3 vPatchPosBase; + ushort2 m_996; +}; + +kernel void main0(constant UBO& _41 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1); + float2 _431 = (gl_in[0].vPatchPosBase.xy - float2(10.0)) * _41.uScale.xy; + float2 _441 = ((gl_in[0].vPatchPosBase.xy + _41.uPatchSize) + float2(10.0)) * _41.uScale.xy; + float3 _446 = float3(_431.x, -10.0, _431.y); + float3 _451 = float3(_441.x, 10.0, _441.y); + float4 _467 = float4((_446 + _451) * 0.5, 1.0); + float3 _514 = float3(length(_451 - _446) * (-0.5)); + bool _516 = any(float3(dot(_41.uFrustum[0], _467), dot(_41.uFrustum[1], _467), dot(_41.uFrustum[2], _467)) <= _514); + bool _526; + if (!_516) + { + _526 = any(float3(dot(_41.uFrustum[3], _467), dot(_41.uFrustum[4], _467), dot(_41.uFrustum[5], _467)) <= _514); + } + else + { + _526 = _516; + } + if (!(!_526)) + { + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(-1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(-1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(-1.0); + } + else + { + patchOut.vOutPatchPosBase = gl_in[0].vPatchPosBase.xy; + float2 _681 = fma(float2(-0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float2 _710 = fma(float2(0.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float _729 = fast::clamp(log2((length(_41.uCamPos - float3(_710.x, 0.0, _710.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _739 = fma(float2(1.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float2 _768 = fma(float2(-0.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float _787 = fast::clamp(log2((length(_41.uCamPos - float3(_768.x, 0.0, _768.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _797 = fma(float2(0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float _816 = fast::clamp(log2((length(_41.uCamPos - float3(_797.x, 0.0, _797.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _826 = fma(float2(1.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float _845 = fast::clamp(log2((length(_41.uCamPos - float3(_826.x, 0.0, _826.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _855 = fma(float2(-0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float2 _884 = fma(float2(0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float _903 = fast::clamp(log2((length(_41.uCamPos - float3(_884.x, 0.0, _884.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _913 = fma(float2(1.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float _614 = dot(float4(_787, _816, fast::clamp(log2((length(_41.uCamPos - float3(_855.x, 0.0, _855.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _903), float4(0.25)); + float _620 = dot(float4(fast::clamp(log2((length(_41.uCamPos - float3(_681.x, 0.0, _681.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _729, _787, _816), float4(0.25)); + float _626 = dot(float4(_729, fast::clamp(log2((length(_41.uCamPos - float3(_739.x, 0.0, _739.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _816, _845), float4(0.25)); + float _632 = dot(float4(_816, _845, _903, fast::clamp(log2((length(_41.uCamPos - float3(_913.x, 0.0, _913.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), float4(0.25)); + float4 _633 = float4(_614, _620, _626, _632); + patchOut.vPatchLods = _633; + float4 _940 = exp2(-fast::min(_633, _633.yzwx)) * _41.uMaxTessLevel.y; + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_940.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_940.y); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_940.z); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(_940.w); + float _948 = _41.uMaxTessLevel.y * exp2(-fast::min(fast::min(fast::min(_614, _620), fast::min(_626, _632)), _816)); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(_948); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(_948); + } +} + diff --git a/reference/opt/shaders-msl/tesc/water_tess.tesc b/reference/opt/shaders-msl/tesc/water_tess.tesc index 9a75f05b43f..d9a6697cd95 100644 --- a/reference/opt/shaders-msl/tesc/water_tess.tesc +++ b/reference/opt/shaders-msl/tesc/water_tess.tesc @@ -32,23 +32,23 @@ kernel void main0(main0_in in [[stage_in]], constant UBO& _41 [[buffer(0)]], uin threadgroup_barrier(mem_flags::mem_threadgroup); if (gl_InvocationID >= 1) return; - float2 _430 = (gl_in[0].vPatchPosBase - float2(10.0)) * _41.uScale.xy; - float2 _440 = ((gl_in[0].vPatchPosBase + _41.uPatchSize) + float2(10.0)) * _41.uScale.xy; - float3 _445 = float3(_430.x, -10.0, _430.y); - float3 _450 = float3(_440.x, 10.0, _440.y); - float4 _466 = float4((_445 + _450) * 0.5, 1.0); - float3 _513 = float3(length(_450 - _445) * (-0.5)); - bool _515 = any(float3(dot(_41.uFrustum[0], _466), dot(_41.uFrustum[1], _466), dot(_41.uFrustum[2], _466)) <= _513); - bool _525; - if (!_515) + float2 _431 = (gl_in[0].vPatchPosBase - float2(10.0)) * _41.uScale.xy; + float2 _441 = ((gl_in[0].vPatchPosBase + _41.uPatchSize) + float2(10.0)) * _41.uScale.xy; + float3 _446 = float3(_431.x, -10.0, _431.y); + float3 _451 = float3(_441.x, 10.0, _441.y); + float4 _467 = float4((_446 + _451) * 0.5, 1.0); + float3 _514 = float3(length(_451 - _446) * (-0.5)); + bool _516 = any(float3(dot(_41.uFrustum[0], _467), dot(_41.uFrustum[1], _467), dot(_41.uFrustum[2], _467)) <= _514); + bool _526; + if (!_516) { - _525 = any(float3(dot(_41.uFrustum[3], _466), dot(_41.uFrustum[4], _466), dot(_41.uFrustum[5], _466)) <= _513); + _526 = any(float3(dot(_41.uFrustum[3], _467), dot(_41.uFrustum[4], _467), dot(_41.uFrustum[5], _467)) <= _514); } else { - _525 = _515; + _526 = _516; } - if (!(!_525)) + if (!(!_526)) { spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(-1.0); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(-1.0); @@ -60,34 +60,34 @@ kernel void main0(main0_in in [[stage_in]], constant UBO& _41 [[buffer(0)]], uin else { patchOut.vOutPatchPosBase = gl_in[0].vPatchPosBase; - float2 _678 = (gl_in[0].vPatchPosBase + (float2(-0.5) * _41.uPatchSize)) * _41.uScale.xy; - float2 _706 = (gl_in[0].vPatchPosBase + (float2(0.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; - float _725 = fast::clamp(log2((length(_41.uCamPos - float3(_706.x, 0.0, _706.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _734 = (gl_in[0].vPatchPosBase + (float2(1.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; - float2 _762 = (gl_in[0].vPatchPosBase + (float2(-0.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; - float _781 = fast::clamp(log2((length(_41.uCamPos - float3(_762.x, 0.0, _762.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _790 = (gl_in[0].vPatchPosBase + (float2(0.5) * _41.uPatchSize)) * _41.uScale.xy; - float _809 = fast::clamp(log2((length(_41.uCamPos - float3(_790.x, 0.0, _790.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _818 = (gl_in[0].vPatchPosBase + (float2(1.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; - float _837 = fast::clamp(log2((length(_41.uCamPos - float3(_818.x, 0.0, _818.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _846 = (gl_in[0].vPatchPosBase + (float2(-0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; - float2 _874 = (gl_in[0].vPatchPosBase + (float2(0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; - float _893 = fast::clamp(log2((length(_41.uCamPos - float3(_874.x, 0.0, _874.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _902 = (gl_in[0].vPatchPosBase + (float2(1.5) * _41.uPatchSize)) * _41.uScale.xy; - float _612 = dot(float4(_781, _809, fast::clamp(log2((length(_41.uCamPos - float3(_846.x, 0.0, _846.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _893), float4(0.25)); - float _618 = dot(float4(fast::clamp(log2((length(_41.uCamPos - float3(_678.x, 0.0, _678.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _725, _781, _809), float4(0.25)); - float _624 = dot(float4(_725, fast::clamp(log2((length(_41.uCamPos - float3(_734.x, 0.0, _734.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _809, _837), float4(0.25)); - float _630 = dot(float4(_809, _837, _893, fast::clamp(log2((length(_41.uCamPos - float3(_902.x, 0.0, _902.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), float4(0.25)); - float4 _631 = float4(_612, _618, _624, _630); - patchOut.vPatchLods = _631; - float4 _928 = exp2(-fast::min(_631, _631.yzwx)) * _41.uMaxTessLevel.y; - spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_928.x); - spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_928.y); - spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_928.z); - spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(_928.w); - float _935 = _41.uMaxTessLevel.y * exp2(-fast::min(fast::min(fast::min(_612, _618), fast::min(_624, _630)), _809)); - spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(_935); - spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(_935); + float2 _681 = fma(float2(-0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float2 _710 = fma(float2(0.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float _729 = fast::clamp(log2((length(_41.uCamPos - float3(_710.x, 0.0, _710.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _739 = fma(float2(1.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float2 _768 = fma(float2(-0.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float _787 = fast::clamp(log2((length(_41.uCamPos - float3(_768.x, 0.0, _768.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _797 = fma(float2(0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float _816 = fast::clamp(log2((length(_41.uCamPos - float3(_797.x, 0.0, _797.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _826 = fma(float2(1.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float _845 = fast::clamp(log2((length(_41.uCamPos - float3(_826.x, 0.0, _826.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _855 = fma(float2(-0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float2 _884 = fma(float2(0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float _903 = fast::clamp(log2((length(_41.uCamPos - float3(_884.x, 0.0, _884.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _913 = fma(float2(1.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float _614 = dot(float4(_787, _816, fast::clamp(log2((length(_41.uCamPos - float3(_855.x, 0.0, _855.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _903), float4(0.25)); + float _620 = dot(float4(fast::clamp(log2((length(_41.uCamPos - float3(_681.x, 0.0, _681.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _729, _787, _816), float4(0.25)); + float _626 = dot(float4(_729, fast::clamp(log2((length(_41.uCamPos - float3(_739.x, 0.0, _739.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _816, _845), float4(0.25)); + float _632 = dot(float4(_816, _845, _903, fast::clamp(log2((length(_41.uCamPos - float3(_913.x, 0.0, _913.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), float4(0.25)); + float4 _633 = float4(_614, _620, _626, _632); + patchOut.vPatchLods = _633; + float4 _940 = exp2(-fast::min(_633, _633.yzwx)) * _41.uMaxTessLevel.y; + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_940.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_940.y); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_940.z); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(_940.w); + float _948 = _41.uMaxTessLevel.y * exp2(-fast::min(fast::min(fast::min(_614, _620), fast::min(_626, _632)), _816)); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(_948); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(_948); } } diff --git a/reference/opt/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese b/reference/opt/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese new file mode 100644 index 00000000000..e1f1f3c9df9 --- /dev/null +++ b/reference/opt/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese @@ -0,0 +1,72 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _35 +{ + float dummy; + float4 variableInStruct; +}; + +struct main0_out +{ + float outResult [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + spvUnsafeArray<_35, 3> testStructArray; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(float3 gl_TessCoord [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_in* spvIn [[buffer(22)]]) +{ + main0_out out = {}; + const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0]; + out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0); + out.outResult = ((float(abs(gl_in[0].testStructArray[2].variableInStruct.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(gl_in[0].testStructArray[2].variableInStruct.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].testStructArray[2].variableInStruct.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].testStructArray[2].variableInStruct.w - 7.0) < 0.001000000047497451305389404296875); + return out; +} + diff --git a/reference/opt/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese b/reference/opt/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese new file mode 100644 index 00000000000..d81d44b0700 --- /dev/null +++ b/reference/opt/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct t35 +{ + float2 m0; + float4 m1; +}; + +struct t36 +{ + float2 m0; + t35 m1; +}; + +struct main0_out +{ + float v80 [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float2 v40_m0; + t35 v40_m1; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(float3 gl_TessCoord [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_in* spvIn [[buffer(22)]]) +{ + main0_out out = {}; + const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0]; + out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0); + out.v80 = ((float(abs(gl_in[0].v40_m1.m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(gl_in[0].v40_m1.m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].v40_m1.m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].v40_m1.m1.w - 7.0) < 0.001000000047497451305389404296875); + return out; +} + diff --git a/reference/opt/shaders-msl/tese/in-block-with-nested-struct.tese b/reference/opt/shaders-msl/tese/in-block-with-nested-struct.tese new file mode 100644 index 00000000000..86553955813 --- /dev/null +++ b/reference/opt/shaders-msl/tese/in-block-with-nested-struct.tese @@ -0,0 +1,43 @@ +#include +#include + +using namespace metal; + +struct t35 +{ + float2 m0; + float4 m1; +}; + +struct t36 +{ + float2 m0; + t35 m1; +}; + +struct main0_out +{ + float v80 [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float2 v40_m0 [[attribute(0)]]; + float2 v40_m1_m0 [[attribute(1)]]; + float4 v40_m1_m1 [[attribute(2)]]; +}; + +struct main0_patchIn +{ + patch_control_point gl_in; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float3 gl_TessCoord [[position_in_patch]]) +{ + main0_out out = {}; + out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0); + out.v80 = ((float(abs(patchIn.gl_in[0].v40_m1_m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(patchIn.gl_in[0].v40_m1_m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(patchIn.gl_in[0].v40_m1_m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(patchIn.gl_in[0].v40_m1_m1.w - 7.0) < 0.001000000047497451305389404296875); + return out; +} + diff --git a/reference/opt/shaders-msl/tese/input-array.tese b/reference/opt/shaders-msl/tese/input-array.tese index 97a83b4eedf..8f1002128d0 100644 --- a/reference/opt/shaders-msl/tese/input-array.tese +++ b/reference/opt/shaders-msl/tese/input-array.tese @@ -19,9 +19,10 @@ struct main0_patchIn patch_control_point gl_in; }; -[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]]) +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]]) { main0_out out = {}; + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); out.gl_Position = (patchIn.gl_in[0].Floats * gl_TessCoord.x) + (patchIn.gl_in[1].Floats2 * gl_TessCoord.y); return out; } diff --git a/reference/opt/shaders-msl/tese/input-types.raw-tess-in.tese b/reference/opt/shaders-msl/tese/input-types.raw-tess-in.tese new file mode 100644 index 00000000000..e918f5b6530 --- /dev/null +++ b/reference/opt/shaders-msl/tese/input-types.raw-tess-in.tese @@ -0,0 +1,70 @@ +#include +#include + +using namespace metal; + +struct Block +{ + float4 a; + float4 b; +}; + +struct PatchBlock +{ + float4 a; + float4 b; +}; + +struct Foo +{ + float4 a; + float4 b; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vColor; + float4 blocks_a; + float4 blocks_b; + Foo vFoos; +}; + +struct main0_patchIn +{ + float4 vColors; + float4 patch_block_a; + float4 patch_block_b; + Foo vFoo; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device main0_patchIn* spvPatchIn [[buffer(20)]], const device main0_in* spvIn [[buffer(22)]]) +{ + main0_out out = {}; + PatchBlock patch_block = {}; + const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0]; + const device main0_patchIn& patchIn = spvPatchIn[gl_PrimitiveID]; + patch_block.a = patchIn.patch_block_a; + patch_block.b = patchIn.patch_block_b; + out.gl_Position = gl_in[0].blocks_a; + out.gl_Position += gl_in[0].blocks_b; + out.gl_Position += gl_in[1].blocks_a; + out.gl_Position += gl_in[1].blocks_b; + out.gl_Position += patch_block.a; + out.gl_Position += patch_block.b; + out.gl_Position += gl_in[0].vColor; + out.gl_Position += gl_in[1].vColor; + out.gl_Position += patchIn.vColors; + out.gl_Position += patchIn.vFoo.a; + out.gl_Position += patchIn.vFoo.b; + out.gl_Position += gl_in[0].vFoos.a; + out.gl_Position += gl_in[0].vFoos.b; + out.gl_Position += gl_in[1].vFoos.a; + out.gl_Position += gl_in[1].vFoos.b; + return out; +} + diff --git a/reference/opt/shaders-msl/tese/input-types.tese b/reference/opt/shaders-msl/tese/input-types.tese index 2a936fce3aa..25b25ff94e1 100644 --- a/reference/opt/shaders-msl/tese/input-types.tese +++ b/reference/opt/shaders-msl/tese/input-types.tese @@ -29,19 +29,19 @@ struct main0_out struct main0_in { float4 vColor [[attribute(0)]]; - float4 Block_a [[attribute(4)]]; - float4 Block_b [[attribute(5)]]; - float4 Foo_a [[attribute(14)]]; - float4 Foo_b [[attribute(15)]]; + float4 blocks_a [[attribute(4)]]; + float4 blocks_b [[attribute(5)]]; + float4 vFoos_a [[attribute(14)]]; + float4 vFoos_b [[attribute(15)]]; }; struct main0_patchIn { float4 vColors [[attribute(1)]]; - float4 PatchBlock_a [[attribute(6)]]; - float4 PatchBlock_b [[attribute(7)]]; - float4 Foo_a [[attribute(8)]]; - float4 Foo_b [[attribute(9)]]; + float4 patch_block_a [[attribute(6)]]; + float4 patch_block_b [[attribute(7)]]; + float4 vFoo_a [[attribute(8)]]; + float4 vFoo_b [[attribute(9)]]; patch_control_point gl_in; }; @@ -50,14 +50,14 @@ struct main0_patchIn main0_out out = {}; PatchBlock patch_block = {}; Foo vFoo = {}; - patch_block.a = patchIn.PatchBlock_a; - patch_block.b = patchIn.PatchBlock_b; - vFoo.a = patchIn.Foo_a; - vFoo.b = patchIn.Foo_b; - out.gl_Position = patchIn.gl_in[0].Block_a; - out.gl_Position += patchIn.gl_in[0].Block_b; - out.gl_Position += patchIn.gl_in[1].Block_a; - out.gl_Position += patchIn.gl_in[1].Block_b; + patch_block.a = patchIn.patch_block_a; + patch_block.b = patchIn.patch_block_b; + vFoo.a = patchIn.vFoo_a; + vFoo.b = patchIn.vFoo_b; + out.gl_Position = patchIn.gl_in[0].blocks_a; + out.gl_Position += patchIn.gl_in[0].blocks_b; + out.gl_Position += patchIn.gl_in[1].blocks_a; + out.gl_Position += patchIn.gl_in[1].blocks_b; out.gl_Position += patch_block.a; out.gl_Position += patch_block.b; out.gl_Position += patchIn.gl_in[0].vColor; @@ -65,16 +65,12 @@ struct main0_patchIn out.gl_Position += patchIn.vColors; out.gl_Position += vFoo.a; out.gl_Position += vFoo.b; - Foo vFoos_202; - vFoos_202.a = patchIn.gl_in[0].Foo_a; - vFoos_202.b = patchIn.gl_in[0].Foo_b; - out.gl_Position += vFoos_202.a; - out.gl_Position += vFoos_202.b; - Foo vFoos_216; - vFoos_216.a = patchIn.gl_in[1].Foo_a; - vFoos_216.b = patchIn.gl_in[1].Foo_b; - out.gl_Position += vFoos_216.a; - out.gl_Position += vFoos_216.b; + Foo _204 = Foo{ patchIn.gl_in[0].vFoos_a, patchIn.gl_in[0].vFoos_b }; + out.gl_Position += _204.a; + out.gl_Position += _204.b; + Foo _218 = Foo{ patchIn.gl_in[1].vFoos_a, patchIn.gl_in[1].vFoos_b }; + out.gl_Position += _218.a; + out.gl_Position += _218.b; return out; } diff --git a/reference/opt/shaders-msl/tese/load-control-point-array-of-matrix.tese b/reference/opt/shaders-msl/tese/load-control-point-array-of-matrix.tese new file mode 100644 index 00000000000..e4bbeb7ede9 --- /dev/null +++ b/reference/opt/shaders-msl/tese/load-control-point-array-of-matrix.tese @@ -0,0 +1,85 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vInputs_0 [[attribute(0)]]; + float4 vInputs_1 [[attribute(1)]]; + float4 vInputs_2 [[attribute(2)]]; + float4 vInputs_3 [[attribute(3)]]; +}; + +struct main0_patchIn +{ + float4 vBoo_0 [[attribute(4)]]; + float4 vBoo_1 [[attribute(5)]]; + float4 vBoo_2 [[attribute(6)]]; + float4 vBoo_3 [[attribute(7)]]; + int vIndex [[attribute(8)]]; + patch_control_point gl_in; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray vBoo = {}; + vBoo[0] = patchIn.vBoo_0; + vBoo[1] = patchIn.vBoo_1; + vBoo[2] = patchIn.vBoo_2; + vBoo[3] = patchIn.vBoo_3; + float4x4 _57 = float4x4(patchIn.gl_in[0u].vInputs_0, patchIn.gl_in[0u].vInputs_1, patchIn.gl_in[0u].vInputs_2, patchIn.gl_in[0u].vInputs_3); + float4x4 _59 = float4x4(patchIn.gl_in[1u].vInputs_0, patchIn.gl_in[1u].vInputs_1, patchIn.gl_in[1u].vInputs_2, patchIn.gl_in[1u].vInputs_3); + float4x4 _47 = _57; + float4x4 _48 = _59; + out.gl_Position = (_47[patchIn.vIndex] + _48[patchIn.vIndex]) + vBoo[patchIn.vIndex]; + return out; +} + diff --git a/reference/opt/shaders-msl/tese/load-control-point-array.tese b/reference/opt/shaders-msl/tese/load-control-point-array.tese new file mode 100644 index 00000000000..54d7419f5a3 --- /dev/null +++ b/reference/opt/shaders-msl/tese/load-control-point-array.tese @@ -0,0 +1,78 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vInputs [[attribute(0)]]; +}; + +struct main0_patchIn +{ + float4 vBoo_0 [[attribute(1)]]; + float4 vBoo_1 [[attribute(2)]]; + float4 vBoo_2 [[attribute(3)]]; + float4 vBoo_3 [[attribute(4)]]; + int vIndex [[attribute(5)]]; + patch_control_point gl_in; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray vBoo = {}; + vBoo[0] = patchIn.vBoo_0; + vBoo[1] = patchIn.vBoo_1; + vBoo[2] = patchIn.vBoo_2; + vBoo[3] = patchIn.vBoo_3; + out.gl_Position = (patchIn.gl_in[0u].vInputs + patchIn.gl_in[1u].vInputs) + vBoo[patchIn.vIndex]; + return out; +} + diff --git a/reference/opt/shaders-msl/tese/quad.domain.tese b/reference/opt/shaders-msl/tese/quad.domain.tese index 78b58ab9975..81c4aa49a45 100644 --- a/reference/opt/shaders-msl/tese/quad.domain.tese +++ b/reference/opt/shaders-msl/tese/quad.domain.tese @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 gl_Position [[position]]; @@ -10,15 +51,24 @@ struct main0_out struct main0_patchIn { - float2 gl_TessLevelInner [[attribute(0)]]; - float4 gl_TessLevelOuter [[attribute(1)]]; + float4 gl_TessLevelOuter [[attribute(0)]]; + float2 gl_TessLevelInner [[attribute(1)]]; }; -[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]]) +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]]) { main0_out out = {}; + spvUnsafeArray gl_TessLevelInner = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0]; + gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1]; + gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2]; + gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3]; + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); gl_TessCoord.y = 1.0 - gl_TessCoord.y; - out.gl_Position = float4(((gl_TessCoord.x * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.z), ((gl_TessCoord.y * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.w) + (((1.0 - gl_TessCoord.y) * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.y), 0.0, 1.0); + out.gl_Position = float4(fma(gl_TessCoord.x * gl_TessLevelInner[0], gl_TessLevelOuter[0], ((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), fma(gl_TessCoord.y * gl_TessLevelInner[1], gl_TessLevelOuter[3], ((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]), 0.0, 1.0); return out; } diff --git a/reference/opt/shaders-msl/tese/quad.tese b/reference/opt/shaders-msl/tese/quad.tese index 83ef729321e..bfa96f9cfbd 100644 --- a/reference/opt/shaders-msl/tese/quad.tese +++ b/reference/opt/shaders-msl/tese/quad.tese @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 gl_Position [[position]]; @@ -10,14 +51,23 @@ struct main0_out struct main0_patchIn { - float2 gl_TessLevelInner [[attribute(0)]]; - float4 gl_TessLevelOuter [[attribute(1)]]; + float4 gl_TessLevelOuter [[attribute(0)]]; + float2 gl_TessLevelInner [[attribute(1)]]; }; -[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]]) +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]]) { main0_out out = {}; - out.gl_Position = float4(((gl_TessCoord.x * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.z), ((gl_TessCoord.y * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.y) + (((1.0 - gl_TessCoord.y) * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.w), 0.0, 1.0); + spvUnsafeArray gl_TessLevelInner = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0]; + gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1]; + gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2]; + gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3]; + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); + out.gl_Position = float4(fma(gl_TessCoord.x * gl_TessLevelInner[0], gl_TessLevelOuter[0], ((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), fma(gl_TessCoord.y * gl_TessLevelInner[1], gl_TessLevelOuter[1], ((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0); return out; } diff --git a/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese b/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese new file mode 100644 index 00000000000..44bdd5ffe9d --- /dev/null +++ b/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese @@ -0,0 +1,72 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_patchIn +{ + float4 gl_TessLevelOuter [[attribute(0)]]; + float2 gl_TessLevelInner [[attribute(1)]]; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + spvUnsafeArray gl_TessLevelInner = {}; + gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2]; + gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3]; + gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0]; + gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1]; + out.gl_Position = float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; + return out; +} + diff --git a/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese b/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese new file mode 100644 index 00000000000..65d2fd94f72 --- /dev/null +++ b/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese @@ -0,0 +1,66 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + main0_out out = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + spvUnsafeArray gl_TessLevelInner = {}; + gl_TessLevelOuter[0] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]; + gl_TessLevelOuter[1] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]; + gl_TessLevelOuter[2] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]; + gl_TessLevelOuter[3] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]; + gl_TessLevelInner[0] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0]; + gl_TessLevelInner[1] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1]; + out.gl_Position = float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; + return out; +} + diff --git a/reference/opt/shaders-msl/tese/read-tess-level-in-func.msl2.tese b/reference/opt/shaders-msl/tese/read-tess-level-in-func.msl2.tese new file mode 100644 index 00000000000..decaca3d5a7 --- /dev/null +++ b/reference/opt/shaders-msl/tese/read-tess-level-in-func.msl2.tese @@ -0,0 +1,69 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_patchIn +{ + float4 gl_TessLevel [[attribute(0)]]; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + spvUnsafeArray gl_TessLevelInner = {}; + gl_TessLevelOuter[0] = patchIn.gl_TessLevel[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevel[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevel[2]; + gl_TessLevelInner[0] = patchIn.gl_TessLevel[3]; + out.gl_Position = float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; + return out; +} + diff --git a/reference/opt/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese b/reference/opt/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese new file mode 100644 index 00000000000..0b555ed0fee --- /dev/null +++ b/reference/opt/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese @@ -0,0 +1,64 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + main0_out out = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + spvUnsafeArray gl_TessLevelInner = {}; + gl_TessLevelOuter[0] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]; + gl_TessLevelOuter[1] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]; + gl_TessLevelOuter[2] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]; + gl_TessLevelInner[0] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor; + out.gl_Position = float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; + return out; +} + diff --git a/reference/opt/shaders-msl/tese/set-from-function.tese b/reference/opt/shaders-msl/tese/set-from-function.tese index 6dcdbe19387..ad4f61b9562 100644 --- a/reference/opt/shaders-msl/tese/set-from-function.tese +++ b/reference/opt/shaders-msl/tese/set-from-function.tese @@ -23,15 +23,15 @@ struct main0_out struct main0_in { float4 vColor [[attribute(0)]]; - float4 Block_a [[attribute(2)]]; - float4 Block_b [[attribute(3)]]; + float4 blocks_a [[attribute(2)]]; + float4 blocks_b [[attribute(3)]]; }; struct main0_patchIn { float4 vColors [[attribute(1)]]; - float4 Foo_a [[attribute(4)]]; - float4 Foo_b [[attribute(5)]]; + float4 vFoo_a [[attribute(4)]]; + float4 vFoo_b [[attribute(5)]]; patch_control_point gl_in; }; @@ -39,12 +39,12 @@ struct main0_patchIn { main0_out out = {}; Foo vFoo = {}; - vFoo.a = patchIn.Foo_a; - vFoo.b = patchIn.Foo_b; - out.gl_Position = patchIn.gl_in[0].Block_a; - out.gl_Position += patchIn.gl_in[0].Block_b; - out.gl_Position += patchIn.gl_in[1].Block_a; - out.gl_Position += patchIn.gl_in[1].Block_b; + vFoo.a = patchIn.vFoo_a; + vFoo.b = patchIn.vFoo_b; + out.gl_Position = patchIn.gl_in[0].blocks_a; + out.gl_Position += patchIn.gl_in[0].blocks_b; + out.gl_Position += patchIn.gl_in[1].blocks_a; + out.gl_Position += patchIn.gl_in[1].blocks_b; out.gl_Position += patchIn.gl_in[0].vColor; out.gl_Position += patchIn.gl_in[1].vColor; out.gl_Position += patchIn.vColors; diff --git a/reference/opt/shaders-msl/tese/triangle-tess-level.tese b/reference/opt/shaders-msl/tese/triangle-tess-level.tese index 975e6298518..86ccc4f023e 100644 --- a/reference/opt/shaders-msl/tese/triangle-tess-level.tese +++ b/reference/opt/shaders-msl/tese/triangle-tess-level.tese @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 gl_Position [[position]]; @@ -16,12 +57,12 @@ struct main0_patchIn [[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float3 gl_TessCoord [[position_in_patch]]) { main0_out out = {}; - float gl_TessLevelInner[2] = {}; - float gl_TessLevelOuter[4] = {}; - gl_TessLevelInner[0] = patchIn.gl_TessLevel.w; - gl_TessLevelOuter[0] = patchIn.gl_TessLevel.x; - gl_TessLevelOuter[1] = patchIn.gl_TessLevel.y; - gl_TessLevelOuter[2] = patchIn.gl_TessLevel.z; + spvUnsafeArray gl_TessLevelInner = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + gl_TessLevelInner[0] = patchIn.gl_TessLevel[3]; + gl_TessLevelOuter[0] = patchIn.gl_TessLevel[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevel[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevel[2]; out.gl_Position = float4((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0], (gl_TessCoord.y * gl_TessLevelInner[0]) * gl_TessLevelOuter[1], (gl_TessCoord.z * gl_TessLevelInner[0]) * gl_TessLevelOuter[2], 1.0); return out; } diff --git a/reference/opt/shaders-msl/tese/water_tess.raw-tess-in.tese b/reference/opt/shaders-msl/tese/water_tess.raw-tess-in.tese new file mode 100644 index 00000000000..d4441c2eeeb --- /dev/null +++ b/reference/opt/shaders-msl/tese/water_tess.raw-tess-in.tese @@ -0,0 +1,46 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + float4x4 uMVP; + float4 uScale; + float2 uInvScale; + float3 uCamPos; + float2 uPatchSize; + float2 uInvHeightmapSize; +}; + +struct main0_out +{ + float3 vWorld [[user(locn0)]]; + float4 vGradNormalTex [[user(locn1)]]; + float4 gl_Position [[position]]; +}; + +struct main0_patchIn +{ + float2 vOutPatchPosBase; + float4 vPatchLods; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(constant UBO& _31 [[buffer(0)]], texture2d uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoordIn [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_patchIn* spvPatchIn [[buffer(20)]]) +{ + main0_out out = {}; + const device main0_patchIn& patchIn = spvPatchIn[gl_PrimitiveID]; + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); + float2 _202 = fma(gl_TessCoord.xy, _31.uPatchSize, patchIn.vOutPatchPosBase); + float2 _216 = mix(patchIn.vPatchLods.yx, patchIn.vPatchLods.zw, float2(gl_TessCoord.x)); + float _223 = mix(_216.x, _216.y, gl_TessCoord.y); + float _225 = floor(_223); + float2 _141 = _31.uInvHeightmapSize * exp2(_225); + out.vGradNormalTex = float4(fma(_202, _31.uInvHeightmapSize, _31.uInvHeightmapSize * 0.5), (_202 * _31.uInvHeightmapSize) * _31.uScale.zw); + float3 _256 = mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, fma(_202, _31.uInvHeightmapSize, _141 * 0.5), level(_225)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, fma(_202, _31.uInvHeightmapSize, _141 * 1.0), level(_225 + 1.0)).xyz, float3(_223 - _225)); + float2 _171 = fma(_202, _31.uScale.xy, _256.yz); + out.vWorld = float3(_171.x, _256.x, _171.y); + out.gl_Position = _31.uMVP * float4(out.vWorld, 1.0); + return out; +} + diff --git a/reference/opt/shaders-msl/tese/water_tess.tese b/reference/opt/shaders-msl/tese/water_tess.tese index dd93e4134d5..e91063f2d4e 100644 --- a/reference/opt/shaders-msl/tese/water_tess.tese +++ b/reference/opt/shaders-msl/tese/water_tess.tese @@ -26,19 +26,19 @@ struct main0_patchIn float4 vPatchLods [[attribute(1)]]; }; -[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant UBO& _31 [[buffer(0)]], texture2d uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoord [[position_in_patch]]) +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant UBO& _31 [[buffer(0)]], texture2d uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoordIn [[position_in_patch]]) { main0_out out = {}; - float2 _201 = patchIn.vOutPatchPosBase + (float3(gl_TessCoord, 0).xy * _31.uPatchSize); - float2 _214 = mix(patchIn.vPatchLods.yx, patchIn.vPatchLods.zw, float2(float3(gl_TessCoord, 0).x)); - float _221 = mix(_214.x, _214.y, float3(gl_TessCoord, 0).y); - float _223 = floor(_221); - float2 _125 = _201 * _31.uInvHeightmapSize; - float2 _141 = _31.uInvHeightmapSize * exp2(_223); - out.vGradNormalTex = float4(_125 + (_31.uInvHeightmapSize * 0.5), _125 * _31.uScale.zw); - float3 _253 = mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (_125 + (_141 * 0.5)), level(_223)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (_125 + (_141 * 1.0)), level(_223 + 1.0)).xyz, float3(_221 - _223)); - float2 _171 = (_201 * _31.uScale.xy) + _253.yz; - out.vWorld = float3(_171.x, _253.x, _171.y); + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); + float2 _202 = fma(gl_TessCoord.xy, _31.uPatchSize, patchIn.vOutPatchPosBase); + float2 _216 = mix(patchIn.vPatchLods.yx, patchIn.vPatchLods.zw, float2(gl_TessCoord.x)); + float _223 = mix(_216.x, _216.y, gl_TessCoord.y); + float _225 = floor(_223); + float2 _141 = _31.uInvHeightmapSize * exp2(_225); + out.vGradNormalTex = float4(fma(_202, _31.uInvHeightmapSize, _31.uInvHeightmapSize * 0.5), (_202 * _31.uInvHeightmapSize) * _31.uScale.zw); + float3 _256 = mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, fma(_202, _31.uInvHeightmapSize, _141 * 0.5), level(_225)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, fma(_202, _31.uInvHeightmapSize, _141 * 1.0), level(_225 + 1.0)).xyz, float3(_223 - _225)); + float2 _171 = fma(_202, _31.uScale.xy, _256.yz); + out.vWorld = float3(_171.x, _256.x, _171.y); out.gl_Position = _31.uMVP * float4(out.vWorld, 1.0); return out; } diff --git a/reference/opt/shaders-msl/vert/array-component-io.for-tess.vert b/reference/opt/shaders-msl/vert/array-component-io.for-tess.vert new file mode 100644 index 00000000000..24958eb50db --- /dev/null +++ b/reference/opt/shaders-msl/vert/array-component-io.for-tess.vert @@ -0,0 +1,98 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 m_location_0; + float4 m_location_1; + float4 m_location_2; + float4 gl_Position; +}; + +struct main0_in +{ + float4 m_location_0 [[attribute(0)]]; + float4 m_location_1 [[attribute(1)]]; + float4 m_location_2 [[attribute(2)]]; + float4 Pos [[attribute(4)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + spvUnsafeArray A = {}; + spvUnsafeArray B = {}; + spvUnsafeArray C = {}; + float D = {}; + spvUnsafeArray InA = {}; + spvUnsafeArray InB = {}; + spvUnsafeArray InC = {}; + float InD = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + InA[0] = in.m_location_1.x; + InA[1] = in.m_location_2.x; + InB[0] = in.m_location_1.zw; + InB[1] = in.m_location_2.zw; + InC[0] = in.m_location_0.y; + InC[1] = in.m_location_1.y; + InC[2] = in.m_location_2.y; + InD = in.m_location_0.w; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.gl_Position = in.Pos; + A = InA; + B = InB; + C = InC; + D = InD; + out.m_location_1.x = A[0]; + out.m_location_2.x = A[1]; + out.m_location_1.zw = B[0]; + out.m_location_2.zw = B[1]; + out.m_location_0.y = C[0]; + out.m_location_1.y = C[1]; + out.m_location_2.y = C[2]; + out.m_location_0.w = D; +} + diff --git a/reference/opt/shaders-msl/vert/array-component-io.vert b/reference/opt/shaders-msl/vert/array-component-io.vert new file mode 100644 index 00000000000..352c9d2ef0f --- /dev/null +++ b/reference/opt/shaders-msl/vert/array-component-io.vert @@ -0,0 +1,100 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float C_0 [[user(locn0_1)]]; + float D [[user(locn0_3)]]; + float A_0 [[user(locn1)]]; + float C_1 [[user(locn1_1)]]; + float2 B_0 [[user(locn1_2)]]; + float A_1 [[user(locn2)]]; + float C_2 [[user(locn2_1)]]; + float2 B_1 [[user(locn2_2)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 m_location_0 [[attribute(0)]]; + float4 m_location_1 [[attribute(1)]]; + float4 m_location_2 [[attribute(2)]]; + float4 Pos [[attribute(4)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray A = {}; + spvUnsafeArray B = {}; + spvUnsafeArray C = {}; + spvUnsafeArray InA = {}; + spvUnsafeArray InB = {}; + spvUnsafeArray InC = {}; + float InD = {}; + InA[0] = in.m_location_1.x; + InA[1] = in.m_location_2.x; + InB[0] = in.m_location_1.zw; + InB[1] = in.m_location_2.zw; + InC[0] = in.m_location_0.y; + InC[1] = in.m_location_1.y; + InC[2] = in.m_location_2.y; + InD = in.m_location_0.w; + out.gl_Position = in.Pos; + A = InA; + B = InB; + C = InC; + out.D = InD; + out.A_0 = A[0]; + out.A_1 = A[1]; + out.B_0 = B[0]; + out.B_1 = B[1]; + out.C_0 = C[0]; + out.C_1 = C[1]; + out.C_2 = C[2]; + return out; +} + diff --git a/reference/opt/shaders-msl/vert/basic.for-tess.vert b/reference/opt/shaders-msl/vert/basic.for-tess.vert new file mode 100644 index 00000000000..c99a95ac898 --- /dev/null +++ b/reference/opt/shaders-msl/vert/basic.for-tess.vert @@ -0,0 +1,31 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + float4x4 uMVP; +}; + +struct main0_out +{ + float3 vNormal; + float4 gl_Position; +}; + +struct main0_in +{ + float4 aVertex [[attribute(0)]]; + float3 aNormal [[attribute(1)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.gl_Position = _16.uMVP * in.aVertex; + out.vNormal = in.aNormal; +} + diff --git a/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert new file mode 100644 index 00000000000..38442986959 --- /dev/null +++ b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert @@ -0,0 +1,62 @@ +#include +#include + +using namespace metal; + +struct Position; +struct PositionReferences; + +struct Position +{ + float2 positions[1]; +}; + +struct Registers +{ + float4x4 view_projection; + device PositionReferences* references; +}; + +struct PositionReferences +{ + device Position* buffers[1]; +}; + +struct main0_out +{ + float4 out_color [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0(constant Registers& registers [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + int slice = int(gl_InstanceIndex); + const device Position* __restrict positions = registers.references->buffers[int(gl_InstanceIndex)]; + float2 _45 = registers.references->buffers[int(gl_InstanceIndex)]->positions[int(gl_VertexIndex)] * 2.5; + float2 pos = _45; + float2 _60 = _45 + ((float2(float(int(gl_InstanceIndex) % 8), float(int(gl_InstanceIndex) / 8)) - float2(3.5)) * 3.0); + pos = _60; + out.gl_Position = registers.view_projection * float4(_60, 0.0, 1.0); + int _82 = int(gl_VertexIndex) % 16; + int index_x = _82; + int _85 = int(gl_VertexIndex) / 16; + int index_y = _85; + float _92 = sin(float(_82)); + float _94 = fma(0.300000011920928955078125, _92, 0.5); + float r = _94; + float _98 = sin(float(_85)); + float _100 = fma(0.300000011920928955078125, _98, 0.5); + float g = _100; + int _105 = (_82 ^ _85) & 1; + int checkerboard = _105; + float _107 = float(_105); + float _111 = fma(_107, 0.800000011920928955078125, 0.20000000298023223876953125); + float _113 = _94 * _111; + r = _113; + float _119 = _100 * _111; + g = _119; + out.out_color = float4(_113, _119, 0.1500000059604644775390625, 1.0); + return out; +} + diff --git a/reference/opt/shaders-msl/vert/clip-distance-block.no-user-varying.vert b/reference/opt/shaders-msl/vert/clip-distance-block.no-user-varying.vert new file mode 100644 index 00000000000..c78105e0ce6 --- /dev/null +++ b/reference/opt/shaders-msl/vert/clip-distance-block.no-user-varying.vert @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; +}; + +struct main0_in +{ + float4 Position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.gl_Position = in.Position; + out.gl_ClipDistance[0] = in.Position.x; + out.gl_ClipDistance[1] = in.Position.y; + return out; +} + diff --git a/reference/opt/shaders-msl/vert/clip-distance-block.vert b/reference/opt/shaders-msl/vert/clip-distance-block.vert new file mode 100644 index 00000000000..af58f35ff5f --- /dev/null +++ b/reference/opt/shaders-msl/vert/clip-distance-block.vert @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +struct main0_in +{ + float4 Position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.gl_Position = in.Position; + out.gl_ClipDistance[0] = in.Position.x; + out.gl_ClipDistance[1] = in.Position.y; + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; + return out; +} + diff --git a/reference/opt/shaders-msl/vert/copy.flatten.vert b/reference/opt/shaders-msl/vert/copy.flatten.vert index d73ee3282fb..32fde3a48b8 100644 --- a/reference/opt/shaders-msl/vert/copy.flatten.vert +++ b/reference/opt/shaders-msl/vert/copy.flatten.vert @@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] for (int _96 = 0; _96 < 4; ) { float3 _68 = in.aVertex.xyz - float3(_21.lights[_96].Position); - out.vColor += ((_21.lights[_96].Color * fast::clamp(1.0 - (length(_68) / _21.lights[_96].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_68))); + out.vColor += ((_21.lights[_96].Color * fast::clamp(1.0 - (length(_68) / _21.lights[_96].Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(_68))); _96++; continue; } diff --git a/reference/opt/shaders-msl/vert/dynamic.flatten.vert b/reference/opt/shaders-msl/vert/dynamic.flatten.vert index 92911a4eebd..26264ddf95e 100644 --- a/reference/opt/shaders-msl/vert/dynamic.flatten.vert +++ b/reference/opt/shaders-msl/vert/dynamic.flatten.vert @@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] for (int _82 = 0; _82 < 4; ) { float3 _54 = in.aVertex.xyz - float3(_21.lights[_82].Position); - out.vColor += ((_21.lights[_82].Color * fast::clamp(1.0 - (length(_54) / _21.lights[_82].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_54))); + out.vColor += ((_21.lights[_82].Color * fast::clamp(1.0 - (length(_54) / _21.lights[_82].Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(_54))); _82++; continue; } diff --git a/reference/opt/shaders-msl/vert/float-math.invariant-float-math.vert b/reference/opt/shaders-msl/vert/float-math.invariant-float-math.vert new file mode 100644 index 00000000000..0fddcdf4d33 --- /dev/null +++ b/reference/opt/shaders-msl/vert/float-math.invariant-float-math.vert @@ -0,0 +1,137 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +[[clang::optnone]] T spvFMul(T l, T r) +{ + return fma(l, r, T(0)); +} + +template +[[clang::optnone]] vec spvFMulVectorMatrix(vec v, matrix m) +{ + vec res = vec(0); + for (uint i = Rows; i > 0; --i) + { + vec tmp(0); + for (uint j = 0; j < Cols; ++j) + { + tmp[j] = m[j][i - 1]; + } + res = fma(tmp, vec(v[i - 1]), res); + } + return res; +} + +template +[[clang::optnone]] vec spvFMulMatrixVector(matrix m, vec v) +{ + vec res = vec(0); + for (uint i = Cols; i > 0; --i) + { + res = fma(m[i - 1], vec(v[i - 1]), res); + } + return res; +} + +template +[[clang::optnone]] matrix spvFMulMatrixMatrix(matrix l, matrix r) +{ + matrix res; + for (uint i = 0; i < RCols; i++) + { + vec tmp(0); + for (uint j = 0; j < LCols; j++) + { + tmp = fma(vec(r[i][j]), l[j], tmp); + } + res[i] = tmp; + } + return res; +} + +struct Matrices +{ + float4x4 vpMatrix; + float4x4 wMatrix; + float4x3 wMatrix4x3; + float3x4 wMatrix3x4; +}; + +struct main0_out +{ + float3 OutNormal [[user(locn0)]]; + float4 OutWorldPos_0 [[user(locn1)]]; + float4 OutWorldPos_1 [[user(locn2)]]; + float4 OutWorldPos_2 [[user(locn3)]]; + float4 OutWorldPos_3 [[user(locn4)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float3 InPos [[attribute(0)]]; + float3 InNormal [[attribute(1)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant Matrices& _22 [[buffer(0)]]) +{ + main0_out out = {}; + spvUnsafeArray OutWorldPos = {}; + float4 _37 = float4(in.InPos, 1.0); + out.gl_Position = spvFMulMatrixVector(spvFMulMatrixMatrix(_22.vpMatrix, _22.wMatrix), _37); + OutWorldPos[0] = spvFMulMatrixVector(_22.wMatrix, _37); + OutWorldPos[1] = spvFMulVectorMatrix(_37, _22.wMatrix); + OutWorldPos[2] = spvFMulMatrixVector(_22.wMatrix3x4, in.InPos); + OutWorldPos[3] = spvFMulVectorMatrix(in.InPos, _22.wMatrix4x3); + out.OutNormal = spvFMulMatrixVector(_22.wMatrix, float4(in.InNormal, 0.0)).xyz; + out.OutWorldPos_0 = OutWorldPos[0]; + out.OutWorldPos_1 = OutWorldPos[1]; + out.OutWorldPos_2 = OutWorldPos[2]; + out.OutWorldPos_3 = OutWorldPos[3]; + return out; +} + diff --git a/reference/opt/shaders-msl/vert/float-math.vert b/reference/opt/shaders-msl/vert/float-math.vert new file mode 100644 index 00000000000..da468c24c9b --- /dev/null +++ b/reference/opt/shaders-msl/vert/float-math.vert @@ -0,0 +1,88 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Matrices +{ + float4x4 vpMatrix; + float4x4 wMatrix; + float4x3 wMatrix4x3; + float3x4 wMatrix3x4; +}; + +struct main0_out +{ + float3 OutNormal [[user(locn0)]]; + float4 OutWorldPos_0 [[user(locn1)]]; + float4 OutWorldPos_1 [[user(locn2)]]; + float4 OutWorldPos_2 [[user(locn3)]]; + float4 OutWorldPos_3 [[user(locn4)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float3 InPos [[attribute(0)]]; + float3 InNormal [[attribute(1)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant Matrices& _22 [[buffer(0)]]) +{ + main0_out out = {}; + spvUnsafeArray OutWorldPos = {}; + float4 _37 = float4(in.InPos, 1.0); + out.gl_Position = (_22.vpMatrix * _22.wMatrix) * _37; + OutWorldPos[0] = _22.wMatrix * _37; + OutWorldPos[1] = _37 * _22.wMatrix; + OutWorldPos[2] = _22.wMatrix3x4 * in.InPos; + OutWorldPos[3] = in.InPos * _22.wMatrix4x3; + out.OutNormal = (_22.wMatrix * float4(in.InNormal, 0.0)).xyz; + out.OutWorldPos_0 = OutWorldPos[0]; + out.OutWorldPos_1 = OutWorldPos[1]; + out.OutWorldPos_2 = OutWorldPos[2]; + out.OutWorldPos_3 = OutWorldPos[3]; + return out; +} + diff --git a/reference/opt/shaders-msl/vert/functions.vert b/reference/opt/shaders-msl/vert/functions.vert index f710225261d..4300aa1350a 100644 --- a/reference/opt/shaders-msl/vert/functions.vert +++ b/reference/opt/shaders-msl/vert/functions.vert @@ -5,73 +5,52 @@ using namespace metal; -struct UBO -{ - float4x4 uMVP; - float3 rotDeg; - float3 rotRad; - int2 bits; -}; - -struct main0_out -{ - float3 vNormal [[user(locn0)]]; - float3 vRotDeg [[user(locn1)]]; - float3 vRotRad [[user(locn2)]]; - int2 vLSB [[user(locn3)]]; - int2 vMSB [[user(locn4)]]; - float4 gl_Position [[position]]; -}; - -struct main0_in -{ - float4 aVertex [[attribute(0)]]; - float3 aNormal [[attribute(1)]]; -}; - // Implementation of the GLSL radians() function template -T radians(T d) +inline T radians(T d) { return d * T(0.01745329251); } // Implementation of the GLSL degrees() function template -T degrees(T r) +inline T degrees(T r) { return r * T(57.2957795131); } // Implementation of the GLSL findLSB() function template -T findLSB(T x) +inline T spvFindLSB(T x) { return select(ctz(x), T(-1), x == T(0)); } // Implementation of the signed GLSL findMSB() function template -T findSMSB(T x) +inline T spvFindSMSB(T x) { T v = select(x, T(-1) - x, x < T(0)); return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0)); } // Returns the determinant of a 2x2 matrix. -inline float spvDet2x2(float a1, float a2, float b1, float b2) +static inline __attribute__((always_inline)) +float spvDet2x2(float a1, float a2, float b1, float b2) { return a1 * b2 - b1 * a2; } // Returns the determinant of a 3x3 matrix. -inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) +static inline __attribute__((always_inline)) +float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) { return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3); } // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. +static inline __attribute__((always_inline)) float4x4 spvInverse4x4(float4x4 m) { float4x4 adj; // The adjoint matrix (inverse after dividing by determinant) @@ -105,6 +84,30 @@ float4x4 spvInverse4x4(float4x4 m) return (det != 0.0f) ? (adj * (1.0f / det)) : m; } +struct UBO +{ + float4x4 uMVP; + float3 rotDeg; + float3 rotRad; + int2 bits; +}; + +struct main0_out +{ + float3 vNormal [[user(locn0)]]; + float3 vRotDeg [[user(locn1)]]; + float3 vRotRad [[user(locn2)]]; + int2 vLSB [[user(locn3)]]; + int2 vMSB [[user(locn4)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 aVertex [[attribute(0)]]; + float3 aNormal [[attribute(1)]]; +}; + vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]) { main0_out out = {}; @@ -112,8 +115,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]] out.vNormal = in.aNormal; out.vRotDeg = degrees(_18.rotRad); out.vRotRad = radians(_18.rotDeg); - out.vLSB = findLSB(_18.bits); - out.vMSB = findSMSB(_18.bits); + out.vLSB = spvFindLSB(_18.bits); + out.vMSB = spvFindSMSB(_18.bits); return out; } diff --git a/reference/opt/shaders-msl/vert/implicit-position-1.vert b/reference/opt/shaders-msl/vert/implicit-position-1.vert new file mode 100644 index 00000000000..5cea4ee2c20 --- /dev/null +++ b/reference/opt/shaders-msl/vert/implicit-position-1.vert @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 V [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + out.V = float4(1.0); + return out; +} + diff --git a/reference/opt/shaders-msl/vert/implicit-position-2.vert b/reference/opt/shaders-msl/vert/implicit-position-2.vert new file mode 100644 index 00000000000..9e024c2095b --- /dev/null +++ b/reference/opt/shaders-msl/vert/implicit-position-2.vert @@ -0,0 +1,9 @@ +#include +#include + +using namespace metal; + +vertex void main0() +{ +} + diff --git a/reference/opt/shaders-msl/vert/in_out_array_mat.vert b/reference/opt/shaders-msl/vert/in_out_array_mat.vert index 0d6976e2606..7a74f49a49e 100644 --- a/reference/opt/shaders-msl/vert/in_out_array_mat.vert +++ b/reference/opt/shaders-msl/vert/in_out_array_mat.vert @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct UBO { float4x4 projection; @@ -40,7 +81,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& ubo [[buffer(0)]] { main0_out out = {}; float4x4 outTransModel = {}; - float4 colors[3] = {}; + spvUnsafeArray colors = {}; float4x4 inViewMat = {}; colors[0] = in.colors_0; colors[1] = in.colors_1; diff --git a/reference/opt/shaders-msl/vert/interface-block-block-composites.frag b/reference/opt/shaders-msl/vert/interface-block-block-composites.frag index 90d732cc52b..ac0d424d883 100644 --- a/reference/opt/shaders-msl/vert/interface-block-block-composites.frag +++ b/reference/opt/shaders-msl/vert/interface-block-block-composites.frag @@ -1,13 +1,54 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Vert { float3x3 wMatrix; float4 wTmp; - float arr[4]; + spvUnsafeArray arr; }; struct main0_out @@ -20,14 +61,14 @@ struct main0_in float3 vMatrix_0 [[user(locn0)]]; float3 vMatrix_1 [[user(locn1)]]; float3 vMatrix_2 [[user(locn2)]]; - float3 Vert_wMatrix_0 [[user(locn4)]]; - float3 Vert_wMatrix_1 [[user(locn5)]]; - float3 Vert_wMatrix_2 [[user(locn6)]]; - float4 Vert_wTmp [[user(locn7)]]; - float Vert_arr_0 [[user(locn8)]]; - float Vert_arr_1 [[user(locn9)]]; - float Vert_arr_2 [[user(locn10)]]; - float Vert_arr_3 [[user(locn11)]]; + float3 m_17_wMatrix_0 [[user(locn4)]]; + float3 m_17_wMatrix_1 [[user(locn5)]]; + float3 m_17_wMatrix_2 [[user(locn6)]]; + float4 m_17_wTmp [[user(locn7)]]; + float m_17_arr_0 [[user(locn8)]]; + float m_17_arr_1 [[user(locn9)]]; + float m_17_arr_2 [[user(locn10)]]; + float m_17_arr_3 [[user(locn11)]]; }; fragment main0_out main0(main0_in in [[stage_in]]) @@ -35,14 +76,14 @@ fragment main0_out main0(main0_in in [[stage_in]]) main0_out out = {}; Vert _17 = {}; float3x3 vMatrix = {}; - _17.wMatrix[0] = in.Vert_wMatrix_0; - _17.wMatrix[1] = in.Vert_wMatrix_1; - _17.wMatrix[2] = in.Vert_wMatrix_2; - _17.wTmp = in.Vert_wTmp; - _17.arr[0] = in.Vert_arr_0; - _17.arr[1] = in.Vert_arr_1; - _17.arr[2] = in.Vert_arr_2; - _17.arr[3] = in.Vert_arr_3; + _17.wMatrix[0] = in.m_17_wMatrix_0; + _17.wMatrix[1] = in.m_17_wMatrix_1; + _17.wMatrix[2] = in.m_17_wMatrix_2; + _17.wTmp = in.m_17_wTmp; + _17.arr[0] = in.m_17_arr_0; + _17.arr[1] = in.m_17_arr_1; + _17.arr[2] = in.m_17_arr_2; + _17.arr[3] = in.m_17_arr_3; vMatrix[0] = in.vMatrix_0; vMatrix[1] = in.vMatrix_1; vMatrix[2] = in.vMatrix_2; diff --git a/reference/opt/shaders-msl/vert/interface-block-block-composites.vert b/reference/opt/shaders-msl/vert/interface-block-block-composites.vert index 3d97ae6dcff..a05c9331586 100644 --- a/reference/opt/shaders-msl/vert/interface-block-block-composites.vert +++ b/reference/opt/shaders-msl/vert/interface-block-block-composites.vert @@ -1,11 +1,52 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Vert { - float arr[3]; + spvUnsafeArray arr; float3x3 wMatrix; float4 wTmp; }; @@ -15,13 +56,13 @@ struct main0_out float3 vMatrix_0 [[user(locn0)]]; float3 vMatrix_1 [[user(locn1)]]; float3 vMatrix_2 [[user(locn2)]]; - float Vert_arr_0 [[user(locn4)]]; - float Vert_arr_1 [[user(locn5)]]; - float Vert_arr_2 [[user(locn6)]]; - float3 Vert_wMatrix_0 [[user(locn7)]]; - float3 Vert_wMatrix_1 [[user(locn8)]]; - float3 Vert_wMatrix_2 [[user(locn9)]]; - float4 Vert_wTmp [[user(locn10)]]; + float m_20_arr_0 [[user(locn4)]]; + float m_20_arr_1 [[user(locn5)]]; + float m_20_arr_2 [[user(locn6)]]; + float3 m_20_wMatrix_0 [[user(locn7)]]; + float3 m_20_wMatrix_1 [[user(locn8)]]; + float3 m_20_wMatrix_2 [[user(locn9)]]; + float4 m_20_wTmp [[user(locn10)]]; float4 gl_Position [[position]]; }; @@ -52,13 +93,13 @@ vertex main0_out main0(main0_in in [[stage_in]]) out.vMatrix_0 = vMatrix[0]; out.vMatrix_1 = vMatrix[1]; out.vMatrix_2 = vMatrix[2]; - out.Vert_arr_0 = _20.arr[0]; - out.Vert_arr_1 = _20.arr[1]; - out.Vert_arr_2 = _20.arr[2]; - out.Vert_wMatrix_0 = _20.wMatrix[0]; - out.Vert_wMatrix_1 = _20.wMatrix[1]; - out.Vert_wMatrix_2 = _20.wMatrix[2]; - out.Vert_wTmp = _20.wTmp; + out.m_20_arr_0 = _20.arr[0]; + out.m_20_arr_1 = _20.arr[1]; + out.m_20_arr_2 = _20.arr[2]; + out.m_20_wMatrix_0 = _20.wMatrix[0]; + out.m_20_wMatrix_1 = _20.wMatrix[1]; + out.m_20_wMatrix_2 = _20.wMatrix[2]; + out.m_20_wTmp = _20.wTmp; return out; } diff --git a/reference/opt/shaders-msl/vert/interface-block-single-element-array.vert b/reference/opt/shaders-msl/vert/interface-block-single-element-array.vert new file mode 100644 index 00000000000..6858db730e3 --- /dev/null +++ b/reference/opt/shaders-msl/vert/interface-block-single-element-array.vert @@ -0,0 +1,79 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct TDPickVertex +{ + float4 c; + spvUnsafeArray uv; +}; + +struct main0_out +{ + float4 oTDVert_c [[user(locn0)]]; + float3 oTDVert_uv_0 [[user(locn1)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float3 P [[attribute(0)]]; + float3 uv_0 [[attribute(1)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + TDPickVertex oTDVert = {}; + spvUnsafeArray uv = {}; + uv[0] = in.uv_0; + out.gl_Position = float4(in.P, 1.0); + oTDVert.uv[0] = uv[0]; + oTDVert.c = float4(1.0); + out.oTDVert_c = oTDVert.c; + out.oTDVert_uv_0 = oTDVert.uv[0]; + return out; +} + diff --git a/reference/opt/shaders-msl/vert/interpolation-qualifiers-block.vert b/reference/opt/shaders-msl/vert/interpolation-qualifiers-block.vert index 4206623b4f6..1ae24c7e5b5 100644 --- a/reference/opt/shaders-msl/vert/interpolation-qualifiers-block.vert +++ b/reference/opt/shaders-msl/vert/interpolation-qualifiers-block.vert @@ -16,13 +16,13 @@ struct Output struct main0_out { - float2 Output_v0 [[user(locn0)]]; - float2 Output_v1 [[user(locn1)]]; - float3 Output_v2 [[user(locn2)]]; - float4 Output_v3 [[user(locn3)]]; - float Output_v4 [[user(locn4)]]; - float Output_v5 [[user(locn5)]]; - float Output_v6 [[user(locn6)]]; + float2 outp_v0 [[user(locn0)]]; + float2 outp_v1 [[user(locn1)]]; + float3 outp_v2 [[user(locn2)]]; + float4 outp_v3 [[user(locn3)]]; + float outp_v4 [[user(locn4)]]; + float outp_v5 [[user(locn5)]]; + float outp_v6 [[user(locn6)]]; float4 gl_Position [[position]]; }; @@ -43,13 +43,13 @@ vertex main0_out main0(main0_in in [[stage_in]]) outp.v5 = in.Position.y; outp.v6 = in.Position.x * in.Position.w; out.gl_Position = in.Position; - out.Output_v0 = outp.v0; - out.Output_v1 = outp.v1; - out.Output_v2 = outp.v2; - out.Output_v3 = outp.v3; - out.Output_v4 = outp.v4; - out.Output_v5 = outp.v5; - out.Output_v6 = outp.v6; + out.outp_v0 = outp.v0; + out.outp_v1 = outp.v1; + out.outp_v2 = outp.v2; + out.outp_v3 = outp.v3; + out.outp_v4 = outp.v4; + out.outp_v5 = outp.v5; + out.outp_v6 = outp.v6; return out; } diff --git a/reference/opt/shaders-msl/vert/invariant.msl21.vert b/reference/opt/shaders-msl/vert/invariant.msl21.vert index 73b0ec7449d..b7f703b2e66 100644 --- a/reference/opt/shaders-msl/vert/invariant.msl21.vert +++ b/reference/opt/shaders-msl/vert/invariant.msl21.vert @@ -18,8 +18,7 @@ struct main0_in vertex main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float4 _20 = in.vInput1 * in.vInput2; - float4 _21 = in.vInput0 + _20; + float4 _21 = fma(in.vInput1, in.vInput2, in.vInput0); out.gl_Position = _21; return out; } diff --git a/reference/opt/shaders-msl/vert/leaf-function.for-tess.vert b/reference/opt/shaders-msl/vert/leaf-function.for-tess.vert new file mode 100644 index 00000000000..e3d2d1fac02 --- /dev/null +++ b/reference/opt/shaders-msl/vert/leaf-function.for-tess.vert @@ -0,0 +1,31 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + float4x4 uMVP; +}; + +struct main0_out +{ + float3 vNormal; + float4 gl_Position; +}; + +struct main0_in +{ + float4 aVertex [[attribute(0)]]; + float3 aNormal [[attribute(1)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.gl_Position = _18.uMVP * in.aVertex; + out.vNormal = in.aNormal; +} + diff --git a/reference/opt/shaders-msl/vert/no-contraction.vert b/reference/opt/shaders-msl/vert/no-contraction.vert new file mode 100644 index 00000000000..f4df5506ae4 --- /dev/null +++ b/reference/opt/shaders-msl/vert/no-contraction.vert @@ -0,0 +1,88 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +[[clang::optnone]] T spvFMul(T l, T r) +{ + return fma(l, r, T(0)); +} + +template +[[clang::optnone]] vec spvFMulVectorMatrix(vec v, matrix m) +{ + vec res = vec(0); + for (uint i = Rows; i > 0; --i) + { + vec tmp(0); + for (uint j = 0; j < Cols; ++j) + { + tmp[j] = m[j][i - 1]; + } + res = fma(tmp, vec(v[i - 1]), res); + } + return res; +} + +template +[[clang::optnone]] vec spvFMulMatrixVector(matrix m, vec v) +{ + vec res = vec(0); + for (uint i = Cols; i > 0; --i) + { + res = fma(m[i - 1], vec(v[i - 1]), res); + } + return res; +} + +template +[[clang::optnone]] matrix spvFMulMatrixMatrix(matrix l, matrix r) +{ + matrix res; + for (uint i = 0; i < RCols; i++) + { + vec tmp(0); + for (uint j = 0; j < LCols; j++) + { + tmp = fma(vec(r[i][j]), l[j], tmp); + } + res[i] = tmp; + } + return res; +} + +template +[[clang::optnone]] T spvFAdd(T l, T r) +{ + return fma(T(1), l, r); +} + +template +[[clang::optnone]] T spvFSub(T l, T r) +{ + return fma(T(-1), r, l); +} + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vA [[attribute(0)]]; + float4 vB [[attribute(1)]]; + float4 vC [[attribute(2)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float4 _15 = spvFMul(in.vA, in.vB); + out.gl_Position = spvFAdd(spvFAdd(spvFAdd(_15, spvFAdd(in.vA, in.vB)), spvFSub(in.vA, in.vB)), spvFAdd(_15, in.vC)); + return out; +} + diff --git a/reference/opt/shaders-msl/vert/no-disable-vertex-out.frag-output.vert b/reference/opt/shaders-msl/vert/no-disable-vertex-out.frag-output.vert new file mode 100644 index 00000000000..14cc94937c0 --- /dev/null +++ b/reference/opt/shaders-msl/vert/no-disable-vertex-out.frag-output.vert @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct buf +{ + float4x4 MVP; + float4 position[36]; + float4 attr[36]; +}; + +struct main0_out +{ + float4 texcoord [[user(locn0)]]; + float3 frag_pos [[user(locn1)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0(constant buf& ubuf [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + out.texcoord = ubuf.attr[int(gl_VertexIndex)]; + out.gl_Position = ubuf.MVP * ubuf.position[int(gl_VertexIndex)]; + out.frag_pos = out.gl_Position.xyz; + return out; +} + diff --git a/reference/opt/shaders-msl/vert/no_stage_out.for-tess.vert b/reference/opt/shaders-msl/vert/no_stage_out.for-tess.vert new file mode 100644 index 00000000000..984e83260aa --- /dev/null +++ b/reference/opt/shaders-msl/vert/no_stage_out.for-tess.vert @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct _RESERVED_IDENTIFIER_FIXUP_10_12 +{ + uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024]; +}; + +struct main0_in +{ + uint4 _RESERVED_IDENTIFIER_FIXUP_19 [[attribute(0)]]; +}; + +kernel void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_10_12& _RESERVED_IDENTIFIER_FIXUP_12 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], uint3 spvDispatchBase [[grid_origin]]) +{ + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + uint gl_VertexIndex = gl_GlobalInvocationID.x + spvDispatchBase.x; + _RESERVED_IDENTIFIER_FIXUP_12._RESERVED_IDENTIFIER_FIXUP_m0[int(gl_VertexIndex)] = in._RESERVED_IDENTIFIER_FIXUP_19; +} + diff --git a/reference/opt/shaders-msl/vert/no_stage_out.vert b/reference/opt/shaders-msl/vert/no_stage_out.vert index 28098ee88e6..e804da67535 100644 --- a/reference/opt/shaders-msl/vert/no_stage_out.vert +++ b/reference/opt/shaders-msl/vert/no_stage_out.vert @@ -3,18 +3,18 @@ using namespace metal; -struct _10 +struct _RESERVED_IDENTIFIER_FIXUP_10_12 { - uint4 _m0[1024]; + uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024]; }; struct main0_in { - uint4 m_19 [[attribute(0)]]; + uint4 _RESERVED_IDENTIFIER_FIXUP_19 [[attribute(0)]]; }; -vertex void main0(main0_in in [[stage_in]], device _10& _12 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +vertex void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_10_12& _RESERVED_IDENTIFIER_FIXUP_12 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) { - _12._m0[gl_VertexIndex] = in.m_19; + _RESERVED_IDENTIFIER_FIXUP_12._RESERVED_IDENTIFIER_FIXUP_m0[int(gl_VertexIndex)] = in._RESERVED_IDENTIFIER_FIXUP_19; } diff --git a/reference/opt/shaders-msl/vert/no_stage_out.write_buff.vert b/reference/opt/shaders-msl/vert/no_stage_out.write_buff.vert index d5d31f44308..296293aaea1 100644 --- a/reference/opt/shaders-msl/vert/no_stage_out.write_buff.vert +++ b/reference/opt/shaders-msl/vert/no_stage_out.write_buff.vert @@ -3,14 +3,14 @@ using namespace metal; -struct _35 +struct _RESERVED_IDENTIFIER_FIXUP_33_35 { - uint4 _m0[1024]; + uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024]; }; -struct _40 +struct _RESERVED_IDENTIFIER_FIXUP_38_40 { - uint4 _m0[1024]; + uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024]; }; struct main0_out @@ -20,16 +20,16 @@ struct main0_out struct main0_in { - float4 m_17 [[attribute(0)]]; + float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]]; }; -vertex void main0(main0_in in [[stage_in]], device _35& _37 [[buffer(0)]], constant _40& _42 [[buffer(1)]]) +vertex void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_33_35& _RESERVED_IDENTIFIER_FIXUP_35 [[buffer(0)]], constant _RESERVED_IDENTIFIER_FIXUP_38_40& _RESERVED_IDENTIFIER_FIXUP_40 [[buffer(1)]]) { main0_out out = {}; - out.gl_Position = in.m_17; + out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14; for (int _52 = 0; _52 < 1024; ) { - _37._m0[_52] = _42._m0[_52]; + _RESERVED_IDENTIFIER_FIXUP_35._RESERVED_IDENTIFIER_FIXUP_m0[_52] = _RESERVED_IDENTIFIER_FIXUP_40._RESERVED_IDENTIFIER_FIXUP_m0[_52]; _52++; continue; } diff --git a/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert b/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert index ca4d6a5b92f..92fbf555d32 100644 --- a/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert +++ b/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert @@ -6,9 +6,9 @@ using namespace metal; -struct _23 +struct _RESERVED_IDENTIFIER_FIXUP_19_21 { - uint _m0; + uint _RESERVED_IDENTIFIER_FIXUP_m0; }; struct main0_out @@ -18,13 +18,13 @@ struct main0_out struct main0_in { - float4 m_17 [[attribute(0)]]; + float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]]; }; -vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]]) +vertex void main0(main0_in in [[stage_in]], volatile device _RESERVED_IDENTIFIER_FIXUP_19_21& _RESERVED_IDENTIFIER_FIXUP_21 [[buffer(0)]]) { main0_out out = {}; - out.gl_Position = in.m_17; - uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_25._m0, 1u, memory_order_relaxed); + out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14; + uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_RESERVED_IDENTIFIER_FIXUP_21._RESERVED_IDENTIFIER_FIXUP_m0, 1u, memory_order_relaxed); } diff --git a/reference/opt/shaders-msl/vert/no_stage_out.write_tex.vert b/reference/opt/shaders-msl/vert/no_stage_out.write_tex.vert index ddad934ee00..9d87efe1420 100644 --- a/reference/opt/shaders-msl/vert/no_stage_out.write_tex.vert +++ b/reference/opt/shaders-msl/vert/no_stage_out.write_tex.vert @@ -10,16 +10,16 @@ struct main0_out struct main0_in { - float4 m_17 [[attribute(0)]]; + float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]]; }; -vertex void main0(main0_in in [[stage_in]], texture1d _34 [[texture(0)]], texture1d _37 [[texture(1)]]) +vertex void main0(main0_in in [[stage_in]], texture1d _RESERVED_IDENTIFIER_FIXUP_32 [[texture(0)]], texture1d _RESERVED_IDENTIFIER_FIXUP_35 [[texture(1)]]) { main0_out out = {}; - out.gl_Position = in.m_17; + out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14; for (int _45 = 0; _45 < 128; ) { - _34.write(_37.read(uint(_45)), uint(_45)); + _RESERVED_IDENTIFIER_FIXUP_32.write(_RESERVED_IDENTIFIER_FIXUP_35.read(uint(_45)), uint(_45)); _45++; continue; } diff --git a/reference/opt/shaders-msl/vert/out-block-with-nested-struct-array.vert b/reference/opt/shaders-msl/vert/out-block-with-nested-struct-array.vert new file mode 100644 index 00000000000..cabcfcb521d --- /dev/null +++ b/reference/opt/shaders-msl/vert/out-block-with-nested-struct-array.vert @@ -0,0 +1,88 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct t21 +{ + float4 m0; + float4 m1; +}; + +struct t24 +{ + spvUnsafeArray m0; +}; + +struct main0_out +{ + float4 v26_m0_0_m0 [[user(locn0)]]; + float4 v26_m0_0_m1 [[user(locn1)]]; + float4 v26_m0_1_m0 [[user(locn2)]]; + float4 v26_m0_1_m1 [[user(locn3)]]; + float4 v26_m0_2_m0 [[user(locn4)]]; + float4 v26_m0_2_m1 [[user(locn5)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 v17 [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + t24 v26 = {}; + out.gl_Position = in.v17; + v26.m0[1].m1 = float4(-4.0, -9.0, 3.0, 7.0); + out.v26_m0_0_m0 = v26.m0[0].m0; + out.v26_m0_0_m1 = v26.m0[0].m1; + out.v26_m0_1_m0 = v26.m0[1].m0; + out.v26_m0_1_m1 = v26.m0[1].m1; + out.v26_m0_2_m0 = v26.m0[2].m0; + out.v26_m0_2_m1 = v26.m0[2].m1; + return out; +} + diff --git a/reference/opt/shaders-msl/vert/out-block-with-struct-array.vert b/reference/opt/shaders-msl/vert/out-block-with-struct-array.vert new file mode 100644 index 00000000000..61c7c18b54c --- /dev/null +++ b/reference/opt/shaders-msl/vert/out-block-with-struct-array.vert @@ -0,0 +1,83 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct t21 +{ + float m0; + float4 m1; +}; + +struct main0_out +{ + float v25_0_m0 [[user(locn0)]]; + float4 v25_0_m1 [[user(locn1)]]; + float v25_1_m0 [[user(locn2)]]; + float4 v25_1_m1 [[user(locn3)]]; + float v25_2_m0 [[user(locn4)]]; + float4 v25_2_m1 [[user(locn5)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 v17 [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray v25 = {}; + out.gl_Position = in.v17; + v25[2].m1 = float4(-4.0, -9.0, 3.0, 7.0); + out.v25_0_m0 = v25[0].m0; + out.v25_0_m1 = v25[0].m1; + out.v25_1_m0 = v25[1].m0; + out.v25_1_m1 = v25[1].m1; + out.v25_2_m0 = v25[2].m0; + out.v25_2_m1 = v25[2].m1; + return out; +} + diff --git a/reference/opt/shaders-msl/vert/out_block.vert b/reference/opt/shaders-msl/vert/out_block.vert index 45b897013b1..909a059bd2c 100644 --- a/reference/opt/shaders-msl/vert/out_block.vert +++ b/reference/opt/shaders-msl/vert/out_block.vert @@ -16,8 +16,8 @@ struct VertexOut struct main0_out { - float4 VertexOut_color [[user(locn2)]]; - float4 VertexOut_color2 [[user(locn3)]]; + float4 outputs_color [[user(locn2)]]; + float4 outputs_color2 [[user(locn3)]]; float4 gl_Position [[position]]; }; @@ -34,8 +34,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant Transform& block [[buf out.gl_Position = block.transform * float4(in.position, 1.0); outputs.color = in.color; outputs.color2 = in.color + float4(1.0); - out.VertexOut_color = outputs.color; - out.VertexOut_color2 = outputs.color2; + out.outputs_color = outputs.color; + out.outputs_color2 = outputs.color2; return out; } diff --git a/reference/opt/shaders-msl/vert/packed-bool-to-uint.vert b/reference/opt/shaders-msl/vert/packed-bool-to-uint.vert new file mode 100644 index 00000000000..6cc55204848 --- /dev/null +++ b/reference/opt/shaders-msl/vert/packed-bool-to-uint.vert @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct Struct +{ + uint flags[1]; +}; + +struct defaultUniformsVS +{ + Struct flags; + float4 uquad[4]; + float4x4 umatrix; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 a_position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _24 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + out.gl_Position = _24.umatrix * float4(_24.uquad[int(gl_VertexIndex)].x, _24.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w); + if (_24.flags.flags[0] != 0u) + { + out.gl_Position.z = 0.0; + } + return out; +} + diff --git a/reference/opt/shaders-msl/vert/packed-bool2-to-packed_uint2.vert b/reference/opt/shaders-msl/vert/packed-bool2-to-packed_uint2.vert new file mode 100644 index 00000000000..4c46aaeb4ea --- /dev/null +++ b/reference/opt/shaders-msl/vert/packed-bool2-to-packed_uint2.vert @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct Struct +{ + uint2 flags[1]; +}; + +struct defaultUniformsVS +{ + Struct flags; + float4 uquad[4]; + float4x4 umatrix; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 a_position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _25 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + out.gl_Position = _25.umatrix * float4(_25.uquad[int(gl_VertexIndex)].x, _25.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w); + if (_25.flags.flags[0].x != 0u) + { + out.gl_Position.z = 0.0; + } + return out; +} + diff --git a/reference/opt/shaders-msl/vert/packed_matrix.vert b/reference/opt/shaders-msl/vert/packed_matrix.vert index 44db8203c7e..2e6f9680dbe 100644 --- a/reference/opt/shaders-msl/vert/packed_matrix.vert +++ b/reference/opt/shaders-msl/vert/packed_matrix.vert @@ -3,48 +3,45 @@ using namespace metal; -typedef packed_float4 packed_rm_float4x3[3]; - -struct _15 +struct _RESERVED_IDENTIFIER_FIXUP_1365_18812 { - packed_rm_float4x3 _m0; - packed_rm_float4x3 _m1; + float3x4 _RESERVED_IDENTIFIER_FIXUP_m0; + float3x4 _RESERVED_IDENTIFIER_FIXUP_m1; }; -struct _42 +struct _RESERVED_IDENTIFIER_FIXUP_1126_22044 { - float4x4 _m0; - float4x4 _m1; - float _m2; + float4x4 _RESERVED_IDENTIFIER_FIXUP_m0; + float4x4 _RESERVED_IDENTIFIER_FIXUP_m1; + float _RESERVED_IDENTIFIER_FIXUP_m9; char _m3_pad[12]; - packed_float3 _m3; - float _m4; - packed_float3 _m5; - float _m6; - float _m7; - float _m8; - float2 _m9; + packed_float3 _RESERVED_IDENTIFIER_FIXUP_m10; + float _RESERVED_IDENTIFIER_FIXUP_m11; + packed_float3 _RESERVED_IDENTIFIER_FIXUP_m12; + float _RESERVED_IDENTIFIER_FIXUP_m17; + float _RESERVED_IDENTIFIER_FIXUP_m18; + float _RESERVED_IDENTIFIER_FIXUP_m19; + float2 _RESERVED_IDENTIFIER_FIXUP_m20; }; struct main0_out { - float3 m_72 [[user(locn0)]]; + float3 _RESERVED_IDENTIFIER_FIXUP_3976 [[user(locn0)]]; float4 gl_Position [[position]]; }; struct main0_in { - float4 m_25 [[attribute(0)]]; + float4 _RESERVED_IDENTIFIER_FIXUP_5275 [[attribute(0)]]; }; -vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]]) +vertex main0_out main0(main0_in in [[stage_in]], constant _RESERVED_IDENTIFIER_FIXUP_1365_18812& _RESERVED_IDENTIFIER_FIXUP_18812 [[buffer(0)]], constant _RESERVED_IDENTIFIER_FIXUP_1126_22044& _RESERVED_IDENTIFIER_FIXUP_22044 [[buffer(1)]]) { main0_out out = {}; - float4 _70 = _44._m0 * float4(float3(_44._m3) + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0); - out.m_72 = normalize(float4(in.m_25.xyz, 0.0) * float3x4(float4(_17._m1[0]), float4(_17._m1[1]), float4(_17._m1[2]))); - float4 _94 = _70; - _94.y = -_70.y; - out.gl_Position = _94; + float4 _70 = _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m0 * float4(float3(_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m10) + (in._RESERVED_IDENTIFIER_FIXUP_5275.xyz * (_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m17 + _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m18)), 1.0); + out._RESERVED_IDENTIFIER_FIXUP_3976 = fast::normalize(float4(in._RESERVED_IDENTIFIER_FIXUP_5275.xyz, 0.0) * _RESERVED_IDENTIFIER_FIXUP_18812._RESERVED_IDENTIFIER_FIXUP_m1); + _70.y = -_70.y; + out.gl_Position = _70; return out; } diff --git a/reference/opt/shaders-msl/vert/read-from-row-major-array.vert b/reference/opt/shaders-msl/vert/read-from-row-major-array.vert index 9b85a25956a..ec3e7b72481 100644 --- a/reference/opt/shaders-msl/vert/read-from-row-major-array.vert +++ b/reference/opt/shaders-msl/vert/read-from-row-major-array.vert @@ -1,5 +1,3 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - #include #include @@ -7,7 +5,7 @@ using namespace metal; struct Block { - float2x3 var[3][4]; + float3x4 var[3][4]; }; struct main0_out @@ -21,17 +19,11 @@ struct main0_in float4 a_position [[attribute(0)]]; }; -// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization. -float2x3 spvConvertFromRowMajor2x3(float2x3 m) -{ - return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2])); -} - vertex main0_out main0(main0_in in [[stage_in]], constant Block& _104 [[buffer(0)]]) { main0_out out = {}; out.gl_Position = in.a_position; - out.v_vtxResult = ((float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[0].x - 2.0) < 0.0500000007450580596923828125) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[0].y - 6.0) < 0.0500000007450580596923828125)) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[0].z - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[1].x) < 0.0500000007450580596923828125) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[1].y - 5.0) < 0.0500000007450580596923828125)) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[1].z - 5.0) < 0.0500000007450580596923828125)); + out.v_vtxResult = ((float(abs(_104.var[0][0][0][0] - 2.0) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][1][0] - 6.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][2][0] - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(_104.var[0][0][0][1]) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][1][1] - 5.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][2][1] - 5.0) < 0.0500000007450580596923828125)); return out; } diff --git a/reference/shaders-msl/vert/layer.msl11.invalid.vert b/reference/opt/shaders-msl/vert/return-array.force-native-array.vert similarity index 61% rename from reference/shaders-msl/vert/layer.msl11.invalid.vert rename to reference/opt/shaders-msl/vert/return-array.force-native-array.vert index b6f39dca3e9..ce13349a0ff 100644 --- a/reference/shaders-msl/vert/layer.msl11.invalid.vert +++ b/reference/opt/shaders-msl/vert/return-array.force-native-array.vert @@ -6,19 +6,17 @@ using namespace metal; struct main0_out { float4 gl_Position [[position]]; - uint gl_Layer [[render_target_array_index]]; }; struct main0_in { - float4 coord [[attribute(0)]]; + float4 vInput1 [[attribute(1)]]; }; vertex main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - out.gl_Position = in.coord; - out.gl_Layer = uint(int(in.coord.z)); + out.gl_Position = float4(10.0) + in.vInput1; return out; } diff --git a/reference/opt/shaders-msl/vert/sign-int-types.vert b/reference/opt/shaders-msl/vert/sign-int-types.vert index 2f518b12911..f5f647d4589 100644 --- a/reference/opt/shaders-msl/vert/sign-int-types.vert +++ b/reference/opt/shaders-msl/vert/sign-int-types.vert @@ -5,6 +5,13 @@ using namespace metal; +// Implementation of the GLSL sign() function for integer types +template::value>::type> +inline T sign(T x) +{ + return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0)); +} + struct UBO { float4x4 uMVP; @@ -36,13 +43,6 @@ struct main0_in float4 aVertex [[attribute(0)]]; }; -// Implementation of the GLSL sign() function for integer types -template::value>::type> -T sign(T x) -{ - return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0)); -} - vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]) { main0_out out = {}; diff --git a/reference/opt/shaders-msl/vert/signedness-mismatch.shader-inputs.vert b/reference/opt/shaders-msl/vert/signedness-mismatch.shader-inputs.vert new file mode 100644 index 00000000000..56e00199cb1 --- /dev/null +++ b/reference/opt/shaders-msl/vert/signedness-mismatch.shader-inputs.vert @@ -0,0 +1,74 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + ushort2 a [[attribute(0)]]; + uint3 b [[attribute(1)]]; + ushort c_0 [[attribute(2)]]; + ushort c_1 [[attribute(3)]]; + uint4 d_0 [[attribute(4)]]; + uint4 d_1 [[attribute(5)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray c = {}; + spvUnsafeArray d = {}; + c[0] = in.c_0; + c[1] = in.c_1; + d[0] = in.d_0; + d[1] = in.d_1; + out.gl_Position = float4(float(int(short(in.a.x))), float(int(in.b.x)), float(uint(c[1])), float(d[0].w)); + return out; +} + diff --git a/reference/opt/shaders-msl/vert/texture_buffer.vert b/reference/opt/shaders-msl/vert/texture_buffer.vert index ee3956fad84..9d8b5c49f02 100644 --- a/reference/opt/shaders-msl/vert/texture_buffer.vert +++ b/reference/opt/shaders-msl/vert/texture_buffer.vert @@ -5,17 +5,18 @@ using namespace metal; -struct main0_out -{ - float4 gl_Position [[position]]; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct main0_out +{ + float4 gl_Position [[position]]; +}; + vertex main0_out main0(texture2d uSamp [[texture(0)]], texture2d uSampo [[texture(1)]]) { main0_out out = {}; diff --git a/reference/opt/shaders-msl/vert/uniform-struct-out-of-order-offests.vert b/reference/opt/shaders-msl/vert/uniform-struct-out-of-order-offests.vert new file mode 100644 index 00000000000..c69775e3262 --- /dev/null +++ b/reference/opt/shaders-msl/vert/uniform-struct-out-of-order-offests.vert @@ -0,0 +1,32 @@ +#include +#include + +using namespace metal; + +struct data_u_t +{ + int4 m1[3]; + uint m3; + uint3 m2; + int4 m0[8]; +}; + +struct main0_out +{ + float foo [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vtx_posn [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant data_u_t& data_u [[buffer(0)]]) +{ + main0_out out = {}; + out.gl_Position = in.vtx_posn; + out.foo = float((uint3(data_u.m1[1].xyz) + data_u.m2).y * uint(data_u.m0[4].x)); + return out; +} + diff --git a/reference/opt/shaders-msl/vert/uniform-struct-packing-nested.vert b/reference/opt/shaders-msl/vert/uniform-struct-packing-nested.vert new file mode 100644 index 00000000000..c305623256a --- /dev/null +++ b/reference/opt/shaders-msl/vert/uniform-struct-packing-nested.vert @@ -0,0 +1,52 @@ +#include +#include + +using namespace metal; + +typedef packed_float4 packed_rm_float4x4[4]; + +struct s0 +{ + float3x4 m0; + packed_int4 m1; + packed_rm_float4x4 m2; + packed_uint2 m3; +}; + +struct s1 +{ + float4x4 m0; + int m1; + char _m2_pad[12]; + packed_uint3 m2; + s0 m3; +}; + +struct data_u_t +{ + float4 m1[5]; + float2x4 m3; + int4 m4; + s1 m2; + float3x4 m0; +}; + +struct main0_out +{ + float foo [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vtx_posn [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant data_u_t& data_u [[buffer(0)]]) +{ + main0_out out = {}; + out.gl_Position = in.vtx_posn; + out.foo = (((data_u.m1[3].y + float(data_u.m4.z)) * data_u.m0[2][1]) * data_u.m2.m0[3][2]) * data_u.m2.m3.m2[3][3]; + return out; +} + diff --git a/reference/opt/shaders-msl/vert/unused-position.vert b/reference/opt/shaders-msl/vert/unused-position.vert new file mode 100644 index 00000000000..7dc4672139c --- /dev/null +++ b/reference/opt/shaders-msl/vert/unused-position.vert @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_PointSize [[point_size]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + out.gl_PointSize = 1.0; + return out; +} + diff --git a/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp deleted file mode 100644 index 278a8bb2ee8..00000000000 --- a/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp +++ /dev/null @@ -1,171 +0,0 @@ -#include -#include - -using namespace metal; - -typedef packed_float2 packed_float2x2[2]; -typedef packed_float2 packed_rm_float2x3[3]; -typedef packed_float3 packed_float2x3[2]; -typedef packed_float3 packed_rm_float3x2[2]; - -struct S0 -{ - packed_float2 a[1]; - float b; -}; - -struct S1 -{ - packed_float3 a; - float b; -}; - -struct S2 -{ - packed_float3 a[1]; - float b; -}; - -struct S3 -{ - packed_float2 a; - float b; -}; - -struct S4 -{ - float2 c; -}; - -struct Content -{ - S0 m0s[1]; - S1 m1s[1]; - S2 m2s[1]; - S0 m0; - S1 m1; - S2 m2; - S3 m3; - float m4; - S4 m3s[8]; -}; - -struct SSBO1 -{ - Content content; - Content content1[2]; - Content content2; - float2x2 m0; - float2x2 m1; - packed_float2x3 m2[4]; - float3x2 m3; - float2x2 m4; - float2x2 m5[9]; - packed_rm_float2x3 m6[4][2]; - float3x2 m7; - float array[1]; -}; - -struct S0_1 -{ - float4 a[1]; - float b; -}; - -struct S1_1 -{ - packed_float3 a; - float b; -}; - -struct S2_1 -{ - float3 a[1]; - float b; -}; - -struct S3_1 -{ - float2 a; - float b; -}; - -struct S4_1 -{ - float2 c; -}; - -struct Content_1 -{ - S0_1 m0s[1]; - S1_1 m1s[1]; - S2_1 m2s[1]; - S0_1 m0; - S1_1 m1; - S2_1 m2; - S3_1 m3; - float m4; - char _m8_pad[12]; - /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8]; -}; - -struct SSBO0 -{ - Content_1 content; - Content_1 content1[2]; - Content_1 content2; - float2x2 m0; - char _m4_pad[16]; - float2x2 m1; - char _m5_pad[16]; - float2x3 m2[4]; - float3x2 m3; - char _m7_pad[24]; - float2x2 m4; - char _m8_pad[16]; - float2x2 m5[9]; - float2x3 m6[4][2]; - float3x2 m7; - float4 array[1]; -}; - -struct SSBO2 -{ - float m0; - packed_float2x2 m1; - packed_rm_float3x2 m2; -}; - -kernel void main0(device SSBO1& ssbo_scalar [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]], device SSBO2& ssbo_scalar2 [[buffer(2)]]) -{ - ssbo_scalar.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0].xy; - ssbo_scalar.content.m0s[0].b = ssbo_140.content.m0s[0].b; - ssbo_scalar.content.m1s[0].a = float3(ssbo_140.content.m1s[0].a); - ssbo_scalar.content.m1s[0].b = ssbo_140.content.m1s[0].b; - ssbo_scalar.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0]; - ssbo_scalar.content.m2s[0].b = ssbo_140.content.m2s[0].b; - ssbo_scalar.content.m0.a[0] = ssbo_140.content.m0.a[0].xy; - ssbo_scalar.content.m0.b = ssbo_140.content.m0.b; - ssbo_scalar.content.m1.a = float3(ssbo_140.content.m1.a); - ssbo_scalar.content.m1.b = ssbo_140.content.m1.b; - ssbo_scalar.content.m2.a[0] = ssbo_140.content.m2.a[0]; - ssbo_scalar.content.m2.b = ssbo_140.content.m2.b; - ssbo_scalar.content.m3.a = ssbo_140.content.m3.a; - ssbo_scalar.content.m3.b = ssbo_140.content.m3.b; - ssbo_scalar.content.m4 = ssbo_140.content.m4; - ssbo_scalar.content.m3s[0].c = ssbo_140.content.m3s[0].c; - ssbo_scalar.content.m3s[1].c = ssbo_140.content.m3s[1].c; - ssbo_scalar.content.m3s[2].c = ssbo_140.content.m3s[2].c; - ssbo_scalar.content.m3s[3].c = ssbo_140.content.m3s[3].c; - ssbo_scalar.content.m3s[4].c = ssbo_140.content.m3s[4].c; - ssbo_scalar.content.m3s[5].c = ssbo_140.content.m3s[5].c; - ssbo_scalar.content.m3s[6].c = ssbo_140.content.m3s[6].c; - ssbo_scalar.content.m3s[7].c = ssbo_140.content.m3s[7].c; - ssbo_scalar.content.m1.a = float2x3(float3(ssbo_scalar.m2[1][0]), float3(ssbo_scalar.m2[1][1])) * float2(ssbo_scalar.content.m0.a[0]); - ssbo_scalar.m0 = float2x2(float2(ssbo_scalar2.m1[0]), float2(ssbo_scalar2.m1[1])); - ssbo_scalar2.m1[0] = transpose(ssbo_scalar.m4)[0]; - ssbo_scalar2.m1[1] = transpose(ssbo_scalar.m4)[1]; - ssbo_scalar2.m2[0] = spvConvertFromRowMajor3x2(ssbo_scalar.m3)[0]; - ssbo_scalar2.m2[1] = spvConvertFromRowMajor3x2(ssbo_scalar.m3)[1]; -} - diff --git a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp deleted file mode 100644 index 948806db4ef..00000000000 --- a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp +++ /dev/null @@ -1,92 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct SSBO -{ - float FragColor; -}; - -inline uint4 spvSubgroupBallot(bool value) -{ - simd_vote vote = simd_ballot(value); - // simd_ballot() returns a 64-bit integer-like object, but - // SPIR-V callers expect a uint4. We must convert. - // FIXME: This won't include higher bits if Apple ever supports - // 128 lanes in an SIMD-group. - return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0); -} - -inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) -{ - return !!extract_bits(ballot[bit / 32], bit % 32, 1); -} - -inline uint spvSubgroupBallotFindLSB(uint4 ballot) -{ - return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); -} - -inline uint spvSubgroupBallotFindMSB(uint4 ballot) -{ - return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); -} - -inline uint spvSubgroupBallotBitCount(uint4 ballot) -{ - return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); -} - -inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) -{ - uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); - return spvSubgroupBallotBitCount(ballot & mask); -} - -inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) -{ - uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); - return spvSubgroupBallotBitCount(ballot & mask); -} - -template -inline bool spvSubgroupAllEqual(T value) -{ - return simd_all(value == simd_broadcast_first(value)); -} - -template<> -inline bool spvSubgroupAllEqual(bool value) -{ - return simd_all(value) || !simd_any(value); -} - -kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) -{ - uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); - uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0)); - uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0)); - uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); - uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); - _9.FragColor = float(gl_NumSubgroups); - _9.FragColor = float(gl_SubgroupID); - _9.FragColor = float(gl_SubgroupSize); - _9.FragColor = float(gl_SubgroupInvocationID); - simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); - simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); - simdgroup_barrier(mem_flags::mem_device); - simdgroup_barrier(mem_flags::mem_threadgroup); - simdgroup_barrier(mem_flags::mem_texture); - _9.FragColor = float4(gl_SubgroupEqMask).x; - _9.FragColor = float4(gl_SubgroupGeMask).x; - _9.FragColor = float4(gl_SubgroupGtMask).x; - _9.FragColor = float4(gl_SubgroupLeMask).x; - _9.FragColor = float4(gl_SubgroupLtMask).x; - uint4 _83 = spvSubgroupBallot(true); - float4 _165 = simd_prefix_inclusive_product(simd_product(float4(20.0))); - int4 _167 = simd_prefix_inclusive_product(simd_product(int4(20))); -} - diff --git a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp b/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp deleted file mode 100644 index 6d32de695ac..00000000000 --- a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include - -using namespace metal; - -struct SSBO -{ - float FragColor; -}; - -kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]]) -{ - _9.FragColor = float(gl_NumSubgroups); - _9.FragColor = float(gl_SubgroupID); - _9.FragColor = float(gl_SubgroupSize); - _9.FragColor = float(gl_SubgroupInvocationID); - simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); - simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); - simdgroup_barrier(mem_flags::mem_device); - simdgroup_barrier(mem_flags::mem_threadgroup); - simdgroup_barrier(mem_flags::mem_texture); -} - diff --git a/reference/opt/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag new file mode 100644 index 00000000000..f0935f6dcf4 --- /dev/null +++ b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag @@ -0,0 +1,73 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vColor [[user(locn0)]]; + float2 vTex_0 [[user(locn1)]]; + float2 vTex_1 [[user(locn2)]]; + float2 vTex_2 [[user(locn3)]]; + float2 vTex_3 [[user(locn4)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]]) +{ + main0_out out = {}; + spvUnsafeArray vTex = {}; + vTex[0] = in.vTex_0; + vTex[1] = in.vTex_1; + vTex[2] = in.vTex_2; + vTex[3] = in.vTex_3; + const uint gl_ViewIndex = spvViewMask[0]; + out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]); + return out; +} + diff --git a/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag index 23c554940b8..67895e3e92c 100644 --- a/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag +++ b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 FragColor [[color(0)]]; @@ -20,7 +61,7 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]]) { main0_out out = {}; - float2 vTex[4] = {}; + spvUnsafeArray vTex = {}; vTex[0] = in.vTex_0; vTex[1] = in.vTex_1; vTex[2] = in.vTex_2; diff --git a/reference/opt/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag new file mode 100644 index 00000000000..274cea2de15 --- /dev/null +++ b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + bool _15 = gl_HelperInvocation; + gl_HelperInvocation = true, discard_fragment(); + if (!_15) + { + out.FragColor = float4(1.0, 0.0, 0.0, 1.0); + } + return out; +} + diff --git a/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag new file mode 100644 index 00000000000..e2b2a85712a --- /dev/null +++ b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag @@ -0,0 +1,13 @@ +#include +#include + +using namespace metal; + +fragment void main0() +{ + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + gl_HelperInvocation = true, discard_fragment(); + bool _19 = gl_HelperInvocation; +} + diff --git a/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag new file mode 100644 index 00000000000..82eb282f17c --- /dev/null +++ b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag @@ -0,0 +1,13 @@ +#include +#include + +using namespace metal; + +fragment void main0() +{ + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + gl_HelperInvocation = true, discard_fragment(); + bool _9 = gl_HelperInvocation; +} + diff --git a/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag deleted file mode 100644 index fc9c4fcdd19..00000000000 --- a/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag +++ /dev/null @@ -1,89 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct main0_out -{ - float FragColor [[color(0)]]; -}; - -inline uint4 spvSubgroupBallot(bool value) -{ - simd_vote vote = simd_ballot(value); - // simd_ballot() returns a 64-bit integer-like object, but - // SPIR-V callers expect a uint4. We must convert. - // FIXME: This won't include higher bits if Apple ever supports - // 128 lanes in an SIMD-group. - return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0); -} - -inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) -{ - return !!extract_bits(ballot[bit / 32], bit % 32, 1); -} - -inline uint spvSubgroupBallotFindLSB(uint4 ballot) -{ - return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); -} - -inline uint spvSubgroupBallotFindMSB(uint4 ballot) -{ - return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); -} - -inline uint spvSubgroupBallotBitCount(uint4 ballot) -{ - return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); -} - -inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) -{ - uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); - return spvSubgroupBallotBitCount(ballot & mask); -} - -inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) -{ - uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); - return spvSubgroupBallotBitCount(ballot & mask); -} - -template -inline bool spvSubgroupAllEqual(T value) -{ - return simd_all(value == simd_broadcast_first(value)); -} - -template<> -inline bool spvSubgroupAllEqual(bool value) -{ - return simd_all(value) || !simd_any(value); -} - -fragment main0_out main0() -{ - main0_out out = {}; - uint gl_SubgroupSize = simd_sum(1); - uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1); - uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); - uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0)); - uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0)); - uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); - uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); - out.FragColor = float(gl_SubgroupSize); - out.FragColor = float(gl_SubgroupInvocationID); - out.FragColor = float4(gl_SubgroupEqMask).x; - out.FragColor = float4(gl_SubgroupGeMask).x; - out.FragColor = float4(gl_SubgroupGtMask).x; - out.FragColor = float4(gl_SubgroupLeMask).x; - out.FragColor = float4(gl_SubgroupLtMask).x; - uint4 _63 = spvSubgroupBallot(true); - float4 _147 = simd_prefix_inclusive_product(simd_product(float4(20.0))); - int4 _149 = simd_prefix_inclusive_product(simd_product(int4(20))); - return out; -} - diff --git a/reference/opt/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert new file mode 100644 index 00000000000..e36576b86f5 --- /dev/null +++ b/reference/opt/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + const int gl_DeviceIndex = 0; + const uint gl_ViewIndex = 0; + out.gl_Position = float4(float(gl_DeviceIndex), float(int(gl_ViewIndex)), 0.0, 1.0); + return out; +} + diff --git a/reference/opt/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert new file mode 100644 index 00000000000..cc4bcc42027 --- /dev/null +++ b/reference/opt/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + const int gl_DeviceIndex = 0; + out.gl_Position = float4(float(gl_DeviceIndex)); + return out; +} + diff --git a/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert new file mode 100644 index 00000000000..8959afe821e --- /dev/null +++ b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct MVPs +{ + float4x4 MVP[2]; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 Position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]]) +{ + main0_out out = {}; + const uint gl_ViewIndex = spvViewMask[0]; + out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position; + return out; +} + diff --git a/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert index c42e67211e7..20eff0a124f 100644 --- a/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert +++ b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert @@ -19,11 +19,11 @@ struct main0_in float4 Position [[attribute(0)]]; }; -vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) +vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]]) { main0_out out = {}; - uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1]; - gl_InstanceIndex /= spvViewMask[1]; + uint gl_ViewIndex = spvViewMask[0] + (gl_InstanceIndex - gl_BaseInstance) % spvViewMask[1]; + gl_InstanceIndex = (gl_InstanceIndex - gl_BaseInstance) / spvViewMask[1] + gl_BaseInstance; out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position; out.gl_Layer = gl_ViewIndex - spvViewMask[0]; return out; diff --git a/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert index f87d2a11adc..5152b6222ee 100644 --- a/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert +++ b/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert @@ -19,7 +19,7 @@ struct main0_in float4 Position [[attribute(0)]]; }; -vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) +vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]]) { main0_out out = {}; const uint gl_ViewIndex = 0; diff --git a/reference/opt/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert b/reference/opt/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert index 53e26e4a8eb..86a0cea5bb0 100644 --- a/reference/opt/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert +++ b/reference/opt/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert @@ -11,7 +11,7 @@ struct main0_out vertex main0_out main0(uint gl_VertexIndex [[vertex_id]], uint gl_InstanceIndex [[instance_id]]) { main0_out out = {}; - out.gl_Position = float4(1.0, 2.0, 3.0, 4.0) * float(gl_VertexIndex + gl_InstanceIndex); + out.gl_Position = float4(1.0, 2.0, 3.0, 4.0) * float(int(gl_VertexIndex) + int(gl_InstanceIndex)); return out; } diff --git a/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag b/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag new file mode 100644 index 00000000000..a7b390a8cf7 --- /dev/null +++ b/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag @@ -0,0 +1,321 @@ +#include +#include + +using namespace metal; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Globals +{ + float3 SoftTransitionScale; + float4x4 ShadowViewProjectionMatrices[6]; + float InvShadowmapResolution; + float ShadowFadeFraction; + float ShadowSharpen; + float4 LightPositionAndInvRadius; + float2 ProjectionDepthBiasParameters; + float4 PointLightDepthBiasAndProjParameters; +}; + +constant float4 _471 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d SceneTexturesStruct_SceneDepthTexture [[texture(0)]], texture2d SceneTexturesStruct_GBufferATexture [[texture(1)]], texture2d SceneTexturesStruct_GBufferBTexture [[texture(2)]], texture2d SceneTexturesStruct_GBufferDTexture [[texture(3)]], depthcube ShadowDepthCubeTexture [[texture(4)]], texture2d SSProfilesTexture [[texture(5)]], sampler SceneTexturesStruct_SceneDepthTextureSampler [[sampler(0)]], sampler SceneTexturesStruct_GBufferATextureSampler [[sampler(1)]], sampler SceneTexturesStruct_GBufferBTextureSampler [[sampler(2)]], sampler SceneTexturesStruct_GBufferDTextureSampler [[sampler(3)]], sampler ShadowDepthTextureSampler [[sampler(4)]], sampler ShadowDepthCubeTextureSampler [[sampler(5)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + float2 _114 = gl_FragCoord.xy * View.View_BufferSizeAndInvSize.zw; + float4 _118 = SceneTexturesStruct_SceneDepthTexture.sample(SceneTexturesStruct_SceneDepthTextureSampler, _114, level(0.0)); + float _119 = _118.x; + float _133 = fma(_119, View.View_InvDeviceZToWorldZTransform.x, View.View_InvDeviceZToWorldZTransform.y) + (1.0 / fma(_119, View.View_InvDeviceZToWorldZTransform.z, -View.View_InvDeviceZToWorldZTransform.w)); + float4 _147 = View.View_ScreenToWorld * float4((fma(gl_FragCoord.xy, View.View_BufferSizeAndInvSize.zw, -View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_133), _133, 1.0); + float3 _148 = _147.xyz; + float3 _152 = _Globals.LightPositionAndInvRadius.xyz - _148; + float _158 = length(_152); + bool _160 = (_158 * _Globals.LightPositionAndInvRadius.w) < 1.0; + float _207; + if (_160) + { + float3 _165 = abs(_152); + float _166 = _165.x; + float _167 = _165.y; + float _168 = _165.z; + float _170 = fast::max(_166, fast::max(_167, _168)); + int _189; + if (_170 == _166) + { + _189 = (_166 == _152.x) ? 0 : 1; + } + else + { + int _185; + if (_170 == _167) + { + _185 = (_167 == _152.y) ? 2 : 3; + } + else + { + _185 = (_168 == _152.z) ? 4 : 5; + } + _189 = _185; + } + float4 _196 = _Globals.ShadowViewProjectionMatrices[_189] * float4(_147.xyz, 1.0); + float _198 = _196.w; + _207 = ShadowDepthCubeTexture.sample_compare(ShadowDepthCubeTextureSampler, (_152 / float3(_158)), (_196.z / _198) + ((-_Globals.PointLightDepthBiasAndProjParameters.x) / _198), level(0.0)); + } + else + { + _207 = 1.0; + } + float _213 = fast::clamp(fma(_207 - 0.5, _Globals.ShadowSharpen, 0.5), 0.0, 1.0); + float _218 = sqrt(mix(1.0, _213 * _213, _Globals.ShadowFadeFraction)); + float4 _219; + _219.z = _218; + float4 _220 = float4(float3(1.0).x, float3(1.0).y, _219.z, float3(1.0).z); + float3 _236 = fast::normalize(fma(SceneTexturesStruct_GBufferATexture.sample(SceneTexturesStruct_GBufferATextureSampler, _114, level(0.0)).xyz, float3(2.0), float3(-1.0))); + uint _240 = uint(round(SceneTexturesStruct_GBufferBTexture.sample(SceneTexturesStruct_GBufferBTextureSampler, _114, level(0.0)).w * 255.0)); + bool _248 = (_240 & 15u) == 5u; + float _448; + if (_248) + { + float4 _260 = SSProfilesTexture.read(uint2(int3(1, int(uint(fma(select(float4(0.0), SceneTexturesStruct_GBufferDTexture.sample(SceneTexturesStruct_GBufferDTextureSampler, _114, level(0.0)), bool4(!(((_240 & 4294967280u) & 16u) != 0u))).x, 255.0, 0.5))), 0).xy), 0); + float _263 = _260.y * 0.5; + float3 _266 = fma(-_236, float3(_263), _148); + float _274 = pow(fast::clamp(dot(-(_152 * float3(rsqrt(dot(_152, _152)))), _236), 0.0, 1.0), 1.0); + float _445; + if (_160) + { + float3 _278 = _152 / float3(_158); + float3 _280 = fast::normalize(cross(_278, float3(0.0, 0.0, 1.0))); + float3 _284 = float3(_Globals.InvShadowmapResolution); + float3 _285 = _280 * _284; + float3 _286 = cross(_280, _278) * _284; + float3 _287 = abs(_278); + float _288 = _287.x; + float _289 = _287.y; + float _290 = _287.z; + float _292 = fast::max(_288, fast::max(_289, _290)); + int _311; + if (_292 == _288) + { + _311 = (_288 == _278.x) ? 0 : 1; + } + else + { + int _307; + if (_292 == _289) + { + _307 = (_289 == _278.y) ? 2 : 3; + } + else + { + _307 = (_290 == _278.z) ? 4 : 5; + } + _311 = _307; + } + float4 _318 = _Globals.ShadowViewProjectionMatrices[_311] * float4(_266, 1.0); + float _323 = _260.x * (10.0 / _Globals.LightPositionAndInvRadius.w); + float _457 = -_Globals.PointLightDepthBiasAndProjParameters.w; + float _328 = 1.0 / fma(_318.z / _318.w, _Globals.PointLightDepthBiasAndProjParameters.z, _457); + float _341 = fma(_328, _Globals.LightPositionAndInvRadius.w, -((1.0 / fma(float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(2.5), _278), level(0.0))).x, _Globals.PointLightDepthBiasAndProjParameters.z, _457)) * _Globals.LightPositionAndInvRadius.w)); + float _342 = _341 * _323; + float _363 = fma(_328, _Globals.LightPositionAndInvRadius.w, -((1.0 / fma(float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(0.77254199981689453125), fma(_285, float3(2.3776409626007080078125), _278)), level(0.0))).x, _Globals.PointLightDepthBiasAndProjParameters.z, _457)) * _Globals.LightPositionAndInvRadius.w)); + float _364 = _363 * _323; + float _386 = fma(_328, _Globals.LightPositionAndInvRadius.w, -((1.0 / fma(float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(-2.0225429534912109375), fma(_285, float3(1.46946299076080322265625), _278)), level(0.0))).x, _Globals.PointLightDepthBiasAndProjParameters.z, _457)) * _Globals.LightPositionAndInvRadius.w)); + float _387 = _386 * _323; + float _409 = fma(_328, _Globals.LightPositionAndInvRadius.w, -((1.0 / fma(float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(-2.02254199981689453125), fma(_285, float3(-1.46946299076080322265625), _278)), level(0.0))).x, _Globals.PointLightDepthBiasAndProjParameters.z, _457)) * _Globals.LightPositionAndInvRadius.w)); + float _410 = _409 * _323; + float _432 = fma(_328, _Globals.LightPositionAndInvRadius.w, -((1.0 / fma(float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(0.772543013095855712890625), fma(_285, float3(-2.3776409626007080078125), _278)), level(0.0))).x, _Globals.PointLightDepthBiasAndProjParameters.z, _457)) * _Globals.LightPositionAndInvRadius.w)); + float _433 = _432 * _323; + _445 = (((((fast::clamp(abs((_342 > 0.0) ? fma(_341, _323, _263) : fast::max(0.0, fma(_342, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25) + (fast::clamp(abs((_364 > 0.0) ? fma(_363, _323, _263) : fast::max(0.0, fma(_364, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_387 > 0.0) ? fma(_386, _323, _263) : fast::max(0.0, fma(_387, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_410 > 0.0) ? fma(_409, _323, _263) : fast::max(0.0, fma(_410, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_433 > 0.0) ? fma(_432, _323, _263) : fast::max(0.0, fma(_433, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) * 0.20000000298023223876953125; + } + else + { + _445 = 1.0; + } + _448 = fma(-_445, 0.20000000298023223876953125, 1.0); + } + else + { + _448 = 1.0; + } + _220.w = _248 ? sqrt(_448) : _218; + out.out_var_SV_Target0 = _220; + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag b/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag new file mode 100644 index 00000000000..192c0b411bf --- /dev/null +++ b/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag @@ -0,0 +1,1073 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_Globals +{ + float4 MappingPolynomial; + float3 InverseGamma; + float4 ColorMatrixR_ColorCurveCd1; + float4 ColorMatrixG_ColorCurveCd3Cm3; + float4 ColorMatrixB_ColorCurveCm2; + float4 ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3; + float4 ColorCurve_Ch1_Ch2; + float4 ColorShadow_Luma; + float4 ColorShadow_Tint1; + float4 ColorShadow_Tint2; + float FilmSlope; + float FilmToe; + float FilmShoulder; + float FilmBlackClip; + float FilmWhiteClip; + packed_float3 ColorScale; + float4 OverlayColor; + float WhiteTemp; + float WhiteTint; + float4 ColorSaturation; + float4 ColorContrast; + float4 ColorGamma; + float4 ColorGain; + float4 ColorOffset; + float4 ColorSaturationShadows; + float4 ColorContrastShadows; + float4 ColorGammaShadows; + float4 ColorGainShadows; + float4 ColorOffsetShadows; + float4 ColorSaturationMidtones; + float4 ColorContrastMidtones; + float4 ColorGammaMidtones; + float4 ColorGainMidtones; + float4 ColorOffsetMidtones; + float4 ColorSaturationHighlights; + float4 ColorContrastHighlights; + float4 ColorGammaHighlights; + float4 ColorGainHighlights; + float4 ColorOffsetHighlights; + float ColorCorrectionShadowsMax; + float ColorCorrectionHighlightsMin; + uint OutputDevice; + uint OutputGamut; + float BlueCorrection; + float ExpandGamut; +}; + +constant spvUnsafeArray _475 = spvUnsafeArray({ -4.0, -4.0, -3.1573765277862548828125, -0.485249996185302734375, 1.84773242473602294921875, 1.84773242473602294921875 }); +constant spvUnsafeArray _476 = spvUnsafeArray({ -0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875, 4.0, 4.0, 4.0 }); +constant spvUnsafeArray _479 = spvUnsafeArray({ -4.97062206268310546875, -3.0293781757354736328125, -2.1261999607086181640625, -1.5104999542236328125, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 }); +constant spvUnsafeArray _480 = spvUnsafeArray({ 0.80891323089599609375, 1.19108676910400390625, 1.5683000087738037109375, 1.94830000400543212890625, 2.308300018310546875, 2.63840007781982421875, 2.85949993133544921875, 2.9872608184814453125, 3.0127391815185546875, 3.0127391815185546875 }); +constant spvUnsafeArray _482 = spvUnsafeArray({ -2.3010299205780029296875, -2.3010299205780029296875, -1.9312000274658203125, -1.5204999446868896484375, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 }); +constant spvUnsafeArray _483 = spvUnsafeArray({ 0.801995217800140380859375, 1.19800484180450439453125, 1.5943000316619873046875, 1.99730002880096435546875, 2.3782999515533447265625, 2.7683999538421630859375, 3.0515000820159912109375, 3.2746293544769287109375, 3.32743072509765625, 3.32743072509765625 }); + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + float2 in_var_TEXCOORD0 [[user(locn0), center_no_perspective]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globals [[buffer(0)]], uint gl_Layer [[render_target_array_index]]) +{ + main0_out out = {}; + float3x3 _546 = float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * float3x3(float3(1.01303005218505859375, 0.0061053098179399967193603515625, -0.014971000142395496368408203125), float3(0.0076982299797236919403076171875, 0.99816501140594482421875, -0.005032029934227466583251953125), float3(-0.0028413101099431514739990234375, 0.0046851597726345062255859375, 0.92450702190399169921875)); + float3x3 _547 = _546 * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)); + float3x3 _548 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(0.98722398281097412109375, -0.0061132698319852352142333984375, 0.01595330052077770233154296875), float3(-0.007598360069096088409423828125, 1.00186002254486083984375, 0.0053300200961530208587646484375), float3(0.003072570078074932098388671875, -0.0050959498621523380279541015625, 1.0816800594329833984375)); + float3x3 _549 = _548 * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)); + float3x3 _550 = float3x3(float3(0.952552378177642822265625, 0.0, 9.25), float3(0.3439664542675018310546875, 0.728166103363037109375, -0.07213254272937774658203125), float3(0.0, 0.0, 1.00882518291473388671875)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)); + float3x3 _551 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)); + float3x3 _576; + for (;;) + { + if (_Globals.OutputGamut == 1u) + { + _576 = _548 * float3x3(float3(2.493396282196044921875, -0.931345880031585693359375, -0.4026944935321807861328125), float3(-0.829486787319183349609375, 1.76265966892242431640625, 0.02362460084259510040283203125), float3(0.0358506999909877777099609375, -0.076182700693607330322265625, 0.957014024257659912109375)); + break; + } + else + { + if (_Globals.OutputGamut == 2u) + { + _576 = _548 * float3x3(float3(1.71660840511322021484375, -0.3556621074676513671875, -0.253360092639923095703125), float3(-0.666682898998260498046875, 1.61647760868072509765625, 0.01576850004494190216064453125), float3(0.017642199993133544921875, -0.04277630150318145751953125, 0.94222867488861083984375)); + break; + } + else + { + if (_Globals.OutputGamut == 3u) + { + _576 = float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625)); + break; + } + else + { + if (_Globals.OutputGamut == 4u) + { + _576 = float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0)); + break; + } + else + { + _576 = _549; + break; + } + } + } + } + } + float3 _577 = float4((in.in_var_TEXCOORD0 - float2(0.015625)) * float2(1.03225803375244140625), float(gl_Layer) * 0.0322580635547637939453125, 0.0).xyz; + float3 _599; + if (_Globals.OutputDevice >= 3u) + { + float3 _591 = pow(_577, float3(0.0126833133399486541748046875)); + _599 = pow(fast::max(float3(0.0), _591 - float3(0.8359375)) / fma(float3(-18.6875), _591, float3(18.8515625)), float3(6.277394771575927734375)) * float3(10000.0); + } + else + { + _599 = fma(exp2((_577 - float3(0.434017598628997802734375)) * float3(14.0)), float3(0.180000007152557373046875), float3(-0.00266771926544606685638427734375)); + } + float _602 = _Globals.WhiteTemp * 1.00055634975433349609375; + float _616 = (_602 <= 7000.0) ? (0.24406300485134124755859375 + ((99.1100006103515625 + ((2967800.0 - (4604438528.0 / _Globals.WhiteTemp)) / _602)) / _602)) : (0.23703999817371368408203125 + ((247.4799957275390625 + ((1901800.0 - (2005284352.0 / _Globals.WhiteTemp)) / _602)) / _602)); + float _633 = fma(1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.00015411825734190642833709716796875, _Globals.WhiteTemp, 0.860117733478546142578125)) / fma(7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.0008424202096648514270782470703125, _Globals.WhiteTemp, 1.0)); + float _644 = fma(4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(4.25, _Globals.WhiteTemp, 0.317398726940155029296875)) / fma(1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(-2.8974181986995972692966461181641e-05, _Globals.WhiteTemp, 1.0)); + float _649 = fma(2.0, _633, _644 * (-8.0)) + 4.0; + float2 _653 = float2((3.0 * _633) / _649, (2.0 * _644) / _649); + float2 _660 = fast::normalize(float2(_633, _644)); + float _665 = fma((-_660.y) * _Globals.WhiteTint, 0.0500000007450580596923828125, _633); + float _669 = fma(_660.x * _Globals.WhiteTint, 0.0500000007450580596923828125, _644); + float _674 = fma(2.0, _665, _669 * (-8.0)) + 4.0; + float2 _680 = select(float2(_616, fma(_616, fma(-3.0, _616, 2.86999988555908203125), -0.2750000059604644775390625)), _653, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _665) / _674, (2.0 * _669) / _674) - _653); + float _683 = fast::max(_680.y, 1.0000000133514319600180897396058e-10); + float3 _697 = float3(_680.x / _683, 1.0, ((1.0 - _680.x) - _680.y) / _683) * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); + float3 _717 = (_599 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(0.941379249095916748046875 / _697.x, 0.0, 0.0), float3(0.0, 1.04043638706207275390625 / _697.y, 0.0), float3(0.0, 0.0, 1.08976650238037109375 / _697.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _547; + float3 _745; + if (_Globals.ColorShadow_Tint2.w != 0.0) + { + float _724 = dot(_717, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625)); + float3 _727 = (_717 / float3(_724)) - float3(1.0); + _745 = mix(_717, _717 * (_549 * (float3x3(float3(0.544169127941131591796875, 0.23959259688854217529296875, 0.16669429838657379150390625), float3(0.23946559429168701171875, 0.702153027057647705078125, 0.058381401002407073974609375), float3(-0.0023439000360667705535888671875, 0.0361833982169628143310546875, 1.05521833896636962890625)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)))), float3((1.0 - exp2((-4.0) * dot(_727, _727))) * (1.0 - exp2((((-4.0) * _Globals.ExpandGamut) * _724) * _724)))); + } + else + { + _745 = _717; + } + float _746 = dot(_745, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625)); + float4 _751 = _Globals.ColorSaturationShadows * _Globals.ColorSaturation; + float4 _756 = _Globals.ColorContrastShadows * _Globals.ColorContrast; + float4 _761 = _Globals.ColorGammaShadows * _Globals.ColorGamma; + float4 _766 = _Globals.ColorGainShadows * _Globals.ColorGain; + float4 _771 = _Globals.ColorOffsetShadows + _Globals.ColorOffset; + float3 _772 = float3(_746); + float _804 = smoothstep(0.0, _Globals.ColorCorrectionShadowsMax, _746); + float4 _808 = _Globals.ColorSaturationHighlights * _Globals.ColorSaturation; + float4 _811 = _Globals.ColorContrastHighlights * _Globals.ColorContrast; + float4 _814 = _Globals.ColorGammaHighlights * _Globals.ColorGamma; + float4 _817 = _Globals.ColorGainHighlights * _Globals.ColorGain; + float4 _820 = _Globals.ColorOffsetHighlights + _Globals.ColorOffset; + float _852 = smoothstep(_Globals.ColorCorrectionHighlightsMin, 1.0, _746); + float4 _855 = _Globals.ColorSaturationMidtones * _Globals.ColorSaturation; + float4 _858 = _Globals.ColorContrastMidtones * _Globals.ColorContrast; + float4 _861 = _Globals.ColorGammaMidtones * _Globals.ColorGamma; + float4 _864 = _Globals.ColorGainMidtones * _Globals.ColorGain; + float4 _867 = _Globals.ColorOffsetMidtones + _Globals.ColorOffset; + float3 _905 = fma(fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _808.xyz * float3(_808.w))) * float3(5.5555553436279296875), _811.xyz * float3(_811.w)) * float3(0.180000007152557373046875), float3(1.0) / (_814.xyz * float3(_814.w))), _817.xyz * float3(_817.w), _820.xyz + float3(_820.w)), float3(_852), fma(fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _751.xyz * float3(_751.w))) * float3(5.5555553436279296875), _756.xyz * float3(_756.w)) * float3(0.180000007152557373046875), float3(1.0) / (_761.xyz * float3(_761.w))), _766.xyz * float3(_766.w), _771.xyz + float3(_771.w)), float3(1.0 - _804), fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _855.xyz * float3(_855.w))) * float3(5.5555553436279296875), _858.xyz * float3(_858.w)) * float3(0.180000007152557373046875), float3(1.0) / (_861.xyz * float3(_861.w))), _864.xyz * float3(_864.w), _867.xyz + float3(_867.w)) * float3(_804 - _852))); + float3 _906 = _905 * _549; + float3 _914 = float3(_Globals.BlueCorrection); + float3 _916 = mix(_905, _905 * ((_551 * float3x3(float3(0.940437257289886474609375, -0.01830687932670116424560546875, 0.07786960899829864501953125), float3(0.008378696627914905548095703125, 0.82866001129150390625, 0.162961304187774658203125), float3(0.0005471261101774871349334716796875, -0.00088337459601461887359619140625, 1.00033628940582275390625))) * _550), _914) * _551; + float _917 = _916.x; + float _918 = _916.y; + float _920 = _916.z; + float _923 = fast::max(fast::max(_917, _918), _920); + float _928 = (fast::max(_923, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_917, _918), _920), 1.0000000133514319600180897396058e-10)) / fast::max(_923, 0.00999999977648258209228515625); + float _941 = fma(1.75, sqrt(fma(_917, _917 - _920, fma(_920, _920 - _918, _918 * (_918 - _917)))), (_920 + _918) + _917); + float _942 = _941 * 0.3333333432674407958984375; + float _943 = _928 - 0.4000000059604644775390625; + float _948 = fast::max(1.0 - abs(_943 * 2.5), 0.0); + float _956 = fma(float(int(sign(_943 * 5.0))), fma(-_948, _948, 1.0), 1.0) * 0.02500000037252902984619140625; + float _969; + if (_942 <= 0.053333334624767303466796875) + { + _969 = _956; + } + else + { + float _968; + if (_942 >= 0.1599999964237213134765625) + { + _968 = 0.0; + } + else + { + _968 = _956 * ((0.23999999463558197021484375 / _941) - 0.5); + } + _969 = _968; + } + float3 _972 = _916 * float3(1.0 + _969); + float _973 = _972.x; + float _974 = _972.y; + float _976 = _972.z; + float _990; + if ((_973 == _974) && (_974 == _976)) + { + _990 = 0.0; + } + else + { + _990 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_974 - _976), fma(2.0, _973, -_974) - _976); + } + float _995; + if (_990 < 0.0) + { + _995 = _990 + 360.0; + } + else + { + _995 = _990; + } + float _996 = fast::clamp(_995, 0.0, 360.0); + float _1001; + if (_996 > 180.0) + { + _1001 = _996 - 360.0; + } + else + { + _1001 = _996; + } + float _1005 = smoothstep(0.0, 1.0, 1.0 - abs(_1001 * 0.01481481455266475677490234375)); + _972.x = fma(((_1005 * _1005) * _928) * (0.02999999932944774627685546875 - _973), 0.180000007152557373046875, _973); + float3 _1014 = fast::max(float3(0.0), _972 * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375))); + float _1023 = (1.0 + _Globals.FilmBlackClip) - _Globals.FilmToe; + float _1026 = 1.0 + _Globals.FilmWhiteClip; + float _1029 = _1026 - _Globals.FilmShoulder; + float _1056; + if (_Globals.FilmToe > 0.800000011920928955078125) + { + _1056 = ((0.819999992847442626953125 - _Globals.FilmToe) / _Globals.FilmSlope) + (-0.744727432727813720703125); + } + else + { + float _1035 = (0.180000007152557373046875 + _Globals.FilmBlackClip) / _1023; + _1056 = fma(log(_1035 / (2.0 - _1035)) * (-0.5), _1023 / _Globals.FilmSlope, -0.744727432727813720703125); + } + float _1061 = ((1.0 - _Globals.FilmToe) / _Globals.FilmSlope) - _1056; + float _1063 = (_Globals.FilmShoulder / _Globals.FilmSlope) - _1061; + float3 _1064 = log(mix(float3(dot(_1014, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1014, float3(0.959999978542327880859375))); + float3 _1067 = _1064 * float3(0.4342944622039794921875); + float3 _1071 = float3(_Globals.FilmSlope) * fma(_1064, float3(0.4342944622039794921875), float3(_1061)); + float3 _1079 = float3(_1056); + float3 _1080 = fma(_1064, float3(0.4342944622039794921875), -_1079); + float3 _1092 = float3(_1063); + float3 _1106 = fast::clamp(_1080 / float3(_1063 - _1056), float3(0.0), float3(1.0)); + float3 _1110 = select(_1106, float3(1.0) - _1106, bool3(_1063 < _1056)); + float3 _1115 = mix(select(_1071, float3(-_Globals.FilmBlackClip) + (float3(2.0 * _1023) / (float3(1.0) + exp(float3(((-2.0) * _Globals.FilmSlope) / _1023) * _1080))), _1067 < _1079), select(_1071, float3(_1026) - (float3(2.0 * _1029) / (float3(1.0) + exp(float3((2.0 * _Globals.FilmSlope) / _1029) * fma(_1064, float3(0.4342944622039794921875), -_1092)))), _1067 > _1092), (fma(float3(-2.0), _1110, float3(3.0)) * _1110) * _1110); + float3 _1119 = fast::max(float3(0.0), mix(float3(dot(_1115, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1115, float3(0.930000007152557373046875))); + float3 _1189; + if (_Globals.ColorShadow_Tint2.w == 0.0) + { + float3 _1157 = fast::max(float3(0.0), float3(dot(_906, _Globals.ColorMatrixR_ColorCurveCd1.xyz), dot(_906, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz), dot(_906, _Globals.ColorMatrixB_ColorCurveCm2.xyz)) * fma(_Globals.ColorShadow_Tint2.xyz, float3(1.0 / (dot(_906, _Globals.ColorShadow_Luma.xyz) + 1.0)), _Globals.ColorShadow_Tint1.xyz)); + float3 _1162 = fast::max(float3(0.0), _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx - _1157); + float3 _1164 = fast::max(_1157, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz); + _1189 = fma(fma(_1164, _Globals.ColorCurve_Ch1_Ch2.xxx, _Globals.ColorCurve_Ch1_Ch2.yyy), float3(1.0) / (_1164 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www), fma(fast::clamp(_1157, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz), _Globals.ColorMatrixB_ColorCurveCm2.www, fma(_1162 * _Globals.ColorMatrixR_ColorCurveCd1.www, float3(1.0) / (_1162 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy), _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375); + } + else + { + _1189 = fast::max(float3(0.0), mix(_1119, _1119 * ((_551 * float3x3(float3(1.06317996978759765625, 0.02339559979736804962158203125, -0.08657260239124298095703125), float3(-0.010633699595928192138671875, 1.2063200473785400390625, -0.1956900060176849365234375), float3(-0.0005908869788981974124908447265625, 0.00105247995816171169281005859375, 0.999538004398345947265625))) * _550), _914) * _549); + } + float3 _1218 = pow(fast::max(float3(0.0), mix((fma(float3(_Globals.MappingPolynomial.x), _1189 * _1189, float3(_Globals.MappingPolynomial.y) * _1189) + float3(_Globals.MappingPolynomial.z)) * float3(_Globals.ColorScale), _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y)); + float3 _3001; + if (_Globals.OutputDevice == 0u) + { + float _2961 = _1218.x; + float _2973; + for (;;) + { + if (_2961 < 0.00313066993840038776397705078125) + { + _2973 = _2961 * 12.9200000762939453125; + break; + } + _2973 = fma(pow(_2961, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875); + break; + } + float _2974 = _1218.y; + float _2986; + for (;;) + { + if (_2974 < 0.00313066993840038776397705078125) + { + _2986 = _2974 * 12.9200000762939453125; + break; + } + _2986 = fma(pow(_2974, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875); + break; + } + float _2987 = _1218.z; + float _2999; + for (;;) + { + if (_2987 < 0.00313066993840038776397705078125) + { + _2999 = _2987 * 12.9200000762939453125; + break; + } + _2999 = fma(pow(_2987, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875); + break; + } + _3001 = float3(_2973, _2986, _2999); + } + else + { + float3 _2960; + if (_Globals.OutputDevice == 1u) + { + float3 _2953 = fast::max(float3(6.1035199905745685100555419921875e-05), (_1218 * _547) * _576); + _2960 = fast::min(_2953 * float3(4.5), fma(pow(fast::max(_2953, float3(0.017999999225139617919921875)), float3(0.449999988079071044921875)), float3(1.09899997711181640625), float3(-0.098999999463558197021484375))); + } + else + { + float3 _2950; + if ((_Globals.OutputDevice == 3u) || (_Globals.OutputDevice == 5u)) + { + float3 _2100 = (_906 * float3(1.5)) * (_546 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625))); + float _2101 = _2100.x; + float _2102 = _2100.y; + float _2104 = _2100.z; + float _2107 = fast::max(fast::max(_2101, _2102), _2104); + float _2112 = (fast::max(_2107, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_2101, _2102), _2104), 1.0000000133514319600180897396058e-10)) / fast::max(_2107, 0.00999999977648258209228515625); + float _2125 = fma(1.75, sqrt(fma(_2101, _2101 - _2104, fma(_2104, _2104 - _2102, _2102 * (_2102 - _2101)))), (_2104 + _2102) + _2101); + float _2126 = _2125 * 0.3333333432674407958984375; + float _2127 = _2112 - 0.4000000059604644775390625; + float _2132 = fast::max(1.0 - abs(_2127 * 2.5), 0.0); + float _2140 = fma(float(int(sign(_2127 * 5.0))), fma(-_2132, _2132, 1.0), 1.0) * 0.02500000037252902984619140625; + float _2153; + if (_2126 <= 0.053333334624767303466796875) + { + _2153 = _2140; + } + else + { + float _2152; + if (_2126 >= 0.1599999964237213134765625) + { + _2152 = 0.0; + } + else + { + _2152 = _2140 * ((0.23999999463558197021484375 / _2125) - 0.5); + } + _2153 = _2152; + } + float3 _2156 = _2100 * float3(1.0 + _2153); + float _2157 = _2156.x; + float _2158 = _2156.y; + float _2160 = _2156.z; + float _2174; + if ((_2157 == _2158) && (_2158 == _2160)) + { + _2174 = 0.0; + } + else + { + _2174 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_2158 - _2160), fma(2.0, _2157, -_2158) - _2160); + } + float _2179; + if (_2174 < 0.0) + { + _2179 = _2174 + 360.0; + } + else + { + _2179 = _2174; + } + float _2180 = fast::clamp(_2179, 0.0, 360.0); + float _2185; + if (_2180 > 180.0) + { + _2185 = _2180 - 360.0; + } + else + { + _2185 = _2180; + } + float _2235; + if ((_2185 > (-67.5)) && (_2185 < 67.5)) + { + float _2191 = _2185 - (-67.5); + int _2193 = int(_2191 * 0.0296296291053295135498046875); + float _2195 = fma(_2191, 0.0296296291053295135498046875, -float(_2193)); + float _2196 = _2195 * _2195; + float _2197 = _2196 * _2195; + float _2234; + if (_2193 == 3) + { + _2234 = fma(_2195, -0.5, fma(_2197, -0.16666667163372039794921875, _2196 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _2227; + if (_2193 == 2) + { + _2227 = fma(_2197, 0.5, _2196 * (-1.0)) + 0.666666686534881591796875; + } + else + { + float _2222; + if (_2193 == 1) + { + _2222 = fma(_2195, 0.5, fma(_2197, -0.5, _2196 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _2215; + if (_2193 == 0) + { + _2215 = _2197 * 0.16666667163372039794921875; + } + else + { + _2215 = 0.0; + } + _2222 = _2215; + } + _2227 = _2222; + } + _2234 = _2227; + } + _2235 = _2234; + } + else + { + _2235 = 0.0; + } + _2156.x = fma(((_2235 * 1.5) * _2112) * (0.02999999932944774627685546875 - _2157), 0.180000007152557373046875, _2157); + float3 _2245 = fast::clamp(fast::clamp(_2156, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); + float3 _2248 = mix(float3(dot(_2245, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _2245, float3(0.959999978542327880859375)); + float _2249 = _2248.x; + float _2257 = log((_2249 <= 0.0) ? 6.103515625e-05 : _2249); + float _2258 = _2257 * 0.4342944622039794921875; + float _2327; + if (_2258 <= (-5.2601776123046875)) + { + _2327 = -4.0; + } + else + { + float _2324; + if ((_2258 > (-5.2601776123046875)) && (_2258 < (-0.744727432727813720703125))) + { + float _2304 = fma(_2257, 0.4342944622039794921875, 5.2601776123046875); + int _2308 = int(_2304 * 0.6643855571746826171875); + float _2310 = fma(_2304, 0.6643855571746826171875, -float(_2308)); + _2324 = dot(float3(_2310 * _2310, _2310, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2308], _475[_2308 + 1], _475[_2308 + 2])); + } + else + { + float _2303; + if ((_2258 >= (-0.744727432727813720703125)) && (_2258 < 4.673812389373779296875)) + { + float _2283 = fma(_2257, 0.4342944622039794921875, 0.744727432727813720703125); + int _2287 = int(_2283 * 0.55365467071533203125); + float _2289 = fma(_2283, 0.55365467071533203125, -float(_2287)); + _2303 = dot(float3(_2289 * _2289, _2289, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2287], _476[_2287 + 1], _476[_2287 + 2])); + } + else + { + _2303 = 4.0; + } + _2324 = _2303; + } + _2327 = _2324; + } + float _2330 = _2248.y; + float _2333 = log((_2330 <= 0.0) ? 6.103515625e-05 : _2330); + float _2334 = _2333 * 0.4342944622039794921875; + float _2401; + if (_2334 <= (-5.2601776123046875)) + { + _2401 = -4.0; + } + else + { + float _2398; + if ((_2334 > (-5.2601776123046875)) && (_2334 < (-0.744727432727813720703125))) + { + float _2378 = fma(_2333, 0.4342944622039794921875, 5.2601776123046875); + int _2382 = int(_2378 * 0.6643855571746826171875); + float _2384 = fma(_2378, 0.6643855571746826171875, -float(_2382)); + _2398 = dot(float3(_2384 * _2384, _2384, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2382], _475[_2382 + 1], _475[_2382 + 2])); + } + else + { + float _2377; + if ((_2334 >= (-0.744727432727813720703125)) && (_2334 < 4.673812389373779296875)) + { + float _2357 = fma(_2333, 0.4342944622039794921875, 0.744727432727813720703125); + int _2361 = int(_2357 * 0.55365467071533203125); + float _2363 = fma(_2357, 0.55365467071533203125, -float(_2361)); + _2377 = dot(float3(_2363 * _2363, _2363, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2361], _476[_2361 + 1], _476[_2361 + 2])); + } + else + { + _2377 = 4.0; + } + _2398 = _2377; + } + _2401 = _2398; + } + float _2404 = _2248.z; + float _2407 = log((_2404 <= 0.0) ? 6.103515625e-05 : _2404); + float _2408 = _2407 * 0.4342944622039794921875; + float _2475; + if (_2408 <= (-5.2601776123046875)) + { + _2475 = -4.0; + } + else + { + float _2472; + if ((_2408 > (-5.2601776123046875)) && (_2408 < (-0.744727432727813720703125))) + { + float _2452 = fma(_2407, 0.4342944622039794921875, 5.2601776123046875); + int _2456 = int(_2452 * 0.6643855571746826171875); + float _2458 = fma(_2452, 0.6643855571746826171875, -float(_2456)); + _2472 = dot(float3(_2458 * _2458, _2458, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2456], _475[_2456 + 1], _475[_2456 + 2])); + } + else + { + float _2451; + if ((_2408 >= (-0.744727432727813720703125)) && (_2408 < 4.673812389373779296875)) + { + float _2431 = fma(_2407, 0.4342944622039794921875, 0.744727432727813720703125); + int _2435 = int(_2431 * 0.55365467071533203125); + float _2437 = fma(_2431, 0.55365467071533203125, -float(_2435)); + _2451 = dot(float3(_2437 * _2437, _2437, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2435], _476[_2435 + 1], _476[_2435 + 2])); + } + else + { + _2451 = 4.0; + } + _2472 = _2451; + } + _2475 = _2472; + } + float3 _2479 = (float3(pow(10.0, _2327), pow(10.0, _2401), pow(10.0, _2475)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float _2685 = _2479.x; + float _2688 = log((_2685 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2685); + float _2689 = _2688 * 0.4342944622039794921875; + float _2768; + if (_2689 <= (-3.84832763671875)) + { + _2768 = fma(_2688, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0)); + } + else + { + float _2760; + if ((_2689 > (-3.84832763671875)) && (_2689 < 0.68124115467071533203125)) + { + float _2743 = (7.0 * fma(_2688, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _2744 = int(_2743); + float _2746 = _2743 - float(_2744); + _2760 = dot(float3(_2746 * _2746, _2746, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2744], _479[_2744 + 1], _479[_2744 + 2])); + } + else + { + float _2739; + if ((_2689 >= 0.68124115467071533203125) && (_2689 < 3.65370273590087890625)) + { + float _2722 = (7.0 * fma(_2688, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125); + int _2723 = int(_2722); + float _2725 = _2722 - float(_2723); + _2739 = dot(float3(_2725 * _2725, _2725, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2723], _480[_2723 + 1], _480[_2723 + 2])); + } + else + { + _2739 = fma(_2688, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0)); + } + _2760 = _2739; + } + _2768 = _2760; + } + float _2771 = _2479.y; + float _2774 = log((_2771 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2771); + float _2775 = _2774 * 0.4342944622039794921875; + float _2852; + if (_2775 <= (-3.84832763671875)) + { + _2852 = fma(_2774, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0)); + } + else + { + float _2844; + if ((_2775 > (-3.84832763671875)) && (_2775 < 0.68124115467071533203125)) + { + float _2827 = (7.0 * fma(_2774, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _2828 = int(_2827); + float _2830 = _2827 - float(_2828); + _2844 = dot(float3(_2830 * _2830, _2830, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2828], _479[_2828 + 1], _479[_2828 + 2])); + } + else + { + float _2823; + if ((_2775 >= 0.68124115467071533203125) && (_2775 < 3.65370273590087890625)) + { + float _2806 = (7.0 * fma(_2774, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125); + int _2807 = int(_2806); + float _2809 = _2806 - float(_2807); + _2823 = dot(float3(_2809 * _2809, _2809, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2807], _480[_2807 + 1], _480[_2807 + 2])); + } + else + { + _2823 = fma(_2774, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0)); + } + _2844 = _2823; + } + _2852 = _2844; + } + float _2855 = _2479.z; + float _2858 = log((_2855 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2855); + float _2859 = _2858 * 0.4342944622039794921875; + float _2936; + if (_2859 <= (-3.84832763671875)) + { + _2936 = fma(_2858, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0)); + } + else + { + float _2928; + if ((_2859 > (-3.84832763671875)) && (_2859 < 0.68124115467071533203125)) + { + float _2911 = (7.0 * fma(_2858, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _2912 = int(_2911); + float _2914 = _2911 - float(_2912); + _2928 = dot(float3(_2914 * _2914, _2914, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2912], _479[_2912 + 1], _479[_2912 + 2])); + } + else + { + float _2907; + if ((_2859 >= 0.68124115467071533203125) && (_2859 < 3.65370273590087890625)) + { + float _2890 = (7.0 * fma(_2858, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125); + int _2891 = int(_2890); + float _2893 = _2890 - float(_2891); + _2907 = dot(float3(_2893 * _2893, _2893, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2891], _480[_2891 + 1], _480[_2891 + 2])); + } + else + { + _2907 = fma(_2858, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0)); + } + _2928 = _2907; + } + _2936 = _2928; + } + float3 _2942 = pow(((float3(pow(10.0, _2768), pow(10.0, _2852), pow(10.0, _2936)) - float3(3.5073844628641381859779357910156e-05)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _2950 = pow(fma(float3(18.8515625), _2942, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2942, float3(1.0))), float3(78.84375)); + } + else + { + float3 _2097; + if ((_Globals.OutputDevice == 4u) || (_Globals.OutputDevice == 6u)) + { + float3 _1263 = (_906 * float3(1.5)) * (_546 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625))); + float _1264 = _1263.x; + float _1265 = _1263.y; + float _1267 = _1263.z; + float _1270 = fast::max(fast::max(_1264, _1265), _1267); + float _1275 = (fast::max(_1270, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_1264, _1265), _1267), 1.0000000133514319600180897396058e-10)) / fast::max(_1270, 0.00999999977648258209228515625); + float _1288 = fma(1.75, sqrt(fma(_1264, _1264 - _1267, fma(_1267, _1267 - _1265, _1265 * (_1265 - _1264)))), (_1267 + _1265) + _1264); + float _1289 = _1288 * 0.3333333432674407958984375; + float _1290 = _1275 - 0.4000000059604644775390625; + float _1295 = fast::max(1.0 - abs(_1290 * 2.5), 0.0); + float _1303 = fma(float(int(sign(_1290 * 5.0))), fma(-_1295, _1295, 1.0), 1.0) * 0.02500000037252902984619140625; + float _1316; + if (_1289 <= 0.053333334624767303466796875) + { + _1316 = _1303; + } + else + { + float _1315; + if (_1289 >= 0.1599999964237213134765625) + { + _1315 = 0.0; + } + else + { + _1315 = _1303 * ((0.23999999463558197021484375 / _1288) - 0.5); + } + _1316 = _1315; + } + float3 _1319 = _1263 * float3(1.0 + _1316); + float _1320 = _1319.x; + float _1321 = _1319.y; + float _1323 = _1319.z; + float _1337; + if ((_1320 == _1321) && (_1321 == _1323)) + { + _1337 = 0.0; + } + else + { + _1337 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_1321 - _1323), fma(2.0, _1320, -_1321) - _1323); + } + float _1342; + if (_1337 < 0.0) + { + _1342 = _1337 + 360.0; + } + else + { + _1342 = _1337; + } + float _1343 = fast::clamp(_1342, 0.0, 360.0); + float _1348; + if (_1343 > 180.0) + { + _1348 = _1343 - 360.0; + } + else + { + _1348 = _1343; + } + float _1398; + if ((_1348 > (-67.5)) && (_1348 < 67.5)) + { + float _1354 = _1348 - (-67.5); + int _1356 = int(_1354 * 0.0296296291053295135498046875); + float _1358 = fma(_1354, 0.0296296291053295135498046875, -float(_1356)); + float _1359 = _1358 * _1358; + float _1360 = _1359 * _1358; + float _1397; + if (_1356 == 3) + { + _1397 = fma(_1358, -0.5, fma(_1360, -0.16666667163372039794921875, _1359 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _1390; + if (_1356 == 2) + { + _1390 = fma(_1360, 0.5, _1359 * (-1.0)) + 0.666666686534881591796875; + } + else + { + float _1385; + if (_1356 == 1) + { + _1385 = fma(_1358, 0.5, fma(_1360, -0.5, _1359 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _1378; + if (_1356 == 0) + { + _1378 = _1360 * 0.16666667163372039794921875; + } + else + { + _1378 = 0.0; + } + _1385 = _1378; + } + _1390 = _1385; + } + _1397 = _1390; + } + _1398 = _1397; + } + else + { + _1398 = 0.0; + } + _1319.x = fma(((_1398 * 1.5) * _1275) * (0.02999999932944774627685546875 - _1320), 0.180000007152557373046875, _1320); + float3 _1408 = fast::clamp(fast::clamp(_1319, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); + float3 _1411 = mix(float3(dot(_1408, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1408, float3(0.959999978542327880859375)); + float _1412 = _1411.x; + float _1420 = log((_1412 <= 0.0) ? 6.103515625e-05 : _1412); + float _1421 = _1420 * 0.4342944622039794921875; + float _1490; + if (_1421 <= (-5.2601776123046875)) + { + _1490 = -4.0; + } + else + { + float _1487; + if ((_1421 > (-5.2601776123046875)) && (_1421 < (-0.744727432727813720703125))) + { + float _1467 = fma(_1420, 0.4342944622039794921875, 5.2601776123046875); + int _1471 = int(_1467 * 0.6643855571746826171875); + float _1473 = fma(_1467, 0.6643855571746826171875, -float(_1471)); + _1487 = dot(float3(_1473 * _1473, _1473, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1471], _475[_1471 + 1], _475[_1471 + 2])); + } + else + { + float _1466; + if ((_1421 >= (-0.744727432727813720703125)) && (_1421 < 4.673812389373779296875)) + { + float _1446 = fma(_1420, 0.4342944622039794921875, 0.744727432727813720703125); + int _1450 = int(_1446 * 0.55365467071533203125); + float _1452 = fma(_1446, 0.55365467071533203125, -float(_1450)); + _1466 = dot(float3(_1452 * _1452, _1452, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1450], _476[_1450 + 1], _476[_1450 + 2])); + } + else + { + _1466 = 4.0; + } + _1487 = _1466; + } + _1490 = _1487; + } + float _1493 = _1411.y; + float _1496 = log((_1493 <= 0.0) ? 6.103515625e-05 : _1493); + float _1497 = _1496 * 0.4342944622039794921875; + float _1564; + if (_1497 <= (-5.2601776123046875)) + { + _1564 = -4.0; + } + else + { + float _1561; + if ((_1497 > (-5.2601776123046875)) && (_1497 < (-0.744727432727813720703125))) + { + float _1541 = fma(_1496, 0.4342944622039794921875, 5.2601776123046875); + int _1545 = int(_1541 * 0.6643855571746826171875); + float _1547 = fma(_1541, 0.6643855571746826171875, -float(_1545)); + _1561 = dot(float3(_1547 * _1547, _1547, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1545], _475[_1545 + 1], _475[_1545 + 2])); + } + else + { + float _1540; + if ((_1497 >= (-0.744727432727813720703125)) && (_1497 < 4.673812389373779296875)) + { + float _1520 = fma(_1496, 0.4342944622039794921875, 0.744727432727813720703125); + int _1524 = int(_1520 * 0.55365467071533203125); + float _1526 = fma(_1520, 0.55365467071533203125, -float(_1524)); + _1540 = dot(float3(_1526 * _1526, _1526, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1524], _476[_1524 + 1], _476[_1524 + 2])); + } + else + { + _1540 = 4.0; + } + _1561 = _1540; + } + _1564 = _1561; + } + float _1567 = _1411.z; + float _1570 = log((_1567 <= 0.0) ? 6.103515625e-05 : _1567); + float _1571 = _1570 * 0.4342944622039794921875; + float _1638; + if (_1571 <= (-5.2601776123046875)) + { + _1638 = -4.0; + } + else + { + float _1635; + if ((_1571 > (-5.2601776123046875)) && (_1571 < (-0.744727432727813720703125))) + { + float _1615 = fma(_1570, 0.4342944622039794921875, 5.2601776123046875); + int _1619 = int(_1615 * 0.6643855571746826171875); + float _1621 = fma(_1615, 0.6643855571746826171875, -float(_1619)); + _1635 = dot(float3(_1621 * _1621, _1621, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1619], _475[_1619 + 1], _475[_1619 + 2])); + } + else + { + float _1614; + if ((_1571 >= (-0.744727432727813720703125)) && (_1571 < 4.673812389373779296875)) + { + float _1594 = fma(_1570, 0.4342944622039794921875, 0.744727432727813720703125); + int _1598 = int(_1594 * 0.55365467071533203125); + float _1600 = fma(_1594, 0.55365467071533203125, -float(_1598)); + _1614 = dot(float3(_1600 * _1600, _1600, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1598], _476[_1598 + 1], _476[_1598 + 2])); + } + else + { + _1614 = 4.0; + } + _1635 = _1614; + } + _1638 = _1635; + } + float3 _1642 = (float3(pow(10.0, _1490), pow(10.0, _1564), pow(10.0, _1638)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float _1848 = _1642.x; + float _1851 = log((_1848 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1848); + float _1852 = _1851 * 0.4342944622039794921875; + float _1926; + if (_1852 <= (-3.84832763671875)) + { + _1926 = -2.3010299205780029296875; + } + else + { + float _1923; + if ((_1852 > (-3.84832763671875)) && (_1852 < 0.68124115467071533203125)) + { + float _1906 = (7.0 * fma(_1851, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _1907 = int(_1906); + float _1909 = _1906 - float(_1907); + _1923 = dot(float3(_1909 * _1909, _1909, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_1907], _482[_1907 + 1], _482[_1907 + 2])); + } + else + { + float _1902; + if ((_1852 >= 0.68124115467071533203125) && (_1852 < 3.761315822601318359375)) + { + float _1885 = (7.0 * fma(_1851, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125); + int _1886 = int(_1885); + float _1888 = _1885 - float(_1886); + _1902 = dot(float3(_1888 * _1888, _1888, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_1886], _483[_1886 + 1], _483[_1886 + 2])); + } + else + { + _1902 = fma(_1851, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875)); + } + _1923 = _1902; + } + _1926 = _1923; + } + float _1929 = _1642.y; + float _1932 = log((_1929 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1929); + float _1933 = _1932 * 0.4342944622039794921875; + float _2005; + if (_1933 <= (-3.84832763671875)) + { + _2005 = -2.3010299205780029296875; + } + else + { + float _2002; + if ((_1933 > (-3.84832763671875)) && (_1933 < 0.68124115467071533203125)) + { + float _1985 = (7.0 * fma(_1932, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _1986 = int(_1985); + float _1988 = _1985 - float(_1986); + _2002 = dot(float3(_1988 * _1988, _1988, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_1986], _482[_1986 + 1], _482[_1986 + 2])); + } + else + { + float _1981; + if ((_1933 >= 0.68124115467071533203125) && (_1933 < 3.761315822601318359375)) + { + float _1964 = (7.0 * fma(_1932, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125); + int _1965 = int(_1964); + float _1967 = _1964 - float(_1965); + _1981 = dot(float3(_1967 * _1967, _1967, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_1965], _483[_1965 + 1], _483[_1965 + 2])); + } + else + { + _1981 = fma(_1932, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875)); + } + _2002 = _1981; + } + _2005 = _2002; + } + float _2008 = _1642.z; + float _2011 = log((_2008 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2008); + float _2012 = _2011 * 0.4342944622039794921875; + float _2084; + if (_2012 <= (-3.84832763671875)) + { + _2084 = -2.3010299205780029296875; + } + else + { + float _2081; + if ((_2012 > (-3.84832763671875)) && (_2012 < 0.68124115467071533203125)) + { + float _2064 = (7.0 * fma(_2011, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _2065 = int(_2064); + float _2067 = _2064 - float(_2065); + _2081 = dot(float3(_2067 * _2067, _2067, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_2065], _482[_2065 + 1], _482[_2065 + 2])); + } + else + { + float _2060; + if ((_2012 >= 0.68124115467071533203125) && (_2012 < 3.761315822601318359375)) + { + float _2043 = (7.0 * fma(_2011, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125); + int _2044 = int(_2043); + float _2046 = _2043 - float(_2044); + _2060 = dot(float3(_2046 * _2046, _2046, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_2044], _483[_2044 + 1], _483[_2044 + 2])); + } + else + { + _2060 = fma(_2011, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875)); + } + _2081 = _2060; + } + _2084 = _2081; + } + float3 _2089 = pow((float3(pow(10.0, _1926), pow(10.0, _2005), pow(10.0, _2084)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _2097 = pow(fma(float3(18.8515625), _2089, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2089, float3(1.0))), float3(78.84375)); + } + else + { + float3 _1260; + if (_Globals.OutputDevice == 7u) + { + float3 _1252 = pow(((_906 * _547) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _1260 = pow(fma(float3(18.8515625), _1252, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _1252, float3(1.0))), float3(78.84375)); + } + else + { + _1260 = pow((_1218 * _547) * _576, float3(_Globals.InverseGamma.z)); + } + _2097 = _1260; + } + _2950 = _2097; + } + _2960 = _2950; + } + _3001 = _2960; + } + float3 _3002 = _3001 * float3(0.95238101482391357421875); + float4 _3003 = float4(_3002.x, _3002.y, _3002.z, float4(0.0).w); + _3003.w = 0.0; + out.out_var_SV_Target0 = _3003; + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag b/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag new file mode 100644 index 00000000000..8b53cca3ad9 --- /dev/null +++ b/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag @@ -0,0 +1,1121 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_Globals +{ + float4 MappingPolynomial; + float3 InverseGamma; + float4 ColorMatrixR_ColorCurveCd1; + float4 ColorMatrixG_ColorCurveCd3Cm3; + float4 ColorMatrixB_ColorCurveCm2; + float4 ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3; + float4 ColorCurve_Ch1_Ch2; + float4 ColorShadow_Luma; + float4 ColorShadow_Tint1; + float4 ColorShadow_Tint2; + float FilmSlope; + float FilmToe; + float FilmShoulder; + float FilmBlackClip; + float FilmWhiteClip; + float4 LUTWeights[5]; + float3 ColorScale; + float4 OverlayColor; + float WhiteTemp; + float WhiteTint; + float4 ColorSaturation; + float4 ColorContrast; + float4 ColorGamma; + float4 ColorGain; + float4 ColorOffset; + float4 ColorSaturationShadows; + float4 ColorContrastShadows; + float4 ColorGammaShadows; + float4 ColorGainShadows; + float4 ColorOffsetShadows; + float4 ColorSaturationMidtones; + float4 ColorContrastMidtones; + float4 ColorGammaMidtones; + float4 ColorGainMidtones; + float4 ColorOffsetMidtones; + float4 ColorSaturationHighlights; + float4 ColorContrastHighlights; + float4 ColorGammaHighlights; + float4 ColorGainHighlights; + float4 ColorOffsetHighlights; + float ColorCorrectionShadowsMax; + float ColorCorrectionHighlightsMin; + uint OutputDevice; + uint OutputGamut; + float BlueCorrection; + float ExpandGamut; +}; + +constant spvUnsafeArray _499 = spvUnsafeArray({ -4.0, -4.0, -3.1573765277862548828125, -0.485249996185302734375, 1.84773242473602294921875, 1.84773242473602294921875 }); +constant spvUnsafeArray _500 = spvUnsafeArray({ -0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875, 4.0, 4.0, 4.0 }); +constant spvUnsafeArray _503 = spvUnsafeArray({ -4.97062206268310546875, -3.0293781757354736328125, -2.1261999607086181640625, -1.5104999542236328125, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 }); +constant spvUnsafeArray _504 = spvUnsafeArray({ 0.80891323089599609375, 1.19108676910400390625, 1.5683000087738037109375, 1.94830000400543212890625, 2.308300018310546875, 2.63840007781982421875, 2.85949993133544921875, 2.9872608184814453125, 3.0127391815185546875, 3.0127391815185546875 }); +constant spvUnsafeArray _506 = spvUnsafeArray({ -2.3010299205780029296875, -2.3010299205780029296875, -1.9312000274658203125, -1.5204999446868896484375, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 }); +constant spvUnsafeArray _507 = spvUnsafeArray({ 0.801995217800140380859375, 1.19800484180450439453125, 1.5943000316619873046875, 1.99730002880096435546875, 2.3782999515533447265625, 2.7683999538421630859375, 3.0515000820159912109375, 3.2746293544769287109375, 3.32743072509765625, 3.32743072509765625 }); + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + float2 in_var_TEXCOORD0 [[user(locn0), center_no_perspective]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globals [[buffer(0)]], texture2d Texture1 [[texture(0)]], sampler Texture1Sampler [[sampler(0)]], uint gl_Layer [[render_target_array_index]]) +{ + main0_out out = {}; + float3x3 _572 = float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * float3x3(float3(1.01303005218505859375, 0.0061053098179399967193603515625, -0.014971000142395496368408203125), float3(0.0076982299797236919403076171875, 0.99816501140594482421875, -0.005032029934227466583251953125), float3(-0.0028413101099431514739990234375, 0.0046851597726345062255859375, 0.92450702190399169921875)); + float3x3 _573 = _572 * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)); + float3x3 _574 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(0.98722398281097412109375, -0.0061132698319852352142333984375, 0.01595330052077770233154296875), float3(-0.007598360069096088409423828125, 1.00186002254486083984375, 0.0053300200961530208587646484375), float3(0.003072570078074932098388671875, -0.0050959498621523380279541015625, 1.0816800594329833984375)); + float3x3 _575 = _574 * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)); + float3x3 _576 = float3x3(float3(0.952552378177642822265625, 0.0, 9.25), float3(0.3439664542675018310546875, 0.728166103363037109375, -0.07213254272937774658203125), float3(0.0, 0.0, 1.00882518291473388671875)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)); + float3x3 _577 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)); + float3x3 _602; + for (;;) + { + if (_Globals.OutputGamut == 1u) + { + _602 = _574 * float3x3(float3(2.493396282196044921875, -0.931345880031585693359375, -0.4026944935321807861328125), float3(-0.829486787319183349609375, 1.76265966892242431640625, 0.02362460084259510040283203125), float3(0.0358506999909877777099609375, -0.076182700693607330322265625, 0.957014024257659912109375)); + break; + } + else + { + if (_Globals.OutputGamut == 2u) + { + _602 = _574 * float3x3(float3(1.71660840511322021484375, -0.3556621074676513671875, -0.253360092639923095703125), float3(-0.666682898998260498046875, 1.61647760868072509765625, 0.01576850004494190216064453125), float3(0.017642199993133544921875, -0.04277630150318145751953125, 0.94222867488861083984375)); + break; + } + else + { + if (_Globals.OutputGamut == 3u) + { + _602 = float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625)); + break; + } + else + { + if (_Globals.OutputGamut == 4u) + { + _602 = float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0)); + break; + } + else + { + _602 = _575; + break; + } + } + } + } + } + float3 _603 = float4((in.in_var_TEXCOORD0 - float2(0.015625)) * float2(1.03225803375244140625), float(gl_Layer) * 0.0322580635547637939453125, 0.0).xyz; + float3 _625; + if (_Globals.OutputDevice >= 3u) + { + float3 _617 = pow(_603, float3(0.0126833133399486541748046875)); + _625 = pow(fast::max(float3(0.0), _617 - float3(0.8359375)) / fma(float3(-18.6875), _617, float3(18.8515625)), float3(6.277394771575927734375)) * float3(10000.0); + } + else + { + _625 = fma(exp2((_603 - float3(0.434017598628997802734375)) * float3(14.0)), float3(0.180000007152557373046875), float3(-0.00266771926544606685638427734375)); + } + float _628 = _Globals.WhiteTemp * 1.00055634975433349609375; + float _642 = (_628 <= 7000.0) ? (0.24406300485134124755859375 + ((99.1100006103515625 + ((2967800.0 - (4604438528.0 / _Globals.WhiteTemp)) / _628)) / _628)) : (0.23703999817371368408203125 + ((247.4799957275390625 + ((1901800.0 - (2005284352.0 / _Globals.WhiteTemp)) / _628)) / _628)); + float _659 = fma(1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.00015411825734190642833709716796875, _Globals.WhiteTemp, 0.860117733478546142578125)) / fma(7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.0008424202096648514270782470703125, _Globals.WhiteTemp, 1.0)); + float _670 = fma(4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(4.25, _Globals.WhiteTemp, 0.317398726940155029296875)) / fma(1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(-2.8974181986995972692966461181641e-05, _Globals.WhiteTemp, 1.0)); + float _675 = fma(2.0, _659, _670 * (-8.0)) + 4.0; + float2 _679 = float2((3.0 * _659) / _675, (2.0 * _670) / _675); + float2 _686 = fast::normalize(float2(_659, _670)); + float _691 = fma((-_686.y) * _Globals.WhiteTint, 0.0500000007450580596923828125, _659); + float _695 = fma(_686.x * _Globals.WhiteTint, 0.0500000007450580596923828125, _670); + float _700 = fma(2.0, _691, _695 * (-8.0)) + 4.0; + float2 _706 = select(float2(_642, fma(_642, fma(-3.0, _642, 2.86999988555908203125), -0.2750000059604644775390625)), _679, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _691) / _700, (2.0 * _695) / _700) - _679); + float _709 = fast::max(_706.y, 1.0000000133514319600180897396058e-10); + float3 _723 = float3(_706.x / _709, 1.0, ((1.0 - _706.x) - _706.y) / _709) * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); + float3 _743 = (_625 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(0.941379249095916748046875 / _723.x, 0.0, 0.0), float3(0.0, 1.04043638706207275390625 / _723.y, 0.0), float3(0.0, 0.0, 1.08976650238037109375 / _723.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _573; + float3 _771; + if (_Globals.ColorShadow_Tint2.w != 0.0) + { + float _750 = dot(_743, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625)); + float3 _753 = (_743 / float3(_750)) - float3(1.0); + _771 = mix(_743, _743 * (_575 * (float3x3(float3(0.544169127941131591796875, 0.23959259688854217529296875, 0.16669429838657379150390625), float3(0.23946559429168701171875, 0.702153027057647705078125, 0.058381401002407073974609375), float3(-0.0023439000360667705535888671875, 0.0361833982169628143310546875, 1.05521833896636962890625)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)))), float3((1.0 - exp2((-4.0) * dot(_753, _753))) * (1.0 - exp2((((-4.0) * _Globals.ExpandGamut) * _750) * _750)))); + } + else + { + _771 = _743; + } + float _772 = dot(_771, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625)); + float4 _777 = _Globals.ColorSaturationShadows * _Globals.ColorSaturation; + float4 _782 = _Globals.ColorContrastShadows * _Globals.ColorContrast; + float4 _787 = _Globals.ColorGammaShadows * _Globals.ColorGamma; + float4 _792 = _Globals.ColorGainShadows * _Globals.ColorGain; + float4 _797 = _Globals.ColorOffsetShadows + _Globals.ColorOffset; + float3 _798 = float3(_772); + float _830 = smoothstep(0.0, _Globals.ColorCorrectionShadowsMax, _772); + float4 _834 = _Globals.ColorSaturationHighlights * _Globals.ColorSaturation; + float4 _837 = _Globals.ColorContrastHighlights * _Globals.ColorContrast; + float4 _840 = _Globals.ColorGammaHighlights * _Globals.ColorGamma; + float4 _843 = _Globals.ColorGainHighlights * _Globals.ColorGain; + float4 _846 = _Globals.ColorOffsetHighlights + _Globals.ColorOffset; + float _878 = smoothstep(_Globals.ColorCorrectionHighlightsMin, 1.0, _772); + float4 _881 = _Globals.ColorSaturationMidtones * _Globals.ColorSaturation; + float4 _884 = _Globals.ColorContrastMidtones * _Globals.ColorContrast; + float4 _887 = _Globals.ColorGammaMidtones * _Globals.ColorGamma; + float4 _890 = _Globals.ColorGainMidtones * _Globals.ColorGain; + float4 _893 = _Globals.ColorOffsetMidtones + _Globals.ColorOffset; + float3 _931 = fma(fma(pow(pow(fast::max(float3(0.0), mix(_798, _771, _834.xyz * float3(_834.w))) * float3(5.5555553436279296875), _837.xyz * float3(_837.w)) * float3(0.180000007152557373046875), float3(1.0) / (_840.xyz * float3(_840.w))), _843.xyz * float3(_843.w), _846.xyz + float3(_846.w)), float3(_878), fma(fma(pow(pow(fast::max(float3(0.0), mix(_798, _771, _777.xyz * float3(_777.w))) * float3(5.5555553436279296875), _782.xyz * float3(_782.w)) * float3(0.180000007152557373046875), float3(1.0) / (_787.xyz * float3(_787.w))), _792.xyz * float3(_792.w), _797.xyz + float3(_797.w)), float3(1.0 - _830), fma(pow(pow(fast::max(float3(0.0), mix(_798, _771, _881.xyz * float3(_881.w))) * float3(5.5555553436279296875), _884.xyz * float3(_884.w)) * float3(0.180000007152557373046875), float3(1.0) / (_887.xyz * float3(_887.w))), _890.xyz * float3(_890.w), _893.xyz + float3(_893.w)) * float3(_830 - _878))); + float3 _932 = _931 * _575; + float3 _940 = float3(_Globals.BlueCorrection); + float3 _942 = mix(_931, _931 * ((_577 * float3x3(float3(0.940437257289886474609375, -0.01830687932670116424560546875, 0.07786960899829864501953125), float3(0.008378696627914905548095703125, 0.82866001129150390625, 0.162961304187774658203125), float3(0.0005471261101774871349334716796875, -0.00088337459601461887359619140625, 1.00033628940582275390625))) * _576), _940) * _577; + float _943 = _942.x; + float _944 = _942.y; + float _946 = _942.z; + float _949 = fast::max(fast::max(_943, _944), _946); + float _954 = (fast::max(_949, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_943, _944), _946), 1.0000000133514319600180897396058e-10)) / fast::max(_949, 0.00999999977648258209228515625); + float _967 = fma(1.75, sqrt(fma(_943, _943 - _946, fma(_946, _946 - _944, _944 * (_944 - _943)))), (_946 + _944) + _943); + float _968 = _967 * 0.3333333432674407958984375; + float _969 = _954 - 0.4000000059604644775390625; + float _974 = fast::max(1.0 - abs(_969 * 2.5), 0.0); + float _982 = fma(float(int(sign(_969 * 5.0))), fma(-_974, _974, 1.0), 1.0) * 0.02500000037252902984619140625; + float _995; + if (_968 <= 0.053333334624767303466796875) + { + _995 = _982; + } + else + { + float _994; + if (_968 >= 0.1599999964237213134765625) + { + _994 = 0.0; + } + else + { + _994 = _982 * ((0.23999999463558197021484375 / _967) - 0.5); + } + _995 = _994; + } + float3 _998 = _942 * float3(1.0 + _995); + float _999 = _998.x; + float _1000 = _998.y; + float _1002 = _998.z; + float _1016; + if ((_999 == _1000) && (_1000 == _1002)) + { + _1016 = 0.0; + } + else + { + _1016 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_1000 - _1002), fma(2.0, _999, -_1000) - _1002); + } + float _1021; + if (_1016 < 0.0) + { + _1021 = _1016 + 360.0; + } + else + { + _1021 = _1016; + } + float _1022 = fast::clamp(_1021, 0.0, 360.0); + float _1027; + if (_1022 > 180.0) + { + _1027 = _1022 - 360.0; + } + else + { + _1027 = _1022; + } + float _1031 = smoothstep(0.0, 1.0, 1.0 - abs(_1027 * 0.01481481455266475677490234375)); + _998.x = fma(((_1031 * _1031) * _954) * (0.02999999932944774627685546875 - _999), 0.180000007152557373046875, _999); + float3 _1040 = fast::max(float3(0.0), _998 * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375))); + float _1049 = (1.0 + _Globals.FilmBlackClip) - _Globals.FilmToe; + float _1052 = 1.0 + _Globals.FilmWhiteClip; + float _1055 = _1052 - _Globals.FilmShoulder; + float _1082; + if (_Globals.FilmToe > 0.800000011920928955078125) + { + _1082 = ((0.819999992847442626953125 - _Globals.FilmToe) / _Globals.FilmSlope) + (-0.744727432727813720703125); + } + else + { + float _1061 = (0.180000007152557373046875 + _Globals.FilmBlackClip) / _1049; + _1082 = fma(log(_1061 / (2.0 - _1061)) * (-0.5), _1049 / _Globals.FilmSlope, -0.744727432727813720703125); + } + float _1087 = ((1.0 - _Globals.FilmToe) / _Globals.FilmSlope) - _1082; + float _1089 = (_Globals.FilmShoulder / _Globals.FilmSlope) - _1087; + float3 _1090 = log(mix(float3(dot(_1040, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1040, float3(0.959999978542327880859375))); + float3 _1093 = _1090 * float3(0.4342944622039794921875); + float3 _1097 = float3(_Globals.FilmSlope) * fma(_1090, float3(0.4342944622039794921875), float3(_1087)); + float3 _1105 = float3(_1082); + float3 _1106 = fma(_1090, float3(0.4342944622039794921875), -_1105); + float3 _1118 = float3(_1089); + float3 _1132 = fast::clamp(_1106 / float3(_1089 - _1082), float3(0.0), float3(1.0)); + float3 _1136 = select(_1132, float3(1.0) - _1132, bool3(_1089 < _1082)); + float3 _1141 = mix(select(_1097, float3(-_Globals.FilmBlackClip) + (float3(2.0 * _1049) / (float3(1.0) + exp(float3(((-2.0) * _Globals.FilmSlope) / _1049) * _1106))), _1093 < _1105), select(_1097, float3(_1052) - (float3(2.0 * _1055) / (float3(1.0) + exp(float3((2.0 * _Globals.FilmSlope) / _1055) * fma(_1090, float3(0.4342944622039794921875), -_1118)))), _1093 > _1118), (fma(float3(-2.0), _1136, float3(3.0)) * _1136) * _1136); + float3 _1145 = fast::max(float3(0.0), mix(float3(dot(_1141, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1141, float3(0.930000007152557373046875))); + float3 _1215; + if (_Globals.ColorShadow_Tint2.w == 0.0) + { + float3 _1183 = fast::max(float3(0.0), float3(dot(_932, _Globals.ColorMatrixR_ColorCurveCd1.xyz), dot(_932, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz), dot(_932, _Globals.ColorMatrixB_ColorCurveCm2.xyz)) * fma(_Globals.ColorShadow_Tint2.xyz, float3(1.0 / (dot(_932, _Globals.ColorShadow_Luma.xyz) + 1.0)), _Globals.ColorShadow_Tint1.xyz)); + float3 _1188 = fast::max(float3(0.0), _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx - _1183); + float3 _1190 = fast::max(_1183, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz); + _1215 = fma(fma(_1190, _Globals.ColorCurve_Ch1_Ch2.xxx, _Globals.ColorCurve_Ch1_Ch2.yyy), float3(1.0) / (_1190 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www), fma(fast::clamp(_1183, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz), _Globals.ColorMatrixB_ColorCurveCm2.www, fma(_1188 * _Globals.ColorMatrixR_ColorCurveCd1.www, float3(1.0) / (_1188 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy), _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375); + } + else + { + _1215 = fast::max(float3(0.0), mix(_1145, _1145 * ((_577 * float3x3(float3(1.06317996978759765625, 0.02339559979736804962158203125, -0.08657260239124298095703125), float3(-0.010633699595928192138671875, 1.2063200473785400390625, -0.1956900060176849365234375), float3(-0.0005908869788981974124908447265625, 0.00105247995816171169281005859375, 0.999538004398345947265625))) * _576), _940) * _575); + } + float3 _1216 = fast::clamp(_1215, float3(0.0), float3(1.0)); + float _1217 = _1216.x; + float _1229; + for (;;) + { + if (_1217 < 0.00313066993840038776397705078125) + { + _1229 = _1217 * 12.9200000762939453125; + break; + } + _1229 = fma(pow(_1217, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875); + break; + } + float _1230 = _1216.y; + float _1242; + for (;;) + { + if (_1230 < 0.00313066993840038776397705078125) + { + _1242 = _1230 * 12.9200000762939453125; + break; + } + _1242 = fma(pow(_1230, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875); + break; + } + float _1243 = _1216.z; + float _1255; + for (;;) + { + if (_1243 < 0.00313066993840038776397705078125) + { + _1255 = _1243 * 12.9200000762939453125; + break; + } + _1255 = fma(pow(_1243, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875); + break; + } + float3 _1256 = float3(_1229, _1242, _1255); + float3 _1258 = fma(_1256, float3(0.9375), float3(0.03125)); + float _1270 = fma(_1258.z, 16.0, -0.5); + float _1271 = floor(_1270); + float _1274 = _1258.x + _1271; + float _1276 = _1258.y; + float4 _1279 = Texture1.sample(Texture1Sampler, float2(_1274 * 0.0625, _1276)); + float4 _1283 = Texture1.sample(Texture1Sampler, float2(fma(_1274, 0.0625, 0.0625), _1276)); + float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), fma(float3(_Globals.LUTWeights[0].x), _1256, float3(_Globals.LUTWeights[1].x) * mix(_1279, _1283, float4(_1270 - _1271)).xyz)); + float3 _1295 = select(_1289 * float3(0.077399380505084991455078125), pow(fma(_1289, float3(0.94786727428436279296875), float3(0.0521326996386051177978515625)), float3(2.400000095367431640625)), _1289 > float3(0.040449999272823333740234375)); + float3 _1324 = pow(fast::max(float3(0.0), mix((fma(float3(_Globals.MappingPolynomial.x), _1295 * _1295, float3(_Globals.MappingPolynomial.y) * _1295) + float3(_Globals.MappingPolynomial.z)) * _Globals.ColorScale, _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y)); + float3 _3103; + if (_Globals.OutputDevice == 0u) + { + float _3063 = _1324.x; + float _3075; + for (;;) + { + if (_3063 < 0.00313066993840038776397705078125) + { + _3075 = _3063 * 12.9200000762939453125; + break; + } + _3075 = fma(pow(_3063, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875); + break; + } + float _3076 = _1324.y; + float _3088; + for (;;) + { + if (_3076 < 0.00313066993840038776397705078125) + { + _3088 = _3076 * 12.9200000762939453125; + break; + } + _3088 = fma(pow(_3076, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875); + break; + } + float _3089 = _1324.z; + float _3101; + for (;;) + { + if (_3089 < 0.00313066993840038776397705078125) + { + _3101 = _3089 * 12.9200000762939453125; + break; + } + _3101 = fma(pow(_3089, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875); + break; + } + _3103 = float3(_3075, _3088, _3101); + } + else + { + float3 _3062; + if (_Globals.OutputDevice == 1u) + { + float3 _3055 = fast::max(float3(6.1035199905745685100555419921875e-05), (_1324 * _573) * _602); + _3062 = fast::min(_3055 * float3(4.5), fma(pow(fast::max(_3055, float3(0.017999999225139617919921875)), float3(0.449999988079071044921875)), float3(1.09899997711181640625), float3(-0.098999999463558197021484375))); + } + else + { + float3 _3052; + if ((_Globals.OutputDevice == 3u) || (_Globals.OutputDevice == 5u)) + { + float3 _2204 = (_932 * float3(1.5)) * (_572 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625))); + float _2205 = _2204.x; + float _2206 = _2204.y; + float _2208 = _2204.z; + float _2211 = fast::max(fast::max(_2205, _2206), _2208); + float _2216 = (fast::max(_2211, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_2205, _2206), _2208), 1.0000000133514319600180897396058e-10)) / fast::max(_2211, 0.00999999977648258209228515625); + float _2229 = fma(1.75, sqrt(fma(_2205, _2205 - _2208, fma(_2208, _2208 - _2206, _2206 * (_2206 - _2205)))), (_2208 + _2206) + _2205); + float _2230 = _2229 * 0.3333333432674407958984375; + float _2231 = _2216 - 0.4000000059604644775390625; + float _2236 = fast::max(1.0 - abs(_2231 * 2.5), 0.0); + float _2244 = fma(float(int(sign(_2231 * 5.0))), fma(-_2236, _2236, 1.0), 1.0) * 0.02500000037252902984619140625; + float _2257; + if (_2230 <= 0.053333334624767303466796875) + { + _2257 = _2244; + } + else + { + float _2256; + if (_2230 >= 0.1599999964237213134765625) + { + _2256 = 0.0; + } + else + { + _2256 = _2244 * ((0.23999999463558197021484375 / _2229) - 0.5); + } + _2257 = _2256; + } + float3 _2260 = _2204 * float3(1.0 + _2257); + float _2261 = _2260.x; + float _2262 = _2260.y; + float _2264 = _2260.z; + float _2278; + if ((_2261 == _2262) && (_2262 == _2264)) + { + _2278 = 0.0; + } + else + { + _2278 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_2262 - _2264), fma(2.0, _2261, -_2262) - _2264); + } + float _2283; + if (_2278 < 0.0) + { + _2283 = _2278 + 360.0; + } + else + { + _2283 = _2278; + } + float _2284 = fast::clamp(_2283, 0.0, 360.0); + float _2289; + if (_2284 > 180.0) + { + _2289 = _2284 - 360.0; + } + else + { + _2289 = _2284; + } + float _2339; + if ((_2289 > (-67.5)) && (_2289 < 67.5)) + { + float _2295 = _2289 - (-67.5); + int _2297 = int(_2295 * 0.0296296291053295135498046875); + float _2299 = fma(_2295, 0.0296296291053295135498046875, -float(_2297)); + float _2300 = _2299 * _2299; + float _2301 = _2300 * _2299; + float _2338; + if (_2297 == 3) + { + _2338 = fma(_2299, -0.5, fma(_2301, -0.16666667163372039794921875, _2300 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _2331; + if (_2297 == 2) + { + _2331 = fma(_2301, 0.5, _2300 * (-1.0)) + 0.666666686534881591796875; + } + else + { + float _2326; + if (_2297 == 1) + { + _2326 = fma(_2299, 0.5, fma(_2301, -0.5, _2300 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _2319; + if (_2297 == 0) + { + _2319 = _2301 * 0.16666667163372039794921875; + } + else + { + _2319 = 0.0; + } + _2326 = _2319; + } + _2331 = _2326; + } + _2338 = _2331; + } + _2339 = _2338; + } + else + { + _2339 = 0.0; + } + _2260.x = fma(((_2339 * 1.5) * _2216) * (0.02999999932944774627685546875 - _2261), 0.180000007152557373046875, _2261); + float3 _2349 = fast::clamp(fast::clamp(_2260, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); + float3 _2352 = mix(float3(dot(_2349, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _2349, float3(0.959999978542327880859375)); + float _2353 = _2352.x; + float _2361 = log((_2353 <= 0.0) ? 6.103515625e-05 : _2353); + float _2362 = _2361 * 0.4342944622039794921875; + float _2431; + if (_2362 <= (-5.2601776123046875)) + { + _2431 = -4.0; + } + else + { + float _2428; + if ((_2362 > (-5.2601776123046875)) && (_2362 < (-0.744727432727813720703125))) + { + float _2408 = fma(_2361, 0.4342944622039794921875, 5.2601776123046875); + int _2412 = int(_2408 * 0.6643855571746826171875); + float _2414 = fma(_2408, 0.6643855571746826171875, -float(_2412)); + _2428 = dot(float3(_2414 * _2414, _2414, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2412], _499[_2412 + 1], _499[_2412 + 2])); + } + else + { + float _2407; + if ((_2362 >= (-0.744727432727813720703125)) && (_2362 < 4.673812389373779296875)) + { + float _2387 = fma(_2361, 0.4342944622039794921875, 0.744727432727813720703125); + int _2391 = int(_2387 * 0.55365467071533203125); + float _2393 = fma(_2387, 0.55365467071533203125, -float(_2391)); + _2407 = dot(float3(_2393 * _2393, _2393, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2391], _500[_2391 + 1], _500[_2391 + 2])); + } + else + { + _2407 = 4.0; + } + _2428 = _2407; + } + _2431 = _2428; + } + float _2434 = _2352.y; + float _2437 = log((_2434 <= 0.0) ? 6.103515625e-05 : _2434); + float _2438 = _2437 * 0.4342944622039794921875; + float _2505; + if (_2438 <= (-5.2601776123046875)) + { + _2505 = -4.0; + } + else + { + float _2502; + if ((_2438 > (-5.2601776123046875)) && (_2438 < (-0.744727432727813720703125))) + { + float _2482 = fma(_2437, 0.4342944622039794921875, 5.2601776123046875); + int _2486 = int(_2482 * 0.6643855571746826171875); + float _2488 = fma(_2482, 0.6643855571746826171875, -float(_2486)); + _2502 = dot(float3(_2488 * _2488, _2488, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2486], _499[_2486 + 1], _499[_2486 + 2])); + } + else + { + float _2481; + if ((_2438 >= (-0.744727432727813720703125)) && (_2438 < 4.673812389373779296875)) + { + float _2461 = fma(_2437, 0.4342944622039794921875, 0.744727432727813720703125); + int _2465 = int(_2461 * 0.55365467071533203125); + float _2467 = fma(_2461, 0.55365467071533203125, -float(_2465)); + _2481 = dot(float3(_2467 * _2467, _2467, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2465], _500[_2465 + 1], _500[_2465 + 2])); + } + else + { + _2481 = 4.0; + } + _2502 = _2481; + } + _2505 = _2502; + } + float _2508 = _2352.z; + float _2511 = log((_2508 <= 0.0) ? 6.103515625e-05 : _2508); + float _2512 = _2511 * 0.4342944622039794921875; + float _2579; + if (_2512 <= (-5.2601776123046875)) + { + _2579 = -4.0; + } + else + { + float _2576; + if ((_2512 > (-5.2601776123046875)) && (_2512 < (-0.744727432727813720703125))) + { + float _2556 = fma(_2511, 0.4342944622039794921875, 5.2601776123046875); + int _2560 = int(_2556 * 0.6643855571746826171875); + float _2562 = fma(_2556, 0.6643855571746826171875, -float(_2560)); + _2576 = dot(float3(_2562 * _2562, _2562, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2560], _499[_2560 + 1], _499[_2560 + 2])); + } + else + { + float _2555; + if ((_2512 >= (-0.744727432727813720703125)) && (_2512 < 4.673812389373779296875)) + { + float _2535 = fma(_2511, 0.4342944622039794921875, 0.744727432727813720703125); + int _2539 = int(_2535 * 0.55365467071533203125); + float _2541 = fma(_2535, 0.55365467071533203125, -float(_2539)); + _2555 = dot(float3(_2541 * _2541, _2541, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2539], _500[_2539 + 1], _500[_2539 + 2])); + } + else + { + _2555 = 4.0; + } + _2576 = _2555; + } + _2579 = _2576; + } + float3 _2583 = (float3(pow(10.0, _2431), pow(10.0, _2505), pow(10.0, _2579)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float _2787 = _2583.x; + float _2790 = log((_2787 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2787); + float _2791 = _2790 * 0.4342944622039794921875; + float _2870; + if (_2791 <= (-3.84832763671875)) + { + _2870 = fma(_2790, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0)); + } + else + { + float _2862; + if ((_2791 > (-3.84832763671875)) && (_2791 < 0.68124115467071533203125)) + { + float _2845 = (7.0 * fma(_2790, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _2846 = int(_2845); + float _2848 = _2845 - float(_2846); + _2862 = dot(float3(_2848 * _2848, _2848, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_2846], _503[_2846 + 1], _503[_2846 + 2])); + } + else + { + float _2841; + if ((_2791 >= 0.68124115467071533203125) && (_2791 < 3.65370273590087890625)) + { + float _2824 = (7.0 * fma(_2790, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125); + int _2825 = int(_2824); + float _2827 = _2824 - float(_2825); + _2841 = dot(float3(_2827 * _2827, _2827, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2825], _504[_2825 + 1], _504[_2825 + 2])); + } + else + { + _2841 = fma(_2790, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0)); + } + _2862 = _2841; + } + _2870 = _2862; + } + float _2873 = _2583.y; + float _2876 = log((_2873 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2873); + float _2877 = _2876 * 0.4342944622039794921875; + float _2954; + if (_2877 <= (-3.84832763671875)) + { + _2954 = fma(_2876, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0)); + } + else + { + float _2946; + if ((_2877 > (-3.84832763671875)) && (_2877 < 0.68124115467071533203125)) + { + float _2929 = (7.0 * fma(_2876, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _2930 = int(_2929); + float _2932 = _2929 - float(_2930); + _2946 = dot(float3(_2932 * _2932, _2932, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_2930], _503[_2930 + 1], _503[_2930 + 2])); + } + else + { + float _2925; + if ((_2877 >= 0.68124115467071533203125) && (_2877 < 3.65370273590087890625)) + { + float _2908 = (7.0 * fma(_2876, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125); + int _2909 = int(_2908); + float _2911 = _2908 - float(_2909); + _2925 = dot(float3(_2911 * _2911, _2911, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2909], _504[_2909 + 1], _504[_2909 + 2])); + } + else + { + _2925 = fma(_2876, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0)); + } + _2946 = _2925; + } + _2954 = _2946; + } + float _2957 = _2583.z; + float _2960 = log((_2957 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2957); + float _2961 = _2960 * 0.4342944622039794921875; + float _3038; + if (_2961 <= (-3.84832763671875)) + { + _3038 = fma(_2960, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0)); + } + else + { + float _3030; + if ((_2961 > (-3.84832763671875)) && (_2961 < 0.68124115467071533203125)) + { + float _3013 = (7.0 * fma(_2960, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _3014 = int(_3013); + float _3016 = _3013 - float(_3014); + _3030 = dot(float3(_3016 * _3016, _3016, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_3014], _503[_3014 + 1], _503[_3014 + 2])); + } + else + { + float _3009; + if ((_2961 >= 0.68124115467071533203125) && (_2961 < 3.65370273590087890625)) + { + float _2992 = (7.0 * fma(_2960, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125); + int _2993 = int(_2992); + float _2995 = _2992 - float(_2993); + _3009 = dot(float3(_2995 * _2995, _2995, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2993], _504[_2993 + 1], _504[_2993 + 2])); + } + else + { + _3009 = fma(_2960, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0)); + } + _3030 = _3009; + } + _3038 = _3030; + } + float3 _3044 = pow(((float3(pow(10.0, _2870), pow(10.0, _2954), pow(10.0, _3038)) - float3(3.5073844628641381859779357910156e-05)) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _3052 = pow(fma(float3(18.8515625), _3044, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _3044, float3(1.0))), float3(78.84375)); + } + else + { + float3 _2201; + if ((_Globals.OutputDevice == 4u) || (_Globals.OutputDevice == 6u)) + { + float3 _1369 = (_932 * float3(1.5)) * (_572 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625))); + float _1370 = _1369.x; + float _1371 = _1369.y; + float _1373 = _1369.z; + float _1376 = fast::max(fast::max(_1370, _1371), _1373); + float _1381 = (fast::max(_1376, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_1370, _1371), _1373), 1.0000000133514319600180897396058e-10)) / fast::max(_1376, 0.00999999977648258209228515625); + float _1394 = fma(1.75, sqrt(fma(_1370, _1370 - _1373, fma(_1373, _1373 - _1371, _1371 * (_1371 - _1370)))), (_1373 + _1371) + _1370); + float _1395 = _1394 * 0.3333333432674407958984375; + float _1396 = _1381 - 0.4000000059604644775390625; + float _1401 = fast::max(1.0 - abs(_1396 * 2.5), 0.0); + float _1409 = fma(float(int(sign(_1396 * 5.0))), fma(-_1401, _1401, 1.0), 1.0) * 0.02500000037252902984619140625; + float _1422; + if (_1395 <= 0.053333334624767303466796875) + { + _1422 = _1409; + } + else + { + float _1421; + if (_1395 >= 0.1599999964237213134765625) + { + _1421 = 0.0; + } + else + { + _1421 = _1409 * ((0.23999999463558197021484375 / _1394) - 0.5); + } + _1422 = _1421; + } + float3 _1425 = _1369 * float3(1.0 + _1422); + float _1426 = _1425.x; + float _1427 = _1425.y; + float _1429 = _1425.z; + float _1443; + if ((_1426 == _1427) && (_1427 == _1429)) + { + _1443 = 0.0; + } + else + { + _1443 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_1427 - _1429), fma(2.0, _1426, -_1427) - _1429); + } + float _1448; + if (_1443 < 0.0) + { + _1448 = _1443 + 360.0; + } + else + { + _1448 = _1443; + } + float _1449 = fast::clamp(_1448, 0.0, 360.0); + float _1454; + if (_1449 > 180.0) + { + _1454 = _1449 - 360.0; + } + else + { + _1454 = _1449; + } + float _1504; + if ((_1454 > (-67.5)) && (_1454 < 67.5)) + { + float _1460 = _1454 - (-67.5); + int _1462 = int(_1460 * 0.0296296291053295135498046875); + float _1464 = fma(_1460, 0.0296296291053295135498046875, -float(_1462)); + float _1465 = _1464 * _1464; + float _1466 = _1465 * _1464; + float _1503; + if (_1462 == 3) + { + _1503 = fma(_1464, -0.5, fma(_1466, -0.16666667163372039794921875, _1465 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _1496; + if (_1462 == 2) + { + _1496 = fma(_1466, 0.5, _1465 * (-1.0)) + 0.666666686534881591796875; + } + else + { + float _1491; + if (_1462 == 1) + { + _1491 = fma(_1464, 0.5, fma(_1466, -0.5, _1465 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _1484; + if (_1462 == 0) + { + _1484 = _1466 * 0.16666667163372039794921875; + } + else + { + _1484 = 0.0; + } + _1491 = _1484; + } + _1496 = _1491; + } + _1503 = _1496; + } + _1504 = _1503; + } + else + { + _1504 = 0.0; + } + _1425.x = fma(((_1504 * 1.5) * _1381) * (0.02999999932944774627685546875 - _1426), 0.180000007152557373046875, _1426); + float3 _1514 = fast::clamp(fast::clamp(_1425, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); + float3 _1517 = mix(float3(dot(_1514, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1514, float3(0.959999978542327880859375)); + float _1518 = _1517.x; + float _1526 = log((_1518 <= 0.0) ? 6.103515625e-05 : _1518); + float _1527 = _1526 * 0.4342944622039794921875; + float _1596; + if (_1527 <= (-5.2601776123046875)) + { + _1596 = -4.0; + } + else + { + float _1593; + if ((_1527 > (-5.2601776123046875)) && (_1527 < (-0.744727432727813720703125))) + { + float _1573 = fma(_1526, 0.4342944622039794921875, 5.2601776123046875); + int _1577 = int(_1573 * 0.6643855571746826171875); + float _1579 = fma(_1573, 0.6643855571746826171875, -float(_1577)); + _1593 = dot(float3(_1579 * _1579, _1579, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1577], _499[_1577 + 1], _499[_1577 + 2])); + } + else + { + float _1572; + if ((_1527 >= (-0.744727432727813720703125)) && (_1527 < 4.673812389373779296875)) + { + float _1552 = fma(_1526, 0.4342944622039794921875, 0.744727432727813720703125); + int _1556 = int(_1552 * 0.55365467071533203125); + float _1558 = fma(_1552, 0.55365467071533203125, -float(_1556)); + _1572 = dot(float3(_1558 * _1558, _1558, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1556], _500[_1556 + 1], _500[_1556 + 2])); + } + else + { + _1572 = 4.0; + } + _1593 = _1572; + } + _1596 = _1593; + } + float _1599 = _1517.y; + float _1602 = log((_1599 <= 0.0) ? 6.103515625e-05 : _1599); + float _1603 = _1602 * 0.4342944622039794921875; + float _1670; + if (_1603 <= (-5.2601776123046875)) + { + _1670 = -4.0; + } + else + { + float _1667; + if ((_1603 > (-5.2601776123046875)) && (_1603 < (-0.744727432727813720703125))) + { + float _1647 = fma(_1602, 0.4342944622039794921875, 5.2601776123046875); + int _1651 = int(_1647 * 0.6643855571746826171875); + float _1653 = fma(_1647, 0.6643855571746826171875, -float(_1651)); + _1667 = dot(float3(_1653 * _1653, _1653, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1651], _499[_1651 + 1], _499[_1651 + 2])); + } + else + { + float _1646; + if ((_1603 >= (-0.744727432727813720703125)) && (_1603 < 4.673812389373779296875)) + { + float _1626 = fma(_1602, 0.4342944622039794921875, 0.744727432727813720703125); + int _1630 = int(_1626 * 0.55365467071533203125); + float _1632 = fma(_1626, 0.55365467071533203125, -float(_1630)); + _1646 = dot(float3(_1632 * _1632, _1632, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1630], _500[_1630 + 1], _500[_1630 + 2])); + } + else + { + _1646 = 4.0; + } + _1667 = _1646; + } + _1670 = _1667; + } + float _1673 = _1517.z; + float _1676 = log((_1673 <= 0.0) ? 6.103515625e-05 : _1673); + float _1677 = _1676 * 0.4342944622039794921875; + float _1744; + if (_1677 <= (-5.2601776123046875)) + { + _1744 = -4.0; + } + else + { + float _1741; + if ((_1677 > (-5.2601776123046875)) && (_1677 < (-0.744727432727813720703125))) + { + float _1721 = fma(_1676, 0.4342944622039794921875, 5.2601776123046875); + int _1725 = int(_1721 * 0.6643855571746826171875); + float _1727 = fma(_1721, 0.6643855571746826171875, -float(_1725)); + _1741 = dot(float3(_1727 * _1727, _1727, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1725], _499[_1725 + 1], _499[_1725 + 2])); + } + else + { + float _1720; + if ((_1677 >= (-0.744727432727813720703125)) && (_1677 < 4.673812389373779296875)) + { + float _1700 = fma(_1676, 0.4342944622039794921875, 0.744727432727813720703125); + int _1704 = int(_1700 * 0.55365467071533203125); + float _1706 = fma(_1700, 0.55365467071533203125, -float(_1704)); + _1720 = dot(float3(_1706 * _1706, _1706, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1704], _500[_1704 + 1], _500[_1704 + 2])); + } + else + { + _1720 = 4.0; + } + _1741 = _1720; + } + _1744 = _1741; + } + float3 _1748 = (float3(pow(10.0, _1596), pow(10.0, _1670), pow(10.0, _1744)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float _1952 = _1748.x; + float _1955 = log((_1952 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1952); + float _1956 = _1955 * 0.4342944622039794921875; + float _2030; + if (_1956 <= (-3.84832763671875)) + { + _2030 = -2.3010299205780029296875; + } + else + { + float _2027; + if ((_1956 > (-3.84832763671875)) && (_1956 < 0.68124115467071533203125)) + { + float _2010 = (7.0 * fma(_1955, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _2011 = int(_2010); + float _2013 = _2010 - float(_2011); + _2027 = dot(float3(_2013 * _2013, _2013, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2011], _506[_2011 + 1], _506[_2011 + 2])); + } + else + { + float _2006; + if ((_1956 >= 0.68124115467071533203125) && (_1956 < 3.761315822601318359375)) + { + float _1989 = (7.0 * fma(_1955, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125); + int _1990 = int(_1989); + float _1992 = _1989 - float(_1990); + _2006 = dot(float3(_1992 * _1992, _1992, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_1990], _507[_1990 + 1], _507[_1990 + 2])); + } + else + { + _2006 = fma(_1955, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875)); + } + _2027 = _2006; + } + _2030 = _2027; + } + float _2033 = _1748.y; + float _2036 = log((_2033 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2033); + float _2037 = _2036 * 0.4342944622039794921875; + float _2109; + if (_2037 <= (-3.84832763671875)) + { + _2109 = -2.3010299205780029296875; + } + else + { + float _2106; + if ((_2037 > (-3.84832763671875)) && (_2037 < 0.68124115467071533203125)) + { + float _2089 = (7.0 * fma(_2036, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _2090 = int(_2089); + float _2092 = _2089 - float(_2090); + _2106 = dot(float3(_2092 * _2092, _2092, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2090], _506[_2090 + 1], _506[_2090 + 2])); + } + else + { + float _2085; + if ((_2037 >= 0.68124115467071533203125) && (_2037 < 3.761315822601318359375)) + { + float _2068 = (7.0 * fma(_2036, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125); + int _2069 = int(_2068); + float _2071 = _2068 - float(_2069); + _2085 = dot(float3(_2071 * _2071, _2071, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_2069], _507[_2069 + 1], _507[_2069 + 2])); + } + else + { + _2085 = fma(_2036, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875)); + } + _2106 = _2085; + } + _2109 = _2106; + } + float _2112 = _1748.z; + float _2115 = log((_2112 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2112); + float _2116 = _2115 * 0.4342944622039794921875; + float _2188; + if (_2116 <= (-3.84832763671875)) + { + _2188 = -2.3010299205780029296875; + } + else + { + float _2185; + if ((_2116 > (-3.84832763671875)) && (_2116 < 0.68124115467071533203125)) + { + float _2168 = (7.0 * fma(_2115, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875); + int _2169 = int(_2168); + float _2171 = _2168 - float(_2169); + _2185 = dot(float3(_2171 * _2171, _2171, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2169], _506[_2169 + 1], _506[_2169 + 2])); + } + else + { + float _2164; + if ((_2116 >= 0.68124115467071533203125) && (_2116 < 3.761315822601318359375)) + { + float _2147 = (7.0 * fma(_2115, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125); + int _2148 = int(_2147); + float _2150 = _2147 - float(_2148); + _2164 = dot(float3(_2150 * _2150, _2150, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_2148], _507[_2148 + 1], _507[_2148 + 2])); + } + else + { + _2164 = fma(_2115, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875)); + } + _2185 = _2164; + } + _2188 = _2185; + } + float3 _2193 = pow((float3(pow(10.0, _2030), pow(10.0, _2109), pow(10.0, _2188)) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _2201 = pow(fma(float3(18.8515625), _2193, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2193, float3(1.0))), float3(78.84375)); + } + else + { + float3 _1366; + if (_Globals.OutputDevice == 7u) + { + float3 _1358 = pow(((_932 * _573) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _1366 = pow(fma(float3(18.8515625), _1358, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _1358, float3(1.0))), float3(78.84375)); + } + else + { + _1366 = pow((_1324 * _573) * _602, float3(_Globals.InverseGamma.z)); + } + _2201 = _1366; + } + _3052 = _2201; + } + _3062 = _3052; + } + _3103 = _3062; + } + float3 _3104 = _3103 * float3(0.95238101482391357421875); + float4 _3105 = float4(_3104.x, _3104.y, _3104.z, float4(0.0).w); + _3105.w = 0.0; + out.out_var_SV_Target0 = _3105; + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag b/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag new file mode 100644 index 00000000000..790ad27a1d7 --- /dev/null +++ b/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag @@ -0,0 +1,503 @@ +#include +#include + +using namespace metal; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_StructuredBuffer_v4float +{ + float4 _m0[1]; +}; + +struct type_TranslucentBasePass +{ + uint TranslucentBasePass_Shared_Forward_NumLocalLights; + uint TranslucentBasePass_Shared_Forward_NumReflectionCaptures; + uint TranslucentBasePass_Shared_Forward_HasDirectionalLight; + uint TranslucentBasePass_Shared_Forward_NumGridCells; + packed_int3 TranslucentBasePass_Shared_Forward_CulledGridSize; + uint TranslucentBasePass_Shared_Forward_MaxCulledLightsPerCell; + uint TranslucentBasePass_Shared_Forward_LightGridPixelSizeShift; + uint PrePadding_TranslucentBasePass_Shared_Forward_36; + uint PrePadding_TranslucentBasePass_Shared_Forward_40; + uint PrePadding_TranslucentBasePass_Shared_Forward_44; + packed_float3 TranslucentBasePass_Shared_Forward_LightGridZParams; + float PrePadding_TranslucentBasePass_Shared_Forward_60; + packed_float3 TranslucentBasePass_Shared_Forward_DirectionalLightDirection; + float PrePadding_TranslucentBasePass_Shared_Forward_76; + packed_float3 TranslucentBasePass_Shared_Forward_DirectionalLightColor; + float TranslucentBasePass_Shared_Forward_DirectionalLightVolumetricScatteringIntensity; + uint TranslucentBasePass_Shared_Forward_DirectionalLightShadowMapChannelMask; + uint PrePadding_TranslucentBasePass_Shared_Forward_100; + float2 TranslucentBasePass_Shared_Forward_DirectionalLightDistanceFadeMAD; + uint TranslucentBasePass_Shared_Forward_NumDirectionalLightCascades; + uint PrePadding_TranslucentBasePass_Shared_Forward_116; + uint PrePadding_TranslucentBasePass_Shared_Forward_120; + uint PrePadding_TranslucentBasePass_Shared_Forward_124; + float4 TranslucentBasePass_Shared_Forward_CascadeEndDepths; + float4x4 TranslucentBasePass_Shared_Forward_DirectionalLightWorldToShadowMatrix[4]; + float4 TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapMinMax[4]; + float4 TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapAtlasBufferSize; + float TranslucentBasePass_Shared_Forward_DirectionalLightDepthBias; + uint TranslucentBasePass_Shared_Forward_DirectionalLightUseStaticShadowing; + uint PrePadding_TranslucentBasePass_Shared_Forward_488; + uint PrePadding_TranslucentBasePass_Shared_Forward_492; + float4 TranslucentBasePass_Shared_Forward_DirectionalLightStaticShadowBufferSize; + float4x4 TranslucentBasePass_Shared_Forward_DirectionalLightWorldToStaticShadow; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_576; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_580; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_584; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_588; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_592; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_596; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_600; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_604; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_608; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_612; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_616; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_620; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_624; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_628; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_632; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_636; + uint TranslucentBasePass_Shared_ForwardISR_NumLocalLights; + uint TranslucentBasePass_Shared_ForwardISR_NumReflectionCaptures; + uint TranslucentBasePass_Shared_ForwardISR_HasDirectionalLight; + uint TranslucentBasePass_Shared_ForwardISR_NumGridCells; + packed_int3 TranslucentBasePass_Shared_ForwardISR_CulledGridSize; + uint TranslucentBasePass_Shared_ForwardISR_MaxCulledLightsPerCell; + uint TranslucentBasePass_Shared_ForwardISR_LightGridPixelSizeShift; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_676; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_680; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_684; + packed_float3 TranslucentBasePass_Shared_ForwardISR_LightGridZParams; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_700; + packed_float3 TranslucentBasePass_Shared_ForwardISR_DirectionalLightDirection; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_716; + packed_float3 TranslucentBasePass_Shared_ForwardISR_DirectionalLightColor; + float TranslucentBasePass_Shared_ForwardISR_DirectionalLightVolumetricScatteringIntensity; + uint TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowMapChannelMask; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_740; + float2 TranslucentBasePass_Shared_ForwardISR_DirectionalLightDistanceFadeMAD; + uint TranslucentBasePass_Shared_ForwardISR_NumDirectionalLightCascades; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_756; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_760; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_764; + float4 TranslucentBasePass_Shared_ForwardISR_CascadeEndDepths; + float4x4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToShadowMatrix[4]; + float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapMinMax[4]; + float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapAtlasBufferSize; + float TranslucentBasePass_Shared_ForwardISR_DirectionalLightDepthBias; + uint TranslucentBasePass_Shared_ForwardISR_DirectionalLightUseStaticShadowing; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_1128; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_1132; + float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightStaticShadowBufferSize; + float4x4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToStaticShadow; + float PrePadding_TranslucentBasePass_Shared_Reflection_1216; + float PrePadding_TranslucentBasePass_Shared_Reflection_1220; + float PrePadding_TranslucentBasePass_Shared_Reflection_1224; + float PrePadding_TranslucentBasePass_Shared_Reflection_1228; + float PrePadding_TranslucentBasePass_Shared_Reflection_1232; + float PrePadding_TranslucentBasePass_Shared_Reflection_1236; + float PrePadding_TranslucentBasePass_Shared_Reflection_1240; + float PrePadding_TranslucentBasePass_Shared_Reflection_1244; + float PrePadding_TranslucentBasePass_Shared_Reflection_1248; + float PrePadding_TranslucentBasePass_Shared_Reflection_1252; + float PrePadding_TranslucentBasePass_Shared_Reflection_1256; + float PrePadding_TranslucentBasePass_Shared_Reflection_1260; + float PrePadding_TranslucentBasePass_Shared_Reflection_1264; + float PrePadding_TranslucentBasePass_Shared_Reflection_1268; + float PrePadding_TranslucentBasePass_Shared_Reflection_1272; + float PrePadding_TranslucentBasePass_Shared_Reflection_1276; + float4 TranslucentBasePass_Shared_Reflection_SkyLightParameters; + float TranslucentBasePass_Shared_Reflection_SkyLightCubemapBrightness; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1300; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1304; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1308; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1312; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1316; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1320; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1324; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1328; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1332; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1336; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1340; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1344; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1348; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1352; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1356; + float4 TranslucentBasePass_Shared_PlanarReflection_ReflectionPlane; + float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionOrigin; + float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionXAxis; + float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionYAxis; + float3x4 TranslucentBasePass_Shared_PlanarReflection_InverseTransposeMirrorMatrix; + packed_float3 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1484; + float2 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters2; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1496; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1500; + float4x4 TranslucentBasePass_Shared_PlanarReflection_ProjectionWithExtraFOV[2]; + float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenScaleBias[2]; + float2 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenBound; + uint TranslucentBasePass_Shared_PlanarReflection_bIsStereo; + float PrePadding_TranslucentBasePass_Shared_Fog_1676; + float PrePadding_TranslucentBasePass_Shared_Fog_1680; + float PrePadding_TranslucentBasePass_Shared_Fog_1684; + float PrePadding_TranslucentBasePass_Shared_Fog_1688; + float PrePadding_TranslucentBasePass_Shared_Fog_1692; + float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters; + float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters2; + float4 TranslucentBasePass_Shared_Fog_ExponentialFogColorParameter; + float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters3; + float4 TranslucentBasePass_Shared_Fog_InscatteringLightDirection; + float4 TranslucentBasePass_Shared_Fog_DirectionalInscatteringColor; + float2 TranslucentBasePass_Shared_Fog_SinCosInscatteringColorCubemapRotation; + float PrePadding_TranslucentBasePass_Shared_Fog_1800; + float PrePadding_TranslucentBasePass_Shared_Fog_1804; + packed_float3 TranslucentBasePass_Shared_Fog_FogInscatteringTextureParameters; + float TranslucentBasePass_Shared_Fog_ApplyVolumetricFog; + float PrePadding_TranslucentBasePass_1824; + float PrePadding_TranslucentBasePass_1828; + float PrePadding_TranslucentBasePass_1832; + float PrePadding_TranslucentBasePass_1836; + float PrePadding_TranslucentBasePass_1840; + float PrePadding_TranslucentBasePass_1844; + float PrePadding_TranslucentBasePass_1848; + float PrePadding_TranslucentBasePass_1852; + float PrePadding_TranslucentBasePass_1856; + float PrePadding_TranslucentBasePass_1860; + float PrePadding_TranslucentBasePass_1864; + float PrePadding_TranslucentBasePass_1868; + float PrePadding_TranslucentBasePass_1872; + float PrePadding_TranslucentBasePass_1876; + float PrePadding_TranslucentBasePass_1880; + float PrePadding_TranslucentBasePass_1884; + float PrePadding_TranslucentBasePass_1888; + float PrePadding_TranslucentBasePass_1892; + float PrePadding_TranslucentBasePass_1896; + float PrePadding_TranslucentBasePass_1900; + float PrePadding_TranslucentBasePass_1904; + float PrePadding_TranslucentBasePass_1908; + float PrePadding_TranslucentBasePass_1912; + float PrePadding_TranslucentBasePass_1916; + float PrePadding_TranslucentBasePass_1920; + float PrePadding_TranslucentBasePass_1924; + float PrePadding_TranslucentBasePass_1928; + float PrePadding_TranslucentBasePass_1932; + float PrePadding_TranslucentBasePass_1936; + float PrePadding_TranslucentBasePass_1940; + float PrePadding_TranslucentBasePass_1944; + float PrePadding_TranslucentBasePass_1948; + float PrePadding_TranslucentBasePass_1952; + float PrePadding_TranslucentBasePass_1956; + float PrePadding_TranslucentBasePass_1960; + float PrePadding_TranslucentBasePass_1964; + float PrePadding_TranslucentBasePass_1968; + float PrePadding_TranslucentBasePass_1972; + float PrePadding_TranslucentBasePass_1976; + float PrePadding_TranslucentBasePass_1980; + float PrePadding_TranslucentBasePass_1984; + float PrePadding_TranslucentBasePass_1988; + float PrePadding_TranslucentBasePass_1992; + float PrePadding_TranslucentBasePass_1996; + float PrePadding_TranslucentBasePass_2000; + float PrePadding_TranslucentBasePass_2004; + float PrePadding_TranslucentBasePass_2008; + float PrePadding_TranslucentBasePass_2012; + float PrePadding_TranslucentBasePass_2016; + float PrePadding_TranslucentBasePass_2020; + float PrePadding_TranslucentBasePass_2024; + float PrePadding_TranslucentBasePass_2028; + float PrePadding_TranslucentBasePass_2032; + float PrePadding_TranslucentBasePass_2036; + float PrePadding_TranslucentBasePass_2040; + float PrePadding_TranslucentBasePass_2044; + float PrePadding_TranslucentBasePass_2048; + float PrePadding_TranslucentBasePass_2052; + float PrePadding_TranslucentBasePass_2056; + float PrePadding_TranslucentBasePass_2060; + float PrePadding_TranslucentBasePass_2064; + float PrePadding_TranslucentBasePass_2068; + float PrePadding_TranslucentBasePass_2072; + float PrePadding_TranslucentBasePass_2076; + float PrePadding_TranslucentBasePass_2080; + float PrePadding_TranslucentBasePass_2084; + float PrePadding_TranslucentBasePass_2088; + float PrePadding_TranslucentBasePass_2092; + float PrePadding_TranslucentBasePass_2096; + float PrePadding_TranslucentBasePass_2100; + float PrePadding_TranslucentBasePass_2104; + float PrePadding_TranslucentBasePass_2108; + float PrePadding_TranslucentBasePass_2112; + float PrePadding_TranslucentBasePass_2116; + float PrePadding_TranslucentBasePass_2120; + float PrePadding_TranslucentBasePass_2124; + float PrePadding_TranslucentBasePass_2128; + float PrePadding_TranslucentBasePass_2132; + float PrePadding_TranslucentBasePass_2136; + float PrePadding_TranslucentBasePass_2140; + float4 TranslucentBasePass_HZBUvFactorAndInvFactor; + float4 TranslucentBasePass_PrevScreenPositionScaleBias; + float TranslucentBasePass_PrevSceneColorPreExposureInv; +}; + +struct type_Material +{ + float4 Material_VectorExpressions[2]; + float4 Material_ScalarExpressions[1]; +}; + +constant float _108 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; + uint gl_SampleMask [[sample_mask]]; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD10_centroid [[user(locn0)]]; + float4 in_var_TEXCOORD11_centroid [[user(locn1)]]; + uint in_var_PRIMITIVE_ID [[user(locn2)]]; + float4 in_var_TEXCOORD7 [[user(locn3)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], constant type_TranslucentBasePass& TranslucentBasePass [[buffer(2)]], constant type_Material& Material [[buffer(3)]], texture3d TranslucentBasePass_Shared_Fog_IntegratedLightScattering [[texture(0)]], sampler View_SharedBilinearClampedSampler [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleMaskIn [[sample_mask]]) +{ + main0_out out = {}; + float4 _137 = View.View_SVPositionToTranslatedWorld * float4(gl_FragCoord.xyz, 1.0); + float3 _142 = (_137.xyz / float3(_137.w)) - float3(View.View_PreViewTranslation); + bool _165 = TranslucentBasePass.TranslucentBasePass_Shared_Fog_ApplyVolumetricFog > 0.0; + float4 _215; + if (_165) + { + float4 _172 = View.View_WorldToClip * float4(_142, 1.0); + float _173 = _172.w; + float4 _202; + if (_165) + { + _202 = TranslucentBasePass_Shared_Fog_IntegratedLightScattering.sample(View_SharedBilinearClampedSampler, float3(fma((_172.xy / float2(_173)).xy, float2(0.5, -0.5), float2(0.5)), (log2(fma(_173, View.View_VolumetricFogGridZParams[0], View.View_VolumetricFogGridZParams[1])) * View.View_VolumetricFogGridZParams[2]) * View.View_VolumetricFogInvGridSize[2]), level(0.0)); + } + else + { + _202 = float4(0.0, 0.0, 0.0, 1.0); + } + _215 = float4(fma(in.in_var_TEXCOORD7.xyz, float3(_202.w), _202.xyz), _202.w * in.in_var_TEXCOORD7.w); + } + else + { + _215 = in.in_var_TEXCOORD7; + } + float3 _216 = fast::max(Material.Material_VectorExpressions[1].xyz * float3(fma(1.0 + dot(float3(-0.2857142984867095947265625, -0.4285714328289031982421875, 0.857142865657806396484375), fast::normalize(float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz) * fast::normalize(fma(float3(0.0, 0.0, 1.0), float3(View.View_NormalOverrideParameter.w), View.View_NormalOverrideParameter.xyz)))), 0.5, 0.20000000298023223876953125)), float3(0.0)); + float3 _246; + if (View.View_OutOfBoundsMask > 0.0) + { + uint _222 = in.in_var_PRIMITIVE_ID * 26u; + float3 _245; + if (any(abs(_142 - View_PrimitiveSceneData._m0[_222 + 5u].xyz) > (View_PrimitiveSceneData._m0[_222 + 19u].xyz + float3(1.0)))) + { + _245 = mix(float3(1.0, 1.0, 0.0), float3(0.0, 1.0, 1.0), float3(float3(fract(dot(_142, float3(0.57700002193450927734375)) * 0.00200000009499490261077880859375)) > float3(0.5))); + } + else + { + _245 = _216; + } + _246 = _245; + } + else + { + _246 = _216; + } + float4 _255 = float4(fma(_246, float3(_215.w), _215.xyz), _108); + _255.w = 1.0; + float4 _268; + uint _269; + if (View.View_NumSceneColorMSAASamples > 1) + { + _268 = _255 * float4(float(View.View_NumSceneColorMSAASamples) * 0.25); + _269 = gl_SampleMaskIn & 15u; + } + else + { + _268 = _255; + _269 = gl_SampleMaskIn; + } + out.out_var_SV_Target0 = _268; + out.gl_SampleMask = _269; + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag b/reference/opt/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag new file mode 100644 index 00000000000..6a7f419a5fa --- /dev/null +++ b/reference/opt/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag @@ -0,0 +1,213 @@ +#include +#include + +using namespace metal; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Globals +{ + float3 SoftTransitionScale; + float4 ShadowBufferSize; + float ShadowFadeFraction; + float ShadowSharpen; + float4 LightPositionAndInvRadius; + float4x4 ScreenToShadowMatrix; + float2 ProjectionDepthBiasParameters; + float4 ModulatedShadowColor; + float4 ShadowTileOffsetAndSize; +}; + +constant float4 _58 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], float4 _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData [[color(0)]], texture2d ShadowDepthTexture [[texture(0)]], sampler ShadowDepthTextureSampler [[sampler(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + float4 _67 = _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData; + float _68 = _67.w; + float4 _82 = _Globals.ScreenToShadowMatrix * float4((fma(gl_FragCoord.xy, View.View_BufferSizeAndInvSize.zw, -View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_68), _68, 1.0); + float _118 = fast::clamp(fma(fast::clamp(fma(ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx, float3(_Globals.SoftTransitionScale.z), -float3(fma(fast::min(_82.z, 0.999989986419677734375), _Globals.SoftTransitionScale.z, -1.0))), float3(0.0), float3(1.0)).x - 0.5, _Globals.ShadowSharpen, 0.5), 0.0, 1.0); + float3 _127 = mix(_Globals.ModulatedShadowColor.xyz, float3(1.0), float3(mix(1.0, _118 * _118, _Globals.ShadowFadeFraction))); + float4 _128 = float4(_127.x, _127.y, _127.z, _58.w); + _128.w = 0.0; + out.out_var_SV_Target0 = _128; + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag b/reference/opt/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag new file mode 100644 index 00000000000..6a7f419a5fa --- /dev/null +++ b/reference/opt/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag @@ -0,0 +1,213 @@ +#include +#include + +using namespace metal; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Globals +{ + float3 SoftTransitionScale; + float4 ShadowBufferSize; + float ShadowFadeFraction; + float ShadowSharpen; + float4 LightPositionAndInvRadius; + float4x4 ScreenToShadowMatrix; + float2 ProjectionDepthBiasParameters; + float4 ModulatedShadowColor; + float4 ShadowTileOffsetAndSize; +}; + +constant float4 _58 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], float4 _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData [[color(0)]], texture2d ShadowDepthTexture [[texture(0)]], sampler ShadowDepthTextureSampler [[sampler(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + float4 _67 = _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData; + float _68 = _67.w; + float4 _82 = _Globals.ScreenToShadowMatrix * float4((fma(gl_FragCoord.xy, View.View_BufferSizeAndInvSize.zw, -View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_68), _68, 1.0); + float _118 = fast::clamp(fma(fast::clamp(fma(ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx, float3(_Globals.SoftTransitionScale.z), -float3(fma(fast::min(_82.z, 0.999989986419677734375), _Globals.SoftTransitionScale.z, -1.0))), float3(0.0), float3(1.0)).x - 0.5, _Globals.ShadowSharpen, 0.5), 0.0, 1.0); + float3 _127 = mix(_Globals.ModulatedShadowColor.xyz, float3(1.0), float3(mix(1.0, _118 * _118, _Globals.ShadowFadeFraction))); + float4 _128 = float4(_127.x, _127.y, _127.z, _58.w); + _128.w = 0.0; + out.out_var_SV_Target0 = _128; + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag new file mode 100644 index 00000000000..3c9be2985ad --- /dev/null +++ b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag @@ -0,0 +1,84 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct type_StructuredBuffer_v4float +{ + float4 _m0[1]; +}; + +struct type_Globals +{ + uint2 ShadowTileListGroupSize; +}; + +constant float3 _70 = {}; + +struct spvDescriptorSetBuffer0 +{ + const device type_StructuredBuffer_v4float* CulledObjectBoxBounds [[id(0)]]; + constant type_Globals* _Globals [[id(1)]]; + texture2d RWShadowTileNumCulledObjects [[id(2)]]; + device atomic_uint* RWShadowTileNumCulledObjects_atomic [[id(3)]]; +}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + uint in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + uint2 _77 = uint2(gl_FragCoord.xy); + uint _78 = _77.y; + uint _83 = _77.x; + float2 _91 = float2(float(_83), float(((*spvDescriptorSet0._Globals).ShadowTileListGroupSize.y - 1u) - _78)); + float2 _93 = float2((*spvDescriptorSet0._Globals).ShadowTileListGroupSize); + float2 _96 = fma(_91 / _93, float2(2.0), float2(-1.0)); + float2 _100 = fma((_91 + float2(1.0)) / _93, float2(2.0), float2(-1.0)); + float3 _101 = float3(_100.x, _100.y, _70.z); + _101.z = 1.0; + uint _103 = in.in_var_TEXCOORD0 * 5u; + uint _107 = _103 + 1u; + if (all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xy > _96.xy) && all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz < _101)) + { + float3 _120 = (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz + (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xyz; + float _122 = _96.x; + float _123 = _96.y; + float _126 = _100.x; + float _129 = _100.y; + float3 _166 = fma(float3(-0.5), _120, float3(_122, _123, -1000.0)); + float3 _170 = float3(dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz)); + float3 _189 = fma(float3(-0.5), _120, float3(_126, _123, -1000.0)); + float3 _193 = float3(dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz)); + float3 _205 = fma(float3(-0.5), _120, float3(_122, _129, -1000.0)); + float3 _209 = float3(dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz)); + float3 _221 = fma(float3(-0.5), _120, float3(_126, _129, -1000.0)); + float3 _225 = float3(dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz)); + float3 _237 = fma(float3(-0.5), _120, float3(_122, _123, 1.0)); + float3 _241 = float3(dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz)); + float3 _253 = fma(float3(-0.5), _120, float3(_126, _123, 1.0)); + float3 _257 = float3(dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz)); + float3 _269 = fma(float3(-0.5), _120, float3(_122, _129, 1.0)); + float3 _273 = float3(dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz)); + float3 _285 = fma(float3(-0.5), _120, float3(_126, _129, 1.0)); + float3 _289 = float3(dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz)); + if (all(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(float3(500000.0), _170), _193), _209), _225), _241), _257), _273), _289) < float3(1.0)) && all(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(float3(-500000.0), _170), _193), _209), _225), _241), _257), _273), _289) > float3(-1.0))) + { + uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.RWShadowTileNumCulledObjects_atomic[(_78 * (*spvDescriptorSet0._Globals).ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed); + } + } + out.out_var_SV_Target0 = float4(0.0); + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.frag b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.frag new file mode 100644 index 00000000000..617bb4d0b81 --- /dev/null +++ b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.frag @@ -0,0 +1,76 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct type_StructuredBuffer_v4float +{ + float4 _m0[1]; +}; + +struct type_Globals +{ + uint2 ShadowTileListGroupSize; +}; + +constant float3 _70 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + uint in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(0)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + uint2 _77 = uint2(gl_FragCoord.xy); + uint _78 = _77.y; + uint _83 = _77.x; + float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78)); + float2 _93 = float2(_Globals.ShadowTileListGroupSize); + float2 _96 = fma(_91 / _93, float2(2.0), float2(-1.0)); + float2 _100 = fma((_91 + float2(1.0)) / _93, float2(2.0), float2(-1.0)); + float3 _101 = float3(_100.x, _100.y, _70.z); + _101.z = 1.0; + uint _103 = in.in_var_TEXCOORD0 * 5u; + uint _107 = _103 + 1u; + if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _101)) + { + float3 _120 = CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz; + float _122 = _96.x; + float _123 = _96.y; + float _126 = _100.x; + float _129 = _100.y; + float3 _166 = fma(float3(-0.5), _120, float3(_122, _123, -1000.0)); + float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + float3 _189 = fma(float3(-0.5), _120, float3(_126, _123, -1000.0)); + float3 _193 = float3(dot(_189, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_189, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_189, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + float3 _205 = fma(float3(-0.5), _120, float3(_122, _129, -1000.0)); + float3 _209 = float3(dot(_205, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_205, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_205, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + float3 _221 = fma(float3(-0.5), _120, float3(_126, _129, -1000.0)); + float3 _225 = float3(dot(_221, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_221, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_221, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + float3 _237 = fma(float3(-0.5), _120, float3(_122, _123, 1.0)); + float3 _241 = float3(dot(_237, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_237, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_237, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + float3 _253 = fma(float3(-0.5), _120, float3(_126, _123, 1.0)); + float3 _257 = float3(dot(_253, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_253, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_253, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + float3 _269 = fma(float3(-0.5), _120, float3(_122, _129, 1.0)); + float3 _273 = float3(dot(_269, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_269, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_269, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + float3 _285 = fma(float3(-0.5), _120, float3(_126, _129, 1.0)); + float3 _289 = float3(dot(_285, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_285, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_285, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + if (all(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(float3(500000.0), _170), _193), _209), _225), _241), _257), _273), _289) < float3(1.0)) && all(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(float3(-500000.0), _170), _193), _209), _225), _241), _257), _273), _289) > float3(-1.0))) + { + uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed); + } + } + out.out_var_SV_Target0 = float4(0.0); + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag new file mode 100644 index 00000000000..3f68a92f0a7 --- /dev/null +++ b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag @@ -0,0 +1,77 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct type_StructuredBuffer_v4float +{ + float4 _m0[1]; +}; + +struct type_Globals +{ + uint2 ShadowTileListGroupSize; +}; + +constant float3 _70 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + uint in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvBufferSizeConstants [[buffer(25)]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(0)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + constant uint& CulledObjectBoxBoundsBufferSize = spvBufferSizeConstants[0]; + uint2 _77 = uint2(gl_FragCoord.xy); + uint _78 = _77.y; + uint _83 = _77.x; + float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78)); + float2 _93 = float2(_Globals.ShadowTileListGroupSize); + float2 _96 = fma(_91 / _93, float2(2.0), float2(-1.0)); + float2 _100 = fma((_91 + float2(1.0)) / _93, float2(2.0), float2(-1.0)); + float3 _101 = float3(_100.x, _100.y, _70.z); + _101.z = 1.0; + uint _103 = in.in_var_TEXCOORD0 * 5u; + uint _323 = uint(clamp(int(_103 + 1u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u)))); + if (all(CulledObjectBoxBounds._m0[_323].xy > _96.xy) && all(CulledObjectBoxBounds._m0[uint(clamp(int(_103), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz < _101)) + { + float3 _120 = CulledObjectBoxBounds._m0[uint(clamp(int(_103), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz + CulledObjectBoxBounds._m0[_323].xyz; + float _122 = _96.x; + float _123 = _96.y; + float _126 = _100.x; + float _129 = _100.y; + float3 _166 = fma(float3(-0.5), _120, float3(_122, _123, -1000.0)); + float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_166, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_166, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz)); + float3 _189 = fma(float3(-0.5), _120, float3(_126, _123, -1000.0)); + float3 _193 = float3(dot(_189, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_189, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_189, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz)); + float3 _205 = fma(float3(-0.5), _120, float3(_122, _129, -1000.0)); + float3 _209 = float3(dot(_205, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_205, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_205, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz)); + float3 _221 = fma(float3(-0.5), _120, float3(_126, _129, -1000.0)); + float3 _225 = float3(dot(_221, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_221, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_221, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz)); + float3 _237 = fma(float3(-0.5), _120, float3(_122, _123, 1.0)); + float3 _241 = float3(dot(_237, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_237, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_237, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz)); + float3 _253 = fma(float3(-0.5), _120, float3(_126, _123, 1.0)); + float3 _257 = float3(dot(_253, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_253, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_253, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz)); + float3 _269 = fma(float3(-0.5), _120, float3(_122, _129, 1.0)); + float3 _273 = float3(dot(_269, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_269, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_269, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz)); + float3 _285 = fma(float3(-0.5), _120, float3(_126, _129, 1.0)); + float3 _289 = float3(dot(_285, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_285, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_285, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz)); + if (all(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(float3(500000.0), _170), _193), _209), _225), _241), _257), _273), _289) < float3(1.0)) && all(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(float3(-500000.0), _170), _193), _209), _225), _241), _257), _273), _289) > float3(-1.0))) + { + uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed); + } + } + out.out_var_SV_Target0 = float4(0.0); + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc b/reference/opt/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc new file mode 100644 index 00000000000..1d0212593bb --- /dev/null +++ b/reference/opt/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc @@ -0,0 +1,396 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct FVertexFactoryInterpolantsVSToPS +{ + float4 TangentToWorld0; + float4 TangentToWorld2; + float4 Color; + spvUnsafeArray TexCoords; + float4 LightMapCoordinate; + uint PrimitiveId; + uint LightmapDataIndex; +}; + +struct FVertexFactoryInterpolantsVSToDS +{ + FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS; +}; + +struct FSharedBasePassInterpolants +{ +}; +struct FBasePassInterpolantsVSToDS +{ + FSharedBasePassInterpolants _m0; +}; + +struct FBasePassVSToDS +{ + FVertexFactoryInterpolantsVSToDS FactoryInterpolants; + FBasePassInterpolantsVSToDS BasePassInterpolants; + float4 Position; +}; + +struct FPNTessellationHSToDS +{ + FBasePassVSToDS PassSpecificData; + spvUnsafeArray WorldPosition; + float3 DisplacementScale; + float TessellationMultiplier; + float WorldDisplacementMultiplier; +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_StructuredBuffer_v4float +{ + float4 _m0[1]; +}; + +constant float4 _142 = {}; + +struct main0_out +{ + float4 out_var_COLOR0; + uint out_var_LIGHTMAP_ID; + float3 out_var_PN_DisplacementScales; + spvUnsafeArray out_var_PN_POSITION; + float out_var_PN_TessellationMultiplier; + float out_var_PN_WorldDisplacementMultiplier; + uint out_var_PRIMITIVE_ID; + spvUnsafeArray out_var_TEXCOORD0; + float4 out_var_TEXCOORD10_centroid; + float4 out_var_TEXCOORD11_centroid; + float4 out_var_TEXCOORD4; + float4 out_var_VS_To_DS_Position; +}; + +struct main0_patchOut +{ + float4 out_var_PN_POSITION9; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD10_centroid [[attribute(0)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(1)]]; + float4 in_var_COLOR0 [[attribute(2)]]; + float4 in_var_TEXCOORD0_0 [[attribute(3)]]; + float4 in_var_TEXCOORD4 [[attribute(4)]]; + uint in_var_PRIMITIVE_ID [[attribute(5)]]; + uint in_var_LIGHTMAP_ID [[attribute(6)]]; + float4 in_var_VS_To_DS_Position [[attribute(7)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 3) + return; + spvUnsafeArray _144 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _145 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid }); + spvUnsafeArray _146 = spvUnsafeArray({ gl_in[0].in_var_COLOR0, gl_in[1].in_var_COLOR0, gl_in[2].in_var_COLOR0, gl_in[3].in_var_COLOR0, gl_in[4].in_var_COLOR0, gl_in[5].in_var_COLOR0, gl_in[6].in_var_COLOR0, gl_in[7].in_var_COLOR0, gl_in[8].in_var_COLOR0, gl_in[9].in_var_COLOR0, gl_in[10].in_var_COLOR0, gl_in[11].in_var_COLOR0 }); + spvUnsafeArray, 12> _147 = spvUnsafeArray, 12>({ spvUnsafeArray({ gl_in[0].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[1].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[2].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[3].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[4].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[5].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[6].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[7].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[8].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[9].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[10].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[11].in_var_TEXCOORD0_0 }) }); + spvUnsafeArray _148 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD4, gl_in[1].in_var_TEXCOORD4, gl_in[2].in_var_TEXCOORD4, gl_in[3].in_var_TEXCOORD4, gl_in[4].in_var_TEXCOORD4, gl_in[5].in_var_TEXCOORD4, gl_in[6].in_var_TEXCOORD4, gl_in[7].in_var_TEXCOORD4, gl_in[8].in_var_TEXCOORD4, gl_in[9].in_var_TEXCOORD4, gl_in[10].in_var_TEXCOORD4, gl_in[11].in_var_TEXCOORD4 }); + spvUnsafeArray _149 = spvUnsafeArray({ gl_in[0].in_var_PRIMITIVE_ID, gl_in[1].in_var_PRIMITIVE_ID, gl_in[2].in_var_PRIMITIVE_ID, gl_in[3].in_var_PRIMITIVE_ID, gl_in[4].in_var_PRIMITIVE_ID, gl_in[5].in_var_PRIMITIVE_ID, gl_in[6].in_var_PRIMITIVE_ID, gl_in[7].in_var_PRIMITIVE_ID, gl_in[8].in_var_PRIMITIVE_ID, gl_in[9].in_var_PRIMITIVE_ID, gl_in[10].in_var_PRIMITIVE_ID, gl_in[11].in_var_PRIMITIVE_ID }); + spvUnsafeArray _150 = spvUnsafeArray({ gl_in[0].in_var_LIGHTMAP_ID, gl_in[1].in_var_LIGHTMAP_ID, gl_in[2].in_var_LIGHTMAP_ID, gl_in[3].in_var_LIGHTMAP_ID, gl_in[4].in_var_LIGHTMAP_ID, gl_in[5].in_var_LIGHTMAP_ID, gl_in[6].in_var_LIGHTMAP_ID, gl_in[7].in_var_LIGHTMAP_ID, gl_in[8].in_var_LIGHTMAP_ID, gl_in[9].in_var_LIGHTMAP_ID, gl_in[10].in_var_LIGHTMAP_ID, gl_in[11].in_var_LIGHTMAP_ID }); + spvUnsafeArray _259 = spvUnsafeArray({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position }); + spvUnsafeArray _284 = spvUnsafeArray({ FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[0], _145[0], _146[0], _147[0], _148[0], _149[0], _150[0] } }, FBasePassInterpolantsVSToDS{ { } }, _259[0] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[1], _145[1], _146[1], _147[1], _148[1], _149[1], _150[1] } }, FBasePassInterpolantsVSToDS{ { } }, _259[1] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[2], _145[2], _146[2], _147[2], _148[2], _149[2], _150[2] } }, FBasePassInterpolantsVSToDS{ { } }, _259[2] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[3], _145[3], _146[3], _147[3], _148[3], _149[3], _150[3] } }, FBasePassInterpolantsVSToDS{ { } }, _259[3] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[4], _145[4], _146[4], _147[4], _148[4], _149[4], _150[4] } }, FBasePassInterpolantsVSToDS{ { } }, _259[4] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[5], _145[5], _146[5], _147[5], _148[5], _149[5], _150[5] } }, FBasePassInterpolantsVSToDS{ { } }, _259[5] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[6], _145[6], _146[6], _147[6], _148[6], _149[6], _150[6] } }, FBasePassInterpolantsVSToDS{ { } }, _259[6] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[7], _145[7], _146[7], _147[7], _148[7], _149[7], _150[7] } }, FBasePassInterpolantsVSToDS{ { } }, _259[7] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[8], _145[8], _146[8], _147[8], _148[8], _149[8], _150[8] } }, FBasePassInterpolantsVSToDS{ { } }, _259[8] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[9], _145[9], _146[9], _147[9], _148[9], _149[9], _150[9] } }, FBasePassInterpolantsVSToDS{ { } }, _259[9] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[10], _145[10], _146[10], _147[10], _148[10], _149[10], _150[10] } }, FBasePassInterpolantsVSToDS{ { } }, _259[10] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[11], _145[11], _146[11], _147[11], _148[11], _149[11], _150[11] } }, FBasePassInterpolantsVSToDS{ { } }, _259[11] } }); + spvUnsafeArray param_var_I; + param_var_I = _284; + float4 _301 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float3 _310 = View_PrimitiveSceneData._m0[(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.PrimitiveId * 26u) + 22u].xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz); + uint _313 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u; + uint _314 = 2u * gl_InvocationID; + uint _315 = 3u + _314; + uint _316 = _314 + 4u; + float4 _328 = float4(param_var_I[_313].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _336 = float4(param_var_I[_315].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _344 = float4(param_var_I[_316].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + spvUnsafeArray _392 = spvUnsafeArray({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_313].Position) - (float4(dot(param_var_I[_313].Position - param_var_I[gl_InvocationID].Position, _301)) * _301)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_315].Position) + param_var_I[_316].Position) - (float4(dot(param_var_I[_316].Position - param_var_I[_315].Position, _336)) * _336)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_313].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_313].Position, _328)) * _328)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_316].Position) + param_var_I[_315].Position) - (float4(dot(param_var_I[_315].Position - param_var_I[_316].Position, _344)) * _344)) * float4(0.3333333432674407958984375))) * float4(0.5) }); + gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + gl_out[gl_InvocationID].out_var_COLOR0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.Color; + gl_out[gl_InvocationID].out_var_TEXCOORD0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TexCoords; + gl_out[gl_InvocationID].out_var_TEXCOORD4 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.LightMapCoordinate; + gl_out[gl_InvocationID].out_var_PRIMITIVE_ID = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.PrimitiveId; + gl_out[gl_InvocationID].out_var_LIGHTMAP_ID = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.LightmapDataIndex; + gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position; + gl_out[gl_InvocationID].out_var_PN_POSITION = _392; + gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _310; + gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0; + gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0; + temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _392, _310, 1.0, 1.0 }; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + if (gl_InvocationID == 0u) + { + float4 _450 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875); + float4 _463; + _463.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier); + _463.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier); + _463.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier); + _463.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier); + float4 _589; + for (;;) + { + float4 _489 = View.View_ViewToClip * float4(0.0); + float4 _494 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0); + float3 _495 = _494.xyz; + float3 _496 = _489.xyz; + float _498 = _494.w; + float _499 = _489.w; + float4 _516 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0); + float3 _517 = _516.xyz; + float _519 = _516.w; + float4 _537 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0); + float3 _538 = _537.xyz; + float _540 = _537.w; + if (any((((int3((_495 - _496) < float3(_498 + _499)) + (int3(2) * int3((_495 + _496) > float3((-_498) - _499)))) | (int3((_517 - _496) < float3(_519 + _499)) + (int3(2) * int3((_517 + _496) > float3((-_519) - _499))))) | (int3((_538 - _496) < float3(_540 + _499)) + (int3(2) * int3((_538 + _496) > float3((-_540) - _499))))) != int3(3))) + { + _589 = float4(0.0); + break; + } + float3 _558 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz; + float3 _559 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz; + float3 _560 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz; + float3 _563 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _566 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _569 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float _573 = sqrt(dot(_559, _559) / dot(_566, _566)); + float _577 = sqrt(dot(_560, _560) / dot(_569, _569)); + float _581 = sqrt(dot(_558, _558) / dot(_563, _563)); + float4 _582 = float4(_573, _577, _581, 1.0); + _582.w = 0.333000004291534423828125 * ((_573 + _577) + _581); + _589 = float4(View.View_AdaptiveTessellationFactor) * _582; + break; + } + float4 _591 = fast::clamp(_463 * _589, float4(1.0), float4(15.0)); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_591.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_591.y); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_591.z); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_591.w); + patchOut.out_var_PN_POSITION9 = _450 + ((_450 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5)); + } +} + diff --git a/reference/opt/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc b/reference/opt/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc new file mode 100644 index 00000000000..f72e5d3b753 --- /dev/null +++ b/reference/opt/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc @@ -0,0 +1,464 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct FVertexFactoryInterpolantsVSToPS +{ + float4 TangentToWorld0; + float4 TangentToWorld2; +}; + +struct FVertexFactoryInterpolantsVSToDS +{ + FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS; +}; + +struct FHitProxyVSToDS +{ + FVertexFactoryInterpolantsVSToDS FactoryInterpolants; + float4 Position; + uint VertexID; +}; + +struct FHullShaderConstantDominantVertexData +{ + float2 UV; + float4 Normal; + float3 Tangent; +}; + +struct FHullShaderConstantDominantEdgeData +{ + float2 UV0; + float2 UV1; + float4 Normal0; + float4 Normal1; + float3 Tangent0; + float3 Tangent1; +}; + +struct FPNTessellationHSToDS +{ + FHitProxyVSToDS PassSpecificData; + spvUnsafeArray WorldPosition; + float3 DisplacementScale; + float TessellationMultiplier; + float WorldDisplacementMultiplier; + FHullShaderConstantDominantVertexData DominantVertex; + FHullShaderConstantDominantEdgeData DominantEdge; +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_ClipToWorld; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_908; + packed_float3 View_ViewUp; + float PrePadding_View_924; + packed_float3 View_ViewRight; + float PrePadding_View_940; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_956; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_972; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_1020; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_1036; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_1052; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1068; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1724; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1740; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1756; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2076; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2148; + float PrePadding_View_2152; + float PrePadding_View_2156; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2228; + float PrePadding_View_2232; + float PrePadding_View_2236; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2268; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2412; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + float View_AtmosphericFogSunDiscHalfApexAngleRadian; + float PrePadding_View_2492; + float4 View_AtmosphericFogSunDiscLuminance; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + uint PrePadding_View_2520; + uint PrePadding_View_2524; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2584; + float PrePadding_View_2588; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2744; + float PrePadding_View_2748; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float PrePadding_View_2908; + int2 View_CursorPosition; + float View_bCheckerboardSubsurfaceProfileRendering; + float PrePadding_View_2924; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2940; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2956; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2972; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2988; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_3004; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Primitive +{ + float4x4 Primitive_LocalToWorld; + float4 Primitive_InvNonUniformScaleAndDeterminantSign; + float4 Primitive_ObjectWorldPositionAndRadius; + float4x4 Primitive_WorldToLocal; + float4x4 Primitive_PreviousLocalToWorld; + float4x4 Primitive_PreviousWorldToLocal; + packed_float3 Primitive_ActorWorldPosition; + float Primitive_UseSingleSampleShadowFromStationaryLights; + packed_float3 Primitive_ObjectBounds; + float Primitive_LpvBiasMultiplier; + float Primitive_DecalReceiverMask; + float Primitive_PerObjectGBufferData; + float Primitive_UseVolumetricLightmapShadowFromStationaryLights; + float Primitive_DrawsVelocity; + float4 Primitive_ObjectOrientation; + float4 Primitive_NonUniformScale; + packed_float3 Primitive_LocalObjectBoundsMin; + uint Primitive_LightingChannelMask; + packed_float3 Primitive_LocalObjectBoundsMax; + uint Primitive_LightmapDataIndex; + packed_float3 Primitive_PreSkinnedLocalBounds; + int Primitive_SingleCaptureIndex; + uint Primitive_OutputVelocity; + uint PrePadding_Primitive_420; + uint PrePadding_Primitive_424; + uint PrePadding_Primitive_428; + float4 Primitive_CustomPrimitiveData[4]; +}; + +constant float4 _140 = {}; + +struct main0_out +{ + float3 out_var_PN_DisplacementScales; + float2 out_var_PN_DominantEdge; + float2 out_var_PN_DominantEdge1; + float4 out_var_PN_DominantEdge2; + float4 out_var_PN_DominantEdge3; + float3 out_var_PN_DominantEdge4; + float3 out_var_PN_DominantEdge5; + float2 out_var_PN_DominantVertex; + float4 out_var_PN_DominantVertex1; + float3 out_var_PN_DominantVertex2; + spvUnsafeArray out_var_PN_POSITION; + float out_var_PN_TessellationMultiplier; + float out_var_PN_WorldDisplacementMultiplier; + float4 out_var_TEXCOORD10_centroid; + float4 out_var_TEXCOORD11_centroid; + float4 out_var_VS_To_DS_Position; + uint out_var_VS_To_DS_VertexID; +}; + +struct main0_patchOut +{ + float4 out_var_PN_POSITION9; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD10_centroid [[attribute(0)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(1)]]; + float4 in_var_VS_To_DS_Position [[attribute(2)]]; + uint in_var_VS_To_DS_VertexID [[attribute(3)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 3) + return; + spvUnsafeArray _142 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _143 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid }); + spvUnsafeArray _192 = spvUnsafeArray({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position }); + spvUnsafeArray _193 = spvUnsafeArray({ gl_in[0].in_var_VS_To_DS_VertexID, gl_in[1].in_var_VS_To_DS_VertexID, gl_in[2].in_var_VS_To_DS_VertexID, gl_in[3].in_var_VS_To_DS_VertexID, gl_in[4].in_var_VS_To_DS_VertexID, gl_in[5].in_var_VS_To_DS_VertexID, gl_in[6].in_var_VS_To_DS_VertexID, gl_in[7].in_var_VS_To_DS_VertexID, gl_in[8].in_var_VS_To_DS_VertexID, gl_in[9].in_var_VS_To_DS_VertexID, gl_in[10].in_var_VS_To_DS_VertexID, gl_in[11].in_var_VS_To_DS_VertexID }); + spvUnsafeArray _230 = spvUnsafeArray({ FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[0], _143[0] } }, _192[0], _193[0] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[1], _143[1] } }, _192[1], _193[1] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[2], _143[2] } }, _192[2], _193[2] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[3], _143[3] } }, _192[3], _193[3] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[4], _143[4] } }, _192[4], _193[4] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[5], _143[5] } }, _192[5], _193[5] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[6], _143[6] } }, _192[6], _193[6] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[7], _143[7] } }, _192[7], _193[7] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[8], _143[8] } }, _192[8], _193[8] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[9], _143[9] } }, _192[9], _193[9] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[10], _143[10] } }, _192[10], _193[10] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[11], _143[11] } }, _192[11], _193[11] } }); + spvUnsafeArray param_var_I; + param_var_I = _230; + float4 _247 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float3 _251 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz); + uint _254 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u; + uint _255 = 2u * gl_InvocationID; + uint _256 = 3u + _255; + uint _257 = _255 + 4u; + uint _264 = (_254 < 2u) ? (_254 + 1u) : 0u; + uint _265 = 2u * _254; + uint _266 = 3u + _265; + uint _267 = _265 + 4u; + float4 _279 = float4(param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _315; + float4 _316; + float4 _317; + float4 _318; + if ((param_var_I[_266].VertexID < param_var_I[_254].VertexID) || ((param_var_I[_266].VertexID == param_var_I[_254].VertexID) && (param_var_I[_267].VertexID < param_var_I[_264].VertexID))) + { + _315 = param_var_I[_267].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + _316 = param_var_I[_267].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + _317 = param_var_I[_266].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + _318 = param_var_I[_266].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + } + else + { + _315 = param_var_I[_264].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + _316 = param_var_I[_264].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + _317 = param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + _318 = param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + } + float4 _324 = float4(_318.xyz, 0.0); + float4 _328 = float4(_316.xyz, 0.0); + float4 _336 = float4(param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _344 = float4(param_var_I[_256].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _352 = float4(param_var_I[_257].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + spvUnsafeArray _402 = spvUnsafeArray({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_254].Position) - (float4(dot(param_var_I[_254].Position - param_var_I[gl_InvocationID].Position, _247)) * _247)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_256].Position) + param_var_I[_257].Position) - (float4(dot(param_var_I[_257].Position - param_var_I[_256].Position, _344)) * _344)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_254].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_254].Position, _336)) * _336)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_257].Position) + param_var_I[_256].Position) - (float4(dot(param_var_I[_256].Position - param_var_I[_257].Position, _352)) * _352)) * float4(0.3333333432674407958984375))) * float4(0.5) }); + gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position; + gl_out[gl_InvocationID].out_var_VS_To_DS_VertexID = param_var_I[gl_InvocationID].VertexID; + gl_out[gl_InvocationID].out_var_PN_POSITION = _402; + gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _251; + gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0; + gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0; + gl_out[gl_InvocationID].out_var_PN_DominantVertex = float2(0.0); + gl_out[gl_InvocationID].out_var_PN_DominantVertex1 = _279; + gl_out[gl_InvocationID].out_var_PN_DominantVertex2 = param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz; + gl_out[gl_InvocationID].out_var_PN_DominantEdge = float2(0.0); + gl_out[gl_InvocationID].out_var_PN_DominantEdge1 = float2(0.0); + gl_out[gl_InvocationID].out_var_PN_DominantEdge2 = _324; + gl_out[gl_InvocationID].out_var_PN_DominantEdge3 = _328; + gl_out[gl_InvocationID].out_var_PN_DominantEdge4 = _317.xyz; + gl_out[gl_InvocationID].out_var_PN_DominantEdge5 = _315.xyz; + temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _402, _251, 1.0, 1.0, FHullShaderConstantDominantVertexData{ float2(0.0), _279, param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz }, FHullShaderConstantDominantEdgeData{ float2(0.0), float2(0.0), _324, _328, _317.xyz, _315.xyz } }; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + if (gl_InvocationID == 0u) + { + float4 _461 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875); + float4 _474; + _474.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier); + _474.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier); + _474.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier); + _474.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier); + float4 _600; + for (;;) + { + float4 _500 = View.View_ViewToClip * float4(0.0); + float4 _505 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0); + float3 _506 = _505.xyz; + float3 _507 = _500.xyz; + float _509 = _505.w; + float _510 = _500.w; + float4 _527 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0); + float3 _528 = _527.xyz; + float _530 = _527.w; + float4 _548 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0); + float3 _549 = _548.xyz; + float _551 = _548.w; + if (any((((int3((_506 - _507) < float3(_509 + _510)) + (int3(2) * int3((_506 + _507) > float3((-_509) - _510)))) | (int3((_528 - _507) < float3(_530 + _510)) + (int3(2) * int3((_528 + _507) > float3((-_530) - _510))))) | (int3((_549 - _507) < float3(_551 + _510)) + (int3(2) * int3((_549 + _507) > float3((-_551) - _510))))) != int3(3))) + { + _600 = float4(0.0); + break; + } + float3 _569 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz; + float3 _570 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz; + float3 _571 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz; + float3 _574 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _577 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _580 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float _584 = sqrt(dot(_570, _570) / dot(_577, _577)); + float _588 = sqrt(dot(_571, _571) / dot(_580, _580)); + float _592 = sqrt(dot(_569, _569) / dot(_574, _574)); + float4 _593 = float4(_584, _588, _592, 1.0); + _593.w = 0.333000004291534423828125 * ((_584 + _588) + _592); + _600 = float4(View.View_AdaptiveTessellationFactor) * _593; + break; + } + float4 _602 = fast::clamp(_474 * _600, float4(1.0), float4(15.0)); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_602.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_602.y); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_602.z); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_602.w); + patchOut.out_var_PN_POSITION9 = _461 + ((_461 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5)); + } +} + diff --git a/reference/opt/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc b/reference/opt/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc new file mode 100644 index 00000000000..5d4e320bd04 --- /dev/null +++ b/reference/opt/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc @@ -0,0 +1,408 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct FVertexFactoryInterpolantsVSToPS +{ + float4 TangentToWorld0; + float4 TangentToWorld2; + float4 Color; + spvUnsafeArray TexCoords; +}; + +struct FVertexFactoryInterpolantsVSToDS +{ + FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS; +}; + +struct FHitProxyVSToDS +{ + FVertexFactoryInterpolantsVSToDS FactoryInterpolants; + float4 Position; +}; + +struct FPNTessellationHSToDS +{ + FHitProxyVSToDS PassSpecificData; + spvUnsafeArray WorldPosition; + float3 DisplacementScale; + float TessellationMultiplier; + float WorldDisplacementMultiplier; +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_ClipToWorld; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_908; + packed_float3 View_ViewUp; + float PrePadding_View_924; + packed_float3 View_ViewRight; + float PrePadding_View_940; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_956; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_972; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_1020; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_1036; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_1052; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1068; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1724; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1740; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1756; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2076; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2148; + float PrePadding_View_2152; + float PrePadding_View_2156; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2228; + float PrePadding_View_2232; + float PrePadding_View_2236; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2268; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2412; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + float View_AtmosphericFogSunDiscHalfApexAngleRadian; + float PrePadding_View_2492; + float4 View_AtmosphericFogSunDiscLuminance; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + uint PrePadding_View_2520; + uint PrePadding_View_2524; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2584; + float PrePadding_View_2588; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2744; + float PrePadding_View_2748; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float PrePadding_View_2908; + int2 View_CursorPosition; + float View_bCheckerboardSubsurfaceProfileRendering; + float PrePadding_View_2924; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2940; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2956; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2972; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2988; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_3004; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Primitive +{ + float4x4 Primitive_LocalToWorld; + float4 Primitive_InvNonUniformScaleAndDeterminantSign; + float4 Primitive_ObjectWorldPositionAndRadius; + float4x4 Primitive_WorldToLocal; + float4x4 Primitive_PreviousLocalToWorld; + float4x4 Primitive_PreviousWorldToLocal; + packed_float3 Primitive_ActorWorldPosition; + float Primitive_UseSingleSampleShadowFromStationaryLights; + packed_float3 Primitive_ObjectBounds; + float Primitive_LpvBiasMultiplier; + float Primitive_DecalReceiverMask; + float Primitive_PerObjectGBufferData; + float Primitive_UseVolumetricLightmapShadowFromStationaryLights; + float Primitive_DrawsVelocity; + float4 Primitive_ObjectOrientation; + float4 Primitive_NonUniformScale; + packed_float3 Primitive_LocalObjectBoundsMin; + uint Primitive_LightingChannelMask; + packed_float3 Primitive_LocalObjectBoundsMax; + uint Primitive_LightmapDataIndex; + packed_float3 Primitive_PreSkinnedLocalBounds; + int Primitive_SingleCaptureIndex; + uint Primitive_OutputVelocity; + uint PrePadding_Primitive_420; + uint PrePadding_Primitive_424; + uint PrePadding_Primitive_428; + float4 Primitive_CustomPrimitiveData[4]; +}; + +constant float4 _127 = {}; + +struct main0_out +{ + float4 out_var_COLOR0; + float3 out_var_PN_DisplacementScales; + spvUnsafeArray out_var_PN_POSITION; + float out_var_PN_TessellationMultiplier; + float out_var_PN_WorldDisplacementMultiplier; + spvUnsafeArray out_var_TEXCOORD0; + float4 out_var_TEXCOORD10_centroid; + float4 out_var_TEXCOORD11_centroid; + float4 out_var_VS_To_DS_Position; +}; + +struct main0_patchOut +{ + float4 out_var_PN_POSITION9; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD10_centroid [[attribute(0)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(1)]]; + float4 in_var_COLOR0 [[attribute(2)]]; + float2 in_var_TEXCOORD0_0 [[attribute(3)]]; + float2 in_var_TEXCOORD0_1 [[attribute(4)]]; + float4 in_var_VS_To_DS_Position [[attribute(5)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 3) + return; + spvUnsafeArray _129 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _130 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid }); + spvUnsafeArray _131 = spvUnsafeArray({ gl_in[0].in_var_COLOR0, gl_in[1].in_var_COLOR0, gl_in[2].in_var_COLOR0, gl_in[3].in_var_COLOR0, gl_in[4].in_var_COLOR0, gl_in[5].in_var_COLOR0, gl_in[6].in_var_COLOR0, gl_in[7].in_var_COLOR0, gl_in[8].in_var_COLOR0, gl_in[9].in_var_COLOR0, gl_in[10].in_var_COLOR0, gl_in[11].in_var_COLOR0 }); + spvUnsafeArray, 12> _132 = spvUnsafeArray, 12>({ spvUnsafeArray({ gl_in[0].in_var_TEXCOORD0_0, gl_in[0].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[1].in_var_TEXCOORD0_0, gl_in[1].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[2].in_var_TEXCOORD0_0, gl_in[2].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[3].in_var_TEXCOORD0_0, gl_in[3].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[4].in_var_TEXCOORD0_0, gl_in[4].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[5].in_var_TEXCOORD0_0, gl_in[5].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[6].in_var_TEXCOORD0_0, gl_in[6].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[7].in_var_TEXCOORD0_0, gl_in[7].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[8].in_var_TEXCOORD0_0, gl_in[8].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[9].in_var_TEXCOORD0_0, gl_in[9].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[10].in_var_TEXCOORD0_0, gl_in[10].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[11].in_var_TEXCOORD0_0, gl_in[11].in_var_TEXCOORD0_1 }) }); + spvUnsafeArray _205 = spvUnsafeArray({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position }); + spvUnsafeArray _230 = spvUnsafeArray({ FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[0], _130[0], _131[0], _132[0] } }, _205[0] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[1], _130[1], _131[1], _132[1] } }, _205[1] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[2], _130[2], _131[2], _132[2] } }, _205[2] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[3], _130[3], _131[3], _132[3] } }, _205[3] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[4], _130[4], _131[4], _132[4] } }, _205[4] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[5], _130[5], _131[5], _132[5] } }, _205[5] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[6], _130[6], _131[6], _132[6] } }, _205[6] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[7], _130[7], _131[7], _132[7] } }, _205[7] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[8], _130[8], _131[8], _132[8] } }, _205[8] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[9], _130[9], _131[9], _132[9] } }, _205[9] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[10], _130[10], _131[10], _132[10] } }, _205[10] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[11], _130[11], _131[11], _132[11] } }, _205[11] } }); + spvUnsafeArray param_var_I; + param_var_I = _230; + float4 _247 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float3 _251 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz); + uint _254 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u; + uint _255 = 2u * gl_InvocationID; + uint _256 = 3u + _255; + uint _257 = _255 + 4u; + float4 _269 = float4(param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _277 = float4(param_var_I[_256].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _285 = float4(param_var_I[_257].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + spvUnsafeArray _333 = spvUnsafeArray({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_254].Position) - (float4(dot(param_var_I[_254].Position - param_var_I[gl_InvocationID].Position, _247)) * _247)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_256].Position) + param_var_I[_257].Position) - (float4(dot(param_var_I[_257].Position - param_var_I[_256].Position, _277)) * _277)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_254].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_254].Position, _269)) * _269)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_257].Position) + param_var_I[_256].Position) - (float4(dot(param_var_I[_256].Position - param_var_I[_257].Position, _285)) * _285)) * float4(0.3333333432674407958984375))) * float4(0.5) }); + gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + gl_out[gl_InvocationID].out_var_COLOR0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.Color; + gl_out[gl_InvocationID].out_var_TEXCOORD0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TexCoords; + gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position; + gl_out[gl_InvocationID].out_var_PN_POSITION = _333; + gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _251; + gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0; + gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0; + temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _333, _251, 1.0, 1.0 }; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + if (gl_InvocationID == 0u) + { + float4 _385 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875); + float4 _398; + _398.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier); + _398.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier); + _398.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier); + _398.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier); + float4 _524; + for (;;) + { + float4 _424 = View.View_ViewToClip * float4(0.0); + float4 _429 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0); + float3 _430 = _429.xyz; + float3 _431 = _424.xyz; + float _433 = _429.w; + float _434 = _424.w; + float4 _451 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0); + float3 _452 = _451.xyz; + float _454 = _451.w; + float4 _472 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0); + float3 _473 = _472.xyz; + float _475 = _472.w; + if (any((((int3((_430 - _431) < float3(_433 + _434)) + (int3(2) * int3((_430 + _431) > float3((-_433) - _434)))) | (int3((_452 - _431) < float3(_454 + _434)) + (int3(2) * int3((_452 + _431) > float3((-_454) - _434))))) | (int3((_473 - _431) < float3(_475 + _434)) + (int3(2) * int3((_473 + _431) > float3((-_475) - _434))))) != int3(3))) + { + _524 = float4(0.0); + break; + } + float3 _493 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz; + float3 _494 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz; + float3 _495 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz; + float3 _498 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _501 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _504 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float _508 = sqrt(dot(_494, _494) / dot(_501, _501)); + float _512 = sqrt(dot(_495, _495) / dot(_504, _504)); + float _516 = sqrt(dot(_493, _493) / dot(_498, _498)); + float4 _517 = float4(_508, _512, _516, 1.0); + _517.w = 0.333000004291534423828125 * ((_508 + _512) + _516); + _524 = float4(View.View_AdaptiveTessellationFactor) * _517; + break; + } + float4 _526 = fast::clamp(_398 * _524, float4(1.0), float4(15.0)); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_526.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_526.y); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_526.z); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_526.w); + patchOut.out_var_PN_POSITION9 = _385 + ((_385 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5)); + } +} + diff --git a/reference/opt/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc b/reference/opt/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc new file mode 100644 index 00000000000..9ae81e40615 --- /dev/null +++ b/reference/opt/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc @@ -0,0 +1,175 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct FVertexFactoryInterpolantsVSToPS +{ + float4 TangentToWorld0; + float4 TangentToWorld2; +}; + +struct FVertexFactoryInterpolantsVSToDS +{ + FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS; +}; + +struct FSharedBasePassInterpolants +{ +}; +struct FBasePassInterpolantsVSToDS +{ + FSharedBasePassInterpolants _m0; +}; + +struct FBasePassVSToDS +{ + FVertexFactoryInterpolantsVSToDS FactoryInterpolants; + FBasePassInterpolantsVSToDS BasePassInterpolants; + float4 Position; +}; + +struct FFlatTessellationHSToDS +{ + FBasePassVSToDS PassSpecificData; + float3 DisplacementScale; + float TessellationMultiplier; + float WorldDisplacementMultiplier; +}; + +struct type_Primitive +{ + float4x4 Primitive_LocalToWorld; + float4 Primitive_InvNonUniformScaleAndDeterminantSign; + float4 Primitive_ObjectWorldPositionAndRadius; + float4x4 Primitive_WorldToLocal; + float4x4 Primitive_PreviousLocalToWorld; + float4x4 Primitive_PreviousWorldToLocal; + packed_float3 Primitive_ActorWorldPosition; + float Primitive_UseSingleSampleShadowFromStationaryLights; + packed_float3 Primitive_ObjectBounds; + float Primitive_LpvBiasMultiplier; + float Primitive_DecalReceiverMask; + float Primitive_PerObjectGBufferData; + float Primitive_UseVolumetricLightmapShadowFromStationaryLights; + float Primitive_DrawsVelocity; + float4 Primitive_ObjectOrientation; + float4 Primitive_NonUniformScale; + packed_float3 Primitive_LocalObjectBoundsMin; + uint Primitive_LightingChannelMask; + packed_float3 Primitive_LocalObjectBoundsMax; + uint Primitive_LightmapDataIndex; + packed_float3 Primitive_PreSkinnedLocalBounds; + int Primitive_SingleCaptureIndex; + uint Primitive_OutputVelocity; + uint PrePadding_Primitive_420; + uint PrePadding_Primitive_424; + uint PrePadding_Primitive_428; + float4 Primitive_CustomPrimitiveData[4]; +}; + +struct type_Material +{ + float4 Material_VectorExpressions[3]; + float4 Material_ScalarExpressions[1]; +}; + +constant float4 _88 = {}; + +struct main0_out +{ + float3 out_var_Flat_DisplacementScales; + float out_var_Flat_TessellationMultiplier; + float out_var_Flat_WorldDisplacementMultiplier; + float4 out_var_TEXCOORD10_centroid; + float4 out_var_TEXCOORD11_centroid; + float4 out_var_VS_To_DS_Position; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD10_centroid [[attribute(0)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(1)]]; + float4 in_var_VS_To_DS_Position [[attribute(2)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant type_Primitive& Primitive [[buffer(0)]], constant type_Material& Material [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup FFlatTessellationHSToDS temp_var_hullMainRetVal[3]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 3) + return; + spvUnsafeArray _90 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _91 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid }); + spvUnsafeArray _104 = spvUnsafeArray({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position }); + spvUnsafeArray _111 = spvUnsafeArray({ FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[0], _91[0] } }, FBasePassInterpolantsVSToDS{ { } }, _104[0] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[1], _91[1] } }, FBasePassInterpolantsVSToDS{ { } }, _104[1] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[2], _91[2] } }, FBasePassInterpolantsVSToDS{ { } }, _104[2] } }); + spvUnsafeArray param_var_I; + param_var_I = _111; + float3 _128 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz); + gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position; + gl_out[gl_InvocationID].out_var_Flat_DisplacementScales = _128; + gl_out[gl_InvocationID].out_var_Flat_TessellationMultiplier = Material.Material_ScalarExpressions[0].x; + gl_out[gl_InvocationID].out_var_Flat_WorldDisplacementMultiplier = 1.0; + temp_var_hullMainRetVal[gl_InvocationID] = FFlatTessellationHSToDS{ param_var_I[gl_InvocationID], _128, Material.Material_ScalarExpressions[0].x, 1.0 }; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + if (gl_InvocationID == 0u) + { + float4 _154; + _154.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier); + _154.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier); + _154.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier); + _154.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier); + float4 _173 = fast::clamp(_154, float4(1.0), float4(15.0)); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_173.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_173.y); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_173.z); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_173.w); + } +} + diff --git a/reference/opt/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese b/reference/opt/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese new file mode 100644 index 00000000000..bc0d7e051fa --- /dev/null +++ b/reference/opt/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese @@ -0,0 +1,418 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_ClipToWorld; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_908; + packed_float3 View_ViewUp; + float PrePadding_View_924; + packed_float3 View_ViewRight; + float PrePadding_View_940; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_956; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_972; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_1020; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_1036; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_1052; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1068; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1724; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1740; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1756; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2076; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2148; + float PrePadding_View_2152; + float PrePadding_View_2156; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2228; + float PrePadding_View_2232; + float PrePadding_View_2236; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2268; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2412; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + float View_AtmosphericFogSunDiscHalfApexAngleRadian; + float PrePadding_View_2492; + float4 View_AtmosphericFogSunDiscLuminance; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + uint PrePadding_View_2520; + uint PrePadding_View_2524; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2584; + float PrePadding_View_2588; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2744; + float PrePadding_View_2748; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float PrePadding_View_2908; + int2 View_CursorPosition; + float View_bCheckerboardSubsurfaceProfileRendering; + float PrePadding_View_2924; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2940; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2956; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2972; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2988; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_3004; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_ShadowDepthPass +{ + float PrePadding_ShadowDepthPass_LPV_0; + float PrePadding_ShadowDepthPass_LPV_4; + float PrePadding_ShadowDepthPass_LPV_8; + float PrePadding_ShadowDepthPass_LPV_12; + float PrePadding_ShadowDepthPass_LPV_16; + float PrePadding_ShadowDepthPass_LPV_20; + float PrePadding_ShadowDepthPass_LPV_24; + float PrePadding_ShadowDepthPass_LPV_28; + float PrePadding_ShadowDepthPass_LPV_32; + float PrePadding_ShadowDepthPass_LPV_36; + float PrePadding_ShadowDepthPass_LPV_40; + float PrePadding_ShadowDepthPass_LPV_44; + float PrePadding_ShadowDepthPass_LPV_48; + float PrePadding_ShadowDepthPass_LPV_52; + float PrePadding_ShadowDepthPass_LPV_56; + float PrePadding_ShadowDepthPass_LPV_60; + float PrePadding_ShadowDepthPass_LPV_64; + float PrePadding_ShadowDepthPass_LPV_68; + float PrePadding_ShadowDepthPass_LPV_72; + float PrePadding_ShadowDepthPass_LPV_76; + float PrePadding_ShadowDepthPass_LPV_80; + float PrePadding_ShadowDepthPass_LPV_84; + float PrePadding_ShadowDepthPass_LPV_88; + float PrePadding_ShadowDepthPass_LPV_92; + float PrePadding_ShadowDepthPass_LPV_96; + float PrePadding_ShadowDepthPass_LPV_100; + float PrePadding_ShadowDepthPass_LPV_104; + float PrePadding_ShadowDepthPass_LPV_108; + float PrePadding_ShadowDepthPass_LPV_112; + float PrePadding_ShadowDepthPass_LPV_116; + float PrePadding_ShadowDepthPass_LPV_120; + float PrePadding_ShadowDepthPass_LPV_124; + float PrePadding_ShadowDepthPass_LPV_128; + float PrePadding_ShadowDepthPass_LPV_132; + float PrePadding_ShadowDepthPass_LPV_136; + float PrePadding_ShadowDepthPass_LPV_140; + float PrePadding_ShadowDepthPass_LPV_144; + float PrePadding_ShadowDepthPass_LPV_148; + float PrePadding_ShadowDepthPass_LPV_152; + float PrePadding_ShadowDepthPass_LPV_156; + float PrePadding_ShadowDepthPass_LPV_160; + float PrePadding_ShadowDepthPass_LPV_164; + float PrePadding_ShadowDepthPass_LPV_168; + float PrePadding_ShadowDepthPass_LPV_172; + float PrePadding_ShadowDepthPass_LPV_176; + float PrePadding_ShadowDepthPass_LPV_180; + float PrePadding_ShadowDepthPass_LPV_184; + float PrePadding_ShadowDepthPass_LPV_188; + float PrePadding_ShadowDepthPass_LPV_192; + float PrePadding_ShadowDepthPass_LPV_196; + float PrePadding_ShadowDepthPass_LPV_200; + float PrePadding_ShadowDepthPass_LPV_204; + float PrePadding_ShadowDepthPass_LPV_208; + float PrePadding_ShadowDepthPass_LPV_212; + float PrePadding_ShadowDepthPass_LPV_216; + float PrePadding_ShadowDepthPass_LPV_220; + float PrePadding_ShadowDepthPass_LPV_224; + float PrePadding_ShadowDepthPass_LPV_228; + float PrePadding_ShadowDepthPass_LPV_232; + float PrePadding_ShadowDepthPass_LPV_236; + float PrePadding_ShadowDepthPass_LPV_240; + float PrePadding_ShadowDepthPass_LPV_244; + float PrePadding_ShadowDepthPass_LPV_248; + float PrePadding_ShadowDepthPass_LPV_252; + float PrePadding_ShadowDepthPass_LPV_256; + float PrePadding_ShadowDepthPass_LPV_260; + float PrePadding_ShadowDepthPass_LPV_264; + float PrePadding_ShadowDepthPass_LPV_268; + float4x4 ShadowDepthPass_LPV_mRsmToWorld; + float4 ShadowDepthPass_LPV_mLightColour; + float4 ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection; + float4 ShadowDepthPass_LPV_mEyePos; + packed_int3 ShadowDepthPass_LPV_mOldGridOffset; + int PrePadding_ShadowDepthPass_LPV_396; + packed_int3 ShadowDepthPass_LPV_mLpvGridOffset; + float ShadowDepthPass_LPV_ClearMultiplier; + float ShadowDepthPass_LPV_LpvScale; + float ShadowDepthPass_LPV_OneOverLpvScale; + float ShadowDepthPass_LPV_DirectionalOcclusionIntensity; + float ShadowDepthPass_LPV_DirectionalOcclusionRadius; + float ShadowDepthPass_LPV_RsmAreaIntensityMultiplier; + float ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier; + float ShadowDepthPass_LPV_SecondaryOcclusionStrength; + float ShadowDepthPass_LPV_SecondaryBounceStrength; + float ShadowDepthPass_LPV_VplInjectionBias; + float ShadowDepthPass_LPV_GeometryVolumeInjectionBias; + float ShadowDepthPass_LPV_EmissiveInjectionMultiplier; + int ShadowDepthPass_LPV_PropagationIndex; + float4x4 ShadowDepthPass_ProjectionMatrix; + float4x4 ShadowDepthPass_ViewMatrix; + float4 ShadowDepthPass_ShadowParams; + float ShadowDepthPass_bClampToNearPlane; + float PrePadding_ShadowDepthPass_612; + float PrePadding_ShadowDepthPass_616; + float PrePadding_ShadowDepthPass_620; + float4x4 ShadowDepthPass_ShadowViewProjectionMatrices[6]; + float4x4 ShadowDepthPass_ShadowViewMatrices[6]; +}; + +constant float4 _113 = {}; + +struct main0_out +{ + float4 out_var_TEXCOORD10_centroid [[user(locn0)]]; + float4 out_var_TEXCOORD11_centroid [[user(locn1)]]; + float4 out_var_COLOR0 [[user(locn2)]]; + float4 out_var_TEXCOORD0_0 [[user(locn3)]]; + uint out_var_PRIMITIVE_ID [[user(locn4)]]; + float out_var_TEXCOORD6 [[user(locn5)]]; + float out_var_TEXCOORD8 [[user(locn6)]]; + float3 out_var_TEXCOORD7 [[user(locn7)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 in_var_COLOR0 [[attribute(0)]]; + float4 in_var_PN_POSITION_0 [[attribute(2)]]; + float4 in_var_PN_POSITION_1 [[attribute(3)]]; + float4 in_var_PN_POSITION_2 [[attribute(4)]]; + float in_var_PN_WorldDisplacementMultiplier [[attribute(7)]]; + uint in_var_PRIMITIVE_ID [[attribute(8)]]; + float4 in_var_TEXCOORD0_0 [[attribute(9)]]; + float4 in_var_TEXCOORD10_centroid [[attribute(10)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(11)]]; +}; + +struct main0_patchIn +{ + float4 in_var_PN_POSITION9 [[attribute(5)]]; + patch_control_point gl_in; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_View& View [[buffer(0)]], constant type_ShadowDepthPass& ShadowDepthPass [[buffer(1)]], texture2d Material_Texture2D_3 [[texture(0)]], sampler Material_Texture2D_3Sampler [[sampler(0)]], float3 gl_TessCoord [[position_in_patch]]) +{ + main0_out out = {}; + spvUnsafeArray out_var_TEXCOORD0 = {}; + spvUnsafeArray _117 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _118 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid }); + spvUnsafeArray _119 = spvUnsafeArray({ patchIn.gl_in[0].in_var_COLOR0, patchIn.gl_in[1].in_var_COLOR0, patchIn.gl_in[2].in_var_COLOR0 }); + spvUnsafeArray, 3> _120 = spvUnsafeArray, 3>({ spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD0_0 }), spvUnsafeArray({ patchIn.gl_in[1].in_var_TEXCOORD0_0 }), spvUnsafeArray({ patchIn.gl_in[2].in_var_TEXCOORD0_0 }) }); + spvUnsafeArray, 3> _135 = spvUnsafeArray, 3>({ spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) }); + spvUnsafeArray _136 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[1].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[2].in_var_PN_WorldDisplacementMultiplier }); + float _157 = gl_TessCoord.x * gl_TessCoord.x; + float _158 = gl_TessCoord.y * gl_TessCoord.y; + float _159 = gl_TessCoord.z * gl_TessCoord.z; + float4 _165 = float4(gl_TessCoord.x); + float4 _169 = float4(gl_TessCoord.y); + float4 _174 = float4(gl_TessCoord.z); + float4 _177 = float4(_157 * 3.0); + float4 _181 = float4(_158 * 3.0); + float4 _188 = float4(_159 * 3.0); + float4 _202 = fma(((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _174) * _165, _169, fma(_135[2][2] * _177, _174, fma(_135[2][1] * _188, _165, fma(_135[1][2] * _188, _169, fma(_135[1][1] * _181, _174, fma(_135[0][2] * _181, _165, fma(_135[0][1] * _177, _169, fma(_135[2][0] * float4(_159), _174, fma(_135[0][0] * float4(_157), _165, (_135[1][0] * float4(_158)) * _169))))))))); + float3 _226 = fma(_117[2].xyz, float3(gl_TessCoord.z), fma(_117[0].xyz, float3(gl_TessCoord.x), _117[1].xyz * float3(gl_TessCoord.y)).xyz); + float4 _229 = fma(_118[2], _174, fma(_118[0], _165, _118[1] * _169)); + float4 _231 = fma(_119[2], _174, fma(_119[0], _165, _119[1] * _169)); + float4 _233 = fma(_120[2][0], _174, fma(_120[0][0], _165, _120[1][0] * _169)); + spvUnsafeArray _234 = spvUnsafeArray({ _233 }); + float3 _236 = _229.xyz; + float3 _264 = fma((float3((Material_Texture2D_3.sample(Material_Texture2D_3Sampler, fma(_233.zw, float2(1.0, 2.0), float2(View.View_GameTime * 0.20000000298023223876953125, View.View_GameTime * (-0.699999988079071044921875))), level(-1.0)).x * 10.0) * (1.0 - _231.x)) * _236) * float3(0.5), float3(fma(_136[2], gl_TessCoord.z, fma(_136[0], gl_TessCoord.x, _136[1] * gl_TessCoord.y))), _202.xyz); + float4 _270 = ShadowDepthPass.ShadowDepthPass_ProjectionMatrix * float4(_264.x, _264.y, _264.z, _202.w); + float4 _281; + if ((ShadowDepthPass.ShadowDepthPass_bClampToNearPlane > 0.0) && (_270.z < 0.0)) + { + float4 _279 = _270; + _279.z = 9.9999999747524270787835121154785e-07; + _279.w = 1.0; + _281 = _279; + } + else + { + _281 = _270; + } + float _290 = abs(dot(float3(ShadowDepthPass.ShadowDepthPass_ViewMatrix[0].z, ShadowDepthPass.ShadowDepthPass_ViewMatrix[1].z, ShadowDepthPass.ShadowDepthPass_ViewMatrix[2].z), _236)); + out.out_var_TEXCOORD10_centroid = float4(_226.x, _226.y, _226.z, _113.w); + out.out_var_TEXCOORD11_centroid = _229; + out.out_var_COLOR0 = _231; + out_var_TEXCOORD0 = _234; + out.out_var_PRIMITIVE_ID = patchIn.gl_in[0u].in_var_PRIMITIVE_ID; + out.out_var_TEXCOORD6 = _281.z; + out.out_var_TEXCOORD8 = fma(ShadowDepthPass.ShadowDepthPass_ShadowParams.y, fast::clamp((abs(_290) > 0.0) ? (sqrt(fast::clamp(fma(-_290, _290, 1.0), 0.0, 1.0)) / _290) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z), ShadowDepthPass.ShadowDepthPass_ShadowParams.x); + out.out_var_TEXCOORD7 = _264.xyz; + out.gl_Position = _281; + out.out_var_TEXCOORD0_0 = out_var_TEXCOORD0[0]; + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese b/reference/opt/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese new file mode 100644 index 00000000000..987ba54eada --- /dev/null +++ b/reference/opt/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese @@ -0,0 +1,416 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_ClipToWorld; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_908; + packed_float3 View_ViewUp; + float PrePadding_View_924; + packed_float3 View_ViewRight; + float PrePadding_View_940; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_956; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_972; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_1020; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_1036; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_1052; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1068; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1724; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1740; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1756; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2076; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2148; + float PrePadding_View_2152; + float PrePadding_View_2156; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2228; + float PrePadding_View_2232; + float PrePadding_View_2236; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2268; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2412; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + float View_AtmosphericFogSunDiscHalfApexAngleRadian; + float PrePadding_View_2492; + float4 View_AtmosphericFogSunDiscLuminance; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + uint PrePadding_View_2520; + uint PrePadding_View_2524; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2584; + float PrePadding_View_2588; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2744; + float PrePadding_View_2748; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float PrePadding_View_2908; + int2 View_CursorPosition; + float View_bCheckerboardSubsurfaceProfileRendering; + float PrePadding_View_2924; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2940; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2956; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2972; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2988; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_3004; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; + float PrePadding_View_3048; + float PrePadding_View_3052; + float4x4 View_WorldToVirtualTexture; + float4 View_VirtualTextureParams; + float4 View_XRPassthroughCameraUVs[2]; +}; + +struct type_Material +{ + float4 Material_VectorExpressions[5]; + float4 Material_ScalarExpressions[2]; +}; + +constant float4 _118 = {}; + +struct main0_out +{ + float4 out_var_TEXCOORD6 [[user(locn0)]]; + float4 out_var_TEXCOORD7 [[user(locn1)]]; + float4 out_var_TEXCOORD10_centroid [[user(locn2)]]; + float4 out_var_TEXCOORD11_centroid [[user(locn3)]]; + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [1]; + float gl_ClipDistance_0 [[user(clip0)]]; +}; + +struct main0_in +{ + float4 in_var_PN_DominantEdge2 [[attribute(3)]]; + float4 in_var_PN_DominantEdge3 [[attribute(4)]]; + float3 in_var_PN_DominantEdge4 [[attribute(5)]]; + float3 in_var_PN_DominantEdge5 [[attribute(6)]]; + float4 in_var_PN_DominantVertex1 [[attribute(8)]]; + float3 in_var_PN_DominantVertex2 [[attribute(9)]]; + float4 in_var_PN_POSITION_0 [[attribute(10)]]; + float4 in_var_PN_POSITION_1 [[attribute(11)]]; + float4 in_var_PN_POSITION_2 [[attribute(12)]]; + float in_var_PN_WorldDisplacementMultiplier [[attribute(15)]]; + float4 in_var_TEXCOORD10_centroid [[attribute(16)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(17)]]; + float4 in_var_TEXCOORD6 [[attribute(18)]]; + float4 in_var_TEXCOORD8 [[attribute(19)]]; +}; + +struct main0_patchIn +{ + float4 in_var_PN_POSITION9 [[attribute(13)]]; + patch_control_point gl_in; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Material& Material [[buffer(1)]], texture3d View_GlobalDistanceFieldTexture0 [[texture(0)]], texture3d View_GlobalDistanceFieldTexture1 [[texture(1)]], texture3d View_GlobalDistanceFieldTexture2 [[texture(2)]], texture3d View_GlobalDistanceFieldTexture3 [[texture(3)]], sampler View_GlobalDistanceFieldSampler0 [[sampler(0)]], float3 gl_TessCoord [[position_in_patch]]) +{ + main0_out out = {}; + spvUnsafeArray _120 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD6, patchIn.gl_in[1].in_var_TEXCOORD6, patchIn.gl_in[2].in_var_TEXCOORD6 }); + spvUnsafeArray _121 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD8, patchIn.gl_in[1].in_var_TEXCOORD8, patchIn.gl_in[2].in_var_TEXCOORD8 }); + spvUnsafeArray _128 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _129 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid }); + spvUnsafeArray, 3> _136 = spvUnsafeArray, 3>({ spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) }); + spvUnsafeArray _137 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[1].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[2].in_var_PN_WorldDisplacementMultiplier }); + spvUnsafeArray _138 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantVertex1, patchIn.gl_in[1].in_var_PN_DominantVertex1, patchIn.gl_in[2].in_var_PN_DominantVertex1 }); + spvUnsafeArray _139 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantVertex2, patchIn.gl_in[1].in_var_PN_DominantVertex2, patchIn.gl_in[2].in_var_PN_DominantVertex2 }); + spvUnsafeArray _146 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantEdge2, patchIn.gl_in[1].in_var_PN_DominantEdge2, patchIn.gl_in[2].in_var_PN_DominantEdge2 }); + spvUnsafeArray _147 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantEdge3, patchIn.gl_in[1].in_var_PN_DominantEdge3, patchIn.gl_in[2].in_var_PN_DominantEdge3 }); + spvUnsafeArray _148 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantEdge4, patchIn.gl_in[1].in_var_PN_DominantEdge4, patchIn.gl_in[2].in_var_PN_DominantEdge4 }); + spvUnsafeArray _149 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantEdge5, patchIn.gl_in[1].in_var_PN_DominantEdge5, patchIn.gl_in[2].in_var_PN_DominantEdge5 }); + float _190 = gl_TessCoord.x * gl_TessCoord.x; + float _191 = gl_TessCoord.y * gl_TessCoord.y; + float _192 = gl_TessCoord.z * gl_TessCoord.z; + float4 _198 = float4(gl_TessCoord.x); + float4 _202 = float4(gl_TessCoord.y); + float4 _207 = float4(gl_TessCoord.z); + float4 _210 = float4(_190 * 3.0); + float4 _214 = float4(_191 * 3.0); + float4 _221 = float4(_192 * 3.0); + float4 _235 = fma(((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _207) * _198, _202, fma(_136[2][2] * _210, _207, fma(_136[2][1] * _221, _198, fma(_136[1][2] * _221, _202, fma(_136[1][1] * _214, _207, fma(_136[0][2] * _214, _198, fma(_136[0][1] * _210, _202, fma(_136[2][0] * float4(_192), _207, fma(_136[0][0] * float4(_190), _198, (_136[1][0] * float4(_191)) * _202))))))))); + float3 _237 = float3(gl_TessCoord.x); + float3 _240 = float3(gl_TessCoord.y); + float3 _254 = float3(gl_TessCoord.z); + float3 _256 = fma(_128[2].xyz, _254, fma(_128[0].xyz, _237, _128[1].xyz * _240).xyz); + float4 _259 = fma(_129[2], _207, fma(_129[0], _198, _129[1] * _202)); + float3 _264 = _235.xyz; + float3 _265 = _256.xyz; + float3 _266 = _259.xyz; + float3 _272 = _264 + float3(View.View_WorldCameraOrigin); + float _279 = float(int(gl_TessCoord.x == 0.0)); + float _282 = float(int(gl_TessCoord.y == 0.0)); + float _285 = float(int(gl_TessCoord.z == 0.0)); + float _286 = _279 + _282; + float _287 = _286 + _285; + float4 _387; + float3 _388; + if (float(int(_287 == 2.0)) == 1.0) + { + float _363 = float(int((_282 + _285) == 2.0)); + float _367 = float(int((_285 + _279) == 2.0)); + float _370 = float(int(_286 == 2.0)); + _387 = fma(float4(_370), _138[2], fma(float4(_363), _138[0], float4(_367) * _138[1])); + _388 = fma(float3(_370), _139[2], fma(float3(_363), _139[0], float3(_367) * _139[1])); + } + else + { + float4 _358; + float3 _359; + if (float(int(_287 == 1.0)) != 0.0) + { + float4 _304 = float4(_279); + float4 _306 = float4(_282); + float4 _309 = float4(_285); + float4 _311 = fma(_309, _146[2], fma(_304, _146[0], _306 * _146[1])); + float4 _316 = fma(_309, _147[2], fma(_304, _147[0], _306 * _147[1])); + float3 _331 = float3(_279); + float3 _333 = float3(_282); + float3 _336 = float3(_285); + float3 _338 = fma(_336, _148[2], fma(_331, _148[0], _333 * _148[1])); + float3 _343 = fma(_336, _149[2], fma(_331, _149[0], _333 * _149[1])); + _358 = fma(_309, fma(_198, _311, _202 * _316), fma(_304, fma(_202, _311, _207 * _316), _306 * fma(_207, _311, _198 * _316))); + _359 = fma(_336, fma(_237, _338, _240 * _343), fma(_331, fma(_240, _338, _254 * _343), _333 * fma(_254, _338, _237 * _343))); + } + else + { + _358 = float4(_259.xyz, 0.0); + _359 = _265; + } + _387 = _358; + _388 = _359; + } + float3x3 _398; + if (float(int(_287 == 0.0)) == 0.0) + { + _398 = float3x3(_388, cross(_387.xyz, _388) * float3(_387.w), _387.xyz); + } + else + { + _398 = float3x3(_265, cross(_266, _265) * float3(_259.w), _266); + } + float3 _411 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[0].xyz) + View.View_GlobalVolumeCenterAndExtent[0].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[0].xyz + View.View_GlobalVolumeCenterAndExtent[0].www) - _272, float3(0.0))); + float _547; + if (fast::min(_411.x, fast::min(_411.y, _411.z)) > (View.View_GlobalVolumeCenterAndExtent[0].w * View.View_GlobalVolumeTexelSize)) + { + _547 = View_GlobalDistanceFieldTexture0.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[0u].www, View.View_GlobalVolumeWorldToUVAddAndMul[0u].xyz), level(0.0)).x; + } + else + { + float3 _436 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[1].xyz) + View.View_GlobalVolumeCenterAndExtent[1].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[1].xyz + View.View_GlobalVolumeCenterAndExtent[1].www) - _272, float3(0.0))); + float _535; + if (fast::min(_436.x, fast::min(_436.y, _436.z)) > (View.View_GlobalVolumeCenterAndExtent[1].w * View.View_GlobalVolumeTexelSize)) + { + _535 = View_GlobalDistanceFieldTexture1.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[1u].www, View.View_GlobalVolumeWorldToUVAddAndMul[1u].xyz), level(0.0)).x; + } + else + { + float3 _459 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[2].xyz) + View.View_GlobalVolumeCenterAndExtent[2].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[2].xyz + View.View_GlobalVolumeCenterAndExtent[2].www) - _272, float3(0.0))); + float3 _475 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[3].xyz) + View.View_GlobalVolumeCenterAndExtent[3].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[3].xyz + View.View_GlobalVolumeCenterAndExtent[3].www) - _272, float3(0.0))); + float _480 = fast::min(_475.x, fast::min(_475.y, _475.z)); + float _523; + if (fast::min(_459.x, fast::min(_459.y, _459.z)) > (View.View_GlobalVolumeCenterAndExtent[2].w * View.View_GlobalVolumeTexelSize)) + { + _523 = View_GlobalDistanceFieldTexture2.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[2u].www, View.View_GlobalVolumeWorldToUVAddAndMul[2u].xyz), level(0.0)).x; + } + else + { + float _511; + if (_480 > (View.View_GlobalVolumeCenterAndExtent[3].w * View.View_GlobalVolumeTexelSize)) + { + _511 = mix(View.View_MaxGlobalDistance, View_GlobalDistanceFieldTexture3.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[3u].www, View.View_GlobalVolumeWorldToUVAddAndMul[3u].xyz), level(0.0)).x, fast::clamp((_480 * 10.0) * View.View_GlobalVolumeWorldToUVAddAndMul[3].w, 0.0, 1.0)); + } + else + { + _511 = View.View_MaxGlobalDistance; + } + _523 = _511; + } + _535 = _523; + } + _547 = _535; + } + float3 _565 = fma(_398[2] * float3(fast::min(_547 + Material.Material_ScalarExpressions[0].z, 0.0) * Material.Material_ScalarExpressions[0].w), float3(fma(_137[2], gl_TessCoord.z, fma(_137[0], gl_TessCoord.x, _137[1] * gl_TessCoord.y))), _264); + float4 _574 = View.View_TranslatedWorldToClip * float4(_565.x, _565.y, _565.z, _235.w); + _574.z = fma(0.001000000047497451305389404296875, _574.w, _574.z); + out.gl_Position = _574; + out.out_var_TEXCOORD6 = fma(_120[2], _207, fma(_120[0], _198, _120[1] * _202)); + out.out_var_TEXCOORD7 = fma(_121[2], _207, fma(_121[0], _198, _121[1] * _202)); + out.out_var_TEXCOORD10_centroid = float4(_256.x, _256.y, _256.z, _118.w); + out.out_var_TEXCOORD11_centroid = _259; + out.gl_ClipDistance[0u] = dot(View.View_GlobalClippingPlane, float4(_565.xyz - float3(View.View_PreViewTranslation), 1.0)); + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese b/reference/opt/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese new file mode 100644 index 00000000000..e0efbbfec2e --- /dev/null +++ b/reference/opt/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese @@ -0,0 +1,215 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_ShadowDepthPass +{ + float PrePadding_ShadowDepthPass_LPV_0; + float PrePadding_ShadowDepthPass_LPV_4; + float PrePadding_ShadowDepthPass_LPV_8; + float PrePadding_ShadowDepthPass_LPV_12; + float PrePadding_ShadowDepthPass_LPV_16; + float PrePadding_ShadowDepthPass_LPV_20; + float PrePadding_ShadowDepthPass_LPV_24; + float PrePadding_ShadowDepthPass_LPV_28; + float PrePadding_ShadowDepthPass_LPV_32; + float PrePadding_ShadowDepthPass_LPV_36; + float PrePadding_ShadowDepthPass_LPV_40; + float PrePadding_ShadowDepthPass_LPV_44; + float PrePadding_ShadowDepthPass_LPV_48; + float PrePadding_ShadowDepthPass_LPV_52; + float PrePadding_ShadowDepthPass_LPV_56; + float PrePadding_ShadowDepthPass_LPV_60; + float PrePadding_ShadowDepthPass_LPV_64; + float PrePadding_ShadowDepthPass_LPV_68; + float PrePadding_ShadowDepthPass_LPV_72; + float PrePadding_ShadowDepthPass_LPV_76; + float PrePadding_ShadowDepthPass_LPV_80; + float PrePadding_ShadowDepthPass_LPV_84; + float PrePadding_ShadowDepthPass_LPV_88; + float PrePadding_ShadowDepthPass_LPV_92; + float PrePadding_ShadowDepthPass_LPV_96; + float PrePadding_ShadowDepthPass_LPV_100; + float PrePadding_ShadowDepthPass_LPV_104; + float PrePadding_ShadowDepthPass_LPV_108; + float PrePadding_ShadowDepthPass_LPV_112; + float PrePadding_ShadowDepthPass_LPV_116; + float PrePadding_ShadowDepthPass_LPV_120; + float PrePadding_ShadowDepthPass_LPV_124; + float PrePadding_ShadowDepthPass_LPV_128; + float PrePadding_ShadowDepthPass_LPV_132; + float PrePadding_ShadowDepthPass_LPV_136; + float PrePadding_ShadowDepthPass_LPV_140; + float PrePadding_ShadowDepthPass_LPV_144; + float PrePadding_ShadowDepthPass_LPV_148; + float PrePadding_ShadowDepthPass_LPV_152; + float PrePadding_ShadowDepthPass_LPV_156; + float PrePadding_ShadowDepthPass_LPV_160; + float PrePadding_ShadowDepthPass_LPV_164; + float PrePadding_ShadowDepthPass_LPV_168; + float PrePadding_ShadowDepthPass_LPV_172; + float PrePadding_ShadowDepthPass_LPV_176; + float PrePadding_ShadowDepthPass_LPV_180; + float PrePadding_ShadowDepthPass_LPV_184; + float PrePadding_ShadowDepthPass_LPV_188; + float PrePadding_ShadowDepthPass_LPV_192; + float PrePadding_ShadowDepthPass_LPV_196; + float PrePadding_ShadowDepthPass_LPV_200; + float PrePadding_ShadowDepthPass_LPV_204; + float PrePadding_ShadowDepthPass_LPV_208; + float PrePadding_ShadowDepthPass_LPV_212; + float PrePadding_ShadowDepthPass_LPV_216; + float PrePadding_ShadowDepthPass_LPV_220; + float PrePadding_ShadowDepthPass_LPV_224; + float PrePadding_ShadowDepthPass_LPV_228; + float PrePadding_ShadowDepthPass_LPV_232; + float PrePadding_ShadowDepthPass_LPV_236; + float PrePadding_ShadowDepthPass_LPV_240; + float PrePadding_ShadowDepthPass_LPV_244; + float PrePadding_ShadowDepthPass_LPV_248; + float PrePadding_ShadowDepthPass_LPV_252; + float PrePadding_ShadowDepthPass_LPV_256; + float PrePadding_ShadowDepthPass_LPV_260; + float PrePadding_ShadowDepthPass_LPV_264; + float PrePadding_ShadowDepthPass_LPV_268; + float4x4 ShadowDepthPass_LPV_mRsmToWorld; + float4 ShadowDepthPass_LPV_mLightColour; + float4 ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection; + float4 ShadowDepthPass_LPV_mEyePos; + packed_int3 ShadowDepthPass_LPV_mOldGridOffset; + int PrePadding_ShadowDepthPass_LPV_396; + packed_int3 ShadowDepthPass_LPV_mLpvGridOffset; + float ShadowDepthPass_LPV_ClearMultiplier; + float ShadowDepthPass_LPV_LpvScale; + float ShadowDepthPass_LPV_OneOverLpvScale; + float ShadowDepthPass_LPV_DirectionalOcclusionIntensity; + float ShadowDepthPass_LPV_DirectionalOcclusionRadius; + float ShadowDepthPass_LPV_RsmAreaIntensityMultiplier; + float ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier; + float ShadowDepthPass_LPV_SecondaryOcclusionStrength; + float ShadowDepthPass_LPV_SecondaryBounceStrength; + float ShadowDepthPass_LPV_VplInjectionBias; + float ShadowDepthPass_LPV_GeometryVolumeInjectionBias; + float ShadowDepthPass_LPV_EmissiveInjectionMultiplier; + int ShadowDepthPass_LPV_PropagationIndex; + float4x4 ShadowDepthPass_ProjectionMatrix; + float4x4 ShadowDepthPass_ViewMatrix; + float4 ShadowDepthPass_ShadowParams; + float ShadowDepthPass_bClampToNearPlane; + float PrePadding_ShadowDepthPass_612; + float PrePadding_ShadowDepthPass_616; + float PrePadding_ShadowDepthPass_620; + float4x4 ShadowDepthPass_ShadowViewProjectionMatrices[6]; + float4x4 ShadowDepthPass_ShadowViewMatrices[6]; +}; + +constant float4 _90 = {}; + +struct main0_out +{ + float4 out_var_TEXCOORD10_centroid [[user(locn0)]]; + float4 out_var_TEXCOORD11_centroid [[user(locn1)]]; + float out_var_TEXCOORD6 [[user(locn2)]]; + float3 out_var_TEXCOORD7 [[user(locn3)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 in_var_PN_POSITION_0 [[attribute(10)]]; + float4 in_var_PN_POSITION_1 [[attribute(11)]]; + float4 in_var_PN_POSITION_2 [[attribute(12)]]; + float4 in_var_TEXCOORD10_centroid [[attribute(16)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(17)]]; +}; + +struct main0_patchIn +{ + float4 in_var_PN_POSITION9 [[attribute(13)]]; + patch_control_point gl_in; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_ShadowDepthPass& ShadowDepthPass [[buffer(0)]], float3 gl_TessCoord [[position_in_patch]]) +{ + main0_out out = {}; + spvUnsafeArray _93 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _94 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid }); + spvUnsafeArray, 3> _101 = spvUnsafeArray, 3>({ spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) }); + float _119 = gl_TessCoord.x * gl_TessCoord.x; + float _120 = gl_TessCoord.y * gl_TessCoord.y; + float _121 = gl_TessCoord.z * gl_TessCoord.z; + float4 _127 = float4(gl_TessCoord.x); + float4 _131 = float4(gl_TessCoord.y); + float4 _136 = float4(gl_TessCoord.z); + float4 _139 = float4(_119 * 3.0); + float4 _143 = float4(_120 * 3.0); + float4 _150 = float4(_121 * 3.0); + float4 _164 = fma(((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _136) * _127, _131, fma(_101[2][2] * _139, _136, fma(_101[2][1] * _150, _127, fma(_101[1][2] * _150, _131, fma(_101[1][1] * _143, _136, fma(_101[0][2] * _143, _127, fma(_101[0][1] * _139, _131, fma(_101[2][0] * float4(_121), _136, fma(_101[0][0] * float4(_119), _127, (_101[1][0] * float4(_120)) * _131))))))))); + float3 _179 = fma(_93[2].xyz, float3(gl_TessCoord.z), fma(_93[0].xyz, float3(gl_TessCoord.x), _93[1].xyz * float3(gl_TessCoord.y)).xyz); + float4 _182 = fma(_94[2], _136, fma(_94[0], _127, _94[1] * _131)); + float4 _189 = ShadowDepthPass.ShadowDepthPass_ProjectionMatrix * float4(_164.x, _164.y, _164.z, _164.w); + float4 _200; + if ((ShadowDepthPass.ShadowDepthPass_bClampToNearPlane > 0.0) && (_189.z < 0.0)) + { + float4 _198 = _189; + _198.z = 9.9999999747524270787835121154785e-07; + _198.w = 1.0; + _200 = _198; + } + else + { + _200 = _189; + } + float _209 = abs(dot(float3(ShadowDepthPass.ShadowDepthPass_ViewMatrix[0].z, ShadowDepthPass.ShadowDepthPass_ViewMatrix[1].z, ShadowDepthPass.ShadowDepthPass_ViewMatrix[2].z), _182.xyz)); + float4 _234 = _200; + _234.z = fma(_200.z, ShadowDepthPass.ShadowDepthPass_ShadowParams.w, fma(ShadowDepthPass.ShadowDepthPass_ShadowParams.y, fast::clamp((abs(_209) > 0.0) ? (sqrt(fast::clamp(fma(-_209, _209, 1.0), 0.0, 1.0)) / _209) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z), ShadowDepthPass.ShadowDepthPass_ShadowParams.x)) * _200.w; + out.out_var_TEXCOORD10_centroid = float4(_179.x, _179.y, _179.z, _90.w); + out.out_var_TEXCOORD11_centroid = _182; + out.out_var_TEXCOORD6 = 0.0; + out.out_var_TEXCOORD7 = _164.xyz; + out.gl_Position = _234; + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert b/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert new file mode 100644 index 00000000000..1f47ec47e3c --- /dev/null +++ b/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert @@ -0,0 +1,457 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_MobileBasePass +{ + float4 MobileBasePass_Fog_ExponentialFogParameters; + float4 MobileBasePass_Fog_ExponentialFogParameters2; + float4 MobileBasePass_Fog_ExponentialFogColorParameter; + float4 MobileBasePass_Fog_ExponentialFogParameters3; + float4 MobileBasePass_Fog_InscatteringLightDirection; + float4 MobileBasePass_Fog_DirectionalInscatteringColor; + float2 MobileBasePass_Fog_SinCosInscatteringColorCubemapRotation; + float PrePadding_MobileBasePass_Fog_104; + float PrePadding_MobileBasePass_Fog_108; + packed_float3 MobileBasePass_Fog_FogInscatteringTextureParameters; + float MobileBasePass_Fog_ApplyVolumetricFog; + float PrePadding_MobileBasePass_PlanarReflection_128; + float PrePadding_MobileBasePass_PlanarReflection_132; + float PrePadding_MobileBasePass_PlanarReflection_136; + float PrePadding_MobileBasePass_PlanarReflection_140; + float PrePadding_MobileBasePass_PlanarReflection_144; + float PrePadding_MobileBasePass_PlanarReflection_148; + float PrePadding_MobileBasePass_PlanarReflection_152; + float PrePadding_MobileBasePass_PlanarReflection_156; + float4 MobileBasePass_PlanarReflection_ReflectionPlane; + float4 MobileBasePass_PlanarReflection_PlanarReflectionOrigin; + float4 MobileBasePass_PlanarReflection_PlanarReflectionXAxis; + float4 MobileBasePass_PlanarReflection_PlanarReflectionYAxis; + float3x4 MobileBasePass_PlanarReflection_InverseTransposeMirrorMatrix; + packed_float3 MobileBasePass_PlanarReflection_PlanarReflectionParameters; + float PrePadding_MobileBasePass_PlanarReflection_284; + float2 MobileBasePass_PlanarReflection_PlanarReflectionParameters2; + float PrePadding_MobileBasePass_PlanarReflection_296; + float PrePadding_MobileBasePass_PlanarReflection_300; + float4x4 MobileBasePass_PlanarReflection_ProjectionWithExtraFOV[2]; + float4 MobileBasePass_PlanarReflection_PlanarReflectionScreenScaleBias[2]; + float2 MobileBasePass_PlanarReflection_PlanarReflectionScreenBound; + uint MobileBasePass_PlanarReflection_bIsStereo; +}; + +struct type_Primitive +{ + float4x4 Primitive_LocalToWorld; + float4 Primitive_InvNonUniformScaleAndDeterminantSign; + float4 Primitive_ObjectWorldPositionAndRadius; + float4x4 Primitive_WorldToLocal; + float4x4 Primitive_PreviousLocalToWorld; + float4x4 Primitive_PreviousWorldToLocal; + packed_float3 Primitive_ActorWorldPosition; + float Primitive_UseSingleSampleShadowFromStationaryLights; + packed_float3 Primitive_ObjectBounds; + float Primitive_LpvBiasMultiplier; + float Primitive_DecalReceiverMask; + float Primitive_PerObjectGBufferData; + float Primitive_UseVolumetricLightmapShadowFromStationaryLights; + float Primitive_UseEditorDepthTest; + float4 Primitive_ObjectOrientation; + float4 Primitive_NonUniformScale; + packed_float3 Primitive_LocalObjectBoundsMin; + float PrePadding_Primitive_380; + packed_float3 Primitive_LocalObjectBoundsMax; + uint Primitive_LightingChannelMask; + uint Primitive_LightmapDataIndex; + int Primitive_SingleCaptureIndex; +}; + +struct type_LandscapeParameters +{ + float4 LandscapeParameters_HeightmapUVScaleBias; + float4 LandscapeParameters_WeightmapUVScaleBias; + float4 LandscapeParameters_LandscapeLightmapScaleBias; + float4 LandscapeParameters_SubsectionSizeVertsLayerUVPan; + float4 LandscapeParameters_SubsectionOffsetParams; + float4 LandscapeParameters_LightmapSubsectionOffsetParams; + float4x4 LandscapeParameters_LocalToWorldNoScaling; +}; + +struct type_Globals +{ + float4 LodBias; + float4 LodValues; + float4 SectionLods; + float4 NeighborSectionLod[4]; +}; + +struct main0_out +{ + float2 out_var_TEXCOORD0 [[user(locn0)]]; + float2 out_var_TEXCOORD1 [[user(locn1)]]; + float4 out_var_TEXCOORD2 [[user(locn2)]]; + float4 out_var_TEXCOORD3 [[user(locn3)]]; + float4 out_var_TEXCOORD8 [[user(locn4)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 in_var_ATTRIBUTE0 [[attribute(0)]]; + float4 in_var_ATTRIBUTE1_0 [[attribute(1)]]; + float4 in_var_ATTRIBUTE1_1 [[attribute(2)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_MobileBasePass& MobileBasePass [[buffer(1)]], constant type_Primitive& Primitive [[buffer(2)]], constant type_LandscapeParameters& LandscapeParameters [[buffer(3)]], constant type_Globals& _Globals [[buffer(4)]]) +{ + main0_out out = {}; + spvUnsafeArray in_var_ATTRIBUTE1 = {}; + in_var_ATTRIBUTE1[0] = in.in_var_ATTRIBUTE1_0; + in_var_ATTRIBUTE1[1] = in.in_var_ATTRIBUTE1_1; + float4 _115 = in.in_var_ATTRIBUTE0 * float4(255.0); + float2 _116 = _115.zw; + float2 _118 = fract(_116 * float2(0.5)); + float2 _119 = _118 * float2(2.0); + float2 _121 = fma(-_118, float2(2.0), _116) * float2(0.0039215688593685626983642578125); + float2 _122 = _115.xy; + float2 _126 = _122 * float2(_Globals.LodValues.w); + float _127 = _126.y; + float _128 = _126.x; + float4 _131 = float4(_127, _128, 1.0 - _128, 1.0 - _127); + float4 _132 = _131 * float4(2.0); + float4 _186; + if (_119.y > 0.5) + { + float4 _161; + if (_119.x > 0.5) + { + _161 = fma(_132, float4(_Globals.SectionLods.w), fma(-_131, float4(2.0), float4(1.0)) * _Globals.NeighborSectionLod[3]); + } + else + { + _161 = fma(_132, float4(_Globals.SectionLods.z), fma(-_131, float4(2.0), float4(1.0)) * _Globals.NeighborSectionLod[2]); + } + _186 = _161; + } + else + { + float4 _185; + if (_119.x > 0.5) + { + _185 = fma(_132, float4(_Globals.SectionLods.y), fma(-_131, float4(2.0), float4(1.0)) * _Globals.NeighborSectionLod[1]); + } + else + { + _185 = fma(_132, float4(_Globals.SectionLods.x), fma(-_131, float4(2.0), float4(1.0)) * _Globals.NeighborSectionLod[0]); + } + _186 = _185; + } + float _206; + if ((_128 + _127) > 1.0) + { + float _198; + if (_128 < _127) + { + _198 = _186.w; + } + else + { + _198 = _186.z; + } + _206 = _198; + } + else + { + float _205; + if (_128 < _127) + { + _205 = _186.y; + } + else + { + _205 = _186.x; + } + _206 = _205; + } + float _207 = floor(_206); + float _220 = _121.x; + float3 _235 = select(select(select(select(select(float3(0.03125, _121.yy), float3(0.0625, _220, _121.y), bool3(_207 < 5.0)), float3(0.125, in_var_ATTRIBUTE1[1].w, _220), bool3(_207 < 4.0)), float3(0.25, in_var_ATTRIBUTE1[1].zw), bool3(_207 < 3.0)), float3(0.5, in_var_ATTRIBUTE1[1].yz), bool3(_207 < 2.0)), float3(1.0, in_var_ATTRIBUTE1[1].xy), bool3(_207 < 1.0)); + float _236 = _235.x; + float _245 = (fma(in_var_ATTRIBUTE1[0].x, 65280.0, in_var_ATTRIBUTE1[0].y * 255.0) - 32768.0) * 0.0078125; + float _252 = (fma(in_var_ATTRIBUTE1[0].z, 65280.0, in_var_ATTRIBUTE1[0].w * 255.0) - 32768.0) * 0.0078125; + float2 _257 = floor(_122 * float2(_236)); + float2 _271 = float2(fma(LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.x, _236, -1.0), fast::max((LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.x * 0.5) * _236, 2.0) - 1.0) * float2(LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.y); + float3 _287 = mix(float3(_257 / float2(_271.x), mix(_245, _252, _235.y)), float3(floor(_257 * float2(0.5)) / float2(_271.y), mix(_245, _252, _235.z)), float3(_206 - _207)); + float2 _288 = _119.xy; + float3 _296 = _287 + float3(_288 * LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.ww, 0.0); + float4 _322 = float4(fma(Primitive.Primitive_LocalToWorld[2u].xyz, _296.zzz, fma(Primitive.Primitive_LocalToWorld[0u].xyz, _296.xxx, Primitive.Primitive_LocalToWorld[1u].xyz * _296.yyy)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0); + float2 _323 = _287.xy; + float4 _338 = float4(_322.x, _322.y, _322.z, _322.w); + float4 _339 = View.View_TranslatedWorldToClip * _338; + float3 _341 = _322.xyz - float3(View.View_TranslatedWorldCameraOrigin); + float _345 = dot(_341, _341); + float _346 = rsqrt(_345); + float _347 = _345 * _346; + float _354 = _341.z; + float _357 = fast::max(0.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.w); + float _393; + float _394; + float _395; + float _396; + if (_357 > 0.0) + { + float _361 = _357 * _346; + float _365 = fma(_361, _354, View.View_WorldCameraOrigin[2]); + _393 = fma(-_357, _346, 1.0) * _347; + _394 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.z * exp2(-fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.y * (_365 - MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.w))); + _395 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.x * exp2(-fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.y * (_365 - MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.y))); + _396 = fma(-_361, _354, _354); + } + else + { + _393 = _347; + _394 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.x; + _395 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.x; + _396 = _354; + } + float _400 = fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.y * _396); + float _417 = fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.y * _396); + float _428 = fma(_395, (abs(_400) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_400)) / _400) : fma(-0.2402265071868896484375, _400, 0.693147182464599609375), _394 * ((abs(_417) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_417)) / _417) : fma(-0.2402265071868896484375, _417, 0.693147182464599609375))); + float3 _459; + if (MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.w >= 0.0) + { + _459 = (MobileBasePass.MobileBasePass_Fog_DirectionalInscatteringColor.xyz * float3(pow(fast::clamp(dot(_341 * float3(_346), MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.xyz), 0.0, 1.0), MobileBasePass.MobileBasePass_Fog_DirectionalInscatteringColor.w))) * float3(1.0 - fast::clamp(exp2(-(_428 * fast::max(_393 - MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.w, 0.0))), 0.0, 1.0)); + } + else + { + _459 = float3(0.0); + } + bool _468 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w > 0.0) && (_347 > MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w); + float _471 = _468 ? 1.0 : fast::max(fast::clamp(exp2(-(_428 * _393)), 0.0, 1.0), MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.w); + float4 _479 = float4(fma(MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.xyz, float3(1.0 - _471), select(_459, float3(0.0), bool3(_468))), _471); + float4 _482 = _338; + _482.w = _339.w; + out.out_var_TEXCOORD0 = fma(_288, LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.ww, _323 + LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.zw).xy; + out.out_var_TEXCOORD1 = fma(_288, LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.zz, fma(_323, LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.xy, LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.zw)); + out.out_var_TEXCOORD2 = float4(float4(0.0).x, float4(0.0).y, _479.x, _479.y); + out.out_var_TEXCOORD3 = float4(float4(0.0).x, float4(0.0).y, _479.z, _479.w); + out.out_var_TEXCOORD8 = _482; + out.gl_Position = _339; + return out; +} + diff --git a/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert b/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert new file mode 100644 index 00000000000..2f9afa1678c --- /dev/null +++ b/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert @@ -0,0 +1,387 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +// Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) +uint2 spvTexelBufferCoord(uint tc) +{ + return uint2(tc % 4096, tc / 4096); +} + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Primitive +{ + float4x4 Primitive_LocalToWorld; + float4 Primitive_InvNonUniformScaleAndDeterminantSign; + float4 Primitive_ObjectWorldPositionAndRadius; + float4x4 Primitive_WorldToLocal; + float4x4 Primitive_PreviousLocalToWorld; + float4x4 Primitive_PreviousWorldToLocal; + packed_float3 Primitive_ActorWorldPosition; + float Primitive_UseSingleSampleShadowFromStationaryLights; + packed_float3 Primitive_ObjectBounds; + float Primitive_LpvBiasMultiplier; + float Primitive_DecalReceiverMask; + float Primitive_PerObjectGBufferData; + float Primitive_UseVolumetricLightmapShadowFromStationaryLights; + float Primitive_UseEditorDepthTest; + float4 Primitive_ObjectOrientation; + float4 Primitive_NonUniformScale; + packed_float3 Primitive_LocalObjectBoundsMin; + float PrePadding_Primitive_380; + packed_float3 Primitive_LocalObjectBoundsMax; + uint Primitive_LightingChannelMask; + uint Primitive_LightmapDataIndex; + int Primitive_SingleCaptureIndex; +}; + +struct type_MobileShadowDepthPass +{ + float PrePadding_MobileShadowDepthPass_0; + float PrePadding_MobileShadowDepthPass_4; + float PrePadding_MobileShadowDepthPass_8; + float PrePadding_MobileShadowDepthPass_12; + float PrePadding_MobileShadowDepthPass_16; + float PrePadding_MobileShadowDepthPass_20; + float PrePadding_MobileShadowDepthPass_24; + float PrePadding_MobileShadowDepthPass_28; + float PrePadding_MobileShadowDepthPass_32; + float PrePadding_MobileShadowDepthPass_36; + float PrePadding_MobileShadowDepthPass_40; + float PrePadding_MobileShadowDepthPass_44; + float PrePadding_MobileShadowDepthPass_48; + float PrePadding_MobileShadowDepthPass_52; + float PrePadding_MobileShadowDepthPass_56; + float PrePadding_MobileShadowDepthPass_60; + float PrePadding_MobileShadowDepthPass_64; + float PrePadding_MobileShadowDepthPass_68; + float PrePadding_MobileShadowDepthPass_72; + float PrePadding_MobileShadowDepthPass_76; + float4x4 MobileShadowDepthPass_ProjectionMatrix; + float2 MobileShadowDepthPass_ShadowParams; + float MobileShadowDepthPass_bClampToNearPlane; + float PrePadding_MobileShadowDepthPass_156; + float4x4 MobileShadowDepthPass_ShadowViewProjectionMatrices[6]; +}; + +struct type_EmitterDynamicUniforms +{ + float2 EmitterDynamicUniforms_LocalToWorldScale; + float EmitterDynamicUniforms_EmitterInstRandom; + float PrePadding_EmitterDynamicUniforms_12; + float4 EmitterDynamicUniforms_AxisLockRight; + float4 EmitterDynamicUniforms_AxisLockUp; + float4 EmitterDynamicUniforms_DynamicColor; + float4 EmitterDynamicUniforms_MacroUVParameters; +}; + +struct type_EmitterUniforms +{ + float4 EmitterUniforms_ColorCurve; + float4 EmitterUniforms_ColorScale; + float4 EmitterUniforms_ColorBias; + float4 EmitterUniforms_MiscCurve; + float4 EmitterUniforms_MiscScale; + float4 EmitterUniforms_MiscBias; + float4 EmitterUniforms_SizeBySpeed; + float4 EmitterUniforms_SubImageSize; + float4 EmitterUniforms_TangentSelector; + packed_float3 EmitterUniforms_CameraFacingBlend; + float EmitterUniforms_RemoveHMDRoll; + float EmitterUniforms_RotationRateScale; + float EmitterUniforms_RotationBias; + float EmitterUniforms_CameraMotionBlurAmount; + float PrePadding_EmitterUniforms_172; + float2 EmitterUniforms_PivotOffset; +}; + +struct type_Globals +{ + uint ParticleIndicesOffset; +}; + +struct main0_out +{ + float out_var_TEXCOORD6 [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float2 in_var_ATTRIBUTE0 [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], constant type_MobileShadowDepthPass& MobileShadowDepthPass [[buffer(2)]], constant type_EmitterDynamicUniforms& EmitterDynamicUniforms [[buffer(3)]], constant type_EmitterUniforms& EmitterUniforms [[buffer(4)]], constant type_Globals& _Globals [[buffer(5)]], texture2d ParticleIndices [[texture(0)]], texture2d PositionTexture [[texture(1)]], texture2d VelocityTexture [[texture(2)]], texture2d AttributesTexture [[texture(3)]], texture2d CurveTexture [[texture(4)]], sampler PositionTextureSampler [[sampler(0)]], sampler VelocityTextureSampler [[sampler(1)]], sampler AttributesTextureSampler [[sampler(2)]], sampler CurveTextureSampler [[sampler(3)]], uint gl_VertexIndex [[vertex_id]], uint gl_InstanceIndex [[instance_id]]) +{ + main0_out out = {}; + float2 _133 = ParticleIndices.read(spvTexelBufferCoord((_Globals.ParticleIndicesOffset + ((gl_InstanceIndex * 16u) + (gl_VertexIndex / 4u))))).xy; + float4 _137 = PositionTexture.sample(PositionTextureSampler, _133, level(0.0)); + float4 _145 = AttributesTexture.sample(AttributesTextureSampler, _133, level(0.0)); + float _146 = _137.w; + float3 _158 = float3x3(Primitive.Primitive_LocalToWorld[0].xyz, Primitive.Primitive_LocalToWorld[1].xyz, Primitive.Primitive_LocalToWorld[2].xyz) * VelocityTexture.sample(VelocityTextureSampler, _133, level(0.0)).xyz; + float3 _160 = fast::normalize(_158 + float3(0.0, 0.0, 9.9999997473787516355514526367188e-05)); + float2 _204 = ((((_145.xy + float2((_145.x < 0.5) ? 0.0 : (-0.5), (_145.y < 0.5) ? 0.0 : (-0.5))) * float2(2.0)) * (fma(CurveTexture.sample(CurveTextureSampler, fma(EmitterUniforms.EmitterUniforms_MiscCurve.zw, float2(_146), EmitterUniforms.EmitterUniforms_MiscCurve.xy), level(0.0)), EmitterUniforms.EmitterUniforms_MiscScale, EmitterUniforms.EmitterUniforms_MiscBias).xy * EmitterDynamicUniforms.EmitterDynamicUniforms_LocalToWorldScale)) * fast::min(fast::max(EmitterUniforms.EmitterUniforms_SizeBySpeed.xy * float2(length(_158)), float2(1.0)), EmitterUniforms.EmitterUniforms_SizeBySpeed.zw)) * float2(step(_146, 1.0)); + float3 _239 = float4(fma(Primitive.Primitive_LocalToWorld[2u].xyz, _137.zzz, fma(Primitive.Primitive_LocalToWorld[0u].xyz, _137.xxx, Primitive.Primitive_LocalToWorld[1u].xyz * _137.yyy)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0).xyz; + float3 _242 = float3(EmitterUniforms.EmitterUniforms_RemoveHMDRoll); + float3 _251 = mix(mix(float3(View.View_ViewRight), float3(View.View_HMDViewNoRollRight), _242), EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz, float3(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.w)); + float3 _259 = mix(-mix(float3(View.View_ViewUp), float3(View.View_HMDViewNoRollUp), _242), EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockUp.xyz, float3(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockUp.w)); + float3 _260 = float3(View.View_TranslatedWorldCameraOrigin) - _239; + float _261 = dot(_260, _260); + float3 _265 = _260 / float3(sqrt(fast::max(_261, 0.00999999977648258209228515625))); + float3 _335; + float3 _336; + if (EmitterUniforms.EmitterUniforms_CameraFacingBlend[0] > 0.0) + { + float3 _279 = cross(_265, float3(0.0, 0.0, 1.0)); + float3 _284 = _279 / float3(sqrt(fast::max(dot(_279, _279), 0.00999999977648258209228515625))); + float3 _286 = float3(fast::clamp(fma(_261, EmitterUniforms.EmitterUniforms_CameraFacingBlend[1], -EmitterUniforms.EmitterUniforms_CameraFacingBlend[2]), 0.0, 1.0)); + _335 = fast::normalize(mix(_251, _284, _286)); + _336 = fast::normalize(mix(_259, cross(_265, _284), _286)); + } + else + { + float3 _333; + float3 _334; + if (EmitterUniforms.EmitterUniforms_TangentSelector.y > 0.0) + { + float3 _297 = cross(_265, _160); + _333 = _297 / float3(sqrt(fast::max(dot(_297, _297), 0.00999999977648258209228515625))); + _334 = -_160; + } + else + { + float3 _331; + float3 _332; + if (EmitterUniforms.EmitterUniforms_TangentSelector.z > 0.0) + { + float3 _310 = cross(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz, _265); + _331 = EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz; + _332 = -(_310 / float3(sqrt(fast::max(dot(_310, _310), 0.00999999977648258209228515625)))); + } + else + { + float3 _329; + float3 _330; + if (EmitterUniforms.EmitterUniforms_TangentSelector.w > 0.0) + { + float3 _322 = cross(_265, float3(0.0, 0.0, 1.0)); + float3 _327 = _322 / float3(sqrt(fast::max(dot(_322, _322), 0.00999999977648258209228515625))); + _329 = _327; + _330 = cross(_265, _327); + } + else + { + _329 = _251; + _330 = _259; + } + _331 = _329; + _332 = _330; + } + _333 = _331; + _334 = _332; + } + _335 = _333; + _336 = _334; + } + float _339 = fma(fma(_145.w * EmitterUniforms.EmitterUniforms_RotationRateScale, _146, _145.z), 6.283185482025146484375, EmitterUniforms.EmitterUniforms_RotationBias); + float3 _342 = float3(sin(_339)); + float3 _344 = float3(cos(_339)); + float3 _367 = _239 + fma(float3(_204.x * (in.in_var_ATTRIBUTE0.x + EmitterUniforms.EmitterUniforms_PivotOffset.x)), fma(_342, _336, _344 * _335), float3(_204.y * (in.in_var_ATTRIBUTE0.y + EmitterUniforms.EmitterUniforms_PivotOffset.y)) * fma(_344, _336, -(_342 * _335))); + float4 _371 = float4(_367, 1.0); + float4 _375 = MobileShadowDepthPass.MobileShadowDepthPass_ProjectionMatrix * float4(_371.x, _371.y, _371.z, _371.w); + float4 _386; + if ((MobileShadowDepthPass.MobileShadowDepthPass_bClampToNearPlane > 0.0) && (_375.z < 0.0)) + { + float4 _384 = _375; + _384.z = 9.9999999747524270787835121154785e-07; + _384.w = 1.0; + _386 = _384; + } + else + { + _386 = _375; + } + float4 _396 = _386; + _396.z = fma(_386.z, MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.y, MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.x) * _386.w; + out.out_var_TEXCOORD6 = 0.0; + out.gl_Position = _396; + return out; +} + diff --git a/reference/opt/shaders/amd/fs.invalid.frag b/reference/opt/shaders/amd/fs.invalid.frag deleted file mode 100644 index aecf69eba7a..00000000000 --- a/reference/opt/shaders/amd/fs.invalid.frag +++ /dev/null @@ -1,15 +0,0 @@ -#version 450 -#extension GL_AMD_shader_fragment_mask : require -#extension GL_AMD_shader_explicit_vertex_parameter : require - -layout(binding = 0) uniform sampler2DMS texture1; - -layout(location = 0) __explicitInterpAMD in vec4 vary; - -void main() -{ - uint testi1 = fragmentMaskFetchAMD(texture1, ivec2(0)); - vec4 test1 = fragmentFetchAMD(texture1, ivec2(1), 2u); - vec4 pos = interpolateAtVertexAMD(vary, 0u); -} - diff --git a/reference/opt/shaders/amd/gcn_shader.comp b/reference/opt/shaders/amd/gcn_shader.comp index e4bb67e9ba1..f1961854861 100644 --- a/reference/opt/shaders/amd/gcn_shader.comp +++ b/reference/opt/shaders/amd/gcn_shader.comp @@ -1,5 +1,11 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for 64-bit integers. +#endif #extension GL_AMD_gcn_shader : require layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; diff --git a/reference/opt/shaders/amd/shader_ballot.comp b/reference/opt/shaders/amd/shader_ballot.comp index cc54a244df2..fc497abba94 100644 --- a/reference/opt/shaders/amd/shader_ballot.comp +++ b/reference/opt/shaders/amd/shader_ballot.comp @@ -1,5 +1,11 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for 64-bit integers. +#endif #extension GL_ARB_shader_ballot : require #extension GL_AMD_shader_ballot : require layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; @@ -16,13 +22,12 @@ layout(binding = 1, std430) buffer outputData void main() { - float _25 = _12.inputDataArray[gl_LocalInvocationID.x]; - bool _31 = _25 > 0.0; + bool _31 = _12.inputDataArray[gl_LocalInvocationID.x] > 0.0; uvec4 _37 = uvec4(unpackUint2x32(ballotARB(_31)), 0u, 0u); uint _44 = mbcntAMD(packUint2x32(uvec2(_37.xy))); if (_31) { - _74.outputDataArray[_44] = _25; + _74.outputDataArray[_44] = _12.inputDataArray[gl_LocalInvocationID.x]; } } diff --git a/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000000..8d59fcc856a --- /dev/null +++ b/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) restrict buffer _3_5 +{ + ivec4 _m0; + uvec4 _m1; +} _5; + +layout(binding = 1, std430) restrict buffer _4_6 +{ + uvec4 _m0; + ivec4 _m1; +} _6; + +void main() +{ + _6._m0 = uvec4(lessThan(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(lessThanEqual(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(lessThan(_5._m1, uvec4(_5._m0))); + _6._m0 = uvec4(lessThanEqual(_5._m1, uvec4(_5._m0))); + _6._m0 = uvec4(greaterThan(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(greaterThanEqual(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(greaterThan(_5._m1, uvec4(_5._m0))); + _6._m0 = uvec4(greaterThanEqual(_5._m1, uvec4(_5._m0))); +} + diff --git a/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp b/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp index bdb3eeb9afd..8a552dba068 100644 --- a/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp +++ b/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp @@ -21,13 +21,13 @@ void main() bvec4 _35 = equal(_30, ivec4(_31)); bvec4 _36 = equal(_31, _31); bvec4 _37 = equal(_30, _30); - _6._m0 = mix(uvec4(0u), uvec4(1u), _34); - _6._m0 = mix(uvec4(0u), uvec4(1u), _35); - _6._m0 = mix(uvec4(0u), uvec4(1u), _36); - _6._m0 = mix(uvec4(0u), uvec4(1u), _37); - _6._m1 = mix(ivec4(0), ivec4(1), _34); - _6._m1 = mix(ivec4(0), ivec4(1), _35); - _6._m1 = mix(ivec4(0), ivec4(1), _36); - _6._m1 = mix(ivec4(0), ivec4(1), _37); + _6._m0 = uvec4(_34); + _6._m0 = uvec4(_35); + _6._m0 = uvec4(_36); + _6._m0 = uvec4(_37); + _6._m1 = ivec4(_34); + _6._m1 = ivec4(_35); + _6._m1 = ivec4(_36); + _6._m1 = ivec4(_37); } diff --git a/reference/opt/shaders/asm/comp/block-name-alias-global.asm.comp b/reference/opt/shaders/asm/comp/block-name-alias-global.asm.comp index 08fccbcde6b..86ba0a3b9f9 100644 --- a/reference/opt/shaders/asm/comp/block-name-alias-global.asm.comp +++ b/reference/opt/shaders/asm/comp/block-name-alias-global.asm.comp @@ -7,12 +7,6 @@ struct A int b; }; -struct A_1 -{ - int a; - int b; -}; - layout(binding = 1, std430) buffer C1 { A Data[]; @@ -20,7 +14,7 @@ layout(binding = 1, std430) buffer C1 layout(binding = 2, std140) uniform C2 { - A_1 Data[1024]; + A Data[1024]; } C2_1; layout(binding = 0, std430) buffer B @@ -30,7 +24,7 @@ layout(binding = 0, std430) buffer B layout(binding = 3, std140) uniform B { - A_1 Data[1024]; + A Data[1024]; } C4; void main() diff --git a/reference/opt/shaders/asm/comp/control-flow-hints.asm.comp b/reference/opt/shaders/asm/comp/control-flow-hints.asm.comp new file mode 100644 index 00000000000..617cde4bcad --- /dev/null +++ b/reference/opt/shaders/asm/comp/control-flow-hints.asm.comp @@ -0,0 +1,58 @@ +#version 450 +#if defined(GL_EXT_control_flow_attributes) +#extension GL_EXT_control_flow_attributes : require +#define SPIRV_CROSS_FLATTEN [[flatten]] +#define SPIRV_CROSS_BRANCH [[dont_flatten]] +#define SPIRV_CROSS_UNROLL [[unroll]] +#define SPIRV_CROSS_LOOP [[dont_unroll]] +#else +#define SPIRV_CROSS_FLATTEN +#define SPIRV_CROSS_BRANCH +#define SPIRV_CROSS_UNROLL +#define SPIRV_CROSS_LOOP +#endif +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer bar +{ + vec4 _data[]; +} bar_1; + +layout(binding = 1, std430) buffer foo +{ + vec4 _data[]; +} foo_1; + +void main() +{ + bar_1._data[0] = foo_1._data[0]; + bar_1._data[1] = foo_1._data[1]; + bar_1._data[2] = foo_1._data[2]; + bar_1._data[3] = foo_1._data[3]; + bar_1._data[4] = foo_1._data[4]; + bar_1._data[5] = foo_1._data[5]; + bar_1._data[6] = foo_1._data[6]; + bar_1._data[7] = foo_1._data[7]; + bar_1._data[8] = foo_1._data[8]; + bar_1._data[9] = foo_1._data[9]; + bar_1._data[10] = foo_1._data[10]; + bar_1._data[11] = foo_1._data[11]; + bar_1._data[12] = foo_1._data[12]; + bar_1._data[13] = foo_1._data[13]; + bar_1._data[14] = foo_1._data[14]; + bar_1._data[15] = foo_1._data[15]; + SPIRV_CROSS_LOOP + for (int _137 = 0; _137 < 16; ) + { + bar_1._data[15 - _137] = foo_1._data[_137]; + _137++; + continue; + } + SPIRV_CROSS_BRANCH + if (bar_1._data[10].x > 10.0) + { + foo_1._data[20] = vec4(5.0); + } + foo_1._data[20] = vec4(20.0); +} + diff --git a/reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp b/reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp index 6860cfafe47..d30cddafec8 100644 --- a/reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp +++ b/reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp @@ -13,7 +13,7 @@ layout(binding = 1, std430) buffer Buf_count void main() { - int _32 = atomicAdd(Buf_count_1._count, 1); - Buf_1._data[_32] = vec4(1.0); + int _33 = atomicAdd(Buf_count_1._count, 1); + Buf_1._data[_33] = vec4(1.0); } diff --git a/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000000..32d8e025012 --- /dev/null +++ b/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,42 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float a1; + vec2 a2; + vec3 a3; + vec4 a4; + float b1; + vec2 b2; + vec3 b3; + vec4 b4; + float c1; + vec2 c2; + vec3 c3; + vec4 c4; +} _4; + +void main() +{ + _4.a1 = min(_4.b1, _4.c1); + _4.a2 = min(_4.b2, _4.c2); + _4.a3 = min(_4.b3, _4.c3); + _4.a4 = min(_4.b4, _4.c4); + _4.a1 = max(_4.b1, _4.c1); + _4.a2 = max(_4.b2, _4.c2); + _4.a3 = max(_4.b3, _4.c3); + _4.a4 = max(_4.b4, _4.c4); + _4.a1 = clamp(_4.a1, _4.b1, _4.c1); + _4.a2 = clamp(_4.a2, _4.b2, _4.c2); + _4.a3 = clamp(_4.a3, _4.b3, _4.c3); + _4.a4 = clamp(_4.a4, _4.b4, _4.c4); + for (int _139 = 0; _139 < 2; ) + { + _4.a2 = min(_4.b2, _4.c2); + _4.a1 = clamp(_4.a1, _4.b2.x, _4.b2.y); + _139++; + continue; + } +} + diff --git a/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp deleted file mode 100644 index 9ae8d6fd7f3..00000000000 --- a/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp +++ /dev/null @@ -1,25 +0,0 @@ -#version 450 -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture; - -void main() -{ - int _30; - _30 = 7; - int _27_copy; - for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy) - { - if (5.0 > float(_27)) - { - break; - } - else - { - continue; - } - continue; - } - imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0)); -} - diff --git a/reference/opt/shaders/asm/comp/recompile-block-naming.asm.comp b/reference/opt/shaders/asm/comp/recompile-block-naming.asm.comp index ff83de345d1..7d43b6f54d0 100644 --- a/reference/opt/shaders/asm/comp/recompile-block-naming.asm.comp +++ b/reference/opt/shaders/asm/comp/recompile-block-naming.asm.comp @@ -18,18 +18,18 @@ layout(binding = 0, std430) buffer MyThirdBuffer void main() { - uint _105 = MyFirstBuffer_1._data[0]; - uint _109 = MyFirstBuffer_1._data[1]; - uint _113 = MyFirstBuffer_1._data[2]; - uint _117 = MyFirstBuffer_1._data[3]; - uint _122 = MySecondBuffer_1._data[1]; - uint _126 = MySecondBuffer_1._data[2]; - uint _130 = MySecondBuffer_1._data[3]; - uint _134 = MySecondBuffer_1._data[4]; - uvec4 _140 = uvec4(_105, _109, _113, _117) + uvec4(_122, _126, _130, _134); - MyThirdBuffer_1._data[0] = _140.x; - MyThirdBuffer_1._data[1] = _140.y; - MyThirdBuffer_1._data[2] = _140.z; - MyThirdBuffer_1._data[3] = _140.w; + uint _106 = MyFirstBuffer_1._data[0]; + uint _110 = MyFirstBuffer_1._data[1]; + uint _114 = MyFirstBuffer_1._data[2]; + uint _118 = MyFirstBuffer_1._data[3]; + uint _123 = MySecondBuffer_1._data[1]; + uint _127 = MySecondBuffer_1._data[2]; + uint _131 = MySecondBuffer_1._data[3]; + uint _135 = MySecondBuffer_1._data[4]; + uvec4 _141 = uvec4(_106, _110, _114, _118) + uvec4(_123, _127, _131, _135); + MyThirdBuffer_1._data[0] = _141.x; + MyThirdBuffer_1._data[1] = _141.y; + MyThirdBuffer_1._data[2] = _141.z; + MyThirdBuffer_1._data[3] = _141.w; } diff --git a/reference/shaders/asm/comp/switch-break-ladder.asm.comp b/reference/opt/shaders/asm/comp/switch-break-ladder.asm.invalid.comp similarity index 100% rename from reference/shaders/asm/comp/switch-break-ladder.asm.comp rename to reference/opt/shaders/asm/comp/switch-break-ladder.asm.invalid.comp diff --git a/reference/opt/shaders/asm/comp/undefined-constant-composite.asm.comp b/reference/opt/shaders/asm/comp/undefined-constant-composite.asm.comp new file mode 100644 index 00000000000..eb94756d736 --- /dev/null +++ b/reference/opt/shaders/asm/comp/undefined-constant-composite.asm.comp @@ -0,0 +1,26 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct _20 +{ + int _m0; + int _m1; +}; + +int _28; + +layout(binding = 1, std430) buffer _5_6 +{ + int _m0[10]; +} _6; + +layout(binding = 0, std430) buffer _7_8 +{ + int _m0[10]; +} _8; + +void main() +{ + _6._m0[gl_GlobalInvocationID.x] = _8._m0[gl_GlobalInvocationID.x] + _20(_28, 200)._m1; +} + diff --git a/reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag b/reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag index c07f1657f40..d68f84b48e9 100644 --- a/reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag +++ b/reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag @@ -7,10 +7,9 @@ layout(location = 0) out vec4 b; void main() { vec4 _28 = (_ + a) + _; - vec4 _34 = (_ - a) + a; b = _28; - b = _34; + b = _; b = _28; - b = _34; + b = _; } diff --git a/reference/opt/shaders/asm/frag/default-member-names.asm.frag b/reference/opt/shaders/asm/frag/default-member-names.asm.frag index 13f81b11a67..ad64761ede4 100644 --- a/reference/opt/shaders/asm/frag/default-member-names.asm.frag +++ b/reference/opt/shaders/asm/frag/default-member-names.asm.frag @@ -1,9 +1,9 @@ #version 450 -layout(location = 0) out vec4 _3; - float _49; +layout(location = 0) out vec4 _3; + void main() { _3 = vec4(_49); diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag index 452fd6fb95c..350142d4b76 100644 --- a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag +++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag @@ -7,7 +7,7 @@ layout(location = 0) out vec4 _entryPointOutput; void main() { - ivec2 _152 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy; - _entryPointOutput = ((texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _152, 0) + texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _152, 0)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy); + ivec2 _154 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy; + _entryPointOutput = ((texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _154, 0) + texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _154, 0)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy); } diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk index 55e2c2da630..b898b01bc0e 100644 --- a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk +++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk @@ -8,7 +8,7 @@ layout(location = 0) out vec4 _entryPointOutput; void main() { - ivec2 _152 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy; - _entryPointOutput = ((texelFetch(SampledImage, _152, 0) + texelFetch(SampledImage, _152, 0)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy); + ivec2 _154 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy; + _entryPointOutput = ((texelFetch(SampledImage, _154, 0) + texelFetch(SampledImage, _154, 0)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy); } diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag new file mode 100644 index 00000000000..350142d4b76 --- /dev/null +++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag @@ -0,0 +1,13 @@ +#version 450 + +uniform sampler2D SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler; +uniform sampler2D SPIRV_Cross_CombinedSampledImageSampler; + +layout(location = 0) out vec4 _entryPointOutput; + +void main() +{ + ivec2 _154 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy; + _entryPointOutput = ((texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _154, 0) + texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _154, 0)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy); +} + diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk new file mode 100644 index 00000000000..21797b4cf3a --- /dev/null +++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk @@ -0,0 +1,14 @@ +#version 450 + +layout(set = 0, binding = 0) uniform sampler Sampler; +layout(set = 0, binding = 0) uniform texture2D SampledImage; +layout(set = 0, binding = 0) uniform sampler SPIRV_Cross_DummySampler; + +layout(location = 0) out vec4 _entryPointOutput; + +void main() +{ + ivec2 _154 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy; + _entryPointOutput = ((texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), _154, 0) + texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), _154, 0)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy); +} + diff --git a/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag b/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag new file mode 100644 index 00000000000..05ce10adfaa --- /dev/null +++ b/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag @@ -0,0 +1,6 @@ +#version 450 + +void main() +{ +} + diff --git a/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk b/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk new file mode 100644 index 00000000000..05ce10adfaa --- /dev/null +++ b/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk @@ -0,0 +1,6 @@ +#version 450 + +void main() +{ +} + diff --git a/reference/opt/shaders/asm/frag/inf-nan-constant-double.asm.frag b/reference/opt/shaders/asm/frag/inf-nan-constant-double.asm.frag index d8e29aa4041..e53b282f879 100644 --- a/reference/opt/shaders/asm/frag/inf-nan-constant-double.asm.frag +++ b/reference/opt/shaders/asm/frag/inf-nan-constant-double.asm.frag @@ -1,11 +1,17 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for 64-bit integers. +#endif layout(location = 0) out vec3 FragColor; layout(location = 0) flat in double vTmp; void main() { - FragColor = vec3(dvec3(uint64BitsToDouble(0x7ff0000000000000ul), uint64BitsToDouble(0xfff0000000000000ul), uint64BitsToDouble(0x7ff8000000000000ul)) + dvec3(vTmp)); + FragColor = vec3(dvec3(uint64BitsToDouble(0x7ff0000000000000ul /* inf */), uint64BitsToDouble(0xfff0000000000000ul /* -inf */), uint64BitsToDouble(0x7ff8000000000000ul /* nan */)) + dvec3(vTmp)); } diff --git a/reference/opt/shaders/asm/frag/inf-nan-constant.asm.frag b/reference/opt/shaders/asm/frag/inf-nan-constant.asm.frag index dd4284c9b11..b5e0c6e968b 100644 --- a/reference/opt/shaders/asm/frag/inf-nan-constant.asm.frag +++ b/reference/opt/shaders/asm/frag/inf-nan-constant.asm.frag @@ -6,6 +6,6 @@ layout(location = 0) out highp vec3 FragColor; void main() { - FragColor = vec3(uintBitsToFloat(0x7f800000u), uintBitsToFloat(0xff800000u), uintBitsToFloat(0x7fc00000u)); + FragColor = vec3(uintBitsToFloat(0x7f800000u /* inf */), uintBitsToFloat(0xff800000u /* -inf */), uintBitsToFloat(0x7fc00000u /* nan */)); } diff --git a/reference/opt/shaders/asm/frag/line-directive.line.asm.frag b/reference/opt/shaders/asm/frag/line-directive.line.asm.frag index 30be934fc6e..4682d79e808 100644 --- a/reference/opt/shaders/asm/frag/line-directive.line.asm.frag +++ b/reference/opt/shaders/asm/frag/line-directive.line.asm.frag @@ -7,14 +7,12 @@ layout(location = 0) in float vColor; #line 8 "test.frag" void main() { - float _80; #line 8 "test.frag" FragColor = 1.0; #line 9 "test.frag" FragColor = 2.0; #line 10 "test.frag" - _80 = vColor; - if (_80 < 0.0) + if (vColor < 0.0) { #line 12 "test.frag" FragColor = 3.0; @@ -24,16 +22,19 @@ void main() #line 16 "test.frag" FragColor = 4.0; } - for (int _126 = 0; float(_126) < (40.0 + _80); ) +#line 19 "test.frag" + for (int _127 = 0; float(_127) < (40.0 + vColor); ) { #line 21 "test.frag" FragColor += 0.20000000298023223876953125; #line 22 "test.frag" FragColor += 0.300000011920928955078125; - _126 += (int(_80) + 5); +#line 19 "test.frag" + _127 += (int(vColor) + 5); continue; } - switch (int(_80)) +#line 25 "test.frag" + switch (int(vColor)) { case 0: { @@ -59,7 +60,8 @@ void main() } for (;;) { - FragColor += (10.0 + _80); +#line 42 "test.frag" + FragColor += (10.0 + vColor); #line 43 "test.frag" if (FragColor < 100.0) { @@ -69,5 +71,6 @@ void main() break; } } +#line 48 "test.frag" } diff --git a/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag b/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag index 8c35e62ecef..eb16828e67a 100644 --- a/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag +++ b/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag @@ -1,4 +1,18 @@ #version 450 +#if defined(GL_EXT_control_flow_attributes) +#extension GL_EXT_control_flow_attributes : require +#define SPIRV_CROSS_FLATTEN [[flatten]] +#define SPIRV_CROSS_BRANCH [[dont_flatten]] +#define SPIRV_CROSS_UNROLL [[unroll]] +#define SPIRV_CROSS_LOOP [[dont_unroll]] +#else +#define SPIRV_CROSS_FLATTEN +#define SPIRV_CROSS_BRANCH +#define SPIRV_CROSS_UNROLL +#define SPIRV_CROSS_LOOP +#endif + +int _231; layout(binding = 0, std140) uniform Foo { @@ -10,65 +24,60 @@ layout(binding = 0, std140) uniform Foo layout(location = 0) in vec3 fragWorld; layout(location = 0) out int _entryPointOutput; -int _240; +mat4 spvWorkaroundRowMajor(mat4 wrap) { return wrap; } void main() { - uint _227; - int _236; - for (;;) + int _228; + do { - _227 = 0u; - bool _231; - int _237; + bool _225; + int _229; + uint _222 = 0u; + SPIRV_CROSS_UNROLL for (;;) { - if (_227 < _11.shadowCascadesNum) + if (_222 < _11.shadowCascadesNum) { - mat4 _228; - for (;;) + mat4 _223; + do { if (_11.test == 0) { - _228 = mat4(vec4(0.5, 0.0, 0.0, 0.0), vec4(0.0, 0.5, 0.0, 0.0), vec4(0.0, 0.0, 0.5, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); + _223 = mat4(vec4(0.5, 0.0, 0.0, 0.0), vec4(0.0, 0.5, 0.0, 0.0), vec4(0.0, 0.0, 0.5, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); break; } - _228 = mat4(vec4(1.0, 0.0, 0.0, 0.0), vec4(0.0, 1.0, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); + _223 = mat4(vec4(1.0, 0.0, 0.0, 0.0), vec4(0.0, 1.0, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); break; - } - vec4 _177 = (_228 * _11.lightVP[_227]) * vec4(fragWorld, 1.0); - float _179 = _177.z; - float _186 = _177.x; - float _188 = _177.y; - if ((((_179 >= 0.0) && (_179 <= 1.0)) && (max(_186, _188) <= 1.0)) && (min(_186, _188) >= 0.0)) + } while(false); + vec4 _170 = (_223 * spvWorkaroundRowMajor(_11.lightVP[_222])) * vec4(fragWorld, 1.0); + float _172 = _170.z; + float _179 = _170.x; + float _181 = _170.y; + if ((((_172 >= 0.0) && (_172 <= 1.0)) && (max(_179, _181) <= 1.0)) && (min(_179, _181) >= 0.0)) { - _237 = int(_227); - _231 = true; + _229 = int(_222); + _225 = true; break; } - else - { - _227++; - continue; - } - _227++; + _222++; continue; } else { - _237 = _240; - _231 = false; + _229 = _231; + _225 = false; break; } } - if (_231) + if (_225) { - _236 = _237; + _228 = _229; break; } - _236 = -1; + _228 = -1; break; - } - _entryPointOutput = _236; + } while(false); + _entryPointOutput = _228; } diff --git a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag index c2dba928df2..97d3b74f022 100644 --- a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag +++ b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag @@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput; void main() { + vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w); vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv); float _50 = _49.y; + float _53 = clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375); float _55; float _58; _55 = 0.0; @@ -31,10 +33,11 @@ void main() for (int _60 = -3; _60 <= 3; ) { float _64 = float(_60); - vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64)); - float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375)); - _55 += (_72.x * _78); - _58 += _78; + float _68 = exp(((-_64) * _64) * 0.2222220003604888916015625); + vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64)); + float _77 = float(abs(_72.y - _50) < _53); + _55 = fma(_72.x, _68 * _77, _55); + _58 = fma(_68, _77, _58); _60++; continue; } diff --git a/reference/opt/shaders/asm/frag/loop-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/loop-merge-to-continue.asm.frag deleted file mode 100644 index faf32edcf42..00000000000 --- a/reference/opt/shaders/asm/frag/loop-merge-to-continue.asm.frag +++ /dev/null @@ -1,21 +0,0 @@ -#version 450 - -layout(location = 0) out vec4 FragColor; -layout(location = 0) in vec4 v0; - -void main() -{ - FragColor = vec4(1.0); - int _50; - _50 = 0; - for (; _50 < 4; _50++) - { - for (int _51 = 0; _51 < 4; ) - { - FragColor += vec4(v0[(_50 + _51) & 3]); - _51++; - continue; - } - } -} - diff --git a/reference/opt/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag b/reference/opt/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag new file mode 100644 index 00000000000..4734c89c9af --- /dev/null +++ b/reference/opt/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag @@ -0,0 +1,8 @@ +#version 320 es +precision mediump float; +precision highp int; + +void main() +{ +} + diff --git a/reference/opt/shaders/asm/frag/out-of-bounds-access.asm.frag b/reference/opt/shaders/asm/frag/out-of-bounds-access.asm.frag new file mode 100644 index 00000000000..4734c89c9af --- /dev/null +++ b/reference/opt/shaders/asm/frag/out-of-bounds-access.asm.frag @@ -0,0 +1,8 @@ +#version 320 es +precision mediump float; +precision highp int; + +void main() +{ +} + diff --git a/reference/opt/shaders/asm/frag/pack-and-unpack-uint2.asm.frag b/reference/opt/shaders/asm/frag/pack-and-unpack-uint2.asm.frag new file mode 100644 index 00000000000..9aa9a471e84 --- /dev/null +++ b/reference/opt/shaders/asm/frag/pack-and-unpack-uint2.asm.frag @@ -0,0 +1,9 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(18.0, 52.0, 1.0, 1.0); +} + diff --git a/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag deleted file mode 100644 index 05c17c7a66e..00000000000 --- a/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag +++ /dev/null @@ -1,24 +0,0 @@ -#version 450 - -layout(location = 0) out vec4 FragColor; -layout(location = 0) in vec4 v0; - -void main() -{ - FragColor = vec4(1.0); - for (int _54 = 0; _54 < 4; _54++) - { - if (v0.x == 20.0) - { - FragColor += vec4(v0[_54 & 3]); - continue; - } - else - { - FragColor += vec4(v0[_54 & 1]); - continue; - } - continue; - } -} - diff --git a/reference/opt/shaders/asm/frag/storage-class-output-initializer.asm.frag b/reference/opt/shaders/asm/frag/storage-class-output-initializer.asm.frag index 229358757aa..a5faaefb309 100644 --- a/reference/opt/shaders/asm/frag/storage-class-output-initializer.asm.frag +++ b/reference/opt/shaders/asm/frag/storage-class-output-initializer.asm.frag @@ -2,10 +2,12 @@ layout(location = 0) out vec4 FragColors[2]; layout(location = 2) out vec4 FragColor; +const vec4 _3_init[2] = vec4[](vec4(1.0, 2.0, 3.0, 4.0), vec4(10.0)); +const vec4 _4_init = vec4(5.0); void main() { - FragColors = vec4[](vec4(1.0, 2.0, 3.0, 4.0), vec4(10.0)); - FragColor = vec4(5.0); + FragColors = _3_init; + FragColor = _4_init; } diff --git a/reference/opt/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag b/reference/opt/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag index b2473f4d037..7930ca3b4a0 100644 --- a/reference/opt/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag +++ b/reference/opt/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag @@ -8,12 +8,12 @@ struct Foo float var2; }; +Foo _22; + layout(binding = 0) uniform mediump sampler2D uSampler; layout(location = 0) out vec4 FragColor; -Foo _22; - void main() { FragColor = texture(uSampler, vec2(_22.var1, _22.var2)); diff --git a/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag deleted file mode 100644 index ea4a25995a0..00000000000 --- a/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag +++ /dev/null @@ -1,31 +0,0 @@ -#version 450 - -layout(location = 0) out vec4 FragColor; - -void main() -{ - FragColor = vec4(1.0); - for (int _52 = 0; _52 < 4; _52++) - { - switch (_52) - { - case 0: - { - FragColor.x += 1.0; - break; - } - case 1: - { - FragColor.y += 3.0; - break; - } - default: - { - FragColor.z += 3.0; - break; - } - } - continue; - } -} - diff --git a/reference/opt/shaders/asm/frag/switch-preserve-sign-extension.asm.frag b/reference/opt/shaders/asm/frag/switch-preserve-sign-extension.asm.frag new file mode 100644 index 00000000000..41b98085125 --- /dev/null +++ b/reference/opt/shaders/asm/frag/switch-preserve-sign-extension.asm.frag @@ -0,0 +1,9 @@ +#version 330 +#ifdef GL_ARB_shading_language_420pack +#extension GL_ARB_shading_language_420pack : require +#endif + +void main() +{ +} + diff --git a/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag b/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag index 270c779aa18..fcad3fbf0d8 100644 --- a/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag +++ b/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag @@ -1,4 +1,16 @@ #version 450 +#if defined(GL_EXT_control_flow_attributes) +#extension GL_EXT_control_flow_attributes : require +#define SPIRV_CROSS_FLATTEN [[flatten]] +#define SPIRV_CROSS_BRANCH [[dont_flatten]] +#define SPIRV_CROSS_UNROLL [[unroll]] +#define SPIRV_CROSS_LOOP [[dont_unroll]] +#else +#define SPIRV_CROSS_FLATTEN +#define SPIRV_CROSS_BRANCH +#define SPIRV_CROSS_UNROLL +#define SPIRV_CROSS_LOOP +#endif struct _28 { @@ -90,19 +102,17 @@ uniform sampler2D SPIRV_Cross_Combined_2; layout(location = 0) out vec4 _5; -_28 _74; - void main() { - vec2 _82 = gl_FragCoord.xy * _19._m23.xy; vec4 _88 = _7._m2 * _7._m0.xyxy; vec2 _95 = _88.xy; vec2 _96 = _88.zw; - vec2 _97 = clamp(_82 + (vec2(0.0, -2.0) * _7._m0.xy), _95, _96); + vec2 _97 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, -2.0) * _7._m0.xy), _95, _96); vec3 _109 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _97, 0.0).w * _7._m1, 0.0, 1.0); vec4 _113 = textureLod(SPIRV_Cross_Combined_1, _97, 0.0); float _114 = _113.y; vec3 _129; + SPIRV_CROSS_BRANCH if (_114 > 0.0) { _129 = _109 + (textureLod(SPIRV_Cross_Combined_2, _97, 0.0).xyz * clamp(_114 * _113.z, 0.0, 1.0)); @@ -111,12 +121,12 @@ void main() { _129 = _109; } - vec3 _130 = _129 * 0.5; - vec2 _144 = clamp(_82 + (vec2(-1.0) * _7._m0.xy), _95, _96); + vec2 _144 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-1.0) * _7._m0.xy), _95, _96); vec3 _156 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _144, 0.0).w * _7._m1, 0.0, 1.0); vec4 _160 = textureLod(SPIRV_Cross_Combined_1, _144, 0.0); float _161 = _160.y; vec3 _176; + SPIRV_CROSS_BRANCH if (_161 > 0.0) { _176 = _156 + (textureLod(SPIRV_Cross_Combined_2, _144, 0.0).xyz * clamp(_161 * _160.z, 0.0, 1.0)); @@ -125,12 +135,12 @@ void main() { _176 = _156; } - vec3 _177 = _176 * 0.5; - vec2 _191 = clamp(_82 + (vec2(0.0, -1.0) * _7._m0.xy), _95, _96); + vec2 _191 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, -1.0) * _7._m0.xy), _95, _96); vec3 _203 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _191, 0.0).w * _7._m1, 0.0, 1.0); vec4 _207 = textureLod(SPIRV_Cross_Combined_1, _191, 0.0); float _208 = _207.y; vec3 _223; + SPIRV_CROSS_BRANCH if (_208 > 0.0) { _223 = _203 + (textureLod(SPIRV_Cross_Combined_2, _191, 0.0).xyz * clamp(_208 * _207.z, 0.0, 1.0)); @@ -139,12 +149,12 @@ void main() { _223 = _203; } - vec3 _224 = _223 * 0.75; - vec2 _238 = clamp(_82 + (vec2(1.0, -1.0) * _7._m0.xy), _95, _96); + vec2 _238 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(1.0, -1.0) * _7._m0.xy), _95, _96); vec3 _250 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _238, 0.0).w * _7._m1, 0.0, 1.0); vec4 _254 = textureLod(SPIRV_Cross_Combined_1, _238, 0.0); float _255 = _254.y; vec3 _270; + SPIRV_CROSS_BRANCH if (_255 > 0.0) { _270 = _250 + (textureLod(SPIRV_Cross_Combined_2, _238, 0.0).xyz * clamp(_255 * _254.z, 0.0, 1.0)); @@ -153,12 +163,12 @@ void main() { _270 = _250; } - vec3 _271 = _270 * 0.5; - vec2 _285 = clamp(_82 + (vec2(-2.0, 0.0) * _7._m0.xy), _95, _96); + vec2 _285 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-2.0, 0.0) * _7._m0.xy), _95, _96); vec3 _297 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _285, 0.0).w * _7._m1, 0.0, 1.0); vec4 _301 = textureLod(SPIRV_Cross_Combined_1, _285, 0.0); float _302 = _301.y; vec3 _317; + SPIRV_CROSS_BRANCH if (_302 > 0.0) { _317 = _297 + (textureLod(SPIRV_Cross_Combined_2, _285, 0.0).xyz * clamp(_302 * _301.z, 0.0, 1.0)); @@ -167,12 +177,12 @@ void main() { _317 = _297; } - vec3 _318 = _317 * 0.5; - vec2 _332 = clamp(_82 + (vec2(-1.0, 0.0) * _7._m0.xy), _95, _96); + vec2 _332 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-1.0, 0.0) * _7._m0.xy), _95, _96); vec3 _344 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _332, 0.0).w * _7._m1, 0.0, 1.0); vec4 _348 = textureLod(SPIRV_Cross_Combined_1, _332, 0.0); float _349 = _348.y; vec3 _364; + SPIRV_CROSS_BRANCH if (_349 > 0.0) { _364 = _344 + (textureLod(SPIRV_Cross_Combined_2, _332, 0.0).xyz * clamp(_349 * _348.z, 0.0, 1.0)); @@ -181,12 +191,12 @@ void main() { _364 = _344; } - vec3 _365 = _364 * 0.75; - vec2 _379 = clamp(_82, _95, _96); + vec2 _379 = clamp(gl_FragCoord.xy * _19._m23.xy, _95, _96); vec3 _391 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _379, 0.0).w * _7._m1, 0.0, 1.0); vec4 _395 = textureLod(SPIRV_Cross_Combined_1, _379, 0.0); float _396 = _395.y; vec3 _411; + SPIRV_CROSS_BRANCH if (_396 > 0.0) { _411 = _391 + (textureLod(SPIRV_Cross_Combined_2, _379, 0.0).xyz * clamp(_396 * _395.z, 0.0, 1.0)); @@ -195,12 +205,12 @@ void main() { _411 = _391; } - vec3 _412 = _411 * 1.0; - vec2 _426 = clamp(_82 + (vec2(1.0, 0.0) * _7._m0.xy), _95, _96); + vec2 _426 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(1.0, 0.0) * _7._m0.xy), _95, _96); vec3 _438 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _426, 0.0).w * _7._m1, 0.0, 1.0); vec4 _442 = textureLod(SPIRV_Cross_Combined_1, _426, 0.0); float _443 = _442.y; vec3 _458; + SPIRV_CROSS_BRANCH if (_443 > 0.0) { _458 = _438 + (textureLod(SPIRV_Cross_Combined_2, _426, 0.0).xyz * clamp(_443 * _442.z, 0.0, 1.0)); @@ -209,12 +219,12 @@ void main() { _458 = _438; } - vec3 _459 = _458 * 0.75; - vec2 _473 = clamp(_82 + (vec2(2.0, 0.0) * _7._m0.xy), _95, _96); + vec2 _473 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(2.0, 0.0) * _7._m0.xy), _95, _96); vec3 _485 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _473, 0.0).w * _7._m1, 0.0, 1.0); vec4 _489 = textureLod(SPIRV_Cross_Combined_1, _473, 0.0); float _490 = _489.y; vec3 _505; + SPIRV_CROSS_BRANCH if (_490 > 0.0) { _505 = _485 + (textureLod(SPIRV_Cross_Combined_2, _473, 0.0).xyz * clamp(_490 * _489.z, 0.0, 1.0)); @@ -223,12 +233,12 @@ void main() { _505 = _485; } - vec3 _506 = _505 * 0.5; - vec2 _520 = clamp(_82 + (vec2(-1.0, 1.0) * _7._m0.xy), _95, _96); + vec2 _520 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-1.0, 1.0) * _7._m0.xy), _95, _96); vec3 _532 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _520, 0.0).w * _7._m1, 0.0, 1.0); vec4 _536 = textureLod(SPIRV_Cross_Combined_1, _520, 0.0); float _537 = _536.y; vec3 _552; + SPIRV_CROSS_BRANCH if (_537 > 0.0) { _552 = _532 + (textureLod(SPIRV_Cross_Combined_2, _520, 0.0).xyz * clamp(_537 * _536.z, 0.0, 1.0)); @@ -237,12 +247,12 @@ void main() { _552 = _532; } - vec3 _553 = _552 * 0.5; - vec2 _567 = clamp(_82 + (vec2(0.0, 1.0) * _7._m0.xy), _95, _96); + vec2 _567 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, 1.0) * _7._m0.xy), _95, _96); vec3 _579 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _567, 0.0).w * _7._m1, 0.0, 1.0); vec4 _583 = textureLod(SPIRV_Cross_Combined_1, _567, 0.0); float _584 = _583.y; vec3 _599; + SPIRV_CROSS_BRANCH if (_584 > 0.0) { _599 = _579 + (textureLod(SPIRV_Cross_Combined_2, _567, 0.0).xyz * clamp(_584 * _583.z, 0.0, 1.0)); @@ -251,12 +261,12 @@ void main() { _599 = _579; } - vec3 _600 = _599 * 0.75; - vec2 _614 = clamp(_82 + _7._m0.xy, _95, _96); + vec2 _614 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, _7._m0.xy), _95, _96); vec3 _626 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _614, 0.0).w * _7._m1, 0.0, 1.0); vec4 _630 = textureLod(SPIRV_Cross_Combined_1, _614, 0.0); float _631 = _630.y; vec3 _646; + SPIRV_CROSS_BRANCH if (_631 > 0.0) { _646 = _626 + (textureLod(SPIRV_Cross_Combined_2, _614, 0.0).xyz * clamp(_631 * _630.z, 0.0, 1.0)); @@ -265,12 +275,12 @@ void main() { _646 = _626; } - vec3 _647 = _646 * 0.5; - vec2 _661 = clamp(_82 + (vec2(0.0, 2.0) * _7._m0.xy), _95, _96); + vec2 _661 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, 2.0) * _7._m0.xy), _95, _96); vec3 _673 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _661, 0.0).w * _7._m1, 0.0, 1.0); vec4 _677 = textureLod(SPIRV_Cross_Combined_1, _661, 0.0); float _678 = _677.y; vec3 _693; + SPIRV_CROSS_BRANCH if (_678 > 0.0) { _693 = _673 + (textureLod(SPIRV_Cross_Combined_2, _661, 0.0).xyz * clamp(_678 * _677.z, 0.0, 1.0)); @@ -279,11 +289,9 @@ void main() { _693 = _673; } - vec3 _702 = ((((((((((((_130.xyz + _177).xyz + _224).xyz + _271).xyz + _318).xyz + _365).xyz + _412).xyz + _459).xyz + _506).xyz + _553).xyz + _600).xyz + _647).xyz + (_693 * 0.5)).xyz * vec3(0.125); - _28 _704 = _74; - _704._m0 = vec4(_702.x, _702.y, _702.z, vec4(0.0).w); - _28 _705 = _704; - _705._m0.w = 1.0; - _5 = _705._m0; + vec3 _702 = (((((((((((((_129 * 0.5).xyz + (_176 * 0.5)).xyz + (_223 * 0.75)).xyz + (_270 * 0.5)).xyz + (_317 * 0.5)).xyz + (_364 * 0.75)).xyz + (_411 * 1.0)).xyz + (_458 * 0.75)).xyz + (_505 * 0.5)).xyz + (_552 * 0.5)).xyz + (_599 * 0.75)).xyz + (_646 * 0.5)).xyz + (_693 * 0.5)).xyz * vec3(0.125); + _28 _750 = _28(vec4(_702.x, _702.y, _702.z, vec4(0.0).w)); + _750._m0.w = 1.0; + _5 = _750._m0; } diff --git a/reference/opt/shaders/asm/geom/unroll-glposition-load.asm.geom b/reference/opt/shaders/asm/geom/unroll-glposition-load.asm.geom index d1f8963fa10..678379dddee 100644 --- a/reference/opt/shaders/asm/geom/unroll-glposition-load.asm.geom +++ b/reference/opt/shaders/asm/geom/unroll-glposition-load.asm.geom @@ -4,17 +4,11 @@ layout(max_vertices = 3, triangle_strip) out; void main() { - vec4 _35_unrolled[3]; - for (int i = 0; i < int(3); i++) + for (int _74 = 0; _74 < 3; ) { - _35_unrolled[i] = gl_in[i].gl_Position; - } - vec4 param[3] = _35_unrolled; - for (int _73 = 0; _73 < 3; ) - { - gl_Position = param[_73]; + gl_Position = gl_in[_74].gl_Position; EmitVertex(); - _73++; + _74++; continue; } EndPrimitive(); diff --git a/reference/opt/shaders/asm/vert/empty-io.asm.vert b/reference/opt/shaders/asm/vert/empty-io.asm.vert index 3819a71dd28..52fd706565b 100644 --- a/reference/opt/shaders/asm/vert/empty-io.asm.vert +++ b/reference/opt/shaders/asm/vert/empty-io.asm.vert @@ -6,6 +6,7 @@ struct VSOutput }; layout(location = 0) in vec4 position; +layout(location = 0) out VSOutput _entryPointOutput; void main() { diff --git a/reference/opt/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert b/reference/opt/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert index b237783f6cb..3ac9732b9bb 100644 --- a/reference/opt/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert +++ b/reference/opt/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert @@ -1,4 +1,7 @@ #version 450 +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif struct InstanceData { @@ -12,7 +15,11 @@ layout(binding = 0, std430) readonly buffer gInstanceData } gInstanceData_1; layout(location = 0) in vec3 PosL; +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else uniform int SPIRV_Cross_BaseInstance; +#endif layout(location = 0) out vec4 _entryPointOutput_Color; void main() diff --git a/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert index 2608c1d578f..3d9ad3b4865 100644 --- a/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert +++ b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert @@ -10,21 +10,21 @@ const int _20 = (_7 + 2); #endif const uint _8 = SPIRV_CROSS_CONSTANT_ID_202; const uint _25 = (_8 % 5u); -const ivec4 _30 = ivec4(20, 30, _20, _20); -const ivec2 _32 = ivec2(_30.y, _30.x); -const int _33 = _30.y; +const int _30 = _7 - (-3) * (_7 / (-3)); +const ivec4 _32 = ivec4(20, 30, _20, _30); +const ivec2 _34 = ivec2(_32.y, _32.x); +const int _35 = _32.y; layout(location = 0) flat out int _4; void main() { - vec4 _63 = vec4(0.0); - _63.y = float(_20); - vec4 _66 = _63; - _66.z = float(_25); - vec4 _52 = _66 + vec4(_30); - vec2 _56 = _52.xy + vec2(_32); - gl_Position = vec4(_56.x, _56.y, _52.z, _52.w); - _4 = _33; + vec4 _65 = vec4(0.0); + _65.y = float(_20); + _65.z = float(_25); + vec4 _54 = _65 + vec4(_32); + vec2 _58 = _54.xy + vec2(_34); + gl_Position = vec4(_58.x, _58.y, _54.z, _54.w); + _4 = _35; } diff --git a/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk index 10da8f4b8e8..ed9d98e9dee 100644 --- a/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk +++ b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk @@ -4,21 +4,25 @@ layout(constant_id = 201) const int _7 = -10; const int _20 = (_7 + 2); layout(constant_id = 202) const uint _8 = 100u; const uint _25 = (_8 % 5u); -const ivec4 _30 = ivec4(20, 30, _20, _20); -const ivec2 _32 = ivec2(_30.y, _30.x); -const int _33 = _30.y; +const int _30 = _7 - (-3) * (_7 / (-3)); +const ivec4 _32 = ivec4(20, 30, _20, _30); +const ivec2 _34 = ivec2(_32.y, _32.x); +const int _35 = _32.y; layout(location = 0) flat out int _4; void main() { - vec4 _63 = vec4(0.0); - _63.y = float(_20); - vec4 _66 = _63; - _66.z = float(_25); - vec4 _52 = _66 + vec4(_30); - vec2 _56 = _52.xy + vec2(_32); - gl_Position = vec4(_56.x, _56.y, _52.z, _52.w); - _4 = _33; + float _42 = float(_20); + vec4 _65 = vec4(0.0); + _65.y = _42; + float _47 = float(_25); + _65.z = _47; + vec4 _52 = vec4(_32); + vec4 _54 = _65 + _52; + vec2 _55 = vec2(_34); + vec2 _58 = _54.xy + _55; + gl_Position = vec4(_58.x, _58.y, _54.z, _54.w); + _4 = _35; } diff --git a/reference/opt/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert b/reference/opt/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert index c25e9bbe5b2..134e08d592c 100644 --- a/reference/opt/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert +++ b/reference/opt/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert @@ -1,6 +1,13 @@ #version 450 +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else uniform int SPIRV_Cross_BaseInstance; +#endif void main() { diff --git a/reference/opt/shaders/comp/bake_gradient.comp b/reference/opt/shaders/comp/bake_gradient.comp index 69634d5d848..7ef245e62ed 100644 --- a/reference/opt/shaders/comp/bake_gradient.comp +++ b/reference/opt/shaders/comp/bake_gradient.comp @@ -16,11 +16,23 @@ void main() { vec4 _59 = (vec2(gl_GlobalInvocationID.xy) * _46.uInvSize.xy).xyxy + (_46.uInvSize * 0.5); vec2 _67 = _59.xy; + mediump float _79 = textureLodOffset(uHeight, _67, 0.0, ivec2(-1, 0)).x; + float hp_copy_79 = _79; + mediump float _87 = textureLodOffset(uHeight, _67, 0.0, ivec2(1, 0)).x; + float hp_copy_87 = _87; + mediump float _94 = textureLodOffset(uHeight, _67, 0.0, ivec2(0, -1)).x; + float hp_copy_94 = _94; + mediump float _101 = textureLodOffset(uHeight, _67, 0.0, ivec2(0, 1)).x; + float hp_copy_101 = _101; vec2 _128 = _59.zw; vec2 _157 = ((textureLodOffset(uDisplacement, _128, 0.0, ivec2(1, 0)).xy - textureLodOffset(uDisplacement, _128, 0.0, ivec2(-1, 0)).xy) * 0.60000002384185791015625) * _46.uScale.z; vec2 _161 = ((textureLodOffset(uDisplacement, _128, 0.0, ivec2(0, 1)).xy - textureLodOffset(uDisplacement, _128, 0.0, ivec2(0, -1)).xy) * 0.60000002384185791015625) * _46.uScale.z; + mediump float _203 = _157.y * _161.x; + float hp_copy_203 = _203; + float _209 = -hp_copy_203; + mediump float mp_copy_209 = _209; ivec2 _172 = ivec2(gl_GlobalInvocationID.xy); imageStore(iHeightDisplacement, _172, vec4(textureLod(uHeight, _67, 0.0).x, 0.0, 0.0, 0.0)); - imageStore(iGradJacobian, _172, vec4((_46.uScale.xy * 0.5) * vec2(textureLodOffset(uHeight, _67, 0.0, ivec2(1, 0)).x - textureLodOffset(uHeight, _67, 0.0, ivec2(-1, 0)).x, textureLodOffset(uHeight, _67, 0.0, ivec2(0, 1)).x - textureLodOffset(uHeight, _67, 0.0, ivec2(0, -1)).x), ((1.0 + _157.x) * (1.0 + _161.y)) - (_157.y * _161.x), 0.0)); + imageStore(iGradJacobian, _172, vec4((_46.uScale.xy * 0.5) * vec2(hp_copy_87 - hp_copy_79, hp_copy_101 - hp_copy_94), (1.0 + _157.x) * (1.0 + _161.y) + mp_copy_209, 0.0)); } diff --git a/reference/opt/shaders/comp/barriers.comp b/reference/opt/shaders/comp/barriers.comp index a091497a49b..7dfde372adb 100644 --- a/reference/opt/shaders/comp/barriers.comp +++ b/reference/opt/shaders/comp/barriers.comp @@ -8,21 +8,15 @@ void main() memoryBarrierImage(); memoryBarrierBuffer(); groupMemoryBarrier(); - memoryBarrierShared(); barrier(); memoryBarrier(); - memoryBarrierShared(); barrier(); memoryBarrierImage(); - memoryBarrierShared(); barrier(); memoryBarrierBuffer(); - memoryBarrierShared(); barrier(); groupMemoryBarrier(); - memoryBarrierShared(); barrier(); - memoryBarrierShared(); barrier(); } diff --git a/reference/opt/shaders/comp/bitcast-16bit-1.invalid.comp b/reference/opt/shaders/comp/bitcast-16bit-1.invalid.comp deleted file mode 100644 index 501f97955fc..00000000000 --- a/reference/opt/shaders/comp/bitcast-16bit-1.invalid.comp +++ /dev/null @@ -1,34 +0,0 @@ -#version 450 -#if defined(GL_AMD_gpu_shader_half_float) -#extension GL_AMD_gpu_shader_half_float : require -#elif defined(GL_NV_gpu_shader5) -#extension GL_NV_gpu_shader5 : require -#else -#error No extension available for FP16. -#endif -#if defined(GL_AMD_gpu_shader_int16) -#extension GL_AMD_gpu_shader_int16 : require -#else -#error No extension available for Int16. -#endif -layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; - -layout(binding = 0, std430) buffer SSBO0 -{ - i16vec4 inputs[]; -} _25; - -layout(binding = 1, std430) buffer SSBO1 -{ - ivec4 outputs[]; -} _39; - -void main() -{ - uint ident = gl_GlobalInvocationID.x; - f16vec2 a = int16BitsToFloat16(_25.inputs[ident].xy); - _39.outputs[ident].x = int(packFloat2x16(a + f16vec2(float16_t(1.0)))); - _39.outputs[ident].y = packInt2x16(_25.inputs[ident].zw); - _39.outputs[ident].z = int(packUint2x16(u16vec2(_25.inputs[ident].xy))); -} - diff --git a/reference/opt/shaders/comp/casts.comp b/reference/opt/shaders/comp/casts.comp index 11ef36287b4..12cf17885bc 100644 --- a/reference/opt/shaders/comp/casts.comp +++ b/reference/opt/shaders/comp/casts.comp @@ -13,6 +13,6 @@ layout(binding = 0, std430) buffer SSBO0 void main() { - _21.outputs[gl_GlobalInvocationID.x] = mix(ivec4(0), ivec4(1), notEqual((_27.inputs[gl_GlobalInvocationID.x] & ivec4(3)), ivec4(uvec4(0u)))); + _21.outputs[gl_GlobalInvocationID.x] = ivec4(notEqual((_27.inputs[gl_GlobalInvocationID.x] & ivec4(3)), ivec4(uvec4(0u)))); } diff --git a/reference/opt/shaders/comp/cfg.comp b/reference/opt/shaders/comp/cfg.comp index 0b7e0c16162..f6e02a85516 100644 --- a/reference/opt/shaders/comp/cfg.comp +++ b/reference/opt/shaders/comp/cfg.comp @@ -1,13 +1,13 @@ #version 310 es layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; +float _188; + layout(binding = 0, std430) buffer SSBO { float data; } _11; -float _183; - void main() { if (_11.data != 0.0) @@ -31,14 +31,14 @@ void main() break; } } - float _180; - _180 = _183; - for (int _179 = 0; _179 < 20; ) + float _185; + _185 = _188; + for (int _184 = 0; _184 < 20; ) { - _180 += 10.0; - _179++; + _185 += 10.0; + _184++; continue; } - _11.data = _180; + _11.data = _185; } diff --git a/reference/opt/shaders/comp/dowhile.comp b/reference/opt/shaders/comp/dowhile.comp index 4370ea3079a..d9a9f77cbf9 100644 --- a/reference/opt/shaders/comp/dowhile.comp +++ b/reference/opt/shaders/comp/dowhile.comp @@ -14,19 +14,19 @@ layout(binding = 1, std430) writeonly buffer SSBO2 void main() { - vec4 _57; - int _58; - _58 = 0; - _57 = _28.in_data[gl_GlobalInvocationID.x]; + vec4 _59; + int _60; + _60 = 0; + _59 = _28.in_data[gl_GlobalInvocationID.x]; vec4 _42; for (;;) { - _42 = _28.mvp * _57; - int _44 = _58 + 1; + _42 = _28.mvp * _59; + int _44 = _60 + 1; if (_44 < 16) { - _58 = _44; - _57 = _42; + _60 = _44; + _59 = _42; } else { diff --git a/reference/opt/shaders/comp/generate_height.comp b/reference/opt/shaders/comp/generate_height.comp index feb8d41c0a2..75ad1f9125a 100644 --- a/reference/opt/shaders/comp/generate_height.comp +++ b/reference/opt/shaders/comp/generate_height.comp @@ -18,39 +18,39 @@ layout(binding = 1, std430) writeonly buffer HeightmapFFT void main() { - uvec2 _264 = uvec2(64u, 1u) * gl_NumWorkGroups.xy; - uvec2 _269 = _264 - gl_GlobalInvocationID.xy; - bvec2 _271 = equal(gl_GlobalInvocationID.xy, uvec2(0u)); - uint _454; - if (_271.x) + uvec2 _265 = uvec2(64u, 1u) * gl_NumWorkGroups.xy; + uvec2 _270 = _265 - gl_GlobalInvocationID.xy; + bvec2 _272 = equal(gl_GlobalInvocationID.xy, uvec2(0u)); + uint _460; + if (_272.x) { - _454 = 0u; + _460 = 0u; } else { - _454 = _269.x; + _460 = _270.x; } - uint _455; - if (_271.y) + uint _461; + if (_272.y) { - _455 = 0u; + _461 = 0u; } else { - _455 = _269.y; + _461 = _270.y; } - uint _276 = _264.x; - uint _280 = (gl_GlobalInvocationID.y * _276) + gl_GlobalInvocationID.x; - uint _290 = (_455 * _276) + _454; - vec2 _297 = vec2(gl_GlobalInvocationID.xy); - vec2 _299 = vec2(_264); - float _309 = sqrt(9.81000041961669921875 * length(_166.uModTime.xy * mix(_297, _297 - _299, greaterThan(_297, _299 * 0.5)))) * _166.uModTime.z; - vec2 _316 = vec2(cos(_309), sin(_309)); - vec2 _387 = _316.xx; - vec2 _392 = _316.yy; - vec2 _395 = _392 * _137.distribution[_280].yx; - vec2 _421 = _392 * _137.distribution[_290].yx; - vec2 _429 = (_137.distribution[_290] * _387) + vec2(-_421.x, _421.y); - _225.heights[_280] = packHalf2x16(((_137.distribution[_280] * _387) + vec2(-_395.x, _395.y)) + vec2(_429.x, -_429.y)); + uint _277 = _265.x; + uint _281 = (gl_GlobalInvocationID.y * _277) + gl_GlobalInvocationID.x; + uint _291 = (_461 * _277) + _460; + vec2 _298 = vec2(gl_GlobalInvocationID.xy); + vec2 _300 = vec2(_265); + float _310 = sqrt(9.81000041961669921875 * length(_166.uModTime.xy * mix(_298, _298 - _300, greaterThan(_298, _300 * 0.5)))) * _166.uModTime.z; + vec2 _317 = vec2(cos(_310), sin(_310)); + vec2 _391 = _317.xx; + vec2 _396 = _317.yy; + vec2 _399 = _396 * _137.distribution[_281].yx; + vec2 _426 = _396 * _137.distribution[_291].yx; + vec2 _434 = _137.distribution[_291] * _391 + vec2(-_426.x, _426.y); + _225.heights[_281] = packHalf2x16((_137.distribution[_281] * _391 + vec2(-_399.x, _399.y)) + vec2(_434.x, -_434.y)); } diff --git a/reference/opt/shaders/comp/insert.comp b/reference/opt/shaders/comp/insert.comp index 5ff719449a2..97c55dd5aac 100644 --- a/reference/opt/shaders/comp/insert.comp +++ b/reference/opt/shaders/comp/insert.comp @@ -6,19 +6,9 @@ layout(binding = 0, std430) writeonly buffer SSBO vec4 out_data[]; } _27; -vec4 _52; - void main() { - vec4 _45 = _52; - _45.x = 10.0; - vec4 _47 = _45; - _47.y = 30.0; - vec4 _49 = _47; - _49.z = 70.0; - vec4 _51 = _49; - _51.w = 90.0; - _27.out_data[gl_GlobalInvocationID.x] = _51; + _27.out_data[gl_GlobalInvocationID.x] = vec4(10.0, 30.0, 70.0, 90.0); _27.out_data[gl_GlobalInvocationID.x].y = 20.0; } diff --git a/reference/opt/shaders/comp/shared.comp b/reference/opt/shaders/comp/shared.comp index f95cb2b8b9a..62cf4a4b209 100644 --- a/reference/opt/shaders/comp/shared.comp +++ b/reference/opt/shaders/comp/shared.comp @@ -16,7 +16,6 @@ shared float sShared[4]; void main() { sShared[gl_LocalInvocationIndex] = _22.in_data[gl_GlobalInvocationID.x]; - memoryBarrierShared(); barrier(); _44.out_data[gl_GlobalInvocationID.x] = sShared[3u - gl_LocalInvocationIndex]; } diff --git a/reference/opt/shaders/comp/struct-packing.comp b/reference/opt/shaders/comp/struct-packing.comp index cd1eda1b32b..f4b58342d48 100644 --- a/reference/opt/shaders/comp/struct-packing.comp +++ b/reference/opt/shaders/comp/struct-packing.comp @@ -43,48 +43,6 @@ struct Content S4 m3s[8]; }; -struct S0_1 -{ - vec2 a[1]; - float b; -}; - -struct S1_1 -{ - vec3 a; - float b; -}; - -struct S2_1 -{ - vec3 a[1]; - float b; -}; - -struct S3_1 -{ - vec2 a; - float b; -}; - -struct S4_1 -{ - vec2 c; -}; - -struct Content_1 -{ - S0_1 m0s[1]; - S1_1 m1s[1]; - S2_1 m2s[1]; - S0_1 m0; - S1_1 m1; - S2_1 m2; - S3_1 m3; - float m4; - S4_1 m3s[8]; -}; - layout(binding = 1, std430) restrict buffer SSBO1 { Content content; @@ -103,9 +61,9 @@ layout(binding = 1, std430) restrict buffer SSBO1 layout(binding = 0, std140) restrict buffer SSBO0 { - Content_1 content; - Content_1 content1[2]; - Content_1 content2; + Content content; + Content content1[2]; + Content content2; mat2 m0; mat2 m1; mat2x3 m2[4]; diff --git a/reference/opt/shaders/comp/torture-loop.comp b/reference/opt/shaders/comp/torture-loop.comp index 5943966c059..9ca2b9591f0 100644 --- a/reference/opt/shaders/comp/torture-loop.comp +++ b/reference/opt/shaders/comp/torture-loop.comp @@ -14,27 +14,27 @@ layout(binding = 1, std430) writeonly buffer SSBO2 void main() { - vec4 _99; - _99 = _24.in_data[gl_GlobalInvocationID.x]; - for (int _93 = 0; (_93 + 1) < 10; ) + vec4 _101; + _101 = _24.in_data[gl_GlobalInvocationID.x]; + for (int _95 = 0; (_95 + 1) < 10; ) { - _99 *= 2.0; - _93 += 2; + _101 *= 2.0; + _95 += 2; continue; } - vec4 _98; - _98 = _99; - vec4 _103; - for (uint _94 = 0u; _94 < 16u; _98 = _103, _94++) + vec4 _100; + _100 = _101; + vec4 _105; + for (uint _96 = 0u; _96 < 16u; _100 = _105, _96++) { - _103 = _98; - for (uint _100 = 0u; _100 < 30u; ) + _105 = _100; + for (uint _102 = 0u; _102 < 30u; ) { - _103 = _24.mvp * _103; - _100++; + _105 = _24.mvp * _105; + _102++; continue; } } - _89.out_data[gl_GlobalInvocationID.x] = _98; + _89.out_data[gl_GlobalInvocationID.x] = _100; } diff --git a/reference/opt/shaders/desktop-only/comp/enhanced-layouts.comp b/reference/opt/shaders/desktop-only/comp/enhanced-layouts.comp index 45b25064b6b..ba37ca237b8 100644 --- a/reference/opt/shaders/desktop-only/comp/enhanced-layouts.comp +++ b/reference/opt/shaders/desktop-only/comp/enhanced-layouts.comp @@ -8,13 +8,6 @@ struct Foo int c; }; -struct Foo_1 -{ - int a; - int b; - int c; -}; - layout(binding = 1, std140) buffer SSBO1 { layout(offset = 4) int a; @@ -27,7 +20,7 @@ layout(binding = 2, std430) buffer SSBO2 { layout(offset = 4) int a; layout(offset = 8) int b; - layout(offset = 16) Foo_1 foo; + layout(offset = 16) Foo foo; layout(offset = 48) int c[8]; } ssbo2; diff --git a/reference/opt/shaders/desktop-only/comp/int64.desktop.comp b/reference/opt/shaders/desktop-only/comp/int64.desktop.comp index 702456b303f..28afc2fbd7d 100644 --- a/reference/opt/shaders/desktop-only/comp/int64.desktop.comp +++ b/reference/opt/shaders/desktop-only/comp/int64.desktop.comp @@ -1,5 +1,11 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for 64-bit integers. +#endif layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; struct M0 diff --git a/reference/opt/shaders/desktop-only/frag/image-size.frag b/reference/opt/shaders/desktop-only/frag/image-size.frag new file mode 100644 index 00000000000..5bb060398ed --- /dev/null +++ b/reference/opt/shaders/desktop-only/frag/image-size.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(binding = 0, r32f) uniform readonly writeonly image2D uImage1; +layout(binding = 1, r32f) uniform readonly writeonly image2D uImage2; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(vec2(imageSize(uImage1)), vec2(imageSize(uImage2))); +} + diff --git a/reference/opt/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag b/reference/opt/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag new file mode 100644 index 00000000000..1d9062064a8 --- /dev/null +++ b/reference/opt/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(binding = 0, r32f) uniform image2D uImage1; +layout(binding = 1, r32f) uniform image2D uImage2; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(vec2(imageSize(uImage1)), vec2(imageSize(uImage2))); +} + diff --git a/reference/opt/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag b/reference/opt/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag index d5e45bda431..334a6b19446 100644 --- a/reference/opt/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag +++ b/reference/opt/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag @@ -13,14 +13,15 @@ layout(location = 0) in vec3 vClip3; void main() { - vec4 _20 = vClip4; - _20.y = vClip4.w; - FragColor = textureProj(uShadow1D, vec4(_20.x, 0.0, vClip4.z, _20.y)); - vec4 _30 = vClip4; - _30.z = vClip4.w; - FragColor = textureProj(uShadow2D, vec4(_30.xy, vClip4.z, _30.z)); + vec4 _17 = vClip4; + vec4 _20 = _17; + _20.y = _17.w; + FragColor = textureProj(uShadow1D, vec4(_20.x, 0.0, _17.z, _20.y)); + vec4 _30 = _17; + _30.z = _17.w; + FragColor = textureProj(uShadow2D, vec4(_30.xy, _17.z, _30.z)); FragColor = textureProj(uSampler1D, vClip2).x; FragColor = textureProj(uSampler2D, vClip3).x; - FragColor = textureProj(uSampler3D, vClip4).x; + FragColor = textureProj(uSampler3D, _17).x; } diff --git a/reference/opt/shaders/desktop-only/tesc/basic.desktop.sso.tesc b/reference/opt/shaders/desktop-only/tesc/basic.desktop.sso.tesc index 5e958256af5..c51699db6db 100644 --- a/reference/opt/shaders/desktop-only/tesc/basic.desktop.sso.tesc +++ b/reference/opt/shaders/desktop-only/tesc/basic.desktop.sso.tesc @@ -4,7 +4,7 @@ layout(vertices = 1) out; in gl_PerVertex { vec4 gl_Position; -} gl_in[gl_MaxPatchVertices]; +} gl_in[]; out gl_PerVertex { diff --git a/reference/opt/shaders/desktop-only/tese/triangle.desktop.sso.tese b/reference/opt/shaders/desktop-only/tese/triangle.desktop.sso.tese index 31027dae80f..c9bacd464e4 100644 --- a/reference/opt/shaders/desktop-only/tese/triangle.desktop.sso.tese +++ b/reference/opt/shaders/desktop-only/tese/triangle.desktop.sso.tese @@ -4,7 +4,7 @@ layout(triangles, cw, fractional_even_spacing) in; in gl_PerVertex { vec4 gl_Position; -} gl_in[gl_MaxPatchVertices]; +} gl_in[]; out gl_PerVertex { diff --git a/reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert new file mode 100644 index 00000000000..2b3c5ce0516 --- /dev/null +++ b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert @@ -0,0 +1,24 @@ +#version 450 +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif + +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseVertex gl_BaseVertexARB +#else +uniform int SPIRV_Cross_BaseVertex; +#endif +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else +uniform int SPIRV_Cross_BaseInstance; +#endif +#ifndef GL_ARB_shader_draw_parameters +#error GL_ARB_shader_draw_parameters is not supported. +#endif + +void main() +{ + gl_Position = vec4(float(SPIRV_Cross_BaseVertex), float(SPIRV_Cross_BaseInstance), float(gl_DrawIDARB), 1.0); +} + diff --git a/reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert.vk similarity index 100% rename from reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert rename to reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert.vk diff --git a/reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert new file mode 100644 index 00000000000..bc16d0431aa --- /dev/null +++ b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert @@ -0,0 +1,24 @@ +#version 460 +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif + +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseVertex gl_BaseVertexARB +#else +uniform int SPIRV_Cross_BaseVertex; +#endif +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else +uniform int SPIRV_Cross_BaseInstance; +#endif +#ifndef GL_ARB_shader_draw_parameters +#error GL_ARB_shader_draw_parameters is not supported. +#endif + +void main() +{ + gl_Position = vec4(float(SPIRV_Cross_BaseVertex), float(SPIRV_Cross_BaseInstance), float(gl_DrawIDARB), 1.0); +} + diff --git a/reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vert b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert.vk similarity index 100% rename from reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vert rename to reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert.vk diff --git a/reference/opt/shaders/frag/16bit-constants.frag b/reference/opt/shaders/frag/16bit-constants.frag deleted file mode 100644 index 57d8256138b..00000000000 --- a/reference/opt/shaders/frag/16bit-constants.frag +++ /dev/null @@ -1,25 +0,0 @@ -#version 450 -#if defined(GL_AMD_gpu_shader_half_float) -#extension GL_AMD_gpu_shader_half_float : require -#elif defined(GL_NV_gpu_shader5) -#extension GL_NV_gpu_shader5 : require -#else -#error No extension available for FP16. -#endif -#if defined(GL_AMD_gpu_shader_int16) -#extension GL_AMD_gpu_shader_int16 : require -#else -#error No extension available for Int16. -#endif - -layout(location = 0) out float16_t foo; -layout(location = 1) out int16_t bar; -layout(location = 2) out uint16_t baz; - -void main() -{ - foo = float16_t(1.0); - bar = 2s; - baz = 3us; -} - diff --git a/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag b/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag new file mode 100644 index 00000000000..8eaea64e630 --- /dev/null +++ b/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag @@ -0,0 +1,29 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 1, std140) uniform Count +{ + float count; +} _44; + +layout(binding = 0) uniform mediump sampler2D tex; + +layout(location = 0) in highp vec4 vertex; +layout(location = 0) out vec4 fragColor; + +void main() +{ + highp float _24 = 1.0 / float(textureSize(tex, 0).x); + highp float _34 = dFdx(vertex.x); + float _62; + _62 = 0.0; + for (float _61 = 0.0; _61 < _44.count; ) + { + _62 = _24 * _34 + _62; + _61 += 1.0; + continue; + } + fragColor = vec4(_62); +} + diff --git a/reference/opt/shaders/frag/barycentric-khr.frag b/reference/opt/shaders/frag/barycentric-khr.frag new file mode 100644 index 00000000000..71a44c38575 --- /dev/null +++ b/reference/opt/shaders/frag/barycentric-khr.frag @@ -0,0 +1,13 @@ +#version 450 +#extension GL_EXT_fragment_shader_barycentric : require + +layout(location = 0) out vec2 value; +layout(location = 0) pervertexEXT in vec2 vUV[3]; +layout(location = 3) pervertexEXT in vec2 vUV2[3]; + +void main() +{ + value = ((vUV[0] * gl_BaryCoordEXT.x) + (vUV[1] * gl_BaryCoordEXT.y)) + (vUV[2] * gl_BaryCoordEXT.z); + value += (((vUV2[0] * gl_BaryCoordNoPerspEXT.x) + (vUV2[1] * gl_BaryCoordNoPerspEXT.y)) + (vUV2[2] * gl_BaryCoordNoPerspEXT.z)); +} + diff --git a/reference/opt/shaders/frag/barycentric-nv.frag b/reference/opt/shaders/frag/barycentric-nv.frag index cc3b4de27c9..b3b57e2f880 100644 --- a/reference/opt/shaders/frag/barycentric-nv.frag +++ b/reference/opt/shaders/frag/barycentric-nv.frag @@ -1,19 +1,13 @@ #version 450 #extension GL_NV_fragment_shader_barycentric : require -layout(binding = 0, std430) readonly buffer Vertices -{ - vec2 uvs[]; -} _19; - layout(location = 0) out vec2 value; +layout(location = 0) pervertexNV in vec2 vUV[3]; +layout(location = 1) pervertexNV in vec2 vUV2[3]; void main() { - int _23 = 3 * gl_PrimitiveID; - int _32 = _23 + 1; - int _39 = _23 + 2; - value = ((_19.uvs[_23] * gl_BaryCoordNV.x) + (_19.uvs[_32] * gl_BaryCoordNV.y)) + (_19.uvs[_39] * gl_BaryCoordNV.z); - value += (((_19.uvs[_23] * gl_BaryCoordNoPerspNV.x) + (_19.uvs[_32] * gl_BaryCoordNoPerspNV.y)) + (_19.uvs[_39] * gl_BaryCoordNoPerspNV.z)); + value = ((vUV[0] * gl_BaryCoordNV.x) + (vUV[1] * gl_BaryCoordNV.y)) + (vUV[2] * gl_BaryCoordNV.z); + value += (((vUV2[0] * gl_BaryCoordNoPerspNV.x) + (vUV2[1] * gl_BaryCoordNoPerspNV.y)) + (vUV2[2] * gl_BaryCoordNoPerspNV.z)); } diff --git a/reference/opt/shaders/frag/constant-array.frag b/reference/opt/shaders/frag/constant-array.frag index 914888aaf6f..a7a064a163c 100644 --- a/reference/opt/shaders/frag/constant-array.frag +++ b/reference/opt/shaders/frag/constant-array.frag @@ -17,6 +17,10 @@ layout(location = 0) flat in mediump int index; void main() { - FragColor = ((_37[index] + _55[index][index + 1]) + vec4(30.0)) + vec4(_75[index].a + _75[index].b); + highp float _106 = _75[index].a; + float mp_copy_106 = _106; + highp float _107 = _75[index].b; + float mp_copy_107 = _107; + FragColor = ((_37[index] + _55[index][index + 1]) + vec4(30.0)) + vec4(mp_copy_106 + mp_copy_107); } diff --git a/reference/opt/shaders/frag/for-loop-init.frag b/reference/opt/shaders/frag/for-loop-init.frag index 3aee71c7a14..6c2dfb50943 100644 --- a/reference/opt/shaders/frag/for-loop-init.frag +++ b/reference/opt/shaders/frag/for-loop-init.frag @@ -6,62 +6,60 @@ layout(location = 0) out mediump int FragColor; void main() { - mediump int _145; - for (;;) + do { FragColor = 16; - _145 = 0; - for (; _145 < 25; ) + for (mediump int _143 = 0; _143 < 25; ) { FragColor += 10; - _145++; + _143++; continue; } - for (mediump int _146 = 1; _146 < 30; ) + for (mediump int _144 = 1; _144 < 30; ) { FragColor += 11; - _146++; + _144++; continue; } - mediump int _147; - _147 = 0; - for (; _147 < 20; ) + mediump int _145; + _145 = 0; + for (; _145 < 20; ) { FragColor += 12; - _147++; + _145++; continue; } - mediump int _62 = _147 + 3; + mediump int _62 = _145 + 3; FragColor += _62; if (_62 == 40) { - for (mediump int _151 = 0; _151 < 40; ) + for (mediump int _149 = 0; _149 < 40; ) { FragColor += 13; - _151++; + _149++; continue; } break; } FragColor += _62; - mediump ivec2 _148; - _148 = ivec2(0); - for (; _148.x < 10; ) + mediump ivec2 _146; + _146 = ivec2(0); + for (; _146.x < 10; ) { - FragColor += _148.y; - mediump ivec2 _144 = _148; - _144.x = _148.x + 4; - _148 = _144; + FragColor += _146.y; + mediump ivec2 _142 = _146; + _142.x = _146.x + 4; + _146 = _142; continue; } - for (mediump int _150 = _62; _150 < 40; ) + for (mediump int _148 = _62; _148 < 40; ) { - FragColor += _150; - _150++; + FragColor += _148; + _148++; continue; } FragColor += _62; break; - } + } while(false); } diff --git a/reference/opt/shaders/frag/frexp-modf.frag b/reference/opt/shaders/frag/frexp-modf.frag index 25f3360aaa2..134878e0340 100644 --- a/reference/opt/shaders/frag/frexp-modf.frag +++ b/reference/opt/shaders/frag/frexp-modf.frag @@ -22,12 +22,14 @@ void main() { ResType _22; _22._m0 = frexp(v0 + 1.0, _22._m1); + highp float _24 = _22._m0; + float mp_copy_24 = _24; ResType_1 _35; _35._m0 = frexp(v1, _35._m1); float r0; float _41 = modf(v0, r0); vec2 r1; vec2 _45 = modf(v1, r1); - FragColor = ((((_22._m0 + _35._m0.x) + _35._m0.y) + _41) + _45.x) + _45.y; + FragColor = ((((mp_copy_24 + _35._m0.x) + _35._m0.y) + _41) + _45.x) + _45.y; } diff --git a/reference/opt/shaders/frag/ground.frag b/reference/opt/shaders/frag/ground.frag index f59a402fe37..d28f93efff3 100644 --- a/reference/opt/shaders/frag/ground.frag +++ b/reference/opt/shaders/frag/ground.frag @@ -26,10 +26,13 @@ void main() vec3 _68 = normalize((texture(TexNormalmap, TexCoord).xyz * 2.0) - vec3(1.0)); float _113 = smoothstep(0.0, 0.1500000059604644775390625, (_101.g_CamPos.y + EyeVec.y) * 0.004999999888241291046142578125); float _125 = smoothstep(0.699999988079071044921875, 0.75, _68.y); + highp float hp_copy_125 = _125; vec3 _130 = mix(vec3(0.100000001490116119384765625), mix(vec3(0.100000001490116119384765625, 0.300000011920928955078125, 0.100000001490116119384765625), vec3(0.800000011920928955078125), vec3(_113)), vec3(_125)); + highp float _172 = -hp_copy_125; + float mp_copy_172 = _172; LightingOut = vec4(0.0); NormalOut = vec4((_68 * 0.5) + vec3(0.5), 0.0); - SpecularOut = vec4(1.0 - (_125 * _113), 0.0, 0.0, 0.0); + SpecularOut = vec4(mp_copy_172 * _113 + 1.0, 0.0, 0.0, 0.0); AlbedoOut = vec4(_130 * _130, 1.0); } diff --git a/reference/opt/shaders/frag/helper-invocation.frag b/reference/opt/shaders/frag/helper-invocation.frag index 759a21bdc5a..0c44f72ad6e 100644 --- a/reference/opt/shaders/frag/helper-invocation.frag +++ b/reference/opt/shaders/frag/helper-invocation.frag @@ -9,15 +9,15 @@ layout(location = 0) out vec4 FragColor; void main() { - vec4 _51; + vec4 _52; if (!gl_HelperInvocation) { - _51 = textureLod(uSampler, vUV, 0.0); + _52 = textureLod(uSampler, vUV, 0.0); } else { - _51 = vec4(1.0); + _52 = vec4(1.0); } - FragColor = _51; + FragColor = _52; } diff --git a/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag index 91d7e37cdd2..050218b13b3 100644 --- a/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag +++ b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag @@ -9,20 +9,20 @@ layout(location = 1) flat in mediump int vB; void main() { FragColor = vec4(0.0); + mediump int _49; mediump int _58; - for (mediump int _57 = 0, _60 = 0; _57 < vA; FragColor += vec4(1.0), _60 = _58, _57 += (_58 + 10)) + for (mediump int _57 = 0, _60 = 0; _57 < vA; _60 = _58, _57 += _49) { if ((vA + _57) == 20) { _58 = 50; - continue; } else { _58 = ((vB + _57) == 40) ? 60 : _60; - continue; } - continue; + _49 = _58 + 10; + FragColor += vec4(1.0); } } diff --git a/reference/opt/shaders/frag/image-load-store-uint-coord.asm.frag b/reference/opt/shaders/frag/image-load-store-uint-coord.asm.frag index 5dfb4d0028c..f25b4b738ac 100644 --- a/reference/opt/shaders/frag/image-load-store-uint-coord.asm.frag +++ b/reference/opt/shaders/frag/image-load-store-uint-coord.asm.frag @@ -10,8 +10,8 @@ layout(location = 0) out vec4 _entryPointOutput; void main() { imageStore(RWIm, ivec2(uvec2(10u)), vec4(10.0, 0.5, 8.0, 2.0)); - vec4 _69 = imageLoad(RWIm, ivec2(uvec2(30u))); - imageStore(RWBuf, int(80u), _69); - _entryPointOutput = (_69 + texelFetch(ROIm, ivec2(uvec2(50u, 60u)), 0)) + texelFetch(ROBuf, int(80u)); + vec4 _70 = imageLoad(RWIm, ivec2(uvec2(30u))); + imageStore(RWBuf, int(80u), _70); + _entryPointOutput = (_70 + texelFetch(ROIm, ivec2(uvec2(50u, 60u)), 0)) + texelFetch(ROBuf, int(80u)); } diff --git a/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag b/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag index a9457f22d2b..ee64d8335a3 100644 --- a/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag +++ b/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag @@ -2,10 +2,10 @@ precision mediump float; precision highp int; -layout(location = 0) out vec4 fragColor; - vec4 _80; +layout(location = 0) out vec4 fragColor; + void main() { mediump int _18 = int(_80.x); @@ -33,26 +33,15 @@ void main() } default: { - mediump int _84; vec4 _88; _88 = _82; - _84 = 0; - mediump int _50; - for (;;) + for (mediump int _84 = 0; _84 < _18; ) { - _50 = _84 + 1; - if (_84 < _18) - { - vec4 _72 = _88; - _72.y = _88.y + 0.5; - _88 = _72; - _84 = _50; - continue; - } - else - { - break; - } + vec4 _72 = _88; + _72.y = _88.y + 0.5; + _88 = _72; + _84++; + continue; } _89 = _88; continue; @@ -61,7 +50,6 @@ void main() vec4 _79 = _83; _79.y = _83.y + 0.5; _89 = _79; - continue; } fragColor = _82; } diff --git a/reference/opt/shaders/frag/modf-pointer-function-analysis.frag b/reference/opt/shaders/frag/modf-pointer-function-analysis.frag new file mode 100644 index 00000000000..07160bbdece --- /dev/null +++ b/reference/opt/shaders/frag/modf-pointer-function-analysis.frag @@ -0,0 +1,18 @@ +#version 450 + +layout(location = 0) in vec4 v; +layout(location = 0) out vec4 vo0; +layout(location = 1) out vec4 vo1; + +void main() +{ + vec4 param; + vec4 _59 = modf(v, param); + vo0 = _59; + vo1 = param; + vec4 param_1 = param; + float _65 = modf(v.x, param_1.x); + vo0.x += _65; + vo1.x += param_1.x; +} + diff --git a/reference/opt/shaders/frag/pixel-interlock-ordered.frag b/reference/opt/shaders/frag/pixel-interlock-ordered.frag new file mode 100644 index 00000000000..915b56511f2 --- /dev/null +++ b/reference/opt/shaders/frag/pixel-interlock-ordered.frag @@ -0,0 +1,35 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(pixel_interlock_ordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + SPIRV_Cross_beginInvocationInterlock(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + SPIRV_Cross_endInvocationInterlock(); +} + diff --git a/reference/opt/shaders/frag/pixel-interlock-unordered.frag b/reference/opt/shaders/frag/pixel-interlock-unordered.frag new file mode 100644 index 00000000000..13962daf19d --- /dev/null +++ b/reference/opt/shaders/frag/pixel-interlock-unordered.frag @@ -0,0 +1,35 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(pixel_interlock_unordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + SPIRV_Cross_beginInvocationInterlock(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + SPIRV_Cross_endInvocationInterlock(); +} + diff --git a/reference/opt/shaders/frag/post-depth-coverage-es.frag b/reference/opt/shaders/frag/post-depth-coverage-es.frag new file mode 100644 index 00000000000..d086560e5d2 --- /dev/null +++ b/reference/opt/shaders/frag/post-depth-coverage-es.frag @@ -0,0 +1,14 @@ +#version 310 es +#extension GL_EXT_post_depth_coverage : require +#extension GL_OES_sample_variables : require +precision mediump float; +precision highp int; +layout(early_fragment_tests, post_depth_coverage) in; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(float(gl_SampleMaskIn[0])); +} + diff --git a/reference/opt/shaders/frag/post-depth-coverage.frag b/reference/opt/shaders/frag/post-depth-coverage.frag new file mode 100644 index 00000000000..caca9c03cb5 --- /dev/null +++ b/reference/opt/shaders/frag/post-depth-coverage.frag @@ -0,0 +1,15 @@ +#version 450 +#if defined(GL_ARB_post_depth_coverge) +#extension GL_ARB_post_depth_coverage : require +#else +#extension GL_EXT_post_depth_coverage : require +#endif +layout(early_fragment_tests, post_depth_coverage) in; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(float(gl_SampleMaskIn[0])); +} + diff --git a/reference/opt/shaders/frag/round-even.frag b/reference/opt/shaders/frag/round-even.frag new file mode 100644 index 00000000000..ab6f37adc14 --- /dev/null +++ b/reference/opt/shaders/frag/round-even.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vA; +layout(location = 1) in float vB; + +void main() +{ + FragColor = roundEven(vA); + FragColor *= roundEven(vB); +} + diff --git a/reference/opt/shaders/frag/round.frag b/reference/opt/shaders/frag/round.frag new file mode 100644 index 00000000000..0f1fc0db0f3 --- /dev/null +++ b/reference/opt/shaders/frag/round.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vA; +layout(location = 1) in float vB; + +void main() +{ + FragColor = round(vA); + FragColor *= round(vB); +} + diff --git a/reference/opt/shaders/frag/sample-interlock-ordered.frag b/reference/opt/shaders/frag/sample-interlock-ordered.frag new file mode 100644 index 00000000000..9d5f90e4aaf --- /dev/null +++ b/reference/opt/shaders/frag/sample-interlock-ordered.frag @@ -0,0 +1,35 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(sample_interlock_ordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + SPIRV_Cross_beginInvocationInterlock(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0])); + SPIRV_Cross_endInvocationInterlock(); +} + diff --git a/reference/opt/shaders/frag/sample-interlock-unordered.frag b/reference/opt/shaders/frag/sample-interlock-unordered.frag new file mode 100644 index 00000000000..441198814e0 --- /dev/null +++ b/reference/opt/shaders/frag/sample-interlock-unordered.frag @@ -0,0 +1,35 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(sample_interlock_unordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + SPIRV_Cross_beginInvocationInterlock(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + SPIRV_Cross_endInvocationInterlock(); +} + diff --git a/reference/opt/shaders/frag/selection-block-dominator.frag b/reference/opt/shaders/frag/selection-block-dominator.frag index f737f489172..50a5a371c64 100644 --- a/reference/opt/shaders/frag/selection-block-dominator.frag +++ b/reference/opt/shaders/frag/selection-block-dominator.frag @@ -5,7 +5,7 @@ layout(location = 0) out vec4 FragColor; void main() { - for (;;) + do { if (vIndex != 1) { @@ -14,6 +14,6 @@ void main() } FragColor = vec4(10.0); break; - } + } while(false); } diff --git a/reference/opt/shaders/frag/struct-type-unrelated-alias.frag b/reference/opt/shaders/frag/struct-type-unrelated-alias.frag new file mode 100644 index 00000000000..d6fa667f3b4 --- /dev/null +++ b/reference/opt/shaders/frag/struct-type-unrelated-alias.frag @@ -0,0 +1,9 @@ +#version 450 + +layout(location = 0) out float FragColor; + +void main() +{ + FragColor = 30.0; +} + diff --git a/reference/opt/shaders/frag/switch-unreachable-break.frag b/reference/opt/shaders/frag/switch-unreachable-break.frag new file mode 100644 index 00000000000..111a4d4be32 --- /dev/null +++ b/reference/opt/shaders/frag/switch-unreachable-break.frag @@ -0,0 +1,37 @@ +#version 450 + +layout(binding = 0, std140) uniform UBO +{ + int cond; + int cond2; +} _13; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + bool _49; + switch (_13.cond) + { + case 1: + { + if (_13.cond2 < 50) + { + _49 = false; + break; + } + else + { + discard; + } + break; // unreachable workaround + } + default: + { + _49 = true; + break; + } + } + FragColor = mix(vec4(20.0), vec4(10.0), bvec4(_49)); +} + diff --git a/reference/opt/shaders/frag/swizzle.frag b/reference/opt/shaders/frag/swizzle.frag index a229e5b0d5e..51f5b198957 100644 --- a/reference/opt/shaders/frag/swizzle.frag +++ b/reference/opt/shaders/frag/swizzle.frag @@ -10,9 +10,11 @@ layout(location = 1) in vec3 vNormal; void main() { - FragColor = vec4(texture(samp, vUV).xyz, 1.0); - FragColor = vec4(texture(samp, vUV).xz, 1.0, 4.0); - FragColor = vec4(texture(samp, vUV).xx, texture(samp, vUV + vec2(0.100000001490116119384765625)).yy); + vec4 _19 = texture(samp, vUV); + float _23 = _19.x; + FragColor = vec4(_23, _19.yz, 1.0); + FragColor = vec4(_23, _19.z, 1.0, 4.0); + FragColor = vec4(_23, _23, texture(samp, vUV + vec2(0.100000001490116119384765625)).yy); FragColor = vec4(vNormal, 1.0); FragColor = vec4(vNormal + vec3(1.7999999523162841796875), 1.0); FragColor = vec4(vUV, vUV + vec2(1.7999999523162841796875)); diff --git a/reference/opt/shaders/frag/texel-fetch-offset.frag b/reference/opt/shaders/frag/texel-fetch-offset.frag index 416f764d43f..520b4ee88b7 100644 --- a/reference/opt/shaders/frag/texel-fetch-offset.frag +++ b/reference/opt/shaders/frag/texel-fetch-offset.frag @@ -8,7 +8,7 @@ layout(location = 0) out vec4 FragColor; void main() { - mediump ivec2 _22 = ivec2(gl_FragCoord.xy); + ivec2 _22 = ivec2(gl_FragCoord.xy); FragColor = texelFetchOffset(uTexture, _22, 0, ivec2(1)); FragColor += texelFetchOffset(uTexture, _22, 0, ivec2(-1, 1)); } diff --git a/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag b/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag new file mode 100644 index 00000000000..90b000f94f0 --- /dev/null +++ b/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag @@ -0,0 +1,46 @@ +#version 450 + +struct RowMajor +{ + mat4 B; +}; + +struct NestedRowMajor +{ + RowMajor rm; +}; + +layout(binding = 2, std140) uniform UBO3 +{ + layout(row_major) NestedRowMajor rm2; +} _17; + +layout(binding = 1, std140) uniform UBO2 +{ + layout(row_major) RowMajor rm; +} _35; + +layout(binding = 0, std140) uniform UBO +{ + layout(row_major) mat4 A; + mat4 C; +} _42; + +layout(binding = 3, std140) uniform UBONoWorkaround +{ + mat4 D; +} _56; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 Clip; + +NestedRowMajor spvWorkaroundRowMajor(NestedRowMajor wrap) { return wrap; } +mat4 spvWorkaroundRowMajor(mat4 wrap) { return wrap; } + +void main() +{ + FragColor = (((spvWorkaroundRowMajor(_17.rm2).rm.B * spvWorkaroundRowMajor(_35.rm.B)) * spvWorkaroundRowMajor(_42.A)) * spvWorkaroundRowMajor(_42.C)) * Clip; + FragColor += (_56.D * Clip); + FragColor = fma(_42.A[1], Clip, FragColor); +} + diff --git a/reference/opt/shaders/frag/ubo_layout.frag b/reference/opt/shaders/frag/ubo_layout.frag index 4b66e1396a7..bc0b01c065f 100644 --- a/reference/opt/shaders/frag/ubo_layout.frag +++ b/reference/opt/shaders/frag/ubo_layout.frag @@ -7,11 +7,6 @@ struct Str mat4 foo; }; -struct Str_1 -{ - mat4 foo; -}; - layout(binding = 0, std140) uniform UBO1 { layout(row_major) Str foo; @@ -19,7 +14,7 @@ layout(binding = 0, std140) uniform UBO1 layout(binding = 1, std140) uniform UBO2 { - Str_1 foo; + Str foo; } ubo0; layout(location = 0) out vec4 FragColor; diff --git a/reference/opt/shaders/geom/geometry-passthrough.geom b/reference/opt/shaders/geom/geometry-passthrough.geom new file mode 100644 index 00000000000..afbd662324f --- /dev/null +++ b/reference/opt/shaders/geom/geometry-passthrough.geom @@ -0,0 +1,22 @@ +#version 450 +#extension GL_NV_geometry_shader_passthrough : require +layout(triangles) in; + +layout(passthrough, location = 0) in VertexBlock +{ + int a; + int b; +} v1[3]; + +layout(location = 2) in VertexBlock2 +{ + int a; + layout(passthrough) int b; +} v2[3]; + + +void main() +{ + gl_Layer = (gl_InvocationID + v1[0].a) + v2[1].b; +} + diff --git a/reference/opt/shaders/geom/multi-stream.geom b/reference/opt/shaders/geom/multi-stream.geom new file mode 100644 index 00000000000..548164d7804 --- /dev/null +++ b/reference/opt/shaders/geom/multi-stream.geom @@ -0,0 +1,14 @@ +#version 450 +layout(triangles) in; +layout(max_vertices = 2, points) out; + +void main() +{ + gl_Position = gl_in[0].gl_Position; + EmitStreamVertex(0); + EndStreamPrimitive(0); + gl_Position = gl_in[0].gl_Position + vec4(2.0); + EmitStreamVertex(1); + EndStreamPrimitive(1); +} + diff --git a/reference/opt/shaders/geom/transform-feedback-streams.geom b/reference/opt/shaders/geom/transform-feedback-streams.geom new file mode 100644 index 00000000000..4d238b4adff --- /dev/null +++ b/reference/opt/shaders/geom/transform-feedback-streams.geom @@ -0,0 +1,26 @@ +#version 450 +layout(points) in; +layout(max_vertices = 2, points) out; + +layout(xfb_buffer = 1, xfb_stride = 20, stream = 1) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(location = 0, xfb_buffer = 2, xfb_stride = 32, xfb_offset = 16, stream = 1) out vec4 vFoo; +layout(xfb_buffer = 3, xfb_stride = 16, stream = 2) out VertOut +{ + layout(location = 1, xfb_offset = 0) vec4 vBar; +} _23; + + +void main() +{ + gl_Position = vec4(1.0); + vFoo = vec4(3.0); + EmitStreamVertex(1); + _23.vBar = vec4(5.0); + EmitStreamVertex(2); +} + diff --git a/reference/opt/shaders/legacy/fragment/explicit-lod.legacy.vert b/reference/opt/shaders/legacy/fragment/explicit-lod.legacy.vert new file mode 100644 index 00000000000..b73faa47ab5 --- /dev/null +++ b/reference/opt/shaders/legacy/fragment/explicit-lod.legacy.vert @@ -0,0 +1,11 @@ +#version 100 + +uniform mediump sampler2D tex; + +varying mediump vec4 FragColor; + +void main() +{ + FragColor = texture2DLod(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625), 3.0); +} + diff --git a/reference/opt/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag b/reference/opt/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag new file mode 100644 index 00000000000..10ce5a513f4 --- /dev/null +++ b/reference/opt/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag @@ -0,0 +1,36 @@ +#version 100 +precision mediump float; +precision highp int; + +struct Foo +{ + highp vec4 a; + highp vec4 b; +}; + +struct Bar +{ + highp vec4 a; + highp vec4 b; +}; + +struct Baz +{ + Foo foo; + Bar bar; +}; + +varying highp vec4 baz_foo_a; +varying highp vec4 baz_foo_b; +varying highp vec4 baz_bar_a; +varying highp vec4 baz_bar_b; +varying highp vec4 _33_a_a; +varying highp vec4 _33_a_b; +varying highp vec4 _33_b_a; +varying highp vec4 _33_b_b; + +void main() +{ + gl_FragData[0] = (((_33_a_a + _33_b_b) + baz_foo_b) + baz_foo_a) + baz_bar_b; +} + diff --git a/reference/opt/shaders/legacy/fragment/round.legacy.frag b/reference/opt/shaders/legacy/fragment/round.legacy.frag new file mode 100644 index 00000000000..9033bc3c56c --- /dev/null +++ b/reference/opt/shaders/legacy/fragment/round.legacy.frag @@ -0,0 +1,13 @@ +#version 100 +precision mediump float; +precision highp int; + +varying highp vec4 vA; +varying highp float vB; + +void main() +{ + gl_FragData[0] = floor(vA + vec4(0.5)); + gl_FragData[0] *= floor(vB + float(0.5)); +} + diff --git a/reference/opt/shaders/legacy/fragment/switch.legacy.frag b/reference/opt/shaders/legacy/fragment/switch.legacy.frag new file mode 100644 index 00000000000..169f591a74e --- /dev/null +++ b/reference/opt/shaders/legacy/fragment/switch.legacy.frag @@ -0,0 +1,77 @@ +#version 100 +precision mediump float; +precision highp int; + +varying highp float vIndexF; + +void main() +{ + int _13 = int(vIndexF); + highp vec4 _65; + highp vec4 _66; + highp vec4 _68; + for (int spvDummy25 = 0; spvDummy25 < 1; spvDummy25++) + { + if (_13 == 2) + { + _68 = vec4(0.0, 2.0, 3.0, 4.0); + break; + } + else if ((_13 == 4) || (_13 == 5)) + { + _68 = vec4(1.0, 2.0, 3.0, 4.0); + break; + } + else if ((_13 == 8) || (_13 == 9)) + { + _68 = vec4(40.0, 20.0, 30.0, 40.0); + break; + } + else if (_13 == 10) + { + _65 = vec4(10.0); + highp vec4 _45 = _65 + vec4(1.0); + _66 = _45; + highp vec4 _48 = _66 + vec4(2.0); + _68 = _48; + break; + } + else if (_13 == 11) + { + _65 = vec4(0.0); + highp vec4 _45 = _65 + vec4(1.0); + _66 = _45; + highp vec4 _48 = _66 + vec4(2.0); + _68 = _48; + break; + } + else if (_13 == 12) + { + _66 = vec4(0.0); + highp vec4 _48 = _66 + vec4(2.0); + _68 = _48; + break; + } + else + { + _68 = vec4(10.0, 20.0, 30.0, 40.0); + break; + } + } + highp vec4 _70; + for (int spvDummy146 = 0; spvDummy146 < 1; spvDummy146++) + { + if ((_13 == 10) || (_13 == 20)) + { + _70 = vec4(40.0); + break; + } + else + { + _70 = vec4(20.0); + break; + } + } + gl_FragData[0] = _68 + _70; +} + diff --git a/reference/opt/shaders/legacy/vert/implicit-lod.legacy.vert b/reference/opt/shaders/legacy/vert/implicit-lod.legacy.vert index 6e441074482..2d2050498e0 100644 --- a/reference/opt/shaders/legacy/vert/implicit-lod.legacy.vert +++ b/reference/opt/shaders/legacy/vert/implicit-lod.legacy.vert @@ -4,6 +4,6 @@ uniform mediump sampler2D tex; void main() { - gl_Position = texture2D(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625)); + gl_Position = texture2DLod(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625), 0.0); } diff --git a/reference/opt/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert b/reference/opt/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert new file mode 100644 index 00000000000..837a11a843d --- /dev/null +++ b/reference/opt/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert @@ -0,0 +1,17 @@ +#version 100 + +struct Foo +{ + float a[4]; +}; + +varying float foo_a[4]; + +void main() +{ + gl_Position = vec4(1.0); + for (int _46 = 0; _46 < 4; foo_a[_46] = float(_46 + 2), _46++) + { + } +} + diff --git a/reference/opt/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert b/reference/opt/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert new file mode 100644 index 00000000000..cf807c41f7f --- /dev/null +++ b/reference/opt/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert @@ -0,0 +1,49 @@ +#version 100 + +struct Foo +{ + vec4 a; + vec4 b; +}; + +struct Bar +{ + vec4 a; + vec4 b; +}; + +struct Baz +{ + Foo foo; + Bar bar; +}; + +varying vec4 _12_a_a; +varying vec4 _12_a_b; +varying vec4 _12_b_a; +varying vec4 _12_b_b; +varying vec4 baz_foo_a; +varying vec4 baz_foo_b; +varying vec4 baz_bar_a; +varying vec4 baz_bar_b; + +void main() +{ + _12_a_a = vec4(10.0); + _12_a_b = vec4(20.0); + _12_b_a = vec4(30.0); + _12_b_b = vec4(40.0); + _12_a_a = Foo(vec4(50.0), vec4(60.0)).a; + _12_a_b = Foo(vec4(50.0), vec4(60.0)).b; + _12_b_a = Bar(vec4(50.0), vec4(60.0)).a; + _12_b_b = Bar(vec4(50.0), vec4(60.0)).b; + baz_foo_a = Foo(vec4(100.0), vec4(200.0)).a; + baz_foo_b = Foo(vec4(100.0), vec4(200.0)).b; + baz_bar_a = Bar(vec4(300.0), vec4(400.0)).a; + baz_bar_b = Bar(vec4(300.0), vec4(400.0)).b; + baz_foo_a = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).foo.a; + baz_foo_b = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).foo.b; + baz_bar_a = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).bar.a; + baz_bar_b = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).bar.b; +} + diff --git a/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert b/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert index 01a3d73535e..66136d27ae7 100644 --- a/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert +++ b/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert @@ -11,18 +11,13 @@ varying vec2 vout_b; void main() { - { - Output vout = Output(vec4(0.5), vec2(0.25)); - vout_a = vout.a; - vout_b = vout.b; - } - { - Output vout = Output(vec4(0.5), vec2(0.25)); - vout_a = vout.a; - vout_b = vout.b; - } - vout_a = Output(vout_a, vout_b).a; - vout_b = Output(vout_a, vout_b).b; + vout_a = Output(vec4(0.5), vec2(0.25)).a; + vout_b = Output(vec4(0.5), vec2(0.25)).b; + vout_a = Output(vec4(0.5), vec2(0.25)).a; + vout_b = Output(vec4(0.5), vec2(0.25)).b; + Output _22 = Output(vout_a, vout_b); + vout_a = _22.a; + vout_b = _22.b; vout_a.x = 1.0; vout_b.y = 1.0; } diff --git a/reference/opt/shaders/legacy/vert/switch-nested.legacy.vert b/reference/opt/shaders/legacy/vert/switch-nested.legacy.vert new file mode 100644 index 00000000000..dd987e8f1b4 --- /dev/null +++ b/reference/opt/shaders/legacy/vert/switch-nested.legacy.vert @@ -0,0 +1,45 @@ +#version 100 + +struct UBO +{ + int func_arg; + int inner_func_arg; +}; + +uniform UBO _34; + +void main() +{ + vec4 _102; + for (int spvDummy30 = 0; spvDummy30 < 1; spvDummy30++) + { + if (_34.func_arg != 0) + { + vec4 _101; + for (int spvDummy45 = 0; spvDummy45 < 1; spvDummy45++) + { + if (_34.inner_func_arg != 0) + { + _101 = vec4(1.0); + break; + } + else + { + _101 = vec4(0.0); + break; + } + break; // unreachable workaround + } + _102 = _101; + break; + } + else + { + _102 = vec4(0.0); + break; + } + break; // unreachable workaround + } + gl_Position = _102; +} + diff --git a/reference/opt/shaders/legacy/vert/transpose.legacy.vert b/reference/opt/shaders/legacy/vert/transpose.legacy.vert index 0d30c0e243b..d725bfbb092 100644 --- a/reference/opt/shaders/legacy/vert/transpose.legacy.vert +++ b/reference/opt/shaders/legacy/vert/transpose.legacy.vert @@ -11,8 +11,20 @@ uniform Buffer _13; attribute vec4 Position; +highp mat4 spvWorkaroundRowMajor(highp mat4 wrap) { return wrap; } +mediump mat4 spvWorkaroundRowMajorMP(mediump mat4 wrap) { return wrap; } + +mat4 spvTranspose(mat4 m) +{ + return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]); +} + void main() { - gl_Position = (((_13.M * (Position * _13.MVPRowMajor)) + (_13.M * (_13.MVPColMajor * Position))) + (_13.M * (_13.MVPRowMajor * Position))) + (_13.M * (Position * _13.MVPColMajor)); + mat4 _55 = _13.MVPRowMajor; + mat4 _61 = spvWorkaroundRowMajor(_13.MVPColMajor); + mat4 _80 = spvTranspose(_13.MVPRowMajor) * 2.0; + mat4 _87 = spvTranspose(_61) * 2.0; + gl_Position = (((((((((((spvWorkaroundRowMajor(_13.M) * (Position * _13.MVPRowMajor)) + (spvWorkaroundRowMajor(_13.M) * (spvWorkaroundRowMajor(_13.MVPColMajor) * Position))) + (spvWorkaroundRowMajor(_13.M) * (_13.MVPRowMajor * Position))) + (spvWorkaroundRowMajor(_13.M) * (Position * spvWorkaroundRowMajor(_13.MVPColMajor)))) + (_55 * Position)) + (Position * _61)) + (Position * _55)) + (_61 * Position)) + (_80 * Position)) + (_87 * Position)) + (Position * _80)) + (Position * _87); } diff --git a/reference/opt/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk b/reference/opt/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk new file mode 100644 index 00000000000..81f3c96ec18 --- /dev/null +++ b/reference/opt/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,66 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, lines) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + vec3 _29 = vec3(gl_GlobalInvocationID); + float _31 = _29.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0u, 1u) + uvec2(gl_LocalInvocationIndex); + int _128 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _128; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _128 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _128 + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _128 + 3; + } +} + diff --git a/reference/opt/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk b/reference/opt/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk new file mode 100644 index 00000000000..bacc7fdfdc5 --- /dev/null +++ b/reference/opt/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,66 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, points) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + vec3 _29 = vec3(gl_GlobalInvocationID); + float _31 = _29.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex; + int _124 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _124; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _124 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _124 + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _124 + 3; + } +} + diff --git a/reference/opt/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk b/reference/opt/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk new file mode 100644 index 00000000000..87fd2c2b7b6 --- /dev/null +++ b/reference/opt/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,66 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, triangles) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + vec3 _29 = vec3(gl_GlobalInvocationID); + float _31 = _29.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0u, 1u, 2u) + uvec3(gl_LocalInvocationIndex); + int _127 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _127; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _127 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _127 + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _127 + 3; + } +} + diff --git a/reference/opt/shaders/tesc/water_tess.tesc b/reference/opt/shaders/tesc/water_tess.tesc index eb3e84d53d1..d3d9c8b3e06 100644 --- a/reference/opt/shaders/tesc/water_tess.tesc +++ b/reference/opt/shaders/tesc/water_tess.tesc @@ -18,23 +18,23 @@ layout(location = 0) in vec2 vPatchPosBase[]; void main() { - vec2 _430 = (vPatchPosBase[0] - vec2(10.0)) * _41.uScale.xy; - vec2 _440 = ((vPatchPosBase[0] + _41.uPatchSize) + vec2(10.0)) * _41.uScale.xy; - vec3 _445 = vec3(_430.x, -10.0, _430.y); - vec3 _450 = vec3(_440.x, 10.0, _440.y); - vec4 _466 = vec4((_445 + _450) * 0.5, 1.0); - vec3 _513 = vec3(length(_450 - _445) * (-0.5)); - bool _515 = any(lessThanEqual(vec3(dot(_41.uFrustum[0], _466), dot(_41.uFrustum[1], _466), dot(_41.uFrustum[2], _466)), _513)); - bool _525; - if (!_515) + vec2 _431 = (vPatchPosBase[0] - vec2(10.0)) * _41.uScale.xy; + vec2 _441 = ((vPatchPosBase[0] + _41.uPatchSize) + vec2(10.0)) * _41.uScale.xy; + vec3 _446 = vec3(_431.x, -10.0, _431.y); + vec3 _451 = vec3(_441.x, 10.0, _441.y); + vec4 _467 = vec4((_446 + _451) * 0.5, 1.0); + vec3 _514 = vec3(length(_451 - _446) * (-0.5)); + bool _516 = any(lessThanEqual(vec3(dot(_41.uFrustum[0], _467), dot(_41.uFrustum[1], _467), dot(_41.uFrustum[2], _467)), _514)); + bool _526; + if (!_516) { - _525 = any(lessThanEqual(vec3(dot(_41.uFrustum[3], _466), dot(_41.uFrustum[4], _466), dot(_41.uFrustum[5], _466)), _513)); + _526 = any(lessThanEqual(vec3(dot(_41.uFrustum[3], _467), dot(_41.uFrustum[4], _467), dot(_41.uFrustum[5], _467)), _514)); } else { - _525 = _515; + _526 = _516; } - if (!(!_525)) + if (!(!_526)) { gl_TessLevelOuter[0] = -1.0; gl_TessLevelOuter[1] = -1.0; @@ -46,34 +46,34 @@ void main() else { vOutPatchPosBase = vPatchPosBase[0]; - vec2 _678 = (vPatchPosBase[0] + (vec2(-0.5) * _41.uPatchSize)) * _41.uScale.xy; - vec2 _706 = (vPatchPosBase[0] + (vec2(0.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; - float _725 = clamp(log2((length(_41.uCamPos - vec3(_706.x, 0.0, _706.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - vec2 _734 = (vPatchPosBase[0] + (vec2(1.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; - vec2 _762 = (vPatchPosBase[0] + (vec2(-0.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; - float _781 = clamp(log2((length(_41.uCamPos - vec3(_762.x, 0.0, _762.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - vec2 _790 = (vPatchPosBase[0] + (vec2(0.5) * _41.uPatchSize)) * _41.uScale.xy; - float _809 = clamp(log2((length(_41.uCamPos - vec3(_790.x, 0.0, _790.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - vec2 _818 = (vPatchPosBase[0] + (vec2(1.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; - float _837 = clamp(log2((length(_41.uCamPos - vec3(_818.x, 0.0, _818.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - vec2 _846 = (vPatchPosBase[0] + (vec2(-0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; - vec2 _874 = (vPatchPosBase[0] + (vec2(0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; - float _893 = clamp(log2((length(_41.uCamPos - vec3(_874.x, 0.0, _874.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - vec2 _902 = (vPatchPosBase[0] + (vec2(1.5) * _41.uPatchSize)) * _41.uScale.xy; - float _612 = dot(vec4(_781, _809, clamp(log2((length(_41.uCamPos - vec3(_846.x, 0.0, _846.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _893), vec4(0.25)); - float _618 = dot(vec4(clamp(log2((length(_41.uCamPos - vec3(_678.x, 0.0, _678.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _725, _781, _809), vec4(0.25)); - float _624 = dot(vec4(_725, clamp(log2((length(_41.uCamPos - vec3(_734.x, 0.0, _734.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _809, _837), vec4(0.25)); - float _630 = dot(vec4(_809, _837, _893, clamp(log2((length(_41.uCamPos - vec3(_902.x, 0.0, _902.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), vec4(0.25)); - vec4 _631 = vec4(_612, _618, _624, _630); - vPatchLods = _631; - vec4 _928 = exp2(-min(_631, _631.yzwx)) * _41.uMaxTessLevel.y; - gl_TessLevelOuter[0] = _928.x; - gl_TessLevelOuter[1] = _928.y; - gl_TessLevelOuter[2] = _928.z; - gl_TessLevelOuter[3] = _928.w; - float _935 = _41.uMaxTessLevel.y * exp2(-min(min(min(_612, _618), min(_624, _630)), _809)); - gl_TessLevelInner[0] = _935; - gl_TessLevelInner[1] = _935; + vec2 _681 = (vec2(-0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + vec2 _710 = (vec2(0.5, -0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + float _729 = clamp(log2((length(_41.uCamPos - vec3(_710.x, 0.0, _710.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + vec2 _739 = (vec2(1.5, -0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + vec2 _768 = (vec2(-0.5, 0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + float _787 = clamp(log2((length(_41.uCamPos - vec3(_768.x, 0.0, _768.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + vec2 _797 = (vec2(0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + float _816 = clamp(log2((length(_41.uCamPos - vec3(_797.x, 0.0, _797.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + vec2 _826 = (vec2(1.5, 0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + float _845 = clamp(log2((length(_41.uCamPos - vec3(_826.x, 0.0, _826.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + vec2 _855 = (vec2(-0.5, 1.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + vec2 _884 = (vec2(0.5, 1.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + float _903 = clamp(log2((length(_41.uCamPos - vec3(_884.x, 0.0, _884.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + vec2 _913 = (vec2(1.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + float _614 = dot(vec4(_787, _816, clamp(log2((length(_41.uCamPos - vec3(_855.x, 0.0, _855.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _903), vec4(0.25)); + float _620 = dot(vec4(clamp(log2((length(_41.uCamPos - vec3(_681.x, 0.0, _681.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _729, _787, _816), vec4(0.25)); + float _626 = dot(vec4(_729, clamp(log2((length(_41.uCamPos - vec3(_739.x, 0.0, _739.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _816, _845), vec4(0.25)); + float _632 = dot(vec4(_816, _845, _903, clamp(log2((length(_41.uCamPos - vec3(_913.x, 0.0, _913.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), vec4(0.25)); + vec4 _633 = vec4(_614, _620, _626, _632); + vPatchLods = _633; + vec4 _940 = exp2(-min(_633, _633.yzwx)) * _41.uMaxTessLevel.y; + gl_TessLevelOuter[0] = _940.x; + gl_TessLevelOuter[1] = _940.y; + gl_TessLevelOuter[2] = _940.z; + gl_TessLevelOuter[3] = _940.w; + float _948 = _41.uMaxTessLevel.y * exp2(-min(min(min(_614, _620), min(_626, _632)), _816)); + gl_TessLevelInner[0] = _948; + gl_TessLevelInner[1] = _948; } } diff --git a/reference/opt/shaders/tese/load-array-of-array.tese b/reference/opt/shaders/tese/load-array-of-array.tese new file mode 100644 index 00000000000..e4b426d0ad6 --- /dev/null +++ b/reference/opt/shaders/tese/load-array-of-array.tese @@ -0,0 +1,10 @@ +#version 450 +layout(quads, ccw, equal_spacing) in; + +layout(location = 0) in vec4 vTexCoord[][1]; + +void main() +{ + gl_Position = (vTexCoord[0u][0] + vTexCoord[2u][0]) + vTexCoord[3u][0]; +} + diff --git a/reference/opt/shaders/tese/patch-input-array.tese b/reference/opt/shaders/tese/patch-input-array.tese new file mode 100644 index 00000000000..413d8b391fb --- /dev/null +++ b/reference/opt/shaders/tese/patch-input-array.tese @@ -0,0 +1,10 @@ +#version 450 +layout(quads, ccw, equal_spacing) in; + +layout(location = 0) patch in float P[4]; + +void main() +{ + gl_Position = vec4(P[0], P[1], P[2], P[3]); +} + diff --git a/reference/opt/shaders/tese/water_tess.tese b/reference/opt/shaders/tese/water_tess.tese index a2aa1044763..c862cfbdc0e 100644 --- a/reference/opt/shaders/tese/water_tess.tese +++ b/reference/opt/shaders/tese/water_tess.tese @@ -21,16 +21,16 @@ layout(location = 0) out vec3 vWorld; void main() { - vec2 _201 = vOutPatchPosBase + (gl_TessCoord.xy * _31.uPatchSize); - vec2 _214 = mix(vPatchLods.yx, vPatchLods.zw, vec2(gl_TessCoord.x)); - float _221 = mix(_214.x, _214.y, gl_TessCoord.y); - mediump float _223 = floor(_221); - vec2 _125 = _201 * _31.uInvHeightmapSize; - vec2 _141 = _31.uInvHeightmapSize * exp2(_223); - vGradNormalTex = vec4(_125 + (_31.uInvHeightmapSize * 0.5), _125 * _31.uScale.zw); - mediump vec3 _253 = mix(textureLod(uHeightmapDisplacement, _125 + (_141 * 0.5), _223).xyz, textureLod(uHeightmapDisplacement, _125 + (_141 * 1.0), _223 + 1.0).xyz, vec3(_221 - _223)); - vec2 _171 = (_201 * _31.uScale.xy) + _253.yz; - vWorld = vec3(_171.x, _253.x, _171.y); + vec2 _202 = gl_TessCoord.xy * _31.uPatchSize + vOutPatchPosBase; + vec2 _216 = mix(vPatchLods.yx, vPatchLods.zw, vec2(gl_TessCoord.x)); + float _223 = mix(_216.x, _216.y, gl_TessCoord.y); + mediump float mp_copy_223 = _223; + mediump float _225 = floor(mp_copy_223); + vec2 _141 = _31.uInvHeightmapSize * exp2(_225); + vGradNormalTex = vec4(_202 * _31.uInvHeightmapSize + (_31.uInvHeightmapSize * 0.5), (_202 * _31.uInvHeightmapSize) * _31.uScale.zw); + mediump vec3 _256 = mix(textureLod(uHeightmapDisplacement, _202 * _31.uInvHeightmapSize + (_141 * 0.5), _225).xyz, textureLod(uHeightmapDisplacement, _202 * _31.uInvHeightmapSize + (_141 * 1.0), _225 + 1.0).xyz, vec3(mp_copy_223 - _225)); + vec2 _171 = _202 * _31.uScale.xy + _256.yz; + vWorld = vec3(_171.x, _256.x, _171.y); gl_Position = _31.uMVP * vec4(vWorld, 1.0); } diff --git a/reference/opt/shaders/vert/ground.vert b/reference/opt/shaders/vert/ground.vert index c82c1037b3b..5840c3d5a98 100644 --- a/reference/opt/shaders/vert/ground.vert +++ b/reference/opt/shaders/vert/ground.vert @@ -1,4 +1,7 @@ #version 310 es +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif struct PatchData { @@ -44,44 +47,52 @@ layout(binding = 1) uniform mediump sampler2D TexLOD; layout(binding = 0) uniform mediump sampler2D TexHeightmap; layout(location = 1) in vec4 LODWeights; +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else uniform int SPIRV_Cross_BaseInstance; +#endif layout(location = 0) in vec2 Position; layout(location = 1) out vec3 EyeVec; layout(location = 0) out vec2 TexCoord; void main() { - float _300 = all(equal(LODWeights, vec4(0.0))) ? _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.w : dot(LODWeights, _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].LODs); - float _302 = floor(_300); - uint _307 = uint(_302); - uvec2 _309 = uvec2(Position); - uvec2 _316 = (uvec2(1u) << uvec2(_307, _307 + 1u)) - uvec2(1u); - uint _382; - if (_309.x < 32u) + float _301 = all(equal(LODWeights, vec4(0.0))) ? _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.w : dot(LODWeights, _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].LODs); + float _303 = floor(_301); + uint _308 = uint(_303); + uvec2 _310 = uvec2(Position); + uvec2 _317 = (uvec2(1u) << uvec2(_308, _308 + 1u)) - uvec2(1u); + uint _384; + if (_310.x < 32u) { - _382 = _316.x; + _384 = _317.x; } else { - _382 = 0u; + _384 = 0u; } - uint _383; - if (_309.y < 32u) + uint _385; + if (_310.y < 32u) { - _383 = _316.y; + _385 = _317.y; } else { - _383 = 0u; + _385 = 0u; } - vec4 _344 = vec4((_309 + uvec2(_382, _383)).xyxy & (~_316).xxyy); - vec2 _173 = ((_53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _156.InvGroundSize_PatchScale.zw) + mix(_344.xy, _344.zw, vec2(_300 - _302))) * _156.InvGroundSize_PatchScale.xy; - mediump float _360 = textureLod(TexLOD, _173, 0.0).x * 7.96875; - float _362 = floor(_360); - vec2 _185 = _156.InvGroundSize_PatchScale.xy * exp2(_362); - vec3 _230 = (vec3(_173.x, mix(textureLod(TexHeightmap, _173 + (_185 * 0.5), _362).x, textureLod(TexHeightmap, _173 + (_185 * 1.0), _362 + 1.0).x, _360 - _362), _173.y) * _156.GroundScale.xyz) + _156.GroundPosition.xyz; + vec4 _345 = vec4((_310 + uvec2(_384, _385)).xyxy & (~_317).xxyy); + vec2 _167 = _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _156.InvGroundSize_PatchScale.zw + mix(_345.xy, _345.zw, vec2(_301 - _303)); + vec2 _173 = _167 * _156.InvGroundSize_PatchScale.xy; + mediump vec4 _360 = textureLod(TexLOD, _173, 0.0); + mediump float _361 = _360.x; + mediump float _362 = _361 * 7.96875; + float hp_copy_362 = _362; + float _364 = floor(hp_copy_362); + vec2 _185 = _156.InvGroundSize_PatchScale.xy * exp2(_364); + vec3 _230 = vec3(_173.x, mix(textureLod(TexHeightmap, _167 * _156.InvGroundSize_PatchScale.xy + (_185 * 0.5), _364).x, textureLod(TexHeightmap, _167 * _156.InvGroundSize_PatchScale.xy + (_185 * 1.0), _364 + 1.0).x, _361 * 7.96875 + (-_364)), _173.y) * _156.GroundScale.xyz + _156.GroundPosition.xyz; EyeVec = _230 - _236.g_CamPos.xyz; - TexCoord = _173 + (_156.InvGroundSize_PatchScale.xy * 0.5); + TexCoord = _167 * _156.InvGroundSize_PatchScale.xy + (_156.InvGroundSize_PatchScale.xy * 0.5); gl_Position = (((_236.g_ViewProj_Row0 * _230.x) + (_236.g_ViewProj_Row1 * _230.y)) + (_236.g_ViewProj_Row2 * _230.z)) + _236.g_ViewProj_Row3; } diff --git a/reference/opt/shaders/vert/invariant.vert b/reference/opt/shaders/vert/invariant.vert index 648ea2947c9..31e0c2d46f6 100644 --- a/reference/opt/shaders/vert/invariant.vert +++ b/reference/opt/shaders/vert/invariant.vert @@ -9,8 +9,7 @@ layout(location = 0) invariant out vec4 vColor; void main() { - vec4 _20 = vInput1 * vInput2; - vec4 _21 = vInput0 + _20; + vec4 _21 = vInput1 * vInput2 + vInput0; gl_Position = _21; vec4 _27 = vInput0 - vInput1; vec4 _29 = _27 * vInput2; diff --git a/reference/opt/shaders/vert/no-contraction.vert b/reference/opt/shaders/vert/no-contraction.vert new file mode 100644 index 00000000000..9f9969cd74d --- /dev/null +++ b/reference/opt/shaders/vert/no-contraction.vert @@ -0,0 +1,18 @@ +#version 450 + +layout(location = 0) in vec4 vA; +layout(location = 1) in vec4 vB; +layout(location = 2) in vec4 vC; + +void main() +{ + precise vec4 _15 = vA * vB; + precise vec4 _19 = vA + vB; + precise vec4 _23 = vA - vB; + precise vec4 _30 = _15 + vC; + precise vec4 _34 = _15 + _19; + precise vec4 _36 = _34 + _23; + precise vec4 _38 = _36 + _30; + gl_Position = _38; +} + diff --git a/reference/opt/shaders/vert/ocean.vert b/reference/opt/shaders/vert/ocean.vert index 8f82c316d88..489e82959e0 100644 --- a/reference/opt/shaders/vert/ocean.vert +++ b/reference/opt/shaders/vert/ocean.vert @@ -1,4 +1,7 @@ #version 310 es +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif struct PatchData { @@ -45,75 +48,72 @@ layout(binding = 1) uniform mediump sampler2D TexLOD; layout(binding = 0) uniform mediump sampler2D TexDisplacement; layout(location = 1) in vec4 LODWeights; +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else uniform int SPIRV_Cross_BaseInstance; +#endif layout(location = 0) in vec4 Position; layout(location = 0) out vec3 EyeVec; layout(location = 1) out vec4 TexCoord; -uvec4 _474; - void main() { - float _350 = all(equal(LODWeights, vec4(0.0))) ? _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.w : dot(LODWeights, _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].LODs); - float _352 = floor(_350); - uint _357 = uint(_352); - uvec4 _359 = uvec4(Position); - uvec2 _366 = (uvec2(1u) << uvec2(_357, _357 + 1u)) - uvec2(1u); - bool _369 = _359.x < 32u; - uint _465; - if (_369) + float _351 = all(equal(LODWeights, vec4(0.0))) ? _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.w : dot(LODWeights, _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].LODs); + float _353 = floor(_351); + uint _358 = uint(_353); + uvec4 _360 = uvec4(Position); + uvec2 _367 = (uvec2(1u) << uvec2(_358, _358 + 1u)) - uvec2(1u); + bool _370 = _360.x < 32u; + uint _467; + if (_370) { - _465 = _366.x; + _467 = _367.x; } else { - _465 = 0u; + _467 = 0u; } - uvec4 _443 = _474; - _443.x = _465; - bool _379 = _359.y < 32u; - uint _468; - if (_379) + bool _380 = _360.y < 32u; + uint _470; + if (_380) { - _468 = _366.x; + _470 = _367.x; } else { - _468 = 0u; + _470 = 0u; } - uvec4 _447 = _443; - _447.y = _468; - uint _470; - if (_369) + uint _472; + if (_370) { - _470 = _366.y; + _472 = _367.y; } else { - _470 = 0u; + _472 = 0u; } - uvec4 _451 = _447; - _451.z = _470; - uint _472; - if (_379) + uint _474; + if (_380) { - _472 = _366.y; + _474 = _367.y; } else { - _472 = 0u; + _474 = 0u; } - uvec4 _455 = _451; - _455.w = _472; - vec4 _415 = vec4((_359.xyxy + _455) & (~_366).xxyy); - vec2 _197 = ((_53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _180.InvOceanSize_PatchScale.zw) + mix(_415.xy, _415.zw, vec2(_350 - _352))) * _180.InvOceanSize_PatchScale.xy; + vec4 _416 = vec4((_360.xyxy + uvec4(_467, _470, _472, _474)) & (~_367).xxyy); + vec2 _197 = (_53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _180.InvOceanSize_PatchScale.zw + mix(_416.xy, _416.zw, vec2(_351 - _353))) * _180.InvOceanSize_PatchScale.xy; vec2 _204 = _197 * _180.NormalTexCoordScale.zw; - mediump float _431 = textureLod(TexLOD, _197, 0.0).x * 7.96875; - float _433 = floor(_431); - vec2 _220 = (_180.InvOceanSize_PatchScale.xy * exp2(_433)) * _180.NormalTexCoordScale.zw; - vec3 _267 = ((vec3(_197.x, 0.0, _197.y) + mix(textureLod(TexDisplacement, _204 + (_220 * 0.5), _433).yxz, textureLod(TexDisplacement, _204 + (_220 * 1.0), _433 + 1.0).yxz, vec3(_431 - _433))) * _180.OceanScale.xyz) + _180.OceanPosition.xyz; + mediump vec4 _431 = textureLod(TexLOD, _197, 0.0); + mediump float _432 = _431.x; + mediump float _433 = _432 * 7.96875; + float hp_copy_433 = _433; + float _435 = floor(hp_copy_433); + vec2 _220 = (_180.InvOceanSize_PatchScale.xy * exp2(_435)) * _180.NormalTexCoordScale.zw; + vec3 _267 = (vec3(_197.x, 0.0, _197.y) + mix(textureLod(TexDisplacement, _197 * _180.NormalTexCoordScale.zw + (_220 * 0.5), _435).yxz, textureLod(TexDisplacement, _197 * _180.NormalTexCoordScale.zw + (_220 * 1.0), _435 + 1.0).yxz, vec3(_432 * 7.96875 + (-_435)))) * _180.OceanScale.xyz + _180.OceanPosition.xyz; EyeVec = _267 - _273.g_CamPos.xyz; - TexCoord = vec4(_204, _204 * _180.NormalTexCoordScale.xy) + ((_180.InvOceanSize_PatchScale.xyxy * 0.5) * _180.NormalTexCoordScale.zwzw); + TexCoord = (_180.InvOceanSize_PatchScale.xyxy * 0.5) * _180.NormalTexCoordScale.zwzw + vec4(_204, _204 * _180.NormalTexCoordScale.xy); gl_Position = (((_273.g_ViewProj_Row0 * _267.x) + (_273.g_ViewProj_Row1 * _267.y)) + (_273.g_ViewProj_Row2 * _267.z)) + _273.g_ViewProj_Row3; } diff --git a/reference/opt/shaders/vert/read-from-row-major-array.vert b/reference/opt/shaders/vert/read-from-row-major-array.vert index 25fc9495d23..d5d9681d0d5 100644 --- a/reference/opt/shaders/vert/read-from-row-major-array.vert +++ b/reference/opt/shaders/vert/read-from-row-major-array.vert @@ -8,9 +8,24 @@ layout(binding = 0, std140) uniform Block layout(location = 0) in vec4 a_position; layout(location = 0) out mediump float v_vtxResult; +highp mat2x3 spvWorkaroundRowMajor(highp mat2x3 wrap) { return wrap; } +mediump mat2x3 spvWorkaroundRowMajorMP(mediump mat2x3 wrap) { return wrap; } + void main() { gl_Position = a_position; - v_vtxResult = ((float(abs(_104.var[0][0][0].x - 2.0) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][0].y - 6.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][0].z - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(_104.var[0][0][1].x) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][1].y - 5.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][1].z - 5.0) < 0.0500000007450580596923828125)); + float _172 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[0].x - 2.0) < 0.0500000007450580596923828125); + mediump float mp_copy_172 = _172; + float _180 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[0].y - 6.0) < 0.0500000007450580596923828125); + mediump float mp_copy_180 = _180; + float _188 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[0].z - (-6.0)) < 0.0500000007450580596923828125); + mediump float mp_copy_188 = _188; + float _221 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[1].x) < 0.0500000007450580596923828125); + mediump float mp_copy_221 = _221; + float _229 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[1].y - 5.0) < 0.0500000007450580596923828125); + mediump float mp_copy_229 = _229; + float _237 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[1].z - 5.0) < 0.0500000007450580596923828125); + mediump float mp_copy_237 = _237; + v_vtxResult = ((mp_copy_172 * mp_copy_180) * mp_copy_188) * ((mp_copy_221 * mp_copy_229) * mp_copy_237); } diff --git a/reference/opt/shaders/vert/row-major-workaround.vert b/reference/opt/shaders/vert/row-major-workaround.vert new file mode 100644 index 00000000000..4fe6885d101 --- /dev/null +++ b/reference/opt/shaders/vert/row-major-workaround.vert @@ -0,0 +1,30 @@ +#version 310 es + +layout(binding = 0, std140) uniform Buffer +{ + layout(row_major) mat4 HP; + layout(row_major) mediump mat4 MP; +} _21; + +layout(binding = 1, std140) uniform Buffer2 +{ + layout(row_major) mediump mat4 MP2; +} _39; + +layout(location = 0) out vec4 H; +layout(location = 0) in vec4 Hin; +layout(location = 1) out mediump vec4 M; +layout(location = 1) in mediump vec4 Min; +layout(location = 2) out mediump vec4 M2; + +highp mat4 spvWorkaroundRowMajor(highp mat4 wrap) { return wrap; } +mediump mat4 spvWorkaroundRowMajorMP(mediump mat4 wrap) { return wrap; } + +void main() +{ + gl_Position = vec4(1.0); + H = spvWorkaroundRowMajor(_21.HP) * Hin; + M = spvWorkaroundRowMajor(_21.MP) * Min; + M2 = spvWorkaroundRowMajorMP(_39.MP2) * Min; +} + diff --git a/reference/opt/shaders/vert/texture_buffer.vert b/reference/opt/shaders/vert/texture_buffer.vert index e9442ce1196..217804dfce9 100644 --- a/reference/opt/shaders/vert/texture_buffer.vert +++ b/reference/opt/shaders/vert/texture_buffer.vert @@ -1,5 +1,5 @@ #version 310 es -#extension GL_OES_texture_buffer : require +#extension GL_EXT_texture_buffer : require layout(binding = 4) uniform highp samplerBuffer uSamp; layout(binding = 5, rgba32f) uniform readonly highp imageBuffer uSampo; diff --git a/reference/opt/shaders/vert/transform-feedback-decorations.vert b/reference/opt/shaders/vert/transform-feedback-decorations.vert new file mode 100644 index 00000000000..23e7cf3c19d --- /dev/null +++ b/reference/opt/shaders/vert/transform-feedback-decorations.vert @@ -0,0 +1,22 @@ +#version 450 + +layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(location = 0, xfb_buffer = 2, xfb_stride = 32, xfb_offset = 16) out vec4 vFoo; +layout(xfb_buffer = 3, xfb_stride = 16) out VertOut +{ + layout(location = 1, xfb_offset = 0) vec4 vBar; +} _22; + + +void main() +{ + gl_Position = vec4(1.0); + vFoo = vec4(3.0); + _22.vBar = vec4(5.0); +} + diff --git a/reference/opt/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk index 82ebb960856..771d0496447 100644 --- a/reference/opt/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk +++ b/reference/opt/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk @@ -3,7 +3,7 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer Block; -layout(buffer_reference, std430) buffer Block +layout(buffer_reference, buffer_reference_align = 4, std430) buffer Block { float v; }; diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk new file mode 100644 index 00000000000..f5907d3e2c0 --- /dev/null +++ b/reference/opt/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk @@ -0,0 +1,28 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer Bar; +layout(buffer_reference) buffer Foo; +layout(buffer_reference, buffer_reference_align = 8, std430) buffer Bar +{ + uint a; + uint b; + Foo foo; +}; + +layout(buffer_reference, std430) buffer Foo +{ + uint v; +}; + +layout(push_constant, std430) uniform Push +{ + Bar bar; +} _13; + +void main() +{ + uint _24 = atomicAdd(_13.bar.b, 1u); +} + diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk new file mode 100644 index 00000000000..1808beecbba --- /dev/null +++ b/reference/opt/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk @@ -0,0 +1,29 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer Bar; +layout(buffer_reference) buffer Foo; +layout(buffer_reference, buffer_reference_align = 8, std430) buffer Bar +{ + uint a; + uint b; + Foo foo; +}; + +layout(buffer_reference, std430) buffer Foo +{ + uint v; +}; + +layout(push_constant, std430) uniform Push +{ + Bar bar; +} _15; + +void main() +{ + uint v = _15.bar.b; + uint _31 = atomicAdd(_15.bar.a, _15.bar.b); +} + diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk new file mode 100644 index 00000000000..20a4f1b4239 --- /dev/null +++ b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk @@ -0,0 +1,22 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer PtrInt; +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PtrInt +{ + int value; +}; + +layout(set = 0, binding = 0, std430) buffer Buf +{ + uvec2 ptr; + PtrInt ptrint; +} _13; + +void main() +{ + _13.ptr = uvec2(_13.ptrint); +} + diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk new file mode 100644 index 00000000000..5cf6e2df36d --- /dev/null +++ b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk @@ -0,0 +1,21 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer PtrInt; +layout(buffer_reference, buffer_reference_align = 16, std430) buffer PtrInt +{ + int value; +}; + +layout(set = 0, binding = 0, std430) buffer Buf +{ + uvec2 ptr; +} _10; + +void main() +{ + PtrInt(_10.ptr).value = 10; +} + diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk index 5752f81b268..8923d21d780 100644 --- a/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk +++ b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk @@ -4,12 +4,12 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer PtrUint; layout(buffer_reference) buffer PtrInt; -layout(buffer_reference, std430) buffer PtrUint +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PtrUint { uint value; }; -layout(buffer_reference, std430) buffer PtrInt +layout(buffer_reference, buffer_reference_align = 16, std430) buffer PtrInt { int value; }; diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk new file mode 100644 index 00000000000..b7e88062a04 --- /dev/null +++ b/reference/opt/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk @@ -0,0 +1,35 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer RO; +layout(buffer_reference) buffer RW; +layout(buffer_reference) buffer WO; +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer RO +{ + vec4 v[]; +}; + +layout(buffer_reference, buffer_reference_align = 16, std430) restrict buffer RW +{ + vec4 v[]; +}; + +layout(buffer_reference, buffer_reference_align = 16, std430) coherent writeonly buffer WO +{ + vec4 v[]; +}; + +layout(push_constant, std430) uniform Registers +{ + RO ro; + RW rw; + WO wo; +} registers; + +void main() +{ + registers.rw.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x]; + registers.wo.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x]; +} + diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk index dfcaac83618..c3855cf634a 100644 --- a/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk +++ b/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk @@ -1,10 +1,14 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#else +#error No extension available for 64-bit integers. +#endif #extension GL_EXT_buffer_reference : require layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer Node; -layout(buffer_reference, std430) buffer Node +layout(buffer_reference, buffer_reference_align = 16, std430) buffer Node { layout(offset = 0) int value; layout(offset = 16) Node next; @@ -20,26 +24,30 @@ layout(set = 0, binding = 0, std430) restrict buffer LinkedList void main() { Node _45; + Node _114; if (gl_WorkGroupID.x < 4u) { _45 = _50.head1; + _114 = _50.head1; } else { _45 = _50.head2; + _114 = _50.head2; } - restrict Node n = _45; - Node param = n.next; + restrict Node n = _114; + Node param = _114.next; Node param_1 = _50.head1; Node param_2 = _50.head2; - param.value = param_1.value + param_2.value; + _114.next.value = _50.head1.value + _50.head2.value; Node param_4 = _50.head1; - Node param_3 = param_4; - n = param_3; + Node param_3 = _50.head1; + n = _50.head1; int v = _50.head2.value; - n.value = 20; - n.value = v * 10; - uint64_t uptr = uint64_t(_50.head2.next); - Node unode = Node(uptr); + _50.head1.value = 20; + _50.head1.value = _50.head2.value * 10; + uint64_t _98 = uint64_t(_50.head2.next); + uint64_t uptr = _98; + Node unode = Node(_98); } diff --git a/reference/opt/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk new file mode 100644 index 00000000000..b7004746220 --- /dev/null +++ b/reference/opt/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk @@ -0,0 +1,23 @@ +#version 450 +#extension GL_EXT_shader_atomic_float : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 1, std430) buffer SSBO +{ + float v; +} _18; + +layout(set = 0, binding = 0, r32f) uniform image2D uImage; + +shared float shared_v; + +void main() +{ + float _15 = atomicAdd(shared_v, 2.0); + float value = _15; + float _24 = atomicAdd(_18.v, _15); + float _39 = imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), _15); + float _45 = imageAtomicExchange(uImage, ivec2(gl_GlobalInvocationID.xy), _15); + value = _45; +} + diff --git a/reference/opt/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk b/reference/opt/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk new file mode 100644 index 00000000000..fbe5e3d9640 --- /dev/null +++ b/reference/opt/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk @@ -0,0 +1,72 @@ +#version 460 +#extension GL_EXT_ray_query : require +#extension GL_EXT_ray_flags_primitive_culling : require +#extension GL_EXT_ray_tracing : require +layout(primitive_culling); +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 1, std140) uniform Params +{ + uint ray_flags; + uint cull_mask; + vec3 origin; + float tmin; + vec3 dir; + float tmax; + float thit; + uvec2 bda; +} _19; + +layout(set = 0, binding = 0) uniform accelerationStructureEXT AS; + +rayQueryEXT q; +rayQueryEXT q2[2]; + +void main() +{ + rayQueryInitializeEXT(q, AS, _19.ray_flags, _19.cull_mask, _19.origin, _19.tmin, _19.dir, _19.tmax); + rayQueryInitializeEXT(q2[1], accelerationStructureEXT(_19.bda), _19.ray_flags, _19.cull_mask, _19.origin, _19.tmin, _19.dir, _19.tmax); + bool _67 = rayQueryProceedEXT(q); + bool res = _67; + rayQueryTerminateEXT(q2[0]); + rayQueryGenerateIntersectionEXT(q, _19.thit); + rayQueryConfirmIntersectionEXT(q2[1]); + float _75 = rayQueryGetRayTMinEXT(q); + float fval = _75; + uint _79 = rayQueryGetRayFlagsEXT(q2[0]); + uint type = _79; + vec3 _82 = rayQueryGetWorldRayDirectionEXT(q); + vec3 fvals = _82; + vec3 _83 = rayQueryGetWorldRayOriginEXT(q); + fvals = _83; + uint _86 = rayQueryGetIntersectionTypeEXT(q2[1], bool(1)); + type = _86; + bool _88 = rayQueryGetIntersectionCandidateAABBOpaqueEXT(q2[1]); + res = _88; + float _91 = rayQueryGetIntersectionTEXT(q2[1], bool(0)); + fval = _91; + int _94 = rayQueryGetIntersectionInstanceCustomIndexEXT(q, bool(1)); + int ival = _94; + int _96 = rayQueryGetIntersectionInstanceIdEXT(q2[0], bool(0)); + ival = _96; + uint _97 = rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(q, bool(1)); + type = _97; + int _99 = rayQueryGetIntersectionGeometryIndexEXT(q2[1], bool(0)); + ival = _99; + int _100 = rayQueryGetIntersectionPrimitiveIndexEXT(q, bool(1)); + ival = _100; + vec2 _103 = rayQueryGetIntersectionBarycentricsEXT(q2[0], bool(0)); + fvals.x = _103.x; + fvals.y = _103.y; + bool _110 = rayQueryGetIntersectionFrontFaceEXT(q, bool(1)); + res = _110; + vec3 _111 = rayQueryGetIntersectionObjectRayDirectionEXT(q, bool(0)); + fvals = _111; + vec3 _113 = rayQueryGetIntersectionObjectRayOriginEXT(q2[0], bool(1)); + fvals = _113; + mat4x3 _117 = rayQueryGetIntersectionObjectToWorldEXT(q, bool(0)); + mat4x3 matrices = _117; + mat4x3 _119 = rayQueryGetIntersectionWorldToObjectEXT(q2[1], bool(1)); + matrices = _119; +} + diff --git a/reference/opt/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk b/reference/opt/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk deleted file mode 100644 index d67e0beeb65..00000000000 --- a/reference/opt/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk +++ /dev/null @@ -1,147 +0,0 @@ -#version 310 es -#extension GL_EXT_scalar_block_layout : require -layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; - -struct S0 -{ - vec2 a[1]; - float b; -}; - -struct S1 -{ - vec3 a; - float b; -}; - -struct S2 -{ - vec3 a[1]; - float b; -}; - -struct S3 -{ - vec2 a; - float b; -}; - -struct S4 -{ - vec2 c; -}; - -struct Content -{ - S0 m0s[1]; - S1 m1s[1]; - S2 m2s[1]; - S0 m0; - S1 m1; - S2 m2; - S3 m3; - float m4; - S4 m3s[8]; -}; - -struct S0_1 -{ - vec2 a[1]; - float b; -}; - -struct S1_1 -{ - vec3 a; - float b; -}; - -struct S2_1 -{ - vec3 a[1]; - float b; -}; - -struct S3_1 -{ - vec2 a; - float b; -}; - -struct S4_1 -{ - vec2 c; -}; - -struct Content_1 -{ - S0_1 m0s[1]; - S1_1 m1s[1]; - S2_1 m2s[1]; - S0_1 m0; - S1_1 m1; - S2_1 m2; - S3_1 m3; - float m4; - S4_1 m3s[8]; -}; - -layout(set = 0, binding = 1, scalar) restrict buffer SSBO1 -{ - Content content; - Content content1[2]; - Content content2; - mat2 m0; - mat2 m1; - mat2x3 m2[4]; - mat3x2 m3; - layout(row_major) mat2 m4; - layout(row_major) mat2 m5[9]; - layout(row_major) mat2x3 m6[4][2]; - layout(row_major) mat3x2 m7; - float array[]; -} ssbo_430; - -layout(set = 0, binding = 0, std140) restrict buffer SSBO0 -{ - Content_1 content; - Content_1 content1[2]; - Content_1 content2; - mat2 m0; - mat2 m1; - mat2x3 m2[4]; - mat3x2 m3; - layout(row_major) mat2 m4; - layout(row_major) mat2 m5[9]; - layout(row_major) mat2x3 m6[4][2]; - layout(row_major) mat3x2 m7; - float array[]; -} ssbo_140; - -void main() -{ - ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0]; - ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b; - ssbo_430.content.m1s[0].a = ssbo_140.content.m1s[0].a; - ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b; - ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0]; - ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b; - ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0]; - ssbo_430.content.m0.b = ssbo_140.content.m0.b; - ssbo_430.content.m1.a = ssbo_140.content.m1.a; - ssbo_430.content.m1.b = ssbo_140.content.m1.b; - ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0]; - ssbo_430.content.m2.b = ssbo_140.content.m2.b; - ssbo_430.content.m3.a = ssbo_140.content.m3.a; - ssbo_430.content.m3.b = ssbo_140.content.m3.b; - ssbo_430.content.m4 = ssbo_140.content.m4; - ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c; - ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c; - ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c; - ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c; - ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c; - ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c; - ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c; - ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c; -} - diff --git a/reference/opt/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk b/reference/opt/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk deleted file mode 100644 index 6d288574f74..00000000000 --- a/reference/opt/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk +++ /dev/null @@ -1,110 +0,0 @@ -#version 450 -#extension GL_KHR_shader_subgroup_basic : require -#extension GL_KHR_shader_subgroup_ballot : require -#extension GL_KHR_shader_subgroup_shuffle : require -#extension GL_KHR_shader_subgroup_shuffle_relative : require -#extension GL_KHR_shader_subgroup_vote : require -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_KHR_shader_subgroup_clustered : require -#extension GL_KHR_shader_subgroup_quad : require -layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; - -layout(set = 0, binding = 0, std430) buffer SSBO -{ - float FragColor; -} _9; - -void main() -{ - _9.FragColor = float(gl_NumSubgroups); - _9.FragColor = float(gl_SubgroupID); - _9.FragColor = float(gl_SubgroupSize); - _9.FragColor = float(gl_SubgroupInvocationID); - subgroupMemoryBarrier(); - subgroupBarrier(); - subgroupMemoryBarrier(); - subgroupMemoryBarrierBuffer(); - subgroupMemoryBarrierShared(); - subgroupMemoryBarrierImage(); - bool elected = subgroupElect(); - _9.FragColor = vec4(gl_SubgroupEqMask).x; - _9.FragColor = vec4(gl_SubgroupGeMask).x; - _9.FragColor = vec4(gl_SubgroupGtMask).x; - _9.FragColor = vec4(gl_SubgroupLeMask).x; - _9.FragColor = vec4(gl_SubgroupLtMask).x; - vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); - vec3 first = subgroupBroadcastFirst(vec3(20.0)); - uvec4 ballot_value = subgroupBallot(true); - bool inverse_ballot_value = subgroupInverseBallot(ballot_value); - bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); - uint bit_count = subgroupBallotBitCount(ballot_value); - uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); - uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); - uint lsb = subgroupBallotFindLSB(ballot_value); - uint msb = subgroupBallotFindMSB(ballot_value); - uint shuffled = subgroupShuffle(10u, 8u); - uint shuffled_xor = subgroupShuffleXor(30u, 8u); - uint shuffled_up = subgroupShuffleUp(20u, 4u); - uint shuffled_down = subgroupShuffleDown(20u, 4u); - bool has_all = subgroupAll(true); - bool has_any = subgroupAny(true); - bool has_equal = subgroupAllEqual(true); - vec4 added = subgroupAdd(vec4(20.0)); - ivec4 iadded = subgroupAdd(ivec4(20)); - vec4 multiplied = subgroupMul(vec4(20.0)); - ivec4 imultiplied = subgroupMul(ivec4(20)); - vec4 lo = subgroupMin(vec4(20.0)); - vec4 hi = subgroupMax(vec4(20.0)); - ivec4 slo = subgroupMin(ivec4(20)); - ivec4 shi = subgroupMax(ivec4(20)); - uvec4 ulo = subgroupMin(uvec4(20u)); - uvec4 uhi = subgroupMax(uvec4(20u)); - uvec4 anded = subgroupAnd(ballot_value); - uvec4 ored = subgroupOr(ballot_value); - uvec4 xored = subgroupXor(ballot_value); - added = subgroupInclusiveAdd(added); - iadded = subgroupInclusiveAdd(iadded); - multiplied = subgroupInclusiveMul(multiplied); - imultiplied = subgroupInclusiveMul(imultiplied); - lo = subgroupInclusiveMin(lo); - hi = subgroupInclusiveMax(hi); - slo = subgroupInclusiveMin(slo); - shi = subgroupInclusiveMax(shi); - ulo = subgroupInclusiveMin(ulo); - uhi = subgroupInclusiveMax(uhi); - anded = subgroupInclusiveAnd(anded); - ored = subgroupInclusiveOr(ored); - xored = subgroupInclusiveXor(ored); - added = subgroupExclusiveAdd(lo); - added = subgroupExclusiveAdd(multiplied); - multiplied = subgroupExclusiveMul(multiplied); - iadded = subgroupExclusiveAdd(imultiplied); - imultiplied = subgroupExclusiveMul(imultiplied); - lo = subgroupExclusiveMin(lo); - hi = subgroupExclusiveMax(hi); - ulo = subgroupExclusiveMin(ulo); - uhi = subgroupExclusiveMax(uhi); - slo = subgroupExclusiveMin(slo); - shi = subgroupExclusiveMax(shi); - anded = subgroupExclusiveAnd(anded); - ored = subgroupExclusiveOr(ored); - xored = subgroupExclusiveXor(ored); - added = subgroupClusteredAdd(added, 4u); - multiplied = subgroupClusteredMul(multiplied, 4u); - iadded = subgroupClusteredAdd(iadded, 4u); - imultiplied = subgroupClusteredMul(imultiplied, 4u); - lo = subgroupClusteredMin(lo, 4u); - hi = subgroupClusteredMax(hi, 4u); - ulo = subgroupClusteredMin(ulo, 4u); - uhi = subgroupClusteredMax(uhi, 4u); - slo = subgroupClusteredMin(slo, 4u); - shi = subgroupClusteredMax(shi, 4u); - anded = subgroupClusteredAnd(anded, 4u); - ored = subgroupClusteredOr(ored, 4u); - xored = subgroupClusteredXor(xored, 4u); - vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); - vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); - vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); - vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); -} - diff --git a/reference/opt/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk b/reference/opt/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk new file mode 100644 index 00000000000..153164920f0 --- /dev/null +++ b/reference/opt/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk @@ -0,0 +1,15 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +layout(location = 0) out vec4 FragColor; + +void main() +{ + bool _15 = helperInvocationEXT(); + demote; + if (!_15) + { + FragColor = vec4(1.0, 0.0, 0.0, 1.0); + } +} + diff --git a/reference/opt/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk b/reference/opt/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk new file mode 100644 index 00000000000..688a5800d12 --- /dev/null +++ b/reference/opt/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk @@ -0,0 +1,9 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +void main() +{ + demote; + bool _9 = helperInvocationEXT(); +} + diff --git a/reference/opt/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk b/reference/opt/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk index 294f908d140..891ed232e8b 100644 --- a/reference/opt/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk +++ b/reference/opt/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk @@ -1,19 +1,24 @@ #version 450 #extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_samplerless_texture_functions : require layout(set = 0, binding = 2, std140) uniform UBO { vec4 v[64]; } ubos[]; -layout(set = 0, binding = 3, std430) readonly buffer SSBO +layout(set = 0, binding = 3, std430) buffer SSBO { + uint counter; vec4 v[]; } ssbos[]; layout(set = 0, binding = 0) uniform texture2D uSamplers[]; layout(set = 0, binding = 1) uniform sampler uSamps[]; layout(set = 0, binding = 4) uniform sampler2D uCombinedSamplers[]; +layout(set = 0, binding = 0) uniform texture2DMS uSamplersMS[]; +layout(set = 0, binding = 5, r32f) uniform image2D uImages[]; +layout(set = 0, binding = 5, r32ui) uniform uimage2D uImagesU32[]; layout(location = 0) flat in int vIndex; layout(location = 0) out vec4 FragColor; @@ -23,9 +28,37 @@ void main() { int _22 = vIndex + 10; int _32 = vIndex + 40; - FragColor = texture(sampler2D(uSamplers[nonuniformEXT(_22)], uSamps[nonuniformEXT(_32)]), vUV); - FragColor = texture(uCombinedSamplers[nonuniformEXT(_22)], vUV); - FragColor += ubos[nonuniformEXT(vIndex + 20)].v[_32]; - FragColor += ssbos[nonuniformEXT(vIndex + 50)].v[vIndex + 60]; + FragColor = texture(nonuniformEXT(sampler2D(uSamplers[_22], uSamps[_32])), vUV); + int _49 = _22; + FragColor = texture(uCombinedSamplers[nonuniformEXT(_49)], vUV); + int _65 = vIndex + 20; + int _69 = _32; + FragColor += ubos[nonuniformEXT(_65)].v[_69]; + int _83 = vIndex + 50; + int _88 = vIndex + 60; + FragColor += ssbos[nonuniformEXT(_83)].v[_88]; + int _100 = vIndex + 70; + ssbos[nonuniformEXT(_88)].v[_100] = vec4(20.0); + ivec2 _111 = ivec2(vUV); + FragColor = texelFetch(uSamplers[nonuniformEXT(_49)], _111, 0); + int _116 = vIndex + 100; + uint _122 = atomicAdd(ssbos[_116].counter, 100u); + vec4 _147 = FragColor; + vec2 _149 = _147.xy + (textureQueryLod(nonuniformEXT(sampler2D(uSamplers[_22], uSamps[_32])), vUV) + textureQueryLod(uCombinedSamplers[nonuniformEXT(_49)], vUV)); + FragColor.x = _149.x; + FragColor.y = _149.y; + FragColor.x += float(textureQueryLevels(uSamplers[nonuniformEXT(_65)])); + FragColor.y += float(textureSamples(uSamplersMS[nonuniformEXT(_65)])); + vec4 _189 = FragColor; + vec2 _191 = _189.xy + vec2(textureSize(uSamplers[nonuniformEXT(_65)], 0)); + FragColor.x = _191.x; + FragColor.y = _191.y; + FragColor += imageLoad(uImages[nonuniformEXT(_83)], _111); + vec4 _218 = FragColor; + vec2 _220 = _218.xy + vec2(imageSize(uImages[nonuniformEXT(_65)])); + FragColor.x = _220.x; + FragColor.y = _220.y; + imageStore(uImages[nonuniformEXT(_88)], _111, vec4(50.0)); + uint _248 = imageAtomicAdd(uImagesU32[nonuniformEXT(_100)], _111, 40u); } diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag index df2994efb92..f77b448cdc8 100644 --- a/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag +++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag @@ -13,7 +13,11 @@ layout(location = 0) out vec4 FragColor; void main() { - vec2 _95 = (vTex + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[1], 0)))) + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[2], 1))); + highp vec2 _76 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[1], 0)); + vec2 mp_copy_76 = _76; + highp vec2 _86 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[2], 1)); + vec2 mp_copy_86 = _86; + vec2 _95 = (vTex + mp_copy_76) + mp_copy_86; FragColor = ((((texture(SPIRV_Cross_CombineduTextureuSampler[2], _95) + texture(SPIRV_Cross_CombineduTextureuSampler[1], _95)) + texture(SPIRV_Cross_CombineduTextureuSampler[1], _95)) + texture(SPIRV_Cross_CombineduTextureArrayuSampler[3], vTex3)) + texture(SPIRV_Cross_CombineduTextureCubeuSampler[1], vTex3)) + texture(SPIRV_Cross_CombineduTexture3DuSampler[2], vTex3); } diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk index d275a0f4086..7a0c428d193 100644 --- a/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk +++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk @@ -14,7 +14,11 @@ layout(location = 0) out vec4 FragColor; void main() { - vec2 _95 = (vTex + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture[1], uSampler), 0)))) + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture[2], uSampler), 1))); + highp vec2 _76 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture[1], uSampler), 0)); + vec2 mp_copy_76 = _76; + highp vec2 _86 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture[2], uSampler), 1)); + vec2 mp_copy_86 = _86; + vec2 _95 = (vTex + mp_copy_76) + mp_copy_86; FragColor = ((((texture(sampler2D(uTexture[2], uSampler), _95) + texture(sampler2D(uTexture[1], uSampler), _95)) + texture(sampler2D(uTexture[1], uSampler), _95)) + texture(sampler2DArray(uTextureArray[3], uSampler), vTex3)) + texture(samplerCube(uTextureCube[1], uSampler), vTex3)) + texture(sampler3D(uTexture3D[2], uSampler), vTex3); } diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag index aad1e43662b..c664bd55b17 100644 --- a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag +++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag @@ -13,7 +13,11 @@ layout(location = 0) out vec4 FragColor; void main() { - vec2 _73 = (vTex + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 0)))) + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 1))); + highp vec2 _54 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 0)); + vec2 mp_copy_54 = _54; + highp vec2 _64 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 1)); + vec2 mp_copy_64 = _64; + vec2 _73 = (vTex + mp_copy_54) + mp_copy_64; FragColor = (((texture(SPIRV_Cross_CombineduTextureuSampler, _73) + texture(SPIRV_Cross_CombineduTextureuSampler, _73)) + texture(SPIRV_Cross_CombineduTextureArrayuSampler, vTex3)) + texture(SPIRV_Cross_CombineduTextureCubeuSampler, vTex3)) + texture(SPIRV_Cross_CombineduTexture3DuSampler, vTex3); } diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk index b79374aba98..9fcd3252758 100644 --- a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk +++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk @@ -14,7 +14,11 @@ layout(location = 0) out vec4 FragColor; void main() { - vec2 _73 = (vTex + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 0)))) + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 1))); + highp vec2 _54 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 0)); + vec2 mp_copy_54 = _54; + highp vec2 _64 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 1)); + vec2 mp_copy_64 = _64; + vec2 _73 = (vTex + mp_copy_54) + mp_copy_64; FragColor = (((texture(sampler2D(uTexture, uSampler), _73) + texture(sampler2D(uTexture, uSampler), _73)) + texture(sampler2DArray(uTextureArray, uSampler), vTex3)) + texture(samplerCube(uTextureCube, uSampler), vTex3)) + texture(sampler3D(uTexture3D, uSampler), vTex3); } diff --git a/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk b/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk index d09930f3ad7..512bc915e6a 100644 --- a/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk +++ b/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk @@ -1,6 +1,4 @@ #version 450 -#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require -#extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require #extension GL_EXT_shader_8bit_storage : require @@ -28,42 +26,18 @@ layout(location = 1) out uvec4 FragColorUint; void main() { - int16_t _196 = 10s; - int _197 = 20; - i8vec2 _198 = unpack8(_196); - i8vec4 _199 = unpack8(_197); - _196 = pack16(_198); - _197 = pack32(_199); - ssbo.i8[0] = _199.x; - ssbo.i8[1] = _199.y; - ssbo.i8[2] = _199.z; - ssbo.i8[3] = _199.w; - uint16_t _220 = 10us; - uint _221 = 20u; - u8vec2 _222 = unpack8(_220); - u8vec4 _223 = unpack8(_221); - _220 = pack16(_222); - _221 = pack32(_223); - ssbo.u8[0] = _223.x; - ssbo.u8[1] = _223.y; - ssbo.u8[2] = _223.z; - ssbo.u8[3] = _223.w; - i8vec4 _246 = i8vec4(vColor); - i8vec4 _244 = _246; - _244 += i8vec4(registers.i8); - _244 += i8vec4(-40); - _244 += i8vec4(-50); - _244 += i8vec4(int8_t(10), int8_t(20), int8_t(30), int8_t(40)); - _244 += i8vec4(ssbo.i8[4]); - _244 += i8vec4(ubo.i8); - FragColorInt = ivec4(_244); - u8vec4 _271 = u8vec4(_246); - _271 += u8vec4(registers.u8); - _271 += u8vec4(216); - _271 += u8vec4(206); - _271 += u8vec4(uint8_t(10), uint8_t(20), uint8_t(30), uint8_t(40)); - _271 += u8vec4(ssbo.u8[4]); - _271 += u8vec4(ubo.u8); - FragColorUint = uvec4(_271); + i8vec4 _204 = unpack8(20); + ssbo.i8[0] = _204.x; + ssbo.i8[1] = _204.y; + ssbo.i8[2] = _204.z; + ssbo.i8[3] = _204.w; + u8vec4 _229 = unpack8(20u); + ssbo.u8[0] = _229.x; + ssbo.u8[1] = _229.y; + ssbo.u8[2] = _229.z; + ssbo.u8[3] = _229.w; + i8vec4 _249 = i8vec4(vColor); + FragColorInt = ivec4((((((_249 + i8vec4(registers.i8)) + i8vec4(-40)) + i8vec4(-50)) + i8vec4(int8_t(10), int8_t(20), int8_t(30), int8_t(40))) + i8vec4(ssbo.i8[4])) + i8vec4(ubo.i8)); + FragColorUint = uvec4((((((u8vec4(_249) + u8vec4(registers.u8)) + u8vec4(216)) + u8vec4(206)) + u8vec4(uint8_t(10), uint8_t(20), uint8_t(30), uint8_t(40))) + u8vec4(ssbo.u8[4])) + u8vec4(ubo.u8)); } diff --git a/reference/opt/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk b/reference/opt/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk index 34bfea02604..04c4062a6c9 100644 --- a/reference/opt/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk +++ b/reference/opt/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk @@ -8,6 +8,7 @@ layout(location = 0) out float FragColor; void main() { - FragColor = float(f); + float _17 = float(f); + FragColor = _17; } diff --git a/reference/opt/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk b/reference/opt/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk new file mode 100644 index 00000000000..2f7fbc1d953 --- /dev/null +++ b/reference/opt/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk @@ -0,0 +1,17 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + if (payload > 0.0) + { + ignoreIntersectionEXT; + } + else + { + terminateRayEXT; + } +} + diff --git a/reference/opt/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk b/reference/opt/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk new file mode 100644 index 00000000000..9b9e34b3250 --- /dev/null +++ b/reference/opt/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk @@ -0,0 +1,17 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +rayPayloadInNV float payload; + +void main() +{ + if (payload > 0.0) + { + ignoreIntersectionNV(); + } + else + { + terminateRayNV(); + } +} + diff --git a/reference/opt/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk b/reference/opt/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk new file mode 100644 index 00000000000..5adfac164fa --- /dev/null +++ b/reference/opt/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) callableDataInEXT float c; + +void main() +{ + executeCallableEXT(10u, 0); +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..b6c1876d313 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,24 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +struct Foo2 +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInEXT Foo payload; +hitAttributeEXT Foo2 hit; + +void main() +{ + payload.a = hit.a; + payload.b = hit.b; +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..614a04d95e7 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk @@ -0,0 +1,24 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +struct Foo2 +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInNV Foo payload; +hitAttributeNV Foo2 hit; + +void main() +{ + payload.a = hit.a; + payload.b = hit.b; +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..b6c1876d313 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,24 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +struct Foo2 +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInEXT Foo payload; +hitAttributeEXT Foo2 hit; + +void main() +{ + payload.a = hit.a; + payload.b = hit.b; +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..614a04d95e7 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk @@ -0,0 +1,24 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +struct Foo2 +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInNV Foo payload; +hitAttributeNV Foo2 hit; + +void main() +{ + payload.a = hit.a; + payload.b = hit.b; +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..eeccd3bb092 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,11 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec2 payload; +hitAttributeEXT vec2 hit; + +void main() +{ + payload = hit; +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..908d96344f3 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk @@ -0,0 +1,11 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec2 payload; +hitAttributeNV vec2 hit; + +void main() +{ + payload = hit; +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..a51e6b088f3 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,17 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInEXT Foo payload; +hitAttributeEXT Foo hit; + +void main() +{ + payload = hit; +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..133bdfc1d90 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk @@ -0,0 +1,17 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInNV Foo payload; +hitAttributeNV Foo hit; + +void main() +{ + payload = hit; +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..e4e0103ddb5 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = gl_HitKindEXT; +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..64f79a8dce0 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = gl_HitKindNV; +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..e94e3323c98 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = gl_RayTmaxEXT; +} + diff --git a/reference/opt/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..9004a00c40e --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV float payload; + +void main() +{ + payload = gl_HitTNV; +} + diff --git a/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..a013baa11d5 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = gl_IncomingRayFlagsEXT; +} + diff --git a/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..d17ab8ce76c --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = gl_IncomingRayFlagsNV; +} + diff --git a/reference/opt/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..e28af5d2527 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = uint(gl_InstanceCustomIndexEXT); +} + diff --git a/reference/opt/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..531a1fc2845 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = uint(gl_InstanceCustomIndexNV); +} + diff --git a/reference/opt/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..0413e0d234a --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = uint(gl_InstanceID); +} + diff --git a/reference/opt/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..ff551db7c9d --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = uint(gl_InstanceID); +} + diff --git a/reference/opt/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..237d4790e55 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_ObjectRayDirectionEXT; +} + diff --git a/reference/opt/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..01afa0e067a --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_ObjectRayDirectionNV; +} + diff --git a/reference/opt/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..5739ac09ff5 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_ObjectRayOriginEXT; +} + diff --git a/reference/opt/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..a49e17a1738 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_ObjectRayOriginNV; +} + diff --git a/reference/opt/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..7922e1efbf4 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_ObjectToWorldEXT * vec4(payload, 1.0); +} + diff --git a/reference/opt/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..fc2c5ed0c2c --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_ObjectToWorldNV * vec4(payload, 1.0); +} + diff --git a/reference/opt/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..0bde78724c7 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,15 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Payload +{ + vec4 a; +}; + +layout(location = 0) rayPayloadInEXT Payload payload; + +void main() +{ + payload.a = vec4(10.0); +} + diff --git a/reference/opt/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..6d865f7a195 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk @@ -0,0 +1,15 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Payload +{ + vec4 a; +}; + +layout(location = 0) rayPayloadInNV Payload payload; + +void main() +{ + payload.a = vec4(10.0); +} + diff --git a/reference/opt/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..667c015e8d6 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = uint(gl_PrimitiveID); +} + diff --git a/reference/opt/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..d3b0ef19429 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = uint(gl_PrimitiveID); +} + diff --git a/reference/opt/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..e94e3323c98 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = gl_RayTmaxEXT; +} + diff --git a/reference/opt/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..769c96ad6b7 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV float payload; + +void main() +{ + payload = gl_RayTmaxNV; +} + diff --git a/reference/opt/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..04b89549508 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = gl_RayTminEXT; +} + diff --git a/reference/opt/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..2709899a13a --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV float payload; + +void main() +{ + payload = gl_RayTminNV; +} + diff --git a/reference/opt/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..05af948b379 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = 1.0 + float(gl_InstanceID); +} + diff --git a/reference/opt/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk index 547b9cd51a5..103fd66b801 100644 --- a/reference/opt/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk +++ b/reference/opt/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk @@ -5,6 +5,6 @@ layout(location = 0) rayPayloadInNV float payload; void main() { - payload = 1.0; + payload = 1.0 + float(gl_InstanceID); } diff --git a/reference/opt/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..68ba2bafa54 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_WorldRayDirectionEXT; +} + diff --git a/reference/opt/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..4acf03e0649 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_WorldRayDirectionNV; +} + diff --git a/reference/opt/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..a5c6766e055 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_WorldRayOriginEXT; +} + diff --git a/reference/opt/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..70241f23620 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_WorldRayOriginNV; +} + diff --git a/reference/opt/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..309ca4c6f17 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_WorldToObjectEXT * vec4(payload, 1.0); +} + diff --git a/reference/opt/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..0b93e38acd1 --- /dev/null +++ b/reference/opt/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_WorldToObjectNV * vec4(payload, 1.0); +} + diff --git a/reference/opt/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk b/reference/opt/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk new file mode 100644 index 00000000000..335f476dc50 --- /dev/null +++ b/reference/opt/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk @@ -0,0 +1,15 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(push_constant, std430) uniform Registers +{ + uvec2 ptr; +} _19; + +layout(location = 0) rayPayloadEXT vec4 payload; + +void main() +{ + traceRayEXT(accelerationStructureEXT(_19.ptr), 1u, 255u, 0u, 0u, 0u, vec3(0.0), 0.0, vec3(0.0, 0.0, -1.0), 100.0, 0); +} + diff --git a/reference/opt/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk new file mode 100644 index 00000000000..2cb00f26f1d --- /dev/null +++ b/reference/opt/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk @@ -0,0 +1,15 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT vec4 payload; +layout(location = 1) callableDataEXT float blend; +layout(set = 0, binding = 1, rgba32f) uniform writeonly image2D image; + +void main() +{ + traceRayEXT(as, 1u, 255u, 0u, 0u, 0u, vec3(0.0), 0.0, vec3(0.0, 0.0, -1.0), 100.0, 0); + executeCallableEXT(0u, 1); + imageStore(image, ivec2(gl_LaunchIDEXT.xy), payload + vec4(blend)); +} + diff --git a/reference/opt/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..1614c49626e --- /dev/null +++ b/reference/opt/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchIDEXT.xy), vec4(1.0)); +} + diff --git a/reference/opt/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..f907e6fd606 --- /dev/null +++ b/reference/opt/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchIDNV.xy), vec4(1.0)); +} + diff --git a/reference/opt/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..240e93daa48 --- /dev/null +++ b/reference/opt/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchSizeEXT.xy) - ivec2(1), vec4(1.0)); +} + diff --git a/reference/opt/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..08992c63194 --- /dev/null +++ b/reference/opt/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchSizeNV.xy) - ivec2(1), vec4(1.0)); +} + diff --git a/reference/opt/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..434eadf2166 --- /dev/null +++ b/reference/opt/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,31 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Payload +{ + float a; + float b; +}; + +struct Block +{ + float a; + float b; + Payload c; + Payload d; +}; + +layout(set = 0, binding = 1) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT Payload payload2; +layout(location = 1) rayPayloadEXT float payload1; +layout(location = 2) rayPayloadEXT Block _71; +layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image; + +void main() +{ + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 1); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 0); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 2); + imageStore(image, ivec2(gl_LaunchIDEXT.xy), (vec4(payload1) + (vec4(payload2.a) + vec4(payload2.b))) + vec4(((((_71.a + _71.b) + _71.c.a) + _71.c.b) + _71.d.a) + _71.d.b)); +} + diff --git a/reference/opt/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..8212fa6484b --- /dev/null +++ b/reference/opt/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk @@ -0,0 +1,31 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Payload +{ + float a; + float b; +}; + +struct Block +{ + float a; + float b; + Payload c; + Payload d; +}; + +layout(set = 0, binding = 1) uniform accelerationStructureNV as; +layout(location = 1) rayPayloadNV Payload payload2; +layout(location = 0) rayPayloadNV float payload1; +layout(location = 2) rayPayloadNV Block _71; +layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image; + +void main() +{ + traceNV(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 0); + traceNV(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 1); + traceNV(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 2); + imageStore(image, ivec2(gl_LaunchIDNV.xy), (vec4(payload1) + (vec4(payload2.a) + vec4(payload2.b))) + vec4(((((_71.a + _71.b) + _71.c.a) + _71.c.b) + _71.d.a) + _71.d.b)); +} + diff --git a/reference/opt/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..25b8f2877a5 --- /dev/null +++ b/reference/opt/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,13 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 1) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT float payload; + +void main() +{ + vec2 _57 = vec2(gl_LaunchIDEXT.xy); + vec2 _61 = vec2(gl_LaunchSizeEXT.xy); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(_57.x / _61.x, _57.y / _61.y, 1.0), 0.0, vec3(0.0, 0.0, -1.0), 1000.0, 0); +} + diff --git a/reference/opt/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..d8814465958 --- /dev/null +++ b/reference/opt/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,15 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 1) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT float payload; +layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image; + +void main() +{ + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0), 0.0, vec3(0.0, 0.0, -1.0), 1000.0, 0); + vec4 _68 = vec4(0.0, 0.0, 0.0, 1.0); + _68.y = payload; + imageStore(image, ivec2(gl_LaunchIDEXT.xy), _68); +} + diff --git a/reference/opt/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..3056e8ad281 --- /dev/null +++ b/reference/opt/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,17 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(shaderRecordEXT, std430) buffer sbt +{ + vec3 direction; + float tmax; +} _20; + +layout(set = 0, binding = 0) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT float payload; + +void main() +{ + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(0.0), 0.0, _20.direction, _20.tmax, 0); +} + diff --git a/reference/opt/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk b/reference/opt/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk new file mode 100644 index 00000000000..f9eb7335d83 --- /dev/null +++ b/reference/opt/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk @@ -0,0 +1,8 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +void main() +{ + bool _16 = reportIntersectionEXT(0.5, 10u); +} + diff --git a/reference/opt/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk b/reference/opt/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk new file mode 100644 index 00000000000..56873aff06f --- /dev/null +++ b/reference/opt/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk @@ -0,0 +1,8 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +void main() +{ + bool _16 = reportIntersectionNV(0.5, 10u); +} + diff --git a/reference/opt/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk b/reference/opt/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk new file mode 100644 index 00000000000..c055a268144 --- /dev/null +++ b/reference/opt/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = 0.0; +} + diff --git a/reference/opt/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk b/reference/opt/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk new file mode 100644 index 00000000000..7e791266163 --- /dev/null +++ b/reference/opt/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk @@ -0,0 +1,11 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadInEXT float p; + +void main() +{ + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0), 0.0, vec3(0.0, 0.0, -1.0), 1000.0, 0); +} + diff --git a/reference/opt/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk b/reference/opt/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk new file mode 100644 index 00000000000..9cadcdb6dce --- /dev/null +++ b/reference/opt/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk @@ -0,0 +1,8 @@ +#version 450 +#extension GL_EXT_device_group : require + +void main() +{ + gl_Position = vec4(float(gl_DeviceIndex)); +} + diff --git a/reference/opt/shaders/vulkan/vert/small-storage.vk.vert b/reference/opt/shaders/vulkan/vert/small-storage.vk.vert index b3aafc8d8c1..2c4beb71e5f 100644 --- a/reference/opt/shaders/vulkan/vert/small-storage.vk.vert +++ b/reference/opt/shaders/vulkan/vert/small-storage.vk.vert @@ -1,10 +1,20 @@ #version 450 -#if defined(GL_AMD_gpu_shader_int16) +#if defined(GL_EXT_shader_explicit_arithmetic_types_int16) +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#elif defined(GL_AMD_gpu_shader_int16) #extension GL_AMD_gpu_shader_int16 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require #else #error No extension available for Int16. #endif +#if defined(GL_EXT_shader_explicit_arithmetic_types_int8) #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for Int8. +#endif #if defined(GL_AMD_gpu_shader_half_float) #extension GL_AMD_gpu_shader_half_float : require #elif defined(GL_NV_gpu_shader5) diff --git a/reference/opt/shaders/vulkan/vert/vulkan-vertex.vk.vert b/reference/opt/shaders/vulkan/vert/vulkan-vertex.vk.vert index 60ba1882f82..d939aa625c5 100644 --- a/reference/opt/shaders/vulkan/vert/vulkan-vertex.vk.vert +++ b/reference/opt/shaders/vulkan/vert/vulkan-vertex.vk.vert @@ -1,6 +1,13 @@ #version 310 es +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else uniform int SPIRV_Cross_BaseInstance; +#endif void main() { diff --git a/reference/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp new file mode 100644 index 00000000000..986cc6289f5 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp @@ -0,0 +1,75 @@ +struct Baz +{ + float c; +}; + +struct Bar +{ + float d[2][4]; + Baz baz[2]; +}; + +struct Foo +{ + column_major float2x2 a; + float2 b; + Bar c[5]; +}; + +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer _10 : register(u0); + +void comp_main() +{ + Foo _64; + _64.a = asfloat(uint2x2(_10.Load(0), _10.Load(8), _10.Load(4), _10.Load(12))); + _64.b = asfloat(_10.Load2(16)); + [unroll] + for (int _0ident = 0; _0ident < 5; _0ident++) + { + [unroll] + for (int _1ident = 0; _1ident < 2; _1ident++) + { + [unroll] + for (int _2ident = 0; _2ident < 4; _2ident++) + { + _64.c[_0ident].d[_1ident][_2ident] = asfloat(_10.Load(_2ident * 4 + _1ident * 16 + _0ident * 40 + 24)); + } + } + [unroll] + for (int _3ident = 0; _3ident < 2; _3ident++) + { + _64.c[_0ident].baz[_3ident].c = asfloat(_10.Load(_3ident * 4 + _0ident * 40 + 56)); + } + } + _10.Store(224, asuint(_64.a[0].x)); + _10.Store(228, asuint(_64.a[1].x)); + _10.Store(232, asuint(_64.a[0].y)); + _10.Store(236, asuint(_64.a[1].y)); + _10.Store2(240, asuint(_64.b)); + [unroll] + for (int _4ident = 0; _4ident < 5; _4ident++) + { + [unroll] + for (int _5ident = 0; _5ident < 2; _5ident++) + { + [unroll] + for (int _6ident = 0; _6ident < 4; _6ident++) + { + _10.Store(_6ident * 4 + _5ident * 16 + _4ident * 40 + 248, asuint(_64.c[_4ident].d[_5ident][_6ident])); + } + } + [unroll] + for (int _7ident = 0; _7ident < 2; _7ident++) + { + _10.Store(_7ident * 4 + _4ident * 40 + 280, asuint(_64.c[_4ident].baz[_7ident].c)); + } + } +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp new file mode 100644 index 00000000000..b75157162d9 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp @@ -0,0 +1,22 @@ +struct T +{ + float c; +}; + +static const T _18 = { 40.0f }; + +RWByteAddressBuffer _7 : register(u0); +RWByteAddressBuffer _10 : register(u1); + +void comp_main() +{ + T v = _18; + _7.Store(40, asuint(v.c)); + _10.Store(480, asuint(v.c)); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000000..4f6a3e34c2c --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,18 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer _5 : register(u0); + +void comp_main() +{ + uint _20; + _5.InterlockedAdd(4, 0, _20); + uint c = _20; + uint _23; + _5.InterlockedExchange(0, c, _23); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp new file mode 100644 index 00000000000..ebc431b3edc --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp @@ -0,0 +1,105 @@ +RWByteAddressBuffer _3 : register(u0); + +uint spvBitfieldInsert(uint Base, uint Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint2 spvBitfieldInsert(uint2 Base, uint2 Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint3 spvBitfieldInsert(uint3 Base, uint3 Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint4 spvBitfieldInsert(uint4 Base, uint4 Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint spvBitfieldUExtract(uint Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +uint2 spvBitfieldUExtract(uint2 Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +uint3 spvBitfieldUExtract(uint3 Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +uint4 spvBitfieldUExtract(uint4 Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +int spvBitfieldSExtract(int Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +int2 spvBitfieldSExtract(int2 Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int2 Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +int3 spvBitfieldSExtract(int3 Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int3 Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +int4 spvBitfieldSExtract(int4 Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int4 Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +void comp_main() +{ + int4 _19 = int4(_3.Load4(0)); + uint4 _20 = _3.Load4(16); + _3.Store4(0, uint4(countbits(_19))); + _3.Store4(16, uint4(countbits(_19))); + _3.Store4(0, uint4(int4(countbits(_20)))); + _3.Store4(16, countbits(_20)); + _3.Store4(0, uint4(reversebits(_19))); + _3.Store4(16, reversebits(_20)); + _3.Store4(0, uint4(spvBitfieldSExtract(_19, 1, 11u))); + _3.Store4(16, spvBitfieldSExtract(_20, 11u, 1)); + _3.Store4(0, uint4(spvBitfieldUExtract(_19, 1, 11u))); + _3.Store4(16, spvBitfieldUExtract(_20, 11u, 1)); + _3.Store4(0, uint4(int4(spvBitfieldInsert(_19, _19.wzyx, 1, 11u)))); + _3.Store4(16, spvBitfieldInsert(_20, _20.wzyx, 11u, 1)); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp new file mode 100644 index 00000000000..f8a5fb6fa34 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp @@ -0,0 +1,25 @@ +RWByteAddressBuffer _4 : register(u0); + +void comp_main() +{ + uint4 _19 = _4.Load4(0); + int4 _20 = int4(_4.Load4(16)); + _4.Store4(0, firstbitlow(_19)); + _4.Store4(16, uint4(int4(firstbitlow(_19)))); + _4.Store4(0, uint4(firstbitlow(_20))); + _4.Store4(16, uint4(firstbitlow(_20))); + _4.Store4(0, firstbithigh(_19)); + _4.Store4(16, uint4(int4(firstbithigh(_19)))); + _4.Store4(0, firstbithigh(uint4(_20))); + _4.Store4(16, uint4(int4(firstbithigh(uint4(_20))))); + _4.Store4(0, uint4(firstbithigh(int4(_19)))); + _4.Store4(16, uint4(firstbithigh(int4(_19)))); + _4.Store4(0, uint4(firstbithigh(_20))); + _4.Store4(16, uint4(firstbithigh(_20))); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/opt/shaders-hlsl/comp/atomic-increment.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp similarity index 53% rename from reference/opt/shaders-hlsl/comp/atomic-increment.asm.comp rename to reference/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp index f2338f22518..0b6d93e48f8 100644 --- a/reference/opt/shaders-hlsl/comp/atomic-increment.asm.comp +++ b/reference/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp @@ -1,5 +1,4 @@ -RWByteAddressBuffer u0_counter : register(u1); -RWBuffer u0 : register(u0); +RWByteAddressBuffer ssbos[] : register(u0, space0); static uint3 gl_GlobalInvocationID; struct SPIRV_Cross_Input @@ -9,12 +8,12 @@ struct SPIRV_Cross_Input void comp_main() { - uint _29; - u0_counter.InterlockedAdd(0, 1, _29); - u0[uint(asint(asfloat(_29))) + 0u] = uint(int(gl_GlobalInvocationID.x)).x; + uint _24 = gl_GlobalInvocationID.z; + uint _25; + ssbos[NonUniformResourceIndex(_24)].InterlockedAdd(0, 1u, _25); } -[numthreads(4, 1, 1)] +[numthreads(1, 1, 1)] void main(SPIRV_Cross_Input stage_input) { gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; diff --git a/reference/shaders-hlsl-no-opt/asm/comp/constant-composite-undef.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/constant-composite-undef.asm.comp index 4851d21e16b..a9eab1ccafc 100644 --- a/reference/shaders-hlsl-no-opt/asm/comp/constant-composite-undef.asm.comp +++ b/reference/shaders-hlsl-no-opt/asm/comp/constant-composite-undef.asm.comp @@ -1,6 +1,6 @@ -RWByteAddressBuffer block : register(u0); +static float _15; -float _15; +RWByteAddressBuffer block : register(u0); void comp_main() { diff --git a/reference/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp new file mode 100644 index 00000000000..d3dc5337530 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp @@ -0,0 +1,27 @@ +static const uint3 gl_WorkGroupSize = uint3(4u, 4u, 1u); + +static const int indexable[4] = { 0, 1, 2, 3 }; +static const int indexable_1[4] = { 4, 5, 6, 7 }; + +RWByteAddressBuffer _6 : register(u0); + +static uint3 gl_LocalInvocationID; +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_LocalInvocationID : SV_GroupThreadID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + _6.Store(gl_GlobalInvocationID.x * 4 + 0, uint(indexable[gl_LocalInvocationID.x] + indexable_1[gl_LocalInvocationID.y])); +} + +[numthreads(4, 4, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_LocalInvocationID = stage_input.gl_LocalInvocationID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp new file mode 100644 index 00000000000..a53efc4f7fe --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp @@ -0,0 +1,11 @@ +static const uint3 gl_WorkGroupSize = uint3(64u, 1u, 1u); + +void comp_main() +{ +} + +[numthreads(64, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp new file mode 100644 index 00000000000..b1232635eac --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp @@ -0,0 +1,31 @@ +struct _8 +{ + float _m0; + float _m1; +}; + +struct _15 +{ + float _m0; + int _m1; +}; + +RWByteAddressBuffer _4 : register(u0); + +void comp_main() +{ + _8 _23; + _23._m0 = modf(20.0f, _23._m1); + _15 _24; + _24._m0 = frexp(40.0f, _24._m1); + _4.Store(0, asuint(_23._m0)); + _4.Store(0, asuint(_23._m1)); + _4.Store(0, asuint(_24._m0)); + _4.Store(4, uint(_24._m1)); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/opt/shaders-hlsl/comp/atomic-decrement.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp similarity index 53% rename from reference/opt/shaders-hlsl/comp/atomic-decrement.asm.comp rename to reference/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp index b86b5327e7a..07f87ca9e65 100644 --- a/reference/opt/shaders-hlsl/comp/atomic-decrement.asm.comp +++ b/reference/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp @@ -1,5 +1,4 @@ -RWByteAddressBuffer u0_counter : register(u1); -RWBuffer u0 : register(u0); +RWTexture2D uImage[] : register(u0, space0); static uint3 gl_GlobalInvocationID; struct SPIRV_Cross_Input @@ -9,12 +8,12 @@ struct SPIRV_Cross_Input void comp_main() { - uint _29; - u0_counter.InterlockedAdd(0, -1, _29); - u0[uint(asint(asfloat(_29))) + 0u] = uint(int(gl_GlobalInvocationID.x)).x; + uint _26 = gl_GlobalInvocationID.z; + uint _31; + InterlockedAdd(uImage[NonUniformResourceIndex(_26)][int2(gl_GlobalInvocationID.xy)], 1u, _31); } -[numthreads(4, 1, 1)] +[numthreads(1, 1, 1)] void main(SPIRV_Cross_Input stage_input) { gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; diff --git a/reference/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp new file mode 100644 index 00000000000..dbc881f9982 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp @@ -0,0 +1,37 @@ +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 11u +#endif +static const uint _10 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 12u +#endif +static const uint _11 = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 13u +#endif +static const uint _4 = SPIRV_CROSS_CONSTANT_ID_3; +#ifndef SPIRV_CROSS_CONSTANT_ID_4 +#define SPIRV_CROSS_CONSTANT_ID_4 14u +#endif +static const uint _5 = SPIRV_CROSS_CONSTANT_ID_4; +static const uint3 gl_WorkGroupSize = uint3(3u, _10, _11); + +RWByteAddressBuffer _8 : register(u0); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + _8.Store4(gl_GlobalInvocationID.x * 16 + 0, asuint(asfloat(_8.Load4(gl_GlobalInvocationID.x * 16 + 0)) + 2.0f.xxxx)); +} + +[numthreads(3, SPIRV_CROSS_CONSTANT_ID_1, SPIRV_CROSS_CONSTANT_ID_2)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp new file mode 100644 index 00000000000..157f9e99218 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp @@ -0,0 +1,38 @@ +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 11 +#endif +static const int _10 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 12 +#endif +static const int _11 = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 13 +#endif +static const int _4 = SPIRV_CROSS_CONSTANT_ID_3; +#ifndef SPIRV_CROSS_CONSTANT_ID_4 +#define SPIRV_CROSS_CONSTANT_ID_4 14 +#endif +static const int _5 = SPIRV_CROSS_CONSTANT_ID_4; +static const uint _29 = (uint(_4) + 3u); +static const uint3 _30 = uint3(_29, _5, 2u); + +RWByteAddressBuffer _8 : register(u0); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + _8.Store4(gl_GlobalInvocationID.x * 16 + 0, asuint(((((asfloat(_8.Load4(gl_GlobalInvocationID.x * 16 + 0)) + 2.0f.xxxx) + float3(_30).xyzz) * float(_4)) * float(_5)) * float(int(2u)))); +} + +[numthreads(SPIRV_CROSS_CONSTANT_ID_3, SPIRV_CROSS_CONSTANT_ID_4, 2)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp new file mode 100644 index 00000000000..e771d77bb8b --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp @@ -0,0 +1,24 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer _3 : register(u1); +cbuffer UBO : register(b0) +{ + uint3 _5_w : packoffset(c0); +}; + +cbuffer SPIRV_Cross_NumWorkgroups +{ + uint3 SPIRV_Cross_NumWorkgroups_1_count : packoffset(c0); +}; + + +void comp_main() +{ + _3.Store3(0, SPIRV_Cross_NumWorkgroups_1_count + _5_w); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp new file mode 100644 index 00000000000..423beee63fe --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp @@ -0,0 +1,51 @@ +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 0 +#endif +static const int A = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 1 +#endif +static const int A_1 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 2 +#endif +static const int A_2 = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 3 +#endif +static const int A_3 = SPIRV_CROSS_CONSTANT_ID_3; +#ifndef SPIRV_CROSS_CONSTANT_ID_4 +#define SPIRV_CROSS_CONSTANT_ID_4 4 +#endif +static const int A_4 = SPIRV_CROSS_CONSTANT_ID_4; +#ifndef SPIRV_CROSS_CONSTANT_ID_5 +#define SPIRV_CROSS_CONSTANT_ID_5 5 +#endif +static const int A_5 = SPIRV_CROSS_CONSTANT_ID_5; +static const int A_6 = (A - A_1); +static const int A_7 = (A_6 - A_2); +static const int A_8 = (A_7 - A_3); +static const int A_9 = (A_8 - A_4); +static const int A_10 = (A_9 - A_5); +static const int A_11 = (A_10 + A_5); +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer _5 : register(u0); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + _5.Store(gl_GlobalInvocationID.x * 4 + 0, uint(A_11)); +} + +[numthreads(1, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/reference/opt/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/storage-buffer-basic.nofxc.asm.comp similarity index 100% rename from reference/opt/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp rename to reference/shaders-hlsl-no-opt/asm/comp/storage-buffer-basic.nofxc.asm.comp diff --git a/reference/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag new file mode 100644 index 00000000000..128a8c52f95 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag @@ -0,0 +1,89 @@ +struct anon_aa +{ + int foo; +}; + +struct anon_ab +{ + int foo; +}; + +struct anon_a +{ + anon_aa _aa; + anon_ab ab; +}; + +struct anon_ba +{ + int foo; +}; + +struct anon_bb +{ + int foo; +}; + +struct anon_b +{ + anon_ba _ba; + anon_bb bb; +}; + +struct VertexData +{ + anon_a _a; + anon_b b; +}; + +struct anon_ca +{ + int foo; +}; + +struct anon_c +{ + anon_ca _ca; +}; + +struct anon_da +{ + int foo; +}; + +struct anon_d +{ + anon_da da; +}; + +struct anon_e +{ + int a; +}; + +cbuffer UBO : register(b0) +{ + anon_c _16_c : packoffset(c0); + anon_d _16_d : packoffset(c1); +}; + +RWByteAddressBuffer _19 : register(u1); + +static VertexData _3; + +struct SPIRV_Cross_Input +{ + anon_a VertexData__a : TEXCOORD0; + anon_b VertexData_b : TEXCOORD2; +}; + +void frag_main() +{ +} + +void main(SPIRV_Cross_Input stage_input) +{ + _3._a = stage_input.VertexData__a; + _3.b = stage_input.VertexData_b; + frag_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag new file mode 100644 index 00000000000..8c61e61b5fb --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag @@ -0,0 +1,84 @@ +static float4 _32; + +static const float4 _34[2] = { 0.0f.xxxx, 0.0f.xxxx }; + +static float4 vInput; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + float4 vInput : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + float4 _37 = vInput; + float4 _38 = _37; + _38.x = 1.0f; + _38.y = 2.0f; + _38.z = 3.0f; + _38.w = 4.0f; + FragColor = _38; + float4 _6 = _37; + _6.x = 1.0f; + _6.y = 2.0f; + _6.z = 3.0f; + _6.w = 4.0f; + FragColor = _6; + float4 _42 = _37; + _42.x = 1.0f; + _42.y = 2.0f; + _42.z = 3.0f; + _42.w = 4.0f; + FragColor = _42; + float4 _44 = _37; + _44.x = 1.0f; + float4 _45 = _44; + _45.y = 2.0f; + float4 _46 = _45; + _46.z = 3.0f; + float4 _47 = _46; + _47.w = 4.0f; + FragColor = _47 + _44; + FragColor = _47 + _45; + float4 _49; + _49.x = 1.0f; + _49.y = 2.0f; + _49.z = 3.0f; + _49.w = 4.0f; + FragColor = _49; + float4 _53 = 0.0f.xxxx; + _53.x = 1.0f; + FragColor = _53; + float4 _54[2] = _34; + _54[1].z = 1.0f; + _54[0].w = 2.0f; + FragColor = _54[0]; + FragColor = _54[1]; + float4x4 _58 = float4x4(0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx); + _58[1].z = 1.0f; + _58[2].w = 2.0f; + FragColor = _58[0]; + FragColor = _58[1]; + FragColor = _58[2]; + FragColor = _58[3]; + float4 PHI; + PHI = _46; + float4 _65 = PHI; + _65.w = 4.0f; + FragColor = _65; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vInput = stage_input.vInput; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag new file mode 100644 index 00000000000..e7ffd8d6069 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag @@ -0,0 +1,37 @@ +struct EmptyStructTest +{ + int empty_struct_member; +}; + +struct EmptyStruct2Test +{ + EmptyStructTest _m0; +}; + +static const EmptyStructTest _30 = { 0 }; +static const EmptyStruct2Test _20 = { { 0 } }; + +float GetValue(EmptyStruct2Test self) +{ + return 0.0f; +} + +float GetValue_1(EmptyStruct2Test self) +{ + return 0.0f; +} + +void frag_main() +{ + EmptyStructTest _25 = { 0 }; + EmptyStruct2Test _26 = { _25 }; + EmptyStruct2Test emptyStruct; + float value = GetValue(emptyStruct); + value = GetValue_1(_26); + value = GetValue_1(_20); +} + +void main() +{ + frag_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag new file mode 100644 index 00000000000..fd758b17d74 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag @@ -0,0 +1,37 @@ +ByteAddressBuffer _8 : register(t0, space2); +Texture2D uSamplers[] : register(t0, space0); +SamplerState _uSamplers_sampler[] : register(s0, space0); +Texture2D uSampler : register(t1, space1); +SamplerState _uSampler_sampler : register(s1, space1); + +static float4 gl_FragCoord; +static float4 FragColor; +static float2 vUV; + +struct SPIRV_Cross_Input +{ + float2 vUV : TEXCOORD0; + float4 gl_FragCoord : SV_Position; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = uSamplers[NonUniformResourceIndex(_8.Load(40))].SampleLevel(_uSamplers_sampler[NonUniformResourceIndex(_8.Load(40))], vUV, 0.0f); + FragColor += uSampler.SampleLevel(_uSampler_sampler, vUV, float(_8.Load(int(gl_FragCoord.y) * 4 + 0))); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; + vUV = stage_input.vUV; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag index 44cc8ab221e..0356cf58d9c 100644 --- a/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag +++ b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag @@ -1,9 +1,9 @@ -struct UBO_1_1 +struct UBO_1 { float4 v[64]; }; -ConstantBuffer ubos[] : register(b0, space2); +ConstantBuffer ubos[] : register(b0, space2); ByteAddressBuffer ssbos[] : register(t0, space3); Texture2D uSamplers[] : register(t0, space0); SamplerState uSamps[] : register(s0, space1); diff --git a/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag new file mode 100644 index 00000000000..a692cdcf408 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag @@ -0,0 +1,39 @@ +RWByteAddressBuffer ssbos[] : register(u3, space0); + +static int vIndex; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + nointerpolation int vIndex : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + int i = vIndex; + int _42 = i + 60; + int _45 = i + 70; + ssbos[NonUniformResourceIndex(_42)].Store4(_45 * 16 + 16, asuint(20.0f.xxxx)); + int _48 = i + 100; + uint _49; + ssbos[NonUniformResourceIndex(_48)].InterlockedAdd(0, 100u, _49); + int _51 = i; + uint _52; + ssbos[NonUniformResourceIndex(_51)].GetDimensions(_52); + _52 = (_52 - 16) / 16; + FragColor.z += float(int(_52)); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vIndex = stage_input.vIndex; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag new file mode 100644 index 00000000000..dcbe5d134f1 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag @@ -0,0 +1,17 @@ +static float gl_FragDepth = 0.5f; +struct SPIRV_Cross_Output +{ + float gl_FragDepth : SV_Depth; +}; + +void frag_main() +{ +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_FragDepth = gl_FragDepth; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag new file mode 100644 index 00000000000..2ce5fd41cf6 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag @@ -0,0 +1,46 @@ +static int uninit_int = 0; +static int4 uninit_vector = int4(0, 0, 0, 0); +static float4x4 uninit_matrix = float4x4(0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx); + +struct Foo +{ + int a; +}; + +static Foo uninit_foo = { 0 }; + +static float4 vColor; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + float4 vColor : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + int _39 = 0; + if (vColor.x > 10.0f) + { + _39 = 10; + } + else + { + _39 = 20; + } + FragColor = vColor; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vColor = stage_input.vColor; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag new file mode 100644 index 00000000000..8a47b91bf9a --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag @@ -0,0 +1,33 @@ +RasterizerOrderedByteAddressBuffer _7 : register(u1, space0); +RWByteAddressBuffer _9 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _31 = int(gl_FragCoord.x); + _7.Store(_31 * 4 + 0, _7.Load(_31 * 4 + 0) + 1u); +} + +void callee() +{ + int _39 = int(gl_FragCoord.x); + _9.Store(_39 * 4 + 0, _9.Load(_39 * 4 + 0) + 1u); + callee2(); +} + +void frag_main() +{ + callee(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; + frag_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag new file mode 100644 index 00000000000..01bbe7ddc27 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag @@ -0,0 +1,43 @@ +RasterizerOrderedByteAddressBuffer _7 : register(u1, space0); +RWByteAddressBuffer _13 : register(u2, space0); +RasterizerOrderedByteAddressBuffer _9 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _44 = int(gl_FragCoord.x); + _7.Store(_44 * 4 + 0, _7.Load(_44 * 4 + 0) + 1u); +} + +void callee() +{ + int _52 = int(gl_FragCoord.x); + _9.Store(_52 * 4 + 0, _9.Load(_52 * 4 + 0) + 1u); + callee2(); + if (true) + { + } +} + +void _35() +{ + _13.Store(int(gl_FragCoord.x) * 4 + 0, 4u); +} + +void frag_main() +{ + callee(); + _35(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; + frag_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag new file mode 100644 index 00000000000..c1fb6ebbc23 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag @@ -0,0 +1,43 @@ +RasterizerOrderedByteAddressBuffer _7 : register(u1, space0); +RasterizerOrderedByteAddressBuffer _9 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _37 = int(gl_FragCoord.x); + _7.Store(_37 * 4 + 0, _7.Load(_37 * 4 + 0) + 1u); +} + +void callee() +{ + int _45 = int(gl_FragCoord.x); + _9.Store(_45 * 4 + 0, _9.Load(_45 * 4 + 0) + 1u); + callee2(); +} + +void _29() +{ +} + +void _31() +{ +} + +void frag_main() +{ + callee(); + _29(); + _31(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; + frag_main(); +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag new file mode 100644 index 00000000000..1f1f6fac107 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag @@ -0,0 +1,31 @@ +static float FragColor; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +float _mat3(float a) +{ + return a + 1.0f; +} + +float _RESERVED_IDENTIFIER_FIXUP_gl_Foo(int a) +{ + return float(a) + 1.0f; +} + +void frag_main() +{ + float param = 2.0f; + int param_1 = 4; + FragColor = _mat3(param) + _RESERVED_IDENTIFIER_FIXUP_gl_Foo(param_1); +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag new file mode 100644 index 00000000000..b6d1a902424 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag @@ -0,0 +1,62 @@ +struct _15 +{ + float _m0; +}; + +static const _15 _25 = { 0.0f }; +static const _15 _26 = { 1.0f }; +static const float _29[2] = { 0.0f, 1.0f }; +static const float _30[2] = { 1.0f, 0.0f }; + +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void spvSelectComposite(out _15 out_value, bool cond, _15 true_val, _15 false_val) +{ + if (cond) + { + out_value = true_val; + } + else + { + out_value = false_val; + } +} + +void spvSelectComposite(out float out_value[2], bool cond, float true_val[2], float false_val[2]) +{ + if (cond) + { + out_value = true_val; + } + else + { + out_value = false_val; + } +} + +void frag_main() +{ + FragColor = false ? float4(1.0f, 1.0f, 0.0f, 1.0f) : float4(0.0f, 0.0f, 0.0f, 1.0f); + FragColor = false ? 1.0f.xxxx : 0.0f.xxxx; + FragColor = float4(bool4(false, true, false, true).x ? float4(1.0f, 1.0f, 0.0f, 1.0f).x : float4(0.0f, 0.0f, 0.0f, 1.0f).x, bool4(false, true, false, true).y ? float4(1.0f, 1.0f, 0.0f, 1.0f).y : float4(0.0f, 0.0f, 0.0f, 1.0f).y, bool4(false, true, false, true).z ? float4(1.0f, 1.0f, 0.0f, 1.0f).z : float4(0.0f, 0.0f, 0.0f, 1.0f).z, bool4(false, true, false, true).w ? float4(1.0f, 1.0f, 0.0f, 1.0f).w : float4(0.0f, 0.0f, 0.0f, 1.0f).w); + FragColor = float4(bool4(false, true, false, true)); + _15 _38; + spvSelectComposite(_38, false, _25, _26); + _15 _32 = _38; + float _39[2]; + spvSelectComposite(_39, true, _29, _30); + float _33[2] = _39; +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag new file mode 100644 index 00000000000..b88ac0dd726 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag @@ -0,0 +1,42 @@ +struct Foo +{ + row_major float3x3 m[2]; + float v; +}; + +struct Bar +{ + row_major float3x3 m; + float v; +}; + +cbuffer FooUBO : register(b0) +{ + Foo _6_foo : packoffset(c0); +}; + +cbuffer BarUBO : register(b1) +{ + Bar _9_bar : packoffset(c0); +}; + + +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = (_6_foo.v + _9_bar.v).xxxx; +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag new file mode 100644 index 00000000000..b98e681f6df --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag @@ -0,0 +1,30 @@ +static int index; +static uint FragColor; + +struct SPIRV_Cross_Input +{ + nointerpolation int index : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + uint FragColor : SV_Target0; +}; + +void frag_main() +{ + uint _17 = uint(index); + FragColor = uint(WaveActiveMin(index)); + FragColor = uint(WaveActiveMax(int(_17))); + FragColor = WaveActiveMin(uint(index)); + FragColor = WaveActiveMax(_17); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + index = stage_input.index; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag similarity index 86% rename from reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag rename to reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag index ba66ccf6261..19af59d3bff 100644 --- a/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag +++ b/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag @@ -17,14 +17,6 @@ void frag_main() int j; int _30; int _31; - if (vIndex != 0 && vIndex != 1 && vIndex != 11 && vIndex != 2 && vIndex != 3 && vIndex != 4 && vIndex != 5) - { - _30 = 2; - } - if (vIndex == 1 || vIndex == 11) - { - _31 = 1; - } switch (vIndex) { case 0: @@ -37,6 +29,7 @@ void frag_main() } default: { + _30 = 2; j = _30; _31 = 0; j = _31; @@ -45,6 +38,7 @@ void frag_main() case 1: case 11: { + _31 = 1; j = _31; break; } diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag new file mode 100644 index 00000000000..021333cc742 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag @@ -0,0 +1,52 @@ +static float4 A; +static float4 B; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + float4 A : TEXCOORD0; + float4 B : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +float4 test_vector() +{ + bool4 le = bool4(!(A.x >= B.x), !(A.y >= B.y), !(A.z >= B.z), !(A.w >= B.w)); + bool4 leq = bool4(!(A.x > B.x), !(A.y > B.y), !(A.z > B.z), !(A.w > B.w)); + bool4 ge = bool4(!(A.x <= B.x), !(A.y <= B.y), !(A.z <= B.z), !(A.w <= B.w)); + bool4 geq = bool4(!(A.x < B.x), !(A.y < B.y), !(A.z < B.z), !(A.w < B.w)); + bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w); + bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w); + neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w); + return ((((float4(le) + float4(leq)) + float4(ge)) + float4(geq)) + float4(eq)) + float4(neq); +} + +float test_scalar() +{ + bool le = !(A.x >= B.x); + bool leq = !(A.x > B.x); + bool ge = !(A.x <= B.x); + bool geq = !(A.x < B.x); + bool eq = A.x == B.x; + bool neq = A.x != B.x; + return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq); +} + +void frag_main() +{ + FragColor = test_vector() + test_scalar().xxxx; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + A = stage_input.A; + B = stage_input.B; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag new file mode 100644 index 00000000000..0172c20bb91 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag @@ -0,0 +1,52 @@ +static float4 A; +static float4 B; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + float4 A : TEXCOORD0; + float4 B : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +float4 test_vector() +{ + bool4 le = bool4(A.x < B.x, A.y < B.y, A.z < B.z, A.w < B.w); + bool4 leq = bool4(A.x <= B.x, A.y <= B.y, A.z <= B.z, A.w <= B.w); + bool4 ge = bool4(A.x > B.x, A.y > B.y, A.z > B.z, A.w > B.w); + bool4 geq = bool4(A.x >= B.x, A.y >= B.y, A.z >= B.z, A.w >= B.w); + bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w); + bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w); + neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w); + return ((((float4(le) + float4(leq)) + float4(ge)) + float4(geq)) + float4(eq)) + float4(neq); +} + +float test_scalar() +{ + bool le = A.x < B.x; + bool leq = A.x <= B.x; + bool ge = A.x > B.x; + bool geq = A.x >= B.x; + bool eq = A.x == B.x; + bool neq = A.x != B.x; + return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq); +} + +void frag_main() +{ + FragColor = test_vector() + test_scalar().xxxx; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + A = stage_input.A; + B = stage_input.B; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag index a01d5fa7d0c..95a48835d57 100644 --- a/reference/shaders-hlsl-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag +++ b/reference/shaders-hlsl-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag @@ -1,3 +1,5 @@ +static float4 undef; + static float4 FragColor; static float4 vFloat; @@ -11,8 +13,6 @@ struct SPIRV_Cross_Output float4 FragColor : SV_Target0; }; -float4 undef; - void frag_main() { FragColor = float4(undef.x, vFloat.y, 0.0f, vFloat.w) + float4(vFloat.z, vFloat.y, 0.0f, vFloat.w); diff --git a/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh new file mode 100644 index 00000000000..8fbd2915ae3 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh @@ -0,0 +1,63 @@ +struct _12 +{ + float _m0; +}; + +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +struct gl_MeshPerVertexEXT +{ + float4 B : TEXCOORD1; + float4 gl_Position : SV_Position; +}; + +struct gl_MeshPerPrimitiveEXT +{ + float4 C : TEXCOORD3; + uint gl_PrimitiveID : SV_PrimitiveID; + uint gl_Layer : SV_RenderTargetArrayIndex; + bool gl_CullPrimitiveEXT : SV_CullPrimitive; +}; + +groupshared float _9[64]; + +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], _12 _11, inout uint3 gl_PrimitiveTriangleIndicesEXT[8], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8]) +{ + _9[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex); + GroupMemoryBarrierWithGroupSync(); + SetMeshOutputCounts(24u, 8u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _9[gl_LocalInvocationIndex]; + float _63 = _11._m0 + _9[gl_LocalInvocationIndex ^ 1u]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.x = _63; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.y = _63; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.z = _63; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.w = _63; + if (gl_LocalInvocationIndex < 8u) + { + uint _71 = gl_LocalInvocationIndex * 3u; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(_71, _71 + 1u, _71 + 2u); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_LocalInvocationIndex & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_LocalInvocationIndex); + uint _81 = gl_LocalInvocationIndex ^ 2u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.x = _9[_81]; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.y = _9[_81]; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.z = _9[_81]; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.w = _9[_81]; + } +} + +[outputtopology("triangle")] +[numthreads(2, 3, 4)] +void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], in payload _12 _11, out indices uint3 gl_PrimitiveTriangleIndicesEXT[8], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8]) +{ + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + mesh_main(gl_MeshVerticesEXT, _11, gl_PrimitiveTriangleIndicesEXT, gl_MeshPrimitivesEXT); +} diff --git a/reference/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag b/reference/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag new file mode 100644 index 00000000000..dbdd784529d --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag @@ -0,0 +1,44 @@ +static float4 FragColor; +static int vA; +static int vB; + +struct SPIRV_Cross_Input +{ + nointerpolation int vA : TEXCOORD0; + nointerpolation int vB : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = 0.0f.xxxx; + int _10 = 0; + int _15 = 0; + for (int _16 = 0, _17 = 0; _16 < vA; _17 = _15, _16 += _10) + { + if ((vA + _16) == 20) + { + _15 = 50; + } + else + { + _15 = ((vB + _16) == 40) ? 60 : _17; + } + _10 = _15 + 10; + FragColor += 1.0f.xxxx; + } +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vA = stage_input.vA; + vB = stage_input.vB; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert b/reference/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert new file mode 100644 index 00000000000..72a86d2d44e --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert @@ -0,0 +1,38 @@ +struct Vert +{ + float a; + float b; +}; + +struct Foo +{ + float c; + float d; +}; + +static const Vert _11 = { 0.0f, 0.0f }; +static const Foo _13 = { 0.0f, 0.0f }; + +static Vert _3 = { 0.0f, 0.0f }; +static Foo foo = _13; + +struct SPIRV_Cross_Output +{ + float Vert_a : TEXCOORD0; + float Vert_b : TEXCOORD1; + Foo foo : TEXCOORD2; +}; + +void vert_main() +{ +} + +SPIRV_Cross_Output main() +{ + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.Vert_a = _3.a; + stage_output.Vert_b = _3.b; + stage_output.foo = foo; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert b/reference/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert new file mode 100644 index 00000000000..ee30c1783e7 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert @@ -0,0 +1,28 @@ +static const float _23[1] = { 0.0f }; +static const float _24[1] = { 0.0f }; + +static float4 gl_Position = 0.0f.xxxx; +static float gl_PointSize = 0.0f; +static float gl_ClipDistance[1] = _23; +static float gl_CullDistance[1] = _24; +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; + float gl_ClipDistance0 : SV_ClipDistance0; + float gl_CullDistance0 : SV_CullDistance0; +}; + +void vert_main() +{ + gl_Position = 1.0f.xxxx; +} + +SPIRV_Cross_Output main() +{ + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.gl_ClipDistance0.x = gl_ClipDistance[0]; + stage_output.gl_CullDistance0.x = gl_CullDistance[0]; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert b/reference/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert new file mode 100644 index 00000000000..4846e3f5c62 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert @@ -0,0 +1,56 @@ +struct Struct_vec4 +{ + float4 m0; +}; + +struct VertexOut +{ + Struct_vec4 m0; + Struct_vec4 m1; +}; + +cbuffer UBO : register(b0) +{ + Struct_vec4 ubo_binding_0_m0 : packoffset(c0); + Struct_vec4 ubo_binding_0_m1 : packoffset(c1); +}; + + +static float4 gl_Position; +static VertexOut output_location_0; +static Struct_vec4 output_location_2; +static Struct_vec4 output_location_3; + +struct SPIRV_Cross_Output +{ + Struct_vec4 VertexOut_m0 : TEXCOORD0; + Struct_vec4 VertexOut_m1 : TEXCOORD1; + Struct_vec4 output_location_2 : TEXCOORD2; + Struct_vec4 output_location_3 : TEXCOORD3; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + Struct_vec4 c; + c.m0 = ubo_binding_0_m0.m0; + Struct_vec4 b; + b.m0 = ubo_binding_0_m1.m0; + gl_Position = c.m0 + b.m0; + output_location_0.m0 = c; + output_location_0.m1 = b; + output_location_2 = c; + output_location_3 = b; +} + +SPIRV_Cross_Output main() +{ + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.VertexOut_m0 = output_location_0.m0; + stage_output.VertexOut_m1 = output_location_0.m1; + stage_output.output_location_2 = output_location_2; + stage_output.output_location_3 = output_location_3; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/comp/bitfield.comp b/reference/shaders-hlsl-no-opt/comp/bitfield.comp index 4e93a145355..be287c4174d 100644 --- a/reference/shaders-hlsl-no-opt/comp/bitfield.comp +++ b/reference/shaders-hlsl-no-opt/comp/bitfield.comp @@ -1,52 +1,52 @@ -uint SPIRV_Cross_bitfieldInsert(uint Base, uint Insert, uint Offset, uint Count) +uint spvBitfieldInsert(uint Base, uint Insert, uint Offset, uint Count) { uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); return (Base & ~Mask) | ((Insert << Offset) & Mask); } -uint2 SPIRV_Cross_bitfieldInsert(uint2 Base, uint2 Insert, uint Offset, uint Count) +uint2 spvBitfieldInsert(uint2 Base, uint2 Insert, uint Offset, uint Count) { uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); return (Base & ~Mask) | ((Insert << Offset) & Mask); } -uint3 SPIRV_Cross_bitfieldInsert(uint3 Base, uint3 Insert, uint Offset, uint Count) +uint3 spvBitfieldInsert(uint3 Base, uint3 Insert, uint Offset, uint Count) { uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); return (Base & ~Mask) | ((Insert << Offset) & Mask); } -uint4 SPIRV_Cross_bitfieldInsert(uint4 Base, uint4 Insert, uint Offset, uint Count) +uint4 spvBitfieldInsert(uint4 Base, uint4 Insert, uint Offset, uint Count) { uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); return (Base & ~Mask) | ((Insert << Offset) & Mask); } -uint SPIRV_Cross_bitfieldUExtract(uint Base, uint Offset, uint Count) +uint spvBitfieldUExtract(uint Base, uint Offset, uint Count) { uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); return (Base >> Offset) & Mask; } -uint2 SPIRV_Cross_bitfieldUExtract(uint2 Base, uint Offset, uint Count) +uint2 spvBitfieldUExtract(uint2 Base, uint Offset, uint Count) { uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); return (Base >> Offset) & Mask; } -uint3 SPIRV_Cross_bitfieldUExtract(uint3 Base, uint Offset, uint Count) +uint3 spvBitfieldUExtract(uint3 Base, uint Offset, uint Count) { uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); return (Base >> Offset) & Mask; } -uint4 SPIRV_Cross_bitfieldUExtract(uint4 Base, uint Offset, uint Count) +uint4 spvBitfieldUExtract(uint4 Base, uint Offset, uint Count) { uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); return (Base >> Offset) & Mask; } -int SPIRV_Cross_bitfieldSExtract(int Base, int Offset, int Count) +int spvBitfieldSExtract(int Base, int Offset, int Count) { int Mask = Count == 32 ? -1 : ((1 << Count) - 1); int Masked = (Base >> Offset) & Mask; @@ -54,7 +54,7 @@ int SPIRV_Cross_bitfieldSExtract(int Base, int Offset, int Count) return (Masked << ExtendShift) >> ExtendShift; } -int2 SPIRV_Cross_bitfieldSExtract(int2 Base, int Offset, int Count) +int2 spvBitfieldSExtract(int2 Base, int Offset, int Count) { int Mask = Count == 32 ? -1 : ((1 << Count) - 1); int2 Masked = (Base >> Offset) & Mask; @@ -62,7 +62,7 @@ int2 SPIRV_Cross_bitfieldSExtract(int2 Base, int Offset, int Count) return (Masked << ExtendShift) >> ExtendShift; } -int3 SPIRV_Cross_bitfieldSExtract(int3 Base, int Offset, int Count) +int3 spvBitfieldSExtract(int3 Base, int Offset, int Count) { int Mask = Count == 32 ? -1 : ((1 << Count) - 1); int3 Masked = (Base >> Offset) & Mask; @@ -70,7 +70,7 @@ int3 SPIRV_Cross_bitfieldSExtract(int3 Base, int Offset, int Count) return (Masked << ExtendShift) >> ExtendShift; } -int4 SPIRV_Cross_bitfieldSExtract(int4 Base, int Offset, int Count) +int4 spvBitfieldSExtract(int4 Base, int Offset, int Count) { int Mask = Count == 32 ? -1 : ((1 << Count) - 1); int4 Masked = (Base >> Offset) & Mask; @@ -84,23 +84,23 @@ void comp_main() uint unsigned_value = 0u; int3 signed_values = int3(0, 0, 0); uint3 unsigned_values = uint3(0u, 0u, 0u); - int s = SPIRV_Cross_bitfieldSExtract(signed_value, 5, 20); - uint u = SPIRV_Cross_bitfieldUExtract(unsigned_value, 6, 21); - s = int(SPIRV_Cross_bitfieldInsert(s, 40, 5, 4)); - u = SPIRV_Cross_bitfieldInsert(u, 60u, 5, 4); + int s = spvBitfieldSExtract(signed_value, 5, 20); + uint u = spvBitfieldUExtract(unsigned_value, 6, 21); + s = int(spvBitfieldInsert(s, 40, 5, 4)); + u = spvBitfieldInsert(u, 60u, 5, 4); u = reversebits(u); s = reversebits(s); - int v0 = countbits(u); + int v0 = int(countbits(u)); int v1 = countbits(s); int v2 = int(firstbithigh(u)); int v3 = firstbitlow(s); - int3 s_1 = SPIRV_Cross_bitfieldSExtract(signed_values, 5, 20); - uint3 u_1 = SPIRV_Cross_bitfieldUExtract(unsigned_values, 6, 21); - s_1 = int3(SPIRV_Cross_bitfieldInsert(s_1, int3(40, 40, 40), 5, 4)); - u_1 = SPIRV_Cross_bitfieldInsert(u_1, uint3(60u, 60u, 60u), 5, 4); + int3 s_1 = spvBitfieldSExtract(signed_values, 5, 20); + uint3 u_1 = spvBitfieldUExtract(unsigned_values, 6, 21); + s_1 = int3(spvBitfieldInsert(s_1, int3(40, 40, 40), 5, 4)); + u_1 = spvBitfieldInsert(u_1, uint3(60u, 60u, 60u), 5, 4); u_1 = reversebits(u_1); s_1 = reversebits(s_1); - int3 v0_1 = countbits(u_1); + int3 v0_1 = int3(countbits(u_1)); int3 v1_1 = countbits(s_1); int3 v2_1 = int3(firstbithigh(u_1)); int3 v3_1 = firstbitlow(s_1); diff --git a/reference/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp b/reference/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp new file mode 100644 index 00000000000..7936bf94d09 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp @@ -0,0 +1,297 @@ +struct ResType +{ + float _m0; + int _m1; +}; + +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer _19 : register(u0); + +uint spvPackHalf2x16(float2 value) +{ + uint2 Packed = f32tof16(value); + return Packed.x | (Packed.y << 16); +} + +float2 spvUnpackHalf2x16(uint value) +{ + return f16tof32(uint2(value & 0xffff, value >> 16)); +} + +uint spvPackUnorm4x8(float4 value) +{ + uint4 Packed = uint4(round(saturate(value) * 255.0)); + return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24); +} + +float4 spvUnpackUnorm4x8(uint value) +{ + uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24); + return float4(Packed) / 255.0; +} + +uint spvPackSnorm4x8(float4 value) +{ + int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff; + return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24)); +} + +float4 spvUnpackSnorm4x8(uint value) +{ + int SignedValue = int(value); + int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24; + return clamp(float4(Packed) / 127.0, -1.0, 1.0); +} + +uint spvPackUnorm2x16(float2 value) +{ + uint2 Packed = uint2(round(saturate(value) * 65535.0)); + return Packed.x | (Packed.y << 16); +} + +float2 spvUnpackUnorm2x16(uint value) +{ + uint2 Packed = uint2(value & 0xffff, value >> 16); + return float2(Packed) / 65535.0; +} + +uint spvPackSnorm2x16(float2 value) +{ + int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff; + return uint(Packed.x | (Packed.y << 16)); +} + +float2 spvUnpackSnorm2x16(uint value) +{ + int SignedValue = int(value); + int2 Packed = int2(SignedValue << 16, SignedValue) >> 16; + return clamp(float2(Packed) / 32767.0, -1.0, 1.0); +} + +// Returns the inverse of a matrix, by using the algorithm of calculating the classical +// adjoint and dividing by the determinant. The contents of the matrix are changed. +float2x2 spvInverse(float2x2 m) +{ + float2x2 adj; // The adjoint matrix (inverse after dividing by determinant) + + // Create the transpose of the cofactors, as the classical adjoint of the matrix. + adj[0][0] = m[1][1]; + adj[0][1] = -m[0][1]; + + adj[1][0] = -m[1][0]; + adj[1][1] = m[0][0]; + + // Calculate the determinant as a combination of the cofactors of the first row. + float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]); + + // Divide the classical adjoint matrix by the determinant. + // If determinant is zero, matrix is not invertable, so leave it unchanged. + return (det != 0.0f) ? (adj * (1.0f / det)) : m; +} + +// Returns the determinant of a 2x2 matrix. +float spvDet2x2(float a1, float a2, float b1, float b2) +{ + return a1 * b2 - b1 * a2; +} + +// Returns the inverse of a matrix, by using the algorithm of calculating the classical +// adjoint and dividing by the determinant. The contents of the matrix are changed. +float3x3 spvInverse(float3x3 m) +{ + float3x3 adj; // The adjoint matrix (inverse after dividing by determinant) + + // Create the transpose of the cofactors, as the classical adjoint of the matrix. + adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]); + adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]); + adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]); + + adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]); + adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]); + adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]); + + adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]); + adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]); + adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]); + + // Calculate the determinant as a combination of the cofactors of the first row. + float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]); + + // Divide the classical adjoint matrix by the determinant. + // If determinant is zero, matrix is not invertable, so leave it unchanged. + return (det != 0.0f) ? (adj * (1.0f / det)) : m; +} + +// Returns the determinant of a 3x3 matrix. +float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) +{ + return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3); +} + +// Returns the inverse of a matrix, by using the algorithm of calculating the classical +// adjoint and dividing by the determinant. The contents of the matrix are changed. +float4x4 spvInverse(float4x4 m) +{ + float4x4 adj; // The adjoint matrix (inverse after dividing by determinant) + + // Create the transpose of the cofactors, as the classical adjoint of the matrix. + adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); + adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); + adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]); + adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]); + + adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); + adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); + adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]); + adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]); + + adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); + adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); + adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]); + adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]); + + adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); + adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); + adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]); + adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]); + + // Calculate the determinant as a combination of the cofactors of the first row. + float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]); + + // Divide the classical adjoint matrix by the determinant. + // If determinant is zero, matrix is not invertable, so leave it unchanged. + return (det != 0.0f) ? (adj * (1.0f / det)) : m; +} + +float spvReflect(float i, float n) +{ + return i - 2.0 * dot(n, i) * n; +} + +float spvRefract(float i, float n, float eta) +{ + float NoI = n * i; + float NoI2 = NoI * NoI; + float k = 1.0 - eta * eta * (1.0 - NoI2); + if (k < 0.0) + { + return 0.0; + } + else + { + return eta * i - (eta * NoI + sqrt(k)) * n; + } +} + +float spvFaceForward(float n, float i, float nref) +{ + return i * nref < 0.0 ? n : -n; +} + +void comp_main() +{ + _19.Store(0, asuint(round(asfloat(_19.Load(16))))); + _19.Store(0, asuint(trunc(asfloat(_19.Load(16))))); + _19.Store(0, asuint(abs(asfloat(_19.Load(16))))); + _19.Store(4, uint(abs(int(_19.Load(32))))); + _19.Store(0, asuint(sign(asfloat(_19.Load(16))))); + _19.Store(4, uint(sign(int(_19.Load(32))))); + _19.Store(0, asuint(floor(asfloat(_19.Load(16))))); + _19.Store(0, asuint(ceil(asfloat(_19.Load(16))))); + _19.Store(0, asuint(frac(asfloat(_19.Load(16))))); + _19.Store(0, asuint(radians(asfloat(_19.Load(16))))); + _19.Store(0, asuint(degrees(asfloat(_19.Load(16))))); + _19.Store(0, asuint(sin(asfloat(_19.Load(16))))); + _19.Store(0, asuint(cos(asfloat(_19.Load(16))))); + _19.Store(0, asuint(tan(asfloat(_19.Load(16))))); + _19.Store(0, asuint(asin(asfloat(_19.Load(16))))); + _19.Store(0, asuint(acos(asfloat(_19.Load(16))))); + _19.Store(0, asuint(atan(asfloat(_19.Load(16))))); + _19.Store(0, asuint(sinh(asfloat(_19.Load(16))))); + _19.Store(0, asuint(cosh(asfloat(_19.Load(16))))); + _19.Store(0, asuint(tanh(asfloat(_19.Load(16))))); + _19.Store(0, asuint(atan2(asfloat(_19.Load(16)), asfloat(_19.Load(20))))); + _19.Store(0, asuint(pow(asfloat(_19.Load(16)), asfloat(_19.Load(20))))); + _19.Store(0, asuint(exp(asfloat(_19.Load(16))))); + _19.Store(0, asuint(log(asfloat(_19.Load(16))))); + _19.Store(0, asuint(exp2(asfloat(_19.Load(16))))); + _19.Store(0, asuint(log2(asfloat(_19.Load(16))))); + _19.Store(0, asuint(sqrt(asfloat(_19.Load(16))))); + _19.Store(0, asuint(rsqrt(asfloat(_19.Load(16))))); + _19.Store(0, asuint(length(asfloat(_19.Load(16))))); + _19.Store(0, asuint(distance(asfloat(_19.Load(16)), asfloat(_19.Load(20))))); + _19.Store(0, asuint(sign(asfloat(_19.Load(16))))); + _19.Store(0, asuint(spvFaceForward(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24))))); + _19.Store(0, asuint(spvReflect(asfloat(_19.Load(16)), asfloat(_19.Load(20))))); + _19.Store(0, asuint(spvRefract(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24))))); + _19.Store(0, asuint(length(asfloat(_19.Load4(16)).xy))); + _19.Store(0, asuint(distance(asfloat(_19.Load4(16)).xy, asfloat(_19.Load4(16)).zw))); + float2 v2 = normalize(asfloat(_19.Load4(16)).xy); + v2 = faceforward(asfloat(_19.Load4(16)).xy, asfloat(_19.Load4(16)).yz, asfloat(_19.Load4(16)).zw); + v2 = reflect(asfloat(_19.Load4(16)).xy, asfloat(_19.Load4(16)).zw); + v2 = refract(asfloat(_19.Load4(16)).xy, asfloat(_19.Load4(16)).yz, asfloat(_19.Load(28))); + float3 v3 = cross(asfloat(_19.Load4(16)).xyz, asfloat(_19.Load4(16)).yzw); + float2x2 _240 = asfloat(uint2x2(_19.Load2(64), _19.Load2(72))); + _19.Store(0, asuint(determinant(_240))); + float3x3 _246 = asfloat(uint3x3(_19.Load3(80), _19.Load3(96), _19.Load3(112))); + _19.Store(0, asuint(determinant(_246))); + float4x4 _252 = asfloat(uint4x4(_19.Load4(128), _19.Load4(144), _19.Load4(160), _19.Load4(176))); + _19.Store(0, asuint(determinant(_252))); + float2x2 _256 = asfloat(uint2x2(_19.Load2(64), _19.Load2(72))); + float2x2 _257 = spvInverse(_256); + _19.Store2(64, asuint(_257[0])); + _19.Store2(72, asuint(_257[1])); + float3x3 _260 = asfloat(uint3x3(_19.Load3(80), _19.Load3(96), _19.Load3(112))); + float3x3 _261 = spvInverse(_260); + _19.Store3(80, asuint(_261[0])); + _19.Store3(96, asuint(_261[1])); + _19.Store3(112, asuint(_261[2])); + float4x4 _264 = asfloat(uint4x4(_19.Load4(128), _19.Load4(144), _19.Load4(160), _19.Load4(176))); + float4x4 _265 = spvInverse(_264); + _19.Store4(128, asuint(_265[0])); + _19.Store4(144, asuint(_265[1])); + _19.Store4(160, asuint(_265[2])); + _19.Store4(176, asuint(_265[3])); + float tmp; + float _271 = modf(asfloat(_19.Load(16)), tmp); + _19.Store(0, asuint(_271)); + _19.Store(0, asuint(min(asfloat(_19.Load(16)), asfloat(_19.Load(20))))); + _19.Store(8, min(_19.Load(48), _19.Load(52))); + _19.Store(4, uint(min(int(_19.Load(32)), int(_19.Load(36))))); + _19.Store(0, asuint(max(asfloat(_19.Load(16)), asfloat(_19.Load(20))))); + _19.Store(8, max(_19.Load(48), _19.Load(52))); + _19.Store(4, uint(max(int(_19.Load(32)), int(_19.Load(36))))); + _19.Store(0, asuint(clamp(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24))))); + _19.Store(8, clamp(_19.Load(48), _19.Load(52), _19.Load(56))); + _19.Store(4, uint(clamp(int(_19.Load(32)), int(_19.Load(36)), int(_19.Load(40))))); + _19.Store(0, asuint(lerp(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24))))); + _19.Store(0, asuint(step(asfloat(_19.Load(16)), asfloat(_19.Load(20))))); + _19.Store(0, asuint(smoothstep(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24))))); + _19.Store(0, asuint(mad(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24))))); + ResType _371; + _371._m0 = frexp(asfloat(_19.Load(16)), _371._m1); + int itmp = _371._m1; + _19.Store(0, asuint(_371._m0)); + _19.Store(0, asuint(ldexp(asfloat(_19.Load(16)), itmp))); + _19.Store(8, spvPackSnorm4x8(asfloat(_19.Load4(16)))); + _19.Store(8, spvPackUnorm4x8(asfloat(_19.Load4(16)))); + _19.Store(8, spvPackSnorm2x16(asfloat(_19.Load4(16)).xy)); + _19.Store(8, spvPackUnorm2x16(asfloat(_19.Load4(16)).xy)); + _19.Store(8, spvPackHalf2x16(asfloat(_19.Load4(16)).xy)); + v2 = spvUnpackSnorm2x16(_19.Load(48)); + v2 = spvUnpackUnorm2x16(_19.Load(48)); + v2 = spvUnpackHalf2x16(_19.Load(48)); + float4 v4 = spvUnpackSnorm4x8(_19.Load(48)); + v4 = spvUnpackUnorm4x8(_19.Load(48)); + _19.Store4(32, uint4(firstbitlow(int4(_19.Load4(32))))); + _19.Store4(32, uint4(int4(firstbitlow(_19.Load4(48))))); + _19.Store4(32, uint4(firstbithigh(int4(_19.Load4(32))))); + _19.Store4(32, uint4(int4(firstbithigh(_19.Load4(48))))); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp b/reference/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp new file mode 100644 index 00000000000..dc972bdda87 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp @@ -0,0 +1,22 @@ +struct Foo +{ + float _abs; +}; + +RWByteAddressBuffer _7 : register(u0); + +void comp_main() +{ + Foo _24; + _24._abs = asfloat(_7.Load(0)); + Foo f; + f._abs = _24._abs; + int _abs = 10; + _7.Store(4, asuint(f._abs)); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/comp/intmin-literal.comp b/reference/shaders-hlsl-no-opt/comp/intmin-literal.comp new file mode 100644 index 00000000000..9faa7fba7ba --- /dev/null +++ b/reference/shaders-hlsl-no-opt/comp/intmin-literal.comp @@ -0,0 +1,19 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer _9 : register(u1); +cbuffer UBO : register(b0) +{ + float _14_b : packoffset(c0); +}; + + +void comp_main() +{ + _9.Store(0, asuint(asfloat(asint(_14_b) ^ int(0x80000000)))); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp b/reference/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp new file mode 100644 index 00000000000..ae7ce70c5e9 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp @@ -0,0 +1,28 @@ +static const uint3 gl_WorkGroupSize = uint3(30u, 1u, 1u); + +RWByteAddressBuffer _46 : register(u0, space0); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + bool v = gl_GlobalInvocationID.x != 3u; + bool4 v4; + v4.x = bool(WaveActiveBitOr(uint(v))); + v4.y = bool(WaveActiveBitAnd(uint(v))); + v4.z = bool(WaveActiveBitXor(uint(v))); + v4.w = WaveActiveAllEqual(v); + uint4 w = uint4(v4); + _46.Store(gl_GlobalInvocationID.x * 4 + 0, ((w.x + w.y) + w.z) + w.w); +} + +[numthreads(30, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp b/reference/shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp similarity index 74% rename from reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp rename to reference/shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp index b87574f1a7a..4c11a4b1368 100644 --- a/reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp +++ b/reference/shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _9 : register(u0, space0); static uint4 gl_SubgroupEqMask; @@ -19,9 +21,15 @@ void comp_main() float3 first = WaveReadLaneFirst(20.0f.xxx); uint4 ballot_value = WaveActiveBallot(true); uint bit_count = countbits(ballot_value.x) + countbits(ballot_value.y) + countbits(ballot_value.z) + countbits(ballot_value.w); + uint inclusive_bit_count = countbits(ballot_value.x & gl_SubgroupLeMask.x) + countbits(ballot_value.y & gl_SubgroupLeMask.y) + countbits(ballot_value.z & gl_SubgroupLeMask.z) + countbits(ballot_value.w & gl_SubgroupLeMask.w); + uint exclusive_bit_count = countbits(ballot_value.x & gl_SubgroupLtMask.x) + countbits(ballot_value.y & gl_SubgroupLtMask.y) + countbits(ballot_value.z & gl_SubgroupLtMask.z) + countbits(ballot_value.w & gl_SubgroupLtMask.w); + uint shuffled = WaveReadLaneAt(10u, 8u); + uint shuffled_xor = WaveReadLaneAt(30u, WaveGetLaneIndex() ^ 8u); + uint shuffled_up = WaveReadLaneAt(20u, WaveGetLaneIndex() - 4u); + uint shuffled_down = WaveReadLaneAt(20u, WaveGetLaneIndex() + 4u); bool has_all = WaveActiveAllTrue(true); bool has_any = WaveActiveAnyTrue(true); - bool has_equal = WaveActiveAllEqualBool(true); + bool has_equal = WaveActiveAllEqual(true); float4 added = WaveActiveSum(20.0f.xxxx); int4 iadded = WaveActiveSum(int4(20, 20, 20, 20)); float4 multiplied = WaveActiveProduct(20.0f.xxxx); @@ -35,6 +43,9 @@ void comp_main() uint4 anded = WaveActiveBitAnd(ballot_value); uint4 ored = WaveActiveBitOr(ballot_value); uint4 xored = WaveActiveBitXor(ballot_value); + bool4 anded_b = bool4(WaveActiveBitAnd(uint4(bool4(ballot_value.x == uint4(42u, 42u, 42u, 42u).x, ballot_value.y == uint4(42u, 42u, 42u, 42u).y, ballot_value.z == uint4(42u, 42u, 42u, 42u).z, ballot_value.w == uint4(42u, 42u, 42u, 42u).w)))); + bool4 ored_b = bool4(WaveActiveBitOr(uint4(bool4(ballot_value.x == uint4(42u, 42u, 42u, 42u).x, ballot_value.y == uint4(42u, 42u, 42u, 42u).y, ballot_value.z == uint4(42u, 42u, 42u, 42u).z, ballot_value.w == uint4(42u, 42u, 42u, 42u).w)))); + bool4 xored_b = bool4(WaveActiveBitXor(uint4(bool4(ballot_value.x == uint4(42u, 42u, 42u, 42u).x, ballot_value.y == uint4(42u, 42u, 42u, 42u).y, ballot_value.z == uint4(42u, 42u, 42u, 42u).z, ballot_value.w == uint4(42u, 42u, 42u, 42u).w)))); added = WavePrefixSum(added) + added; iadded = WavePrefixSum(iadded) + iadded; multiplied = WavePrefixProduct(multiplied) * multiplied; diff --git a/reference/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp b/reference/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp new file mode 100644 index 00000000000..94aec455762 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp @@ -0,0 +1,15 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer _14 : register(u0); + +void comp_main() +{ + bool3 c = bool3(asfloat(_14.Load3(16)).x < 1.0f.xxx.x, asfloat(_14.Load3(16)).y < 1.0f.xxx.y, asfloat(_14.Load3(16)).z < 1.0f.xxx.z); + _14.Store3(0, asuint(float3(c.x ? float3(0.0f, 0.0f, 1.0f).x : float3(1.0f, 0.0f, 0.0f).x, c.y ? float3(0.0f, 0.0f, 1.0f).y : float3(1.0f, 0.0f, 0.0f).y, c.z ? float3(0.0f, 0.0f, 1.0f).z : float3(1.0f, 0.0f, 0.0f).z))); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp b/reference/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp new file mode 100644 index 00000000000..7bd1c761697 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp @@ -0,0 +1,22 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer _14 : register(u0); + +void comp_main() +{ + bool c = asfloat(_14.Load(48)) < 1.0f; + float3x3 _29 = c ? float3x3(1.0f.xxx, 1.0f.xxx, 1.0f.xxx) : float3x3(0.0f.xxx, 0.0f.xxx, 0.0f.xxx); + _14.Store3(0, asuint(_29[0])); + _14.Store3(16, asuint(_29[1])); + _14.Store3(32, asuint(_29[2])); + float3x3 _37 = c ? float3x3(float3(1.0f, 0.0f, 0.0f), float3(0.0f, 1.0f, 0.0f), float3(0.0f, 0.0f, 1.0f)) : float3x3(0.0f.xxx, 0.0f.xxx, 0.0f.xxx); + _14.Store3(0, asuint(_37[0])); + _14.Store3(16, asuint(_37[1])); + _14.Store3(32, asuint(_37[2])); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag b/reference/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag new file mode 100644 index 00000000000..2b8ec8119a7 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag @@ -0,0 +1,56 @@ +cbuffer UBO : register(b0) +{ + float4 _18_a[2] : packoffset(c0); + float4 _18_b : packoffset(c2); + float4 _18_c : packoffset(c3); + row_major float4x4 _18_d : packoffset(c4); + float _18_e : packoffset(c8); + float2 _18_f : packoffset(c8.z); + float _18_g : packoffset(c9); + float2 _18_h : packoffset(c9.z); + float _18_i : packoffset(c10); + float2 _18_j : packoffset(c10.z); + float _18_k : packoffset(c11); + float2 _18_l : packoffset(c11.z); + float _18_m : packoffset(c12); + float _18_n : packoffset(c12.y); + float _18_o : packoffset(c12.z); + float4 _18_p : packoffset(c13); + float4 _18_q : packoffset(c14); + float3 _18_r : packoffset(c15); + float4 _18_s : packoffset(c16); + float4 _18_t : packoffset(c17); + float4 _18_u : packoffset(c18); + float _18_v : packoffset(c19); + float _18_w : packoffset(c19.y); + float _18_x : packoffset(c19.z); + float _18_y : packoffset(c19.w); + float _18_z : packoffset(c20); + float _18_aa : packoffset(c20.y); + float _18_ab : packoffset(c20.z); + float _18_ac : packoffset(c20.w); + float _18_ad : packoffset(c21); + float _18_ae : packoffset(c21.y); + float4 _18_ef : packoffset(c22); +}; + + +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = _18_a[1]; +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag b/reference/shaders-hlsl-no-opt/frag/constant-buffer-array.invalid.sm51.frag similarity index 100% rename from reference/opt/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag rename to reference/shaders-hlsl-no-opt/frag/constant-buffer-array.invalid.sm51.frag diff --git a/reference/shaders-hlsl/frag/fp16.invalid.desktop.frag b/reference/shaders-hlsl-no-opt/frag/fp16.invalid.desktop.frag similarity index 93% rename from reference/shaders-hlsl/frag/fp16.invalid.desktop.frag rename to reference/shaders-hlsl-no-opt/frag/fp16.invalid.desktop.frag index e10d6724e00..f0ed32f342d 100644 --- a/reference/shaders-hlsl/frag/fp16.invalid.desktop.frag +++ b/reference/shaders-hlsl-no-opt/frag/fp16.invalid.desktop.frag @@ -43,13 +43,13 @@ float4 mod(float4 x, float4 y) return x - y * floor(x / y); } -uint SPIRV_Cross_packFloat2x16(min16float2 value) +uint spvPackFloat2x16(min16float2 value) { uint2 Packed = f32tof16(value); return Packed.x | (Packed.y << 16); } -min16float2 SPIRV_Cross_unpackFloat2x16(uint value) +min16float2 spvUnpackFloat2x16(uint value) { return min16float2(f16tof32(uint2(value & 0xffff, value >> 16))); } @@ -128,9 +128,9 @@ void test_builtins() bool4 btmp = isnan(v4); btmp = isinf(v4); res = mad(v4, v4, v4); - uint pack0 = SPIRV_Cross_packFloat2x16(v4.xy); - uint pack1 = SPIRV_Cross_packFloat2x16(v4.zw); - res = min16float4(SPIRV_Cross_unpackFloat2x16(pack0), SPIRV_Cross_unpackFloat2x16(pack1)); + uint pack0 = spvPackFloat2x16(v4.xy); + uint pack1 = spvPackFloat2x16(v4.zw); + res = min16float4(spvUnpackFloat2x16(pack0), spvUnpackFloat2x16(pack1)); min16float t0 = length(v4); t0 = distance(v4, v4); t0 = dot(v4, v4); diff --git a/reference/shaders-hlsl-no-opt/frag/frag-coord.frag b/reference/shaders-hlsl-no-opt/frag/frag-coord.frag new file mode 100644 index 00000000000..17cb4c4b741 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/frag-coord.frag @@ -0,0 +1,27 @@ +static float4 gl_FragCoord; +static float3 FragColor; + +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +struct SPIRV_Cross_Output +{ + float3 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = gl_FragCoord.xyz / gl_FragCoord.w.xxx; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag b/reference/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag new file mode 100644 index 00000000000..ad61b21ea38 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag @@ -0,0 +1,26 @@ +static float FragColor; + +struct SPIRV_Cross_Input +{ +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = float(IsHelperLane()); + discard; + bool _16 = IsHelperLane(); + FragColor = float(_16); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag b/reference/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag new file mode 100644 index 00000000000..020831d0a05 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag @@ -0,0 +1,79 @@ +RWByteAddressBuffer _62 : register(u0, space0); + +static float4 gl_FragCoord; +static half4 Output; +static half4 Input; +static int16_t4 OutputI; +static int16_t4 InputI; +static uint16_t4 OutputU; +static uint16_t4 InputU; + +struct SPIRV_Cross_Input +{ + half4 Input : TEXCOORD0; + nointerpolation int16_t4 InputI : TEXCOORD1; + nointerpolation uint16_t4 InputU : TEXCOORD2; + float4 gl_FragCoord : SV_Position; +}; + +struct SPIRV_Cross_Output +{ + half4 Output : SV_Target0; + int16_t4 OutputI : SV_Target1; + uint16_t4 OutputU : SV_Target2; +}; + +void frag_main() +{ + int index = int(gl_FragCoord.x); + Output = Input + half(20.0).xxxx; + OutputI = InputI + int16_t4(int16_t(-40), int16_t(-40), int16_t(-40), int16_t(-40)); + OutputU = InputU + uint16_t4(20u, 20u, 20u, 20u); + Output += _62.Load(index * 2 + 0).xxxx; + OutputI += _62.Load(index * 2 + 8).xxxx; + OutputU += _62.Load(index * 2 + 16).xxxx; + Output += _62.Load(index * 8 + 24); + OutputI += _62.Load(index * 8 + 56); + OutputU += _62.Load(index * 8 + 88); + Output += _62.Load(index * 16 + 128).xyzz; + Output += half3(_62.Load(index * 12 + 186), _62.Load(index * 12 + 190), _62.Load(index * 12 + 194)).xyzz; + half2x3 _128 = half2x3(_62.Load(index * 16 + 120), _62.Load(index * 16 + 128)); + half2x3 m0 = _128; + half2x3 _132 = half2x3(_62.Load(index * 12 + 184), _62.Load(index * 12 + 188), _62.Load(index * 12 + 192), _62.Load(index * 12 + 186), _62.Load(index * 12 + 190), _62.Load(index * 12 + 194)); + half2x3 m1 = _132; + _62.Store(index * 2 + 0, Output.x); + _62.Store(index * 2 + 8, OutputI.y); + _62.Store(index * 2 + 16, OutputU.z); + _62.Store(index * 8 + 24, Output); + _62.Store(index * 8 + 56, OutputI); + _62.Store(index * 8 + 88, OutputU); + _62.Store(index * 16 + 128, Output.xyz); + _62.Store(index * 12 + 186, Output.x); + _62.Store(index * 12 + 190, Output.xyz.y); + _62.Store(index * 12 + 194, Output.xyz.z); + half2x3 _182 = half2x3(half3(Output.xyz), half3(Output.wzy)); + _62.Store(index * 16 + 120, _182[0]); + _62.Store(index * 16 + 128, _182[1]); + half2x3 _197 = half2x3(half3(Output.xyz), half3(Output.wzy)); + _62.Store(index * 12 + 184, _197[0].x); + _62.Store(index * 12 + 186, _197[1].x); + _62.Store(index * 12 + 188, _197[0].y); + _62.Store(index * 12 + 190, _197[1].y); + _62.Store(index * 12 + 192, _197[0].z); + _62.Store(index * 12 + 194, _197[1].z); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; + Input = stage_input.Input; + InputI = stage_input.InputI; + InputU = stage_input.InputU; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.Output = Output; + stage_output.OutputI = OutputI; + stage_output.OutputU = OutputU; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag b/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag new file mode 100644 index 00000000000..ca9a116fe8b --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag @@ -0,0 +1,32 @@ +Texture2D uTex[] : register(t0, space0); +SamplerState Immut : register(s0, space1); + +static float4 FragColor; +static int vIndex; +static float2 vUV; + +struct SPIRV_Cross_Input +{ + float2 vUV : TEXCOORD0; + nointerpolation int vIndex : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = uTex[NonUniformResourceIndex(vIndex)].Sample(Immut, vUV); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vIndex = stage_input.vIndex; + vUV = stage_input.vUV; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag new file mode 100644 index 00000000000..aace6f58ba1 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag @@ -0,0 +1,33 @@ +RasterizerOrderedByteAddressBuffer _14 : register(u1, space0); +RasterizerOrderedByteAddressBuffer _35 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _25 = int(gl_FragCoord.x); + _14.Store(_25 * 4 + 0, _14.Load(_25 * 4 + 0) + 1u); +} + +void callee() +{ + int _38 = int(gl_FragCoord.x); + _35.Store(_38 * 4 + 0, _35.Load(_38 * 4 + 0) + 1u); + callee2(); +} + +void frag_main() +{ + callee(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; + frag_main(); +} diff --git a/reference/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag b/reference/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag new file mode 100644 index 00000000000..a0f078a87cc --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag @@ -0,0 +1,29 @@ +Texture2D uSamp : register(t0); +SamplerState _uSamp_sampler : register(s0); + +static float4 FragColor; +static float2 vUV; + +struct SPIRV_Cross_Input +{ + float2 vUV : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = uSamp.GatherGreen(_uSamp_sampler, vUV); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vUV = stage_input.vUV; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag b/reference/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag new file mode 100644 index 00000000000..4ea3e0dfbe0 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag @@ -0,0 +1,33 @@ +cbuffer UBO : register(b0) +{ + row_major float4x4 _13_m : packoffset(c1); + float4 _13_v : packoffset(c0); +}; + + +static float4 FragColor; +static float4 vColor; + +struct SPIRV_Cross_Input +{ + float4 vColor : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = mul(vColor, _13_m) + _13_v; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vColor = stage_input.vColor; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag b/reference/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag new file mode 100644 index 00000000000..7fe47df036d --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag @@ -0,0 +1,45 @@ +struct Foo +{ + int a; +}; + +static float4 vColor; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + float4 vColor : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +static int uninit_int = 0; +static int4 uninit_vector = int4(0, 0, 0, 0); +static float4x4 uninit_matrix = float4x4(0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx); +static Foo uninit_foo = { 0 }; + +void frag_main() +{ + int uninit_function_int = 0; + if (vColor.x > 10.0f) + { + uninit_function_int = 10; + } + else + { + uninit_function_int = 20; + } + FragColor = vColor; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vColor = stage_input.vColor; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag b/reference/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag new file mode 100644 index 00000000000..1311c863452 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag @@ -0,0 +1,29 @@ +static float FragColor; + +struct SPIRV_Cross_Input +{ +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + bool _12 = IsHelperLane(); + float _15 = float(_12); + FragColor = _15; + discard; + bool _16 = IsHelperLane(); + float _17 = float(_16); + FragColor = _17; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/vert/base-instance.vert b/reference/shaders-hlsl-no-opt/vert/base-instance.vert new file mode 100644 index 00000000000..de31f2c6c0f --- /dev/null +++ b/reference/shaders-hlsl-no-opt/vert/base-instance.vert @@ -0,0 +1,30 @@ +static float4 gl_Position; +static int gl_BaseInstanceARB; +cbuffer SPIRV_Cross_VertexInfo +{ + int SPIRV_Cross_BaseVertex; + int SPIRV_Cross_BaseInstance; +}; + +struct SPIRV_Cross_Input +{ +}; + +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = float(gl_BaseInstanceARB).xxxx; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + gl_BaseInstanceARB = SPIRV_Cross_BaseInstance; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/vert/base-vertex.vert b/reference/shaders-hlsl-no-opt/vert/base-vertex.vert new file mode 100644 index 00000000000..6b9b62bbbb9 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/vert/base-vertex.vert @@ -0,0 +1,30 @@ +static float4 gl_Position; +static int gl_BaseVertexARB; +cbuffer SPIRV_Cross_VertexInfo +{ + int SPIRV_Cross_BaseVertex; + int SPIRV_Cross_BaseInstance; +}; + +struct SPIRV_Cross_Input +{ +}; + +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = float(gl_BaseVertexARB).xxxx; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + gl_BaseVertexARB = SPIRV_Cross_BaseVertex; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert b/reference/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert new file mode 100644 index 00000000000..ea3bdc15644 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert @@ -0,0 +1,50 @@ +struct Bar +{ + float v[2]; + float w; +}; + +struct V +{ + float a; + float b[2]; + Bar c[2]; + Bar d; +}; + +static V _14; + +struct SPIRV_Cross_Output +{ + float V_a : TEXCOORD0; + float V_b[2] : TEXCOORD1; + Bar V_c[2] : TEXCOORD3; + Bar V_d : TEXCOORD9; +}; + +void vert_main() +{ + _14.a = 1.0f; + _14.b[0] = 2.0f; + _14.b[1] = 3.0f; + _14.c[0].v[0] = 4.0f; + _14.c[0].v[1] = 5.0f; + _14.c[0].w = 6.0f; + _14.c[1].v[0] = 7.0f; + _14.c[1].v[1] = 8.0f; + _14.c[1].w = 9.0f; + _14.d.v[0] = 10.0f; + _14.d.v[1] = 11.0f; + _14.d.w = 12.0f; +} + +SPIRV_Cross_Output main() +{ + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.V_a = _14.a; + stage_output.V_b = _14.b; + stage_output.V_c = _14.c; + stage_output.V_d = _14.d; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert b/reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert new file mode 100644 index 00000000000..103ff46a3fe --- /dev/null +++ b/reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert @@ -0,0 +1,8 @@ +void vert_main() +{ +} + +void main() +{ + vert_main(); +} diff --git a/reference/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert b/reference/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert new file mode 100644 index 00000000000..d76b24fca3c --- /dev/null +++ b/reference/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert @@ -0,0 +1,56 @@ +static float4 gl_Position; +static float4x4 m4; +static float4 v; +static float3x3 m3; +static float2x2 m2; + +struct SPIRV_Cross_Input +{ + float4 m4_0 : TEXCOORD0; + float4 m4_1 : TEXCOORD1; + float4 m4_2 : TEXCOORD2; + float4 m4_3 : TEXCOORD3; + float3 m3_0 : TEXCOORD4; + float3 m3_1 : TEXCOORD5; + float3 m3_2 : TEXCOORD6; + float2 m2_0 : TEXCOORD7; + float2 m2_1 : TEXCOORD8; + float4 v : TEXCOORD9; +}; + +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(v, m4); + float4 _35 = gl_Position; + float3 _37 = _35.xyz + mul(v.xyz, m3); + gl_Position.x = _37.x; + gl_Position.y = _37.y; + gl_Position.z = _37.z; + float4 _56 = gl_Position; + float2 _58 = _56.xy + mul(v.xy, m2); + gl_Position.x = _58.x; + gl_Position.y = _58.y; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + m4[0] = stage_input.m4_0; + m4[1] = stage_input.m4_1; + m4[2] = stage_input.m4_2; + m4[3] = stage_input.m4_3; + v = stage_input.v; + m3[0] = stage_input.m3_0; + m3[1] = stage_input.m3_1; + m3[2] = stage_input.m3_2; + m2[0] = stage_input.m2_0; + m2[1] = stage_input.m2_1; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000000..da499c3b6da --- /dev/null +++ b/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,20 @@ +RWByteAddressBuffer _5 : register(u0); +RWByteAddressBuffer _6 : register(u1); + +void comp_main() +{ + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) < int4(_5.Load4(0)).x, int(_5.Load4(16).y) < int4(_5.Load4(0)).y, int(_5.Load4(16).z) < int4(_5.Load4(0)).z, int(_5.Load4(16).w) < int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) <= int4(_5.Load4(0)).x, int(_5.Load4(16).y) <= int4(_5.Load4(0)).y, int(_5.Load4(16).z) <= int4(_5.Load4(0)).z, int(_5.Load4(16).w) <= int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x < uint(int4(_5.Load4(0)).x), _5.Load4(16).y < uint(int4(_5.Load4(0)).y), _5.Load4(16).z < uint(int4(_5.Load4(0)).z), _5.Load4(16).w < uint(int4(_5.Load4(0)).w)))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x <= uint(int4(_5.Load4(0)).x), _5.Load4(16).y <= uint(int4(_5.Load4(0)).y), _5.Load4(16).z <= uint(int4(_5.Load4(0)).z), _5.Load4(16).w <= uint(int4(_5.Load4(0)).w)))); + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) > int4(_5.Load4(0)).x, int(_5.Load4(16).y) > int4(_5.Load4(0)).y, int(_5.Load4(16).z) > int4(_5.Load4(0)).z, int(_5.Load4(16).w) > int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) >= int4(_5.Load4(0)).x, int(_5.Load4(16).y) >= int4(_5.Load4(0)).y, int(_5.Load4(16).z) >= int4(_5.Load4(0)).z, int(_5.Load4(16).w) >= int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x > uint(int4(_5.Load4(0)).x), _5.Load4(16).y > uint(int4(_5.Load4(0)).y), _5.Load4(16).z > uint(int4(_5.Load4(0)).z), _5.Load4(16).w > uint(int4(_5.Load4(0)).w)))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x >= uint(int4(_5.Load4(0)).x), _5.Load4(16).y >= uint(int4(_5.Load4(0)).y), _5.Load4(16).z >= uint(int4(_5.Load4(0)).z), _5.Load4(16).w >= uint(int4(_5.Load4(0)).w)))); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp b/reference/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp index a12274c01c6..e184e03c5c1 100644 --- a/reference/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp +++ b/reference/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp @@ -4,22 +4,16 @@ struct A int b; }; -struct A_1 -{ - int a; - int b; -}; - RWByteAddressBuffer C1 : register(u1); cbuffer C2 : register(b2) { - A_1 C2_1_Data[1024] : packoffset(c0); + A C2_1_Data[1024] : packoffset(c0); }; RWByteAddressBuffer C3 : register(u0); cbuffer B : register(b3) { - A_1 C4_Data[1024] : packoffset(c0); + A C4_Data[1024] : packoffset(c0); }; diff --git a/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000000..88f53a4c182 --- /dev/null +++ b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,27 @@ +RWByteAddressBuffer _4 : register(u0); + +void comp_main() +{ + _4.Store(0, asuint(min(asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + _4.Store(0, asuint(max(asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(clamp(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(clamp(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(clamp(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + for (int i = 0; i < 2; i++, _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(56)), asfloat(_4.Load(60)))))) + { + _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + } +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp b/reference/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp deleted file mode 100644 index c567fbaf14c..00000000000 --- a/reference/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef SPIRV_CROSS_CONSTANT_ID_0 -#define SPIRV_CROSS_CONSTANT_ID_0 1u -#endif -static const uint _3 = SPIRV_CROSS_CONSTANT_ID_0; -#ifndef SPIRV_CROSS_CONSTANT_ID_2 -#define SPIRV_CROSS_CONSTANT_ID_2 3u -#endif -static const uint _4 = SPIRV_CROSS_CONSTANT_ID_2; -static const uint3 gl_WorkGroupSize = uint3(_3, 2u, _4); - -RWByteAddressBuffer _8 : register(u0); -RWByteAddressBuffer _9 : register(u1); - -static uint3 gl_WorkGroupID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; -}; - -static uint3 _22 = gl_WorkGroupSize; - -void comp_main() -{ - _8.Store(gl_WorkGroupID.x * 4 + 0, asuint(asfloat(_9.Load(gl_WorkGroupID.x * 4 + 0)) + asfloat(_8.Load(gl_WorkGroupID.x * 4 + 0)))); -} - -[numthreads(SPIRV_CROSS_CONSTANT_ID_0, 2, SPIRV_CROSS_CONSTANT_ID_2)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - comp_main(); -} diff --git a/reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag b/reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag index ed53720d946..2527d10fdc8 100644 --- a/reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag +++ b/reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag @@ -8,7 +8,7 @@ struct SPIRV_Cross_Output int2 Size : SV_Target0; }; -uint2 SPIRV_Cross_textureSize(Texture2D Tex, uint Level, out uint Param) +uint2 spvTextureSize(Texture2D Tex, uint Level, out uint Param) { uint2 ret; Tex.GetDimensions(Level, ret.x, ret.y, Param); @@ -19,7 +19,7 @@ void frag_main() { uint _19_dummy_parameter; uint _20_dummy_parameter; - Size = int2(SPIRV_Cross_textureSize(uTexture, uint(0), _19_dummy_parameter)) + int2(SPIRV_Cross_textureSize(uTexture, uint(1), _20_dummy_parameter)); + Size = int2(spvTextureSize(uTexture, uint(0), _19_dummy_parameter)) + int2(spvTextureSize(uTexture, uint(1), _20_dummy_parameter)); } SPIRV_Cross_Output main() diff --git a/reference/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag b/reference/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag index d20cf995acf..25dc6939e5c 100644 --- a/reference/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag +++ b/reference/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag @@ -7,7 +7,7 @@ struct SPIRV_Cross_Output void frag_main() { - FragColor = float3(asfloat(0x7f800000u), asfloat(0xff800000u), asfloat(0x7fc00000u)); + FragColor = float3(asfloat(0x7f800000u /* inf */), asfloat(0xff800000u /* -inf */), asfloat(0x7fc00000u /* nan */)); } SPIRV_Cross_Output main() diff --git a/reference/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag b/reference/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag new file mode 100644 index 00000000000..88fad10ff19 --- /dev/null +++ b/reference/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag @@ -0,0 +1,34 @@ +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +uint64_t spvPackUint2x32(uint2 value) +{ + return (uint64_t(value.y) << 32) | uint64_t(value.x); +} + +uint2 spvUnpackUint2x32(uint64_t value) +{ + uint2 Unpacked; + Unpacked.x = uint(value & 0xffffffff); + Unpacked.y = uint(value >> 32); + return Unpacked; +} + +void frag_main() +{ + uint64_t _packed = spvPackUint2x32(uint2(18u, 52u)); + uint2 unpacked = spvUnpackUint2x32(_packed); + FragColor = float4(float(unpacked.x), float(unpacked.y), 1.0f, 1.0f); +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag b/reference/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag index 281c397608f..9c71d08c4f2 100644 --- a/reference/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag +++ b/reference/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag @@ -58,6 +58,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; frag_main(); SPIRV_Cross_Output stage_output; stage_output.o_color = o_color; diff --git a/reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag b/reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag index 695d5fe9dfd..74c12945bfc 100644 --- a/reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag +++ b/reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag @@ -22,6 +22,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/shaders-hlsl/asm/frag/unreachable.asm.frag b/reference/shaders-hlsl/asm/frag/unreachable.asm.frag index c2fa519df8f..5eb70adf81c 100644 --- a/reference/shaders-hlsl/asm/frag/unreachable.asm.frag +++ b/reference/shaders-hlsl/asm/frag/unreachable.asm.frag @@ -1,3 +1,5 @@ +static float4 _21; + static int counter; static float4 FragColor; @@ -11,8 +13,6 @@ struct SPIRV_Cross_Output float4 FragColor : SV_Target0; }; -float4 _21; - void frag_main() { float4 _24; diff --git a/reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert b/reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert index 84b91b69bc4..2a332551f37 100644 --- a/reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert +++ b/reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert @@ -8,10 +8,7 @@ static const int _20 = (_7 + 2); #endif static const uint _8 = SPIRV_CROSS_CONSTANT_ID_202; static const uint _25 = (_8 % 5u); -#ifndef SPIRV_CROSS_CONSTANT_ID_0 -#define SPIRV_CROSS_CONSTANT_ID_0 int4(20, 30, _20, _20) -#endif -static const int4 _30 = SPIRV_CROSS_CONSTANT_ID_0; +static const int4 _30 = int4(20, 30, _20, _20); static const int2 _32 = int2(_30.y, _30.x); static const int _33 = _30.y; #ifndef SPIRV_CROSS_CONSTANT_ID_200 diff --git a/reference/shaders-hlsl/comp/access-chain-load-composite.comp b/reference/shaders-hlsl/comp/access-chain-load-composite.comp new file mode 100644 index 00000000000..1c4016008bd --- /dev/null +++ b/reference/shaders-hlsl/comp/access-chain-load-composite.comp @@ -0,0 +1,164 @@ +struct Baz +{ + float c; +}; + +struct Bar +{ + float d[2][4]; + Baz baz[2]; +}; + +struct Foo +{ + column_major float2x2 a; + float2 b; + Bar c[5]; +}; + +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer _31 : register(u0); + +void comp_main() +{ + Foo _36; + _36.a = asfloat(uint2x2(_31.Load(0), _31.Load(8), _31.Load(4), _31.Load(12))); + _36.b = asfloat(_31.Load2(16)); + [unroll] + for (int _4ident = 0; _4ident < 5; _4ident++) + { + [unroll] + for (int _5ident = 0; _5ident < 2; _5ident++) + { + [unroll] + for (int _6ident = 0; _6ident < 4; _6ident++) + { + _36.c[_4ident].d[_5ident][_6ident] = asfloat(_31.Load(_6ident * 4 + _5ident * 16 + _4ident * 40 + 24)); + } + } + [unroll] + for (int _7ident = 0; _7ident < 2; _7ident++) + { + _36.c[_4ident].baz[_7ident].c = asfloat(_31.Load(_7ident * 4 + _4ident * 40 + 56)); + } + } + Foo f; + f.a = _36.a; + f.b = _36.b; + f.c[0].d[0][0] = _36.c[0].d[0][0]; + f.c[0].d[0][1] = _36.c[0].d[0][1]; + f.c[0].d[0][2] = _36.c[0].d[0][2]; + f.c[0].d[0][3] = _36.c[0].d[0][3]; + f.c[0].d[1][0] = _36.c[0].d[1][0]; + f.c[0].d[1][1] = _36.c[0].d[1][1]; + f.c[0].d[1][2] = _36.c[0].d[1][2]; + f.c[0].d[1][3] = _36.c[0].d[1][3]; + f.c[0].baz[0].c = _36.c[0].baz[0].c; + f.c[0].baz[1].c = _36.c[0].baz[1].c; + f.c[1].d[0][0] = _36.c[1].d[0][0]; + f.c[1].d[0][1] = _36.c[1].d[0][1]; + f.c[1].d[0][2] = _36.c[1].d[0][2]; + f.c[1].d[0][3] = _36.c[1].d[0][3]; + f.c[1].d[1][0] = _36.c[1].d[1][0]; + f.c[1].d[1][1] = _36.c[1].d[1][1]; + f.c[1].d[1][2] = _36.c[1].d[1][2]; + f.c[1].d[1][3] = _36.c[1].d[1][3]; + f.c[1].baz[0].c = _36.c[1].baz[0].c; + f.c[1].baz[1].c = _36.c[1].baz[1].c; + f.c[2].d[0][0] = _36.c[2].d[0][0]; + f.c[2].d[0][1] = _36.c[2].d[0][1]; + f.c[2].d[0][2] = _36.c[2].d[0][2]; + f.c[2].d[0][3] = _36.c[2].d[0][3]; + f.c[2].d[1][0] = _36.c[2].d[1][0]; + f.c[2].d[1][1] = _36.c[2].d[1][1]; + f.c[2].d[1][2] = _36.c[2].d[1][2]; + f.c[2].d[1][3] = _36.c[2].d[1][3]; + f.c[2].baz[0].c = _36.c[2].baz[0].c; + f.c[2].baz[1].c = _36.c[2].baz[1].c; + f.c[3].d[0][0] = _36.c[3].d[0][0]; + f.c[3].d[0][1] = _36.c[3].d[0][1]; + f.c[3].d[0][2] = _36.c[3].d[0][2]; + f.c[3].d[0][3] = _36.c[3].d[0][3]; + f.c[3].d[1][0] = _36.c[3].d[1][0]; + f.c[3].d[1][1] = _36.c[3].d[1][1]; + f.c[3].d[1][2] = _36.c[3].d[1][2]; + f.c[3].d[1][3] = _36.c[3].d[1][3]; + f.c[3].baz[0].c = _36.c[3].baz[0].c; + f.c[3].baz[1].c = _36.c[3].baz[1].c; + f.c[4].d[0][0] = _36.c[4].d[0][0]; + f.c[4].d[0][1] = _36.c[4].d[0][1]; + f.c[4].d[0][2] = _36.c[4].d[0][2]; + f.c[4].d[0][3] = _36.c[4].d[0][3]; + f.c[4].d[1][0] = _36.c[4].d[1][0]; + f.c[4].d[1][1] = _36.c[4].d[1][1]; + f.c[4].d[1][2] = _36.c[4].d[1][2]; + f.c[4].d[1][3] = _36.c[4].d[1][3]; + f.c[4].baz[0].c = _36.c[4].baz[0].c; + f.c[4].baz[1].c = _36.c[4].baz[1].c; + float2 _229 = 1.0f.xx; + f.a = float2x2(f.a[0] + _229, f.a[1] + _229); + f.b += 2.0f.xx; + f.c[3].d[1][1] += 5.0f; + _31.Store(224, asuint(f.a[0].x)); + _31.Store(228, asuint(f.a[1].x)); + _31.Store(232, asuint(f.a[0].y)); + _31.Store(236, asuint(f.a[1].y)); + _31.Store2(240, asuint(f.b)); + _31.Store(248, asuint(f.c[0].d[0][0])); + _31.Store(252, asuint(f.c[0].d[0][1])); + _31.Store(256, asuint(f.c[0].d[0][2])); + _31.Store(260, asuint(f.c[0].d[0][3])); + _31.Store(264, asuint(f.c[0].d[1][0])); + _31.Store(268, asuint(f.c[0].d[1][1])); + _31.Store(272, asuint(f.c[0].d[1][2])); + _31.Store(276, asuint(f.c[0].d[1][3])); + _31.Store(280, asuint(f.c[0].baz[0].c)); + _31.Store(284, asuint(f.c[0].baz[1].c)); + _31.Store(288, asuint(f.c[1].d[0][0])); + _31.Store(292, asuint(f.c[1].d[0][1])); + _31.Store(296, asuint(f.c[1].d[0][2])); + _31.Store(300, asuint(f.c[1].d[0][3])); + _31.Store(304, asuint(f.c[1].d[1][0])); + _31.Store(308, asuint(f.c[1].d[1][1])); + _31.Store(312, asuint(f.c[1].d[1][2])); + _31.Store(316, asuint(f.c[1].d[1][3])); + _31.Store(320, asuint(f.c[1].baz[0].c)); + _31.Store(324, asuint(f.c[1].baz[1].c)); + _31.Store(328, asuint(f.c[2].d[0][0])); + _31.Store(332, asuint(f.c[2].d[0][1])); + _31.Store(336, asuint(f.c[2].d[0][2])); + _31.Store(340, asuint(f.c[2].d[0][3])); + _31.Store(344, asuint(f.c[2].d[1][0])); + _31.Store(348, asuint(f.c[2].d[1][1])); + _31.Store(352, asuint(f.c[2].d[1][2])); + _31.Store(356, asuint(f.c[2].d[1][3])); + _31.Store(360, asuint(f.c[2].baz[0].c)); + _31.Store(364, asuint(f.c[2].baz[1].c)); + _31.Store(368, asuint(f.c[3].d[0][0])); + _31.Store(372, asuint(f.c[3].d[0][1])); + _31.Store(376, asuint(f.c[3].d[0][2])); + _31.Store(380, asuint(f.c[3].d[0][3])); + _31.Store(384, asuint(f.c[3].d[1][0])); + _31.Store(388, asuint(f.c[3].d[1][1])); + _31.Store(392, asuint(f.c[3].d[1][2])); + _31.Store(396, asuint(f.c[3].d[1][3])); + _31.Store(400, asuint(f.c[3].baz[0].c)); + _31.Store(404, asuint(f.c[3].baz[1].c)); + _31.Store(408, asuint(f.c[4].d[0][0])); + _31.Store(412, asuint(f.c[4].d[0][1])); + _31.Store(416, asuint(f.c[4].d[0][2])); + _31.Store(420, asuint(f.c[4].d[0][3])); + _31.Store(424, asuint(f.c[4].d[1][0])); + _31.Store(428, asuint(f.c[4].d[1][1])); + _31.Store(432, asuint(f.c[4].d[1][2])); + _31.Store(436, asuint(f.c[4].d[1][3])); + _31.Store(440, asuint(f.c[4].baz[0].c)); + _31.Store(444, asuint(f.c[4].baz[1].c)); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl/comp/access-chains.comp b/reference/shaders-hlsl/comp/access-chains.comp index 924e9191245..c748200b969 100644 --- a/reference/shaders-hlsl/comp/access-chains.comp +++ b/reference/shaders-hlsl/comp/access-chains.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer wo : register(u1); ByteAddressBuffer ro : register(t0); diff --git a/reference/shaders-hlsl/comp/access-chains.force-uav.comp b/reference/shaders-hlsl/comp/access-chains.force-uav.comp new file mode 100644 index 00000000000..97d046d89a3 --- /dev/null +++ b/reference/shaders-hlsl/comp/access-chains.force-uav.comp @@ -0,0 +1,23 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer wo : register(u1); +RWByteAddressBuffer ro : register(u0); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + wo.Store4(gl_GlobalInvocationID.x * 64 + 272, asuint(asfloat(ro.Load4(gl_GlobalInvocationID.x * 64 + 160)))); + wo.Store4(gl_GlobalInvocationID.x * 16 + 480, asuint(asfloat(ro.Load4(gl_GlobalInvocationID.x * 16 + 480)))); +} + +[numthreads(1, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/reference/shaders-hlsl/comp/address-buffers.comp b/reference/shaders-hlsl/comp/address-buffers.comp index a252fc8ae36..7f1c7975bc6 100644 --- a/reference/shaders-hlsl/comp/address-buffers.comp +++ b/reference/shaders-hlsl/comp/address-buffers.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer WriteOnly : register(u2); ByteAddressBuffer ReadOnly : register(t0); RWByteAddressBuffer ReadWrite : register(u1); diff --git a/reference/shaders-hlsl/comp/atomic.comp b/reference/shaders-hlsl/comp/atomic.comp index 72e15bf77dc..e6ff891e8c2 100644 --- a/reference/shaders-hlsl/comp/atomic.comp +++ b/reference/shaders-hlsl/comp/atomic.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer ssbo : register(u2); RWTexture2D uImage : register(u0); RWTexture2D iImage : register(u1); diff --git a/reference/shaders-hlsl/comp/globallycoherent.comp b/reference/shaders-hlsl/comp/globallycoherent.comp index 69886256f85..236f341e1ab 100644 --- a/reference/shaders-hlsl/comp/globallycoherent.comp +++ b/reference/shaders-hlsl/comp/globallycoherent.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + globallycoherent RWByteAddressBuffer _29 : register(u3); ByteAddressBuffer _33 : register(t2); RWTexture2D uImageIn : register(u0); diff --git a/reference/shaders-hlsl/comp/image.comp b/reference/shaders-hlsl/comp/image.comp index c8504e636c9..89a99409424 100644 --- a/reference/shaders-hlsl/comp/image.comp +++ b/reference/shaders-hlsl/comp/image.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWTexture2D uImageInF : register(u0); RWTexture2D uImageOutF : register(u1); RWTexture2D uImageInI : register(u2); diff --git a/reference/shaders-hlsl/comp/image.nonwritable-uav-texture.comp b/reference/shaders-hlsl/comp/image.nonwritable-uav-texture.comp new file mode 100644 index 00000000000..1e11b08777f --- /dev/null +++ b/reference/shaders-hlsl/comp/image.nonwritable-uav-texture.comp @@ -0,0 +1,73 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +Texture2D uImageInF : register(t0); +RWTexture2D uImageOutF : register(u1); +Texture2D uImageInI : register(t2); +RWTexture2D uImageOutI : register(u3); +Texture2D uImageInU : register(t4); +RWTexture2D uImageOutU : register(u5); +Buffer uImageInBuffer : register(t6); +RWBuffer uImageOutBuffer : register(u7); +Texture2D uImageInF2 : register(t8); +RWTexture2D uImageOutF2 : register(u9); +Texture2D uImageInI2 : register(t10); +RWTexture2D uImageOutI2 : register(u11); +Texture2D uImageInU2 : register(t12); +RWTexture2D uImageOutU2 : register(u13); +Buffer uImageInBuffer2 : register(t14); +RWBuffer uImageOutBuffer2 : register(u15); +Texture2D uImageInF4 : register(t16); +RWTexture2D uImageOutF4 : register(u17); +Texture2D uImageInI4 : register(t18); +RWTexture2D uImageOutI4 : register(u19); +Texture2D uImageInU4 : register(t20); +RWTexture2D uImageOutU4 : register(u21); +Buffer uImageInBuffer4 : register(t22); +RWBuffer uImageOutBuffer4 : register(u23); +RWTexture2D uImageNoFmtF : register(u24); +RWTexture2D uImageNoFmtU : register(u25); +RWTexture2D uImageNoFmtI : register(u26); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + float4 f = uImageInF[int2(gl_GlobalInvocationID.xy)]; + uImageOutF[int2(gl_GlobalInvocationID.xy)] = f.x; + int4 i = uImageInI[int2(gl_GlobalInvocationID.xy)]; + uImageOutI[int2(gl_GlobalInvocationID.xy)] = i.x; + uint4 u = uImageInU[int2(gl_GlobalInvocationID.xy)]; + uImageOutU[int2(gl_GlobalInvocationID.xy)] = u.x; + float4 b = uImageInBuffer[int(gl_GlobalInvocationID.x)]; + uImageOutBuffer[int(gl_GlobalInvocationID.x)] = b.x; + float4 f2 = uImageInF2[int2(gl_GlobalInvocationID.xy)]; + uImageOutF2[int2(gl_GlobalInvocationID.xy)] = f2.xy; + int4 i2 = uImageInI2[int2(gl_GlobalInvocationID.xy)]; + uImageOutI2[int2(gl_GlobalInvocationID.xy)] = i2.xy; + uint4 u2 = uImageInU2[int2(gl_GlobalInvocationID.xy)]; + uImageOutU2[int2(gl_GlobalInvocationID.xy)] = u2.xy; + float4 b2 = uImageInBuffer2[int(gl_GlobalInvocationID.x)]; + uImageOutBuffer2[int(gl_GlobalInvocationID.x)] = b2.xy; + float4 f4 = uImageInF4[int2(gl_GlobalInvocationID.xy)]; + uImageOutF4[int2(gl_GlobalInvocationID.xy)] = f4; + int4 i4 = uImageInI4[int2(gl_GlobalInvocationID.xy)]; + uImageOutI4[int2(gl_GlobalInvocationID.xy)] = i4; + uint4 u4 = uImageInU4[int2(gl_GlobalInvocationID.xy)]; + uImageOutU4[int2(gl_GlobalInvocationID.xy)] = u4; + float4 b4 = uImageInBuffer4[int(gl_GlobalInvocationID.x)]; + uImageOutBuffer4[int(gl_GlobalInvocationID.x)] = b4; + uImageNoFmtF[int2(gl_GlobalInvocationID.xy)] = b2; + uImageNoFmtU[int2(gl_GlobalInvocationID.xy)] = u4; + uImageNoFmtI[int2(gl_GlobalInvocationID.xy)] = i4; +} + +[numthreads(1, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/reference/shaders-hlsl/comp/inverse.comp b/reference/shaders-hlsl/comp/inverse.comp index 3be954a6f61..698f647cecc 100644 --- a/reference/shaders-hlsl/comp/inverse.comp +++ b/reference/shaders-hlsl/comp/inverse.comp @@ -1,9 +1,11 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _15 : register(u0); ByteAddressBuffer _20 : register(t1); // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. -float2x2 SPIRV_Cross_Inverse(float2x2 m) +float2x2 spvInverse(float2x2 m) { float2x2 adj; // The adjoint matrix (inverse after dividing by determinant) @@ -23,29 +25,29 @@ float2x2 SPIRV_Cross_Inverse(float2x2 m) } // Returns the determinant of a 2x2 matrix. -float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2) +float spvDet2x2(float a1, float a2, float b1, float b2) { return a1 * b2 - b1 * a2; } // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. -float3x3 SPIRV_Cross_Inverse(float3x3 m) +float3x3 spvInverse(float3x3 m) { float3x3 adj; // The adjoint matrix (inverse after dividing by determinant) // Create the transpose of the cofactors, as the classical adjoint of the matrix. - adj[0][0] = SPIRV_Cross_Det2x2(m[1][1], m[1][2], m[2][1], m[2][2]); - adj[0][1] = -SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[2][1], m[2][2]); - adj[0][2] = SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[1][1], m[1][2]); + adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]); + adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]); + adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]); - adj[1][0] = -SPIRV_Cross_Det2x2(m[1][0], m[1][2], m[2][0], m[2][2]); - adj[1][1] = SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[2][0], m[2][2]); - adj[1][2] = -SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[1][0], m[1][2]); + adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]); + adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]); + adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]); - adj[2][0] = SPIRV_Cross_Det2x2(m[1][0], m[1][1], m[2][0], m[2][1]); - adj[2][1] = -SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[2][0], m[2][1]); - adj[2][2] = SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[1][0], m[1][1]); + adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]); + adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]); + adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]); // Calculate the determinant as a combination of the cofactors of the first row. float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]); @@ -56,37 +58,37 @@ float3x3 SPIRV_Cross_Inverse(float3x3 m) } // Returns the determinant of a 3x3 matrix. -float SPIRV_Cross_Det3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) +float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) { - return a1 * SPIRV_Cross_Det2x2(b2, b3, c2, c3) - b1 * SPIRV_Cross_Det2x2(a2, a3, c2, c3) + c1 * SPIRV_Cross_Det2x2(a2, a3, b2, b3); + return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3); } // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. -float4x4 SPIRV_Cross_Inverse(float4x4 m) +float4x4 spvInverse(float4x4 m) { float4x4 adj; // The adjoint matrix (inverse after dividing by determinant) // Create the transpose of the cofactors, as the classical adjoint of the matrix. - adj[0][0] = SPIRV_Cross_Det3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); - adj[0][1] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); - adj[0][2] = SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]); - adj[0][3] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]); - - adj[1][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); - adj[1][1] = SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); - adj[1][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]); - adj[1][3] = SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]); - - adj[2][0] = SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); - adj[2][1] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); - adj[2][2] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]); - adj[2][3] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]); - - adj[3][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); - adj[3][1] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); - adj[3][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]); - adj[3][3] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]); + adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); + adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); + adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]); + adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]); + + adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); + adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); + adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]); + adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]); + + adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); + adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); + adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]); + adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]); + + adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); + adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); + adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]); + adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]); // Calculate the determinant as a combination of the cofactors of the first row. float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]); @@ -99,16 +101,16 @@ float4x4 SPIRV_Cross_Inverse(float4x4 m) void comp_main() { float2x2 _23 = asfloat(uint2x2(_20.Load2(0), _20.Load2(8))); - float2x2 _24 = SPIRV_Cross_Inverse(_23); + float2x2 _24 = spvInverse(_23); _15.Store2(0, asuint(_24[0])); _15.Store2(8, asuint(_24[1])); float3x3 _29 = asfloat(uint3x3(_20.Load3(16), _20.Load3(32), _20.Load3(48))); - float3x3 _30 = SPIRV_Cross_Inverse(_29); + float3x3 _30 = spvInverse(_29); _15.Store3(16, asuint(_30[0])); _15.Store3(32, asuint(_30[1])); _15.Store3(48, asuint(_30[2])); float4x4 _35 = asfloat(uint4x4(_20.Load4(64), _20.Load4(80), _20.Load4(96), _20.Load4(112))); - float4x4 _36 = SPIRV_Cross_Inverse(_35); + float4x4 _36 = spvInverse(_35); _15.Store4(64, asuint(_36[0])); _15.Store4(80, asuint(_36[1])); _15.Store4(96, asuint(_36[2])); diff --git a/reference/shaders-hlsl/comp/num-workgroups-alone.comp b/reference/shaders-hlsl/comp/num-workgroups-alone.comp index dee39e3d579..ff71a0e103c 100644 --- a/reference/shaders-hlsl/comp/num-workgroups-alone.comp +++ b/reference/shaders-hlsl/comp/num-workgroups-alone.comp @@ -1,5 +1,7 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _10 : register(u0); -cbuffer SPIRV_Cross_NumWorkgroups : register(b0) +cbuffer SPIRV_Cross_NumWorkgroups { uint3 SPIRV_Cross_NumWorkgroups_1_count : packoffset(c0); }; diff --git a/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp b/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp index 1c98e5e56d7..cc326db3329 100644 --- a/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp +++ b/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp @@ -1,5 +1,7 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _10 : register(u0); -cbuffer SPIRV_Cross_NumWorkgroups : register(b0) +cbuffer SPIRV_Cross_NumWorkgroups { uint3 SPIRV_Cross_NumWorkgroups_1_count : packoffset(c0); }; diff --git a/reference/shaders-hlsl/comp/outer-product.comp b/reference/shaders-hlsl/comp/outer-product.comp index 71613d4f156..e58c02fe0b8 100644 --- a/reference/shaders-hlsl/comp/outer-product.comp +++ b/reference/shaders-hlsl/comp/outer-product.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _21 : register(u0); ByteAddressBuffer _26 : register(t1); diff --git a/reference/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp b/reference/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp new file mode 100644 index 00000000000..1339f45f069 --- /dev/null +++ b/reference/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp @@ -0,0 +1,224 @@ +struct Ray +{ + float3 pos; + float tmin; + float3 dir; + float tmax; +}; + +RWByteAddressBuffer _17 : register(u0); +RWByteAddressBuffer _257 : register(u2); +uniform RaytracingAccelerationStructure rtas : register(t1); + +static RayQuery rayQuery; + +Ray makeRayDesc() +{ + Ray ray; + ray.pos = 0.0f.xxx; + ray.dir = float3(1.0f, 0.0f, 0.0f); + ray.tmin = 0.0f; + ray.tmax = 9999.0f; + return ray; +} + +void doSomething() +{ + _17.Store(0, 0u); + _17.Store(4, 0u); +} + +void comp_main() +{ + Ray ray = makeRayDesc(); + RayDesc _1ident = {ray.pos, ray.tmin, ray.dir, ray.tmax}; + rayQuery.TraceRayInline(rtas, 0u, 255u, _1ident); + float4x3 _mat4x3; + float3x4 _mat3x4; + for (;;) + { + bool _67 = rayQuery.Proceed(); + if (_67) + { + uint _71 = rayQuery.CandidateType(); + uint candidateType = _71; + switch (candidateType) + { + case 0u: + { + rayQuery.Abort(); + float4x3 _79 = rayQuery.CandidateObjectToWorld4x3(); + _mat4x3 = _79; + _mat3x4 = transpose(_mat4x3); + rayQuery.CommitNonOpaqueTriangleHit(); + bool _87 = rayQuery.CommittedTriangleFrontFace(); + if (_87) + { + doSomething(); + } + float2 _92 = rayQuery.CommittedTriangleBarycentrics(); + if (_92.x == 0.0f) + { + doSomething(); + } + int _98 = rayQuery.CommittedInstanceID(); + if (_98 > 0) + { + doSomething(); + } + int _103 = rayQuery.CommittedInstanceIndex(); + if (_103 > 0) + { + doSomething(); + } + float3 _108 = rayQuery.CommittedObjectRayDirection(); + if (_108.x > 0.0f) + { + doSomething(); + } + float3 _114 = rayQuery.CommittedObjectRayOrigin(); + if (_114.x > 0.0f) + { + doSomething(); + } + int _120 = rayQuery.CommittedPrimitiveIndex(); + if (_120 > 0) + { + doSomething(); + } + float _125 = rayQuery.CommittedRayT(); + if (_125 > 0.0f) + { + doSomething(); + } + uint _130 = rayQuery.CommittedInstanceContributionToHitGroupIndex(); + if (_130 > 0u) + { + doSomething(); + } + break; + } + case 1u: + { + float4x3 _136 = rayQuery.CandidateObjectToWorld4x3(); + _mat4x3 = _136; + _mat3x4 = transpose(_mat4x3); + bool _139 = rayQuery.CandidateProceduralPrimitiveNonOpaque(); + if (_139) + { + doSomething(); + } + float t = 0.5f; + rayQuery.CommitProceduralPrimitiveHit(145); + rayQuery.Abort(); + break; + } + } + continue; + } + else + { + break; + } + } + if (_mat3x4[0].x == _mat4x3[0].x) + { + doSomething(); + } + uint _157 = rayQuery.CommittedStatus(); + uint committedStatus = _157; + switch (committedStatus) + { + case 0u: + { + float4x3 _163 = rayQuery.CandidateWorldToObject4x3(); + _mat4x3 = _163; + _mat3x4 = transpose(_mat4x3); + break; + } + case 1u: + { + float4x3 _167 = rayQuery.CommittedWorldToObject4x3(); + _mat4x3 = _167; + _mat3x4 = transpose(_mat4x3); + bool _170 = rayQuery.CommittedTriangleFrontFace(); + if (_170) + { + doSomething(); + } + float2 _174 = rayQuery.CommittedTriangleBarycentrics(); + if (_174.y == 0.0f) + { + doSomething(); + } + break; + } + case 2u: + { + int _182 = rayQuery.CommittedGeometryIndex(); + if (_182 > 0) + { + doSomething(); + } + int _187 = rayQuery.CommittedInstanceIndex(); + if (_187 > 0) + { + doSomething(); + } + int _192 = rayQuery.CommittedInstanceID(); + if (_192 > 0) + { + doSomething(); + } + float3 _197 = rayQuery.CommittedObjectRayDirection(); + if (_197.z > 0.0f) + { + doSomething(); + } + float3 _204 = rayQuery.CommittedObjectRayOrigin(); + if (_204.x > 0.0f) + { + doSomething(); + } + int _210 = rayQuery.CommittedPrimitiveIndex(); + if (_210 > 0) + { + doSomething(); + } + float _215 = rayQuery.CommittedRayT(); + if (_215 > 0.0f) + { + doSomething(); + } + break; + } + } + if (_mat3x4[0].x == _mat4x3[0].x) + { + doSomething(); + } + uint _230 = rayQuery.RayFlags(); + if (_230 > 256u) + { + doSomething(); + } + float _236 = rayQuery.RayTMin(); + if (_236 > 0.0f) + { + doSomething(); + } + float3 _242 = rayQuery.WorldRayOrigin(); + float3 o = _242; + float3 _244 = rayQuery.WorldRayDirection(); + float3 d = _244; + if (o.x == d.z) + { + doSomething(); + } +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl/comp/rmw-matrix.comp b/reference/shaders-hlsl/comp/rmw-matrix.comp index ed666693588..30ac03f84f4 100644 --- a/reference/shaders-hlsl/comp/rmw-matrix.comp +++ b/reference/shaders-hlsl/comp/rmw-matrix.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _11 : register(u0); void comp_main() diff --git a/reference/shaders-hlsl/comp/rwbuffer-matrix.comp b/reference/shaders-hlsl/comp/rwbuffer-matrix.comp index e79829283e6..197c9a95138 100644 --- a/reference/shaders-hlsl/comp/rwbuffer-matrix.comp +++ b/reference/shaders-hlsl/comp/rwbuffer-matrix.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _28 : register(u0); cbuffer UBO : register(b1) { diff --git a/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp index 47f2fe41076..db2bbe96989 100644 --- a/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp +++ b/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _9 : register(u0); void comp_main() diff --git a/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp b/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp index c4537db0391..4e7c5e6167e 100644 --- a/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp +++ b/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp @@ -28,6 +28,7 @@ static const int d = (c + 50); #define SPIRV_CROSS_CONSTANT_ID_3 400 #endif static const int e = SPIRV_CROSS_CONSTANT_ID_3; +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); RWByteAddressBuffer _22 : register(u0); diff --git a/reference/shaders-hlsl/comp/ssbo-array-length.comp b/reference/shaders-hlsl/comp/ssbo-array-length.comp index 2e3df626ae7..82657cacfcb 100644 --- a/reference/shaders-hlsl/comp/ssbo-array-length.comp +++ b/reference/shaders-hlsl/comp/ssbo-array-length.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _11 : register(u1); void comp_main() diff --git a/reference/shaders-hlsl/comp/ssbo-array.comp b/reference/shaders-hlsl/comp/ssbo-array.comp index 90927421c68..dab20325b0b 100644 --- a/reference/shaders-hlsl/comp/ssbo-array.comp +++ b/reference/shaders-hlsl/comp/ssbo-array.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer ssbo0 : register(u0); void comp_main() diff --git a/reference/shaders-hlsl/flatten/array.flatten.vert b/reference/shaders-hlsl/flatten/array.flatten.vert new file mode 100644 index 00000000000..948a198e6ad --- /dev/null +++ b/reference/shaders-hlsl/flatten/array.flatten.vert @@ -0,0 +1,30 @@ +uniform float4 UBO[56]; + +static float4 gl_Position; +static float4 aVertex; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + float4 a4 = UBO[23]; + float4 offset = (UBO[50] + UBO[45]) + UBO[54].x.xxxx; + gl_Position = (mul(aVertex, float4x4(UBO[40], UBO[41], UBO[42], UBO[43])) + UBO[55]) + offset; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/basic.flatten.vert b/reference/shaders-hlsl/flatten/basic.flatten.vert new file mode 100644 index 00000000000..778acd48037 --- /dev/null +++ b/reference/shaders-hlsl/flatten/basic.flatten.vert @@ -0,0 +1,35 @@ +uniform float4 UBO[4]; + +static float4 gl_Position; +static float4 aVertex; +static float3 vNormal; +static float3 aNormal; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; + float3 aNormal : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float3 vNormal : TEXCOORD0; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])); + vNormal = aNormal; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + aNormal = stage_input.aNormal; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vNormal = vNormal; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/copy.flatten.vert b/reference/shaders-hlsl/flatten/copy.flatten.vert new file mode 100644 index 00000000000..f85c890b11d --- /dev/null +++ b/reference/shaders-hlsl/flatten/copy.flatten.vert @@ -0,0 +1,53 @@ +struct Light +{ + float3 Position; + float Radius; + float4 Color; +}; + +uniform float4 UBO[12]; + +static float4 gl_Position; +static float4 aVertex; +static float4 vColor; +static float3 aNormal; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; + float3 aNormal : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 vColor : TEXCOORD0; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])); + vColor = 0.0f.xxxx; + Light light; + for (int i = 0; i < 4; i++) + { + Light _51 = {UBO[i * 2 + 4].xyz, UBO[i * 2 + 4].w, UBO[i * 2 + 5]}; + Light _52 = _51; + light.Position = _52.Position; + light.Radius = _52.Radius; + light.Color = _52.Color; + float3 L = aVertex.xyz - light.Position; + vColor += ((UBO[i * 2 + 5] * clamp(1.0f - (length(L) / light.Radius), 0.0f, 1.0f)) * dot(aNormal, normalize(L))); + } +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + aNormal = stage_input.aNormal; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vColor = vColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/dynamic.flatten.vert b/reference/shaders-hlsl/flatten/dynamic.flatten.vert new file mode 100644 index 00000000000..787eefcdbea --- /dev/null +++ b/reference/shaders-hlsl/flatten/dynamic.flatten.vert @@ -0,0 +1,47 @@ +struct Light +{ + float3 Position; + float Radius; + float4 Color; +}; + +uniform float4 UBO[12]; + +static float4 gl_Position; +static float4 aVertex; +static float4 vColor; +static float3 aNormal; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; + float3 aNormal : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 vColor : TEXCOORD0; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])); + vColor = 0.0f.xxxx; + for (int i = 0; i < 4; i++) + { + float3 L = aVertex.xyz - UBO[i * 2 + 4].xyz; + vColor += ((UBO[i * 2 + 5] * clamp(1.0f - (length(L) / UBO[i * 2 + 4].w), 0.0f, 1.0f)) * dot(aNormal, normalize(L))); + } +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + aNormal = stage_input.aNormal; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vColor = vColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/matrix-conversion.flatten.frag b/reference/shaders-hlsl/flatten/matrix-conversion.flatten.frag new file mode 100644 index 00000000000..59ec525f41a --- /dev/null +++ b/reference/shaders-hlsl/flatten/matrix-conversion.flatten.frag @@ -0,0 +1,29 @@ +uniform float4 UBO[4]; + +static float3 FragColor; +static float3 vNormal; + +struct SPIRV_Cross_Input +{ + nointerpolation float3 vNormal : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float3 FragColor : SV_Target0; +}; + +void frag_main() +{ + float4x4 _19 = float4x4(UBO[0], UBO[1], UBO[2], UBO[3]); + FragColor = mul(vNormal, float3x3(_19[0].xyz, _19[1].xyz, _19[2].xyz)); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vNormal = stage_input.vNormal; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/matrixindex.flatten.vert b/reference/shaders-hlsl/flatten/matrixindex.flatten.vert new file mode 100644 index 00000000000..b69a72dc11a --- /dev/null +++ b/reference/shaders-hlsl/flatten/matrixindex.flatten.vert @@ -0,0 +1,41 @@ +uniform float4 UBO[14]; + +static float4 gl_Position; +static float4 oA; +static float4 oB; +static float4 oC; +static float4 oD; +static float4 oE; + +struct SPIRV_Cross_Output +{ + float4 oA : TEXCOORD0; + float4 oB : TEXCOORD1; + float4 oC : TEXCOORD2; + float4 oD : TEXCOORD3; + float4 oE : TEXCOORD4; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = 0.0f.xxxx; + oA = UBO[1]; + oB = float4(UBO[4].y, UBO[5].y, UBO[6].y, UBO[7].y); + oC = UBO[9]; + oD = float4(UBO[10].x, UBO[11].x, UBO[12].x, UBO[13].x); + oE = float4(UBO[1].z, UBO[6].y, UBO[9].z, UBO[12].y); +} + +SPIRV_Cross_Output main() +{ + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.oA = oA; + stage_output.oB = oB; + stage_output.oC = oC; + stage_output.oD = oD; + stage_output.oE = oE; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/multiindex.flatten.vert b/reference/shaders-hlsl/flatten/multiindex.flatten.vert new file mode 100644 index 00000000000..f21f05ec446 --- /dev/null +++ b/reference/shaders-hlsl/flatten/multiindex.flatten.vert @@ -0,0 +1,28 @@ +uniform float4 UBO[15]; + +static float4 gl_Position; +static int2 aIndex; + +struct SPIRV_Cross_Input +{ + int2 aIndex : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = UBO[aIndex.x * 5 + aIndex.y * 1 + 0]; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aIndex = stage_input.aIndex; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/push-constant.flatten.vert b/reference/shaders-hlsl/flatten/push-constant.flatten.vert new file mode 100644 index 00000000000..5bfb4dc0651 --- /dev/null +++ b/reference/shaders-hlsl/flatten/push-constant.flatten.vert @@ -0,0 +1,35 @@ +uniform float4 PushMe[6]; + +static float4 gl_Position; +static float4 Pos; +static float2 vRot; +static float2 Rot; + +struct SPIRV_Cross_Input +{ + float2 Rot : TEXCOORD0; + float4 Pos : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float2 vRot : TEXCOORD0; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(Pos, float4x4(PushMe[0], PushMe[1], PushMe[2], PushMe[3])); + vRot = mul(Rot, float2x2(PushMe[4].xy, PushMe[4].zw)) + PushMe[5].z.xx; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + Pos = stage_input.Pos; + Rot = stage_input.Rot; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vRot = vRot; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/rowmajor.flatten.vert b/reference/shaders-hlsl/flatten/rowmajor.flatten.vert new file mode 100644 index 00000000000..801def3b436 --- /dev/null +++ b/reference/shaders-hlsl/flatten/rowmajor.flatten.vert @@ -0,0 +1,29 @@ +uniform float4 UBO[12]; + +static float4 gl_Position; +static float4 aVertex; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + float2 v = mul(transpose(float4x2(UBO[8].xy, UBO[9].xy, UBO[10].xy, UBO[11].xy)), aVertex); + gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])) + mul(aVertex, transpose(float4x4(UBO[4], UBO[5], UBO[6], UBO[7]))); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/struct.flatten.vert b/reference/shaders-hlsl/flatten/struct.flatten.vert new file mode 100644 index 00000000000..9b97bf59e29 --- /dev/null +++ b/reference/shaders-hlsl/flatten/struct.flatten.vert @@ -0,0 +1,44 @@ +struct Light +{ + float3 Position; + float Radius; + float4 Color; +}; + +uniform float4 UBO[6]; + +static float4 gl_Position; +static float4 aVertex; +static float4 vColor; +static float3 aNormal; + +struct SPIRV_Cross_Input +{ + float4 aVertex : TEXCOORD0; + float3 aNormal : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 vColor : TEXCOORD0; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])); + vColor = 0.0f.xxxx; + float3 L = aVertex.xyz - UBO[4].xyz; + vColor += ((UBO[5] * clamp(1.0f - (length(L) / UBO[4].w), 0.0f, 1.0f)) * dot(aNormal, normalize(L))); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + aVertex = stage_input.aVertex; + aNormal = stage_input.aNormal; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vColor = vColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/struct.rowmajor.flatten.vert b/reference/shaders-hlsl/flatten/struct.rowmajor.flatten.vert new file mode 100644 index 00000000000..39b0a808023 --- /dev/null +++ b/reference/shaders-hlsl/flatten/struct.rowmajor.flatten.vert @@ -0,0 +1,48 @@ +struct Foo +{ + column_major float3x4 MVP0; + column_major float3x4 MVP1; +}; + +uniform float4 UBO[8]; + +static float4 v0; +static float4 v1; +static float3 V0; +static float3 V1; + +struct SPIRV_Cross_Input +{ + float4 v0 : TEXCOORD0; + float4 v1 : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float3 V0 : TEXCOORD0; + float3 V1 : TEXCOORD1; +}; + +void vert_main() +{ + Foo _19 = {transpose(float4x3(UBO[0].xyz, UBO[1].xyz, UBO[2].xyz, UBO[3].xyz)), transpose(float4x3(UBO[4].xyz, UBO[5].xyz, UBO[6].xyz, UBO[7].xyz))}; + Foo _20 = _19; + Foo f; + f.MVP0 = _20.MVP0; + f.MVP1 = _20.MVP1; + float3 a = mul(f.MVP0, v0); + float3 b = mul(f.MVP1, v1); + V0 = a; + V1 = b; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + v0 = stage_input.v0; + v1 = stage_input.v1; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.V0 = V0; + stage_output.V1 = V1; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/swizzle.flatten.vert b/reference/shaders-hlsl/flatten/swizzle.flatten.vert new file mode 100644 index 00000000000..1091a17e995 --- /dev/null +++ b/reference/shaders-hlsl/flatten/swizzle.flatten.vert @@ -0,0 +1,45 @@ +uniform float4 UBO[8]; + +static float4 gl_Position; +static float4 oA; +static float4 oB; +static float4 oC; +static float4 oD; +static float4 oE; +static float4 oF; + +struct SPIRV_Cross_Output +{ + float4 oA : TEXCOORD0; + float4 oB : TEXCOORD1; + float4 oC : TEXCOORD2; + float4 oD : TEXCOORD3; + float4 oE : TEXCOORD4; + float4 oF : TEXCOORD5; + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = 0.0f.xxxx; + oA = UBO[0]; + oB = float4(UBO[1].xy, UBO[1].zw); + oC = float4(UBO[2].x, UBO[3].xyz); + oD = float4(UBO[4].xyz, UBO[4].w); + oE = float4(UBO[5].x, UBO[5].y, UBO[5].z, UBO[5].w); + oF = float4(UBO[6].x, UBO[6].zw, UBO[7].x); +} + +SPIRV_Cross_Output main() +{ + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.oA = oA; + stage_output.oB = oB; + stage_output.oC = oC; + stage_output.oD = oD; + stage_output.oE = oE; + stage_output.oF = oF; + return stage_output; +} diff --git a/reference/shaders-hlsl/flatten/types.flatten.frag b/reference/shaders-hlsl/flatten/types.flatten.frag new file mode 100644 index 00000000000..feb0b36096a --- /dev/null +++ b/reference/shaders-hlsl/flatten/types.flatten.frag @@ -0,0 +1,23 @@ +uniform int4 UBO1[2]; +uniform uint4 UBO2[2]; +uniform float4 UBO0[2]; + +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = ((((float4(UBO1[0]) + float4(UBO1[1])) + float4(UBO2[0])) + float4(UBO2[1])) + UBO0[0]) + UBO0[1]; +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/builtins.frag b/reference/shaders-hlsl/frag/builtins.frag index 922eca7c2d2..8432c42f80d 100644 --- a/reference/shaders-hlsl/frag/builtins.frag +++ b/reference/shaders-hlsl/frag/builtins.frag @@ -24,6 +24,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; vColor = stage_input.vColor; frag_main(); SPIRV_Cross_Output stage_output; diff --git a/reference/shaders-hlsl/frag/combined-texture-sampler-parameter.frag b/reference/shaders-hlsl/frag/combined-texture-sampler-parameter.frag index 7fcff423b32..66330805f13 100644 --- a/reference/shaders-hlsl/frag/combined-texture-sampler-parameter.frag +++ b/reference/shaders-hlsl/frag/combined-texture-sampler-parameter.frag @@ -22,7 +22,7 @@ float4 samp3(Texture2D s, SamplerState _s_sampler) float samp4(Texture2D s, SamplerComparisonState _s_sampler) { - return s.SampleCmp(_s_sampler, 1.0f.xxx.xy, 1.0f.xxx.z); + return s.SampleCmp(_s_sampler, 1.0f.xxx.xy, 1.0f); } float samp(Texture2D s0, SamplerState _s0_sampler, Texture2D s1, SamplerComparisonState _s1_sampler) diff --git a/reference/shaders-hlsl/frag/combined-texture-sampler-shadow.frag b/reference/shaders-hlsl/frag/combined-texture-sampler-shadow.frag index af5b0b55795..8d48008d382 100644 --- a/reference/shaders-hlsl/frag/combined-texture-sampler-shadow.frag +++ b/reference/shaders-hlsl/frag/combined-texture-sampler-shadow.frag @@ -11,7 +11,7 @@ struct SPIRV_Cross_Output float samp2(Texture2D t, SamplerComparisonState s) { - return t.SampleCmp(s, 1.0f.xxx.xy, 1.0f.xxx.z); + return t.SampleCmp(s, 1.0f.xxx.xy, 1.0f); } float samp3(Texture2D t, SamplerState s) diff --git a/reference/shaders-hlsl/frag/complex-expression-in-access-chain.frag b/reference/shaders-hlsl/frag/complex-expression-in-access-chain.frag index d5ccb9b9800..b2f995484d7 100644 --- a/reference/shaders-hlsl/frag/complex-expression-in-access-chain.frag +++ b/reference/shaders-hlsl/frag/complex-expression-in-access-chain.frag @@ -31,6 +31,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; vIn = stage_input.vIn; vIn2 = stage_input.vIn2; frag_main(); diff --git a/reference/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag b/reference/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag deleted file mode 100644 index d330706c7bb..00000000000 --- a/reference/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag +++ /dev/null @@ -1,44 +0,0 @@ -struct CBO_1 -{ - float4 a; - float4 b; - float4 c; - float4 d; -}; - -ConstantBuffer cbo[2][4] : register(b4, space0); -cbuffer PushMe -{ - float4 push_a : packoffset(c0); - float4 push_b : packoffset(c1); - float4 push_c : packoffset(c2); - float4 push_d : packoffset(c3); -}; - - -static float4 FragColor; - -struct SPIRV_Cross_Output -{ - float4 FragColor : SV_Target0; -}; - -void frag_main() -{ - FragColor = cbo[1][2].a; - FragColor += cbo[1][2].b; - FragColor += cbo[1][2].c; - FragColor += cbo[1][2].d; - FragColor += push_a; - FragColor += push_b; - FragColor += push_c; - FragColor += push_d; -} - -SPIRV_Cross_Output main() -{ - frag_main(); - SPIRV_Cross_Output stage_output; - stage_output.FragColor = FragColor; - return stage_output; -} diff --git a/reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag b/reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag index 322102ce869..6a31ce04888 100644 --- a/reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag +++ b/reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag @@ -28,7 +28,8 @@ void frag_main() float4 d7 = ddy_fine(vInput); float4 d8 = fwidth(vInput); float _56_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vInput.zw); - float2 lod = float2(_56_tmp, _56_tmp); + float2 _56 = _56_tmp.xx; + float2 lod = _56; if (vInput.y > 10.0f) { FragColor += t; diff --git a/reference/shaders-hlsl/frag/demote-to-helper.frag b/reference/shaders-hlsl/frag/demote-to-helper.frag new file mode 100644 index 00000000000..743a4228baf --- /dev/null +++ b/reference/shaders-hlsl/frag/demote-to-helper.frag @@ -0,0 +1,9 @@ +void frag_main() +{ + discard; +} + +void main() +{ + frag_main(); +} diff --git a/reference/shaders-hlsl/frag/fp16-packing.frag b/reference/shaders-hlsl/frag/fp16-packing.frag index d87828225fd..54b91e2aa51 100644 --- a/reference/shaders-hlsl/frag/fp16-packing.frag +++ b/reference/shaders-hlsl/frag/fp16-packing.frag @@ -15,21 +15,21 @@ struct SPIRV_Cross_Output uint FP16Out : SV_Target1; }; -uint SPIRV_Cross_packHalf2x16(float2 value) +uint spvPackHalf2x16(float2 value) { uint2 Packed = f32tof16(value); return Packed.x | (Packed.y << 16); } -float2 SPIRV_Cross_unpackHalf2x16(uint value) +float2 spvUnpackHalf2x16(uint value) { return f16tof32(uint2(value & 0xffff, value >> 16)); } void frag_main() { - FP32Out = SPIRV_Cross_unpackHalf2x16(FP16); - FP16Out = SPIRV_Cross_packHalf2x16(FP32); + FP32Out = spvUnpackHalf2x16(FP16); + FP16Out = spvPackHalf2x16(FP32); } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/shaders-hlsl/frag/image-query-selective.frag b/reference/shaders-hlsl/frag/image-query-selective.frag index 25c12da669f..69fe83fd289 100644 --- a/reference/shaders-hlsl/frag/image-query-selective.frag +++ b/reference/shaders-hlsl/frag/image-query-selective.frag @@ -20,49 +20,49 @@ SamplerState _uSamplerMSArray_sampler : register(s8); Texture2D uSampler2D : register(t1); SamplerState _uSampler2D_sampler : register(s1); -uint SPIRV_Cross_textureSize(Texture1D Tex, uint Level, out uint Param) +uint spvTextureSize(Texture1D Tex, uint Level, out uint Param) { uint ret; Tex.GetDimensions(Level, ret.x, Param); return ret; } -uint SPIRV_Cross_textureSize(Texture1D Tex, uint Level, out uint Param) +uint spvTextureSize(Texture1D Tex, uint Level, out uint Param) { uint ret; Tex.GetDimensions(Level, ret.x, Param); return ret; } -uint SPIRV_Cross_textureSize(Texture1D Tex, uint Level, out uint Param) +uint spvTextureSize(Texture1D Tex, uint Level, out uint Param) { uint ret; Tex.GetDimensions(Level, ret.x, Param); return ret; } -uint2 SPIRV_Cross_textureSize(Texture2D Tex, uint Level, out uint Param) +uint2 spvTextureSize(Texture2D Tex, uint Level, out uint Param) { uint2 ret; Tex.GetDimensions(Level, ret.x, ret.y, Param); return ret; } -uint3 SPIRV_Cross_textureSize(Texture2DArray Tex, uint Level, out uint Param) +uint3 spvTextureSize(Texture2DArray Tex, uint Level, out uint Param) { uint3 ret; Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param); return ret; } -uint3 SPIRV_Cross_textureSize(Texture3D Tex, uint Level, out uint Param) +uint3 spvTextureSize(Texture3D Tex, uint Level, out uint Param) { uint3 ret; Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param); return ret; } -uint SPIRV_Cross_textureSize(Buffer Tex, uint Level, out uint Param) +uint spvTextureSize(Buffer Tex, uint Level, out uint Param) { uint ret; Tex.GetDimensions(ret.x); @@ -70,28 +70,28 @@ uint SPIRV_Cross_textureSize(Buffer Tex, uint Level, out uint Param) return ret; } -uint2 SPIRV_Cross_textureSize(TextureCube Tex, uint Level, out uint Param) +uint2 spvTextureSize(TextureCube Tex, uint Level, out uint Param) { uint2 ret; Tex.GetDimensions(Level, ret.x, ret.y, Param); return ret; } -uint3 SPIRV_Cross_textureSize(TextureCubeArray Tex, uint Level, out uint Param) +uint3 spvTextureSize(TextureCubeArray Tex, uint Level, out uint Param) { uint3 ret; Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param); return ret; } -uint2 SPIRV_Cross_textureSize(Texture2DMS Tex, uint Level, out uint Param) +uint2 spvTextureSize(Texture2DMS Tex, uint Level, out uint Param) { uint2 ret; Tex.GetDimensions(ret.x, ret.y, Param); return ret; } -uint3 SPIRV_Cross_textureSize(Texture2DMSArray Tex, uint Level, out uint Param) +uint3 spvTextureSize(Texture2DMSArray Tex, uint Level, out uint Param) { uint3 ret; Tex.GetDimensions(ret.x, ret.y, ret.z, Param); @@ -101,42 +101,42 @@ uint3 SPIRV_Cross_textureSize(Texture2DMSArray Tex, uint Level, out uint void frag_main() { uint _17_dummy_parameter; - int a = int(SPIRV_Cross_textureSize(uSampler1DUint, uint(0), _17_dummy_parameter)); + int a = int(spvTextureSize(uSampler1DUint, uint(0), _17_dummy_parameter)); uint _24_dummy_parameter; - a = int(SPIRV_Cross_textureSize(uSampler1DInt, uint(0), _24_dummy_parameter)); + a = int(spvTextureSize(uSampler1DInt, uint(0), _24_dummy_parameter)); uint _32_dummy_parameter; - a = int(SPIRV_Cross_textureSize(uSampler1DFloat, uint(0), _32_dummy_parameter)); + a = int(spvTextureSize(uSampler1DFloat, uint(0), _32_dummy_parameter)); uint _42_dummy_parameter; - int3 c = int3(SPIRV_Cross_textureSize(uSampler2DArray, uint(0), _42_dummy_parameter)); + int3 c = int3(spvTextureSize(uSampler2DArray, uint(0), _42_dummy_parameter)); uint _50_dummy_parameter; - int3 d = int3(SPIRV_Cross_textureSize(uSampler3D, uint(0), _50_dummy_parameter)); + int3 d = int3(spvTextureSize(uSampler3D, uint(0), _50_dummy_parameter)); uint _60_dummy_parameter; - int2 e = int2(SPIRV_Cross_textureSize(uSamplerCube, uint(0), _60_dummy_parameter)); + int2 e = int2(spvTextureSize(uSamplerCube, uint(0), _60_dummy_parameter)); uint _68_dummy_parameter; - int3 f = int3(SPIRV_Cross_textureSize(uSamplerCubeArray, uint(0), _68_dummy_parameter)); + int3 f = int3(spvTextureSize(uSamplerCubeArray, uint(0), _68_dummy_parameter)); uint _76_dummy_parameter; - int g = int(SPIRV_Cross_textureSize(uSamplerBuffer, 0u, _76_dummy_parameter)); + int g = int(spvTextureSize(uSamplerBuffer, 0u, _76_dummy_parameter)); uint _84_dummy_parameter; - int2 h = int2(SPIRV_Cross_textureSize(uSamplerMS, 0u, _84_dummy_parameter)); + int2 h = int2(spvTextureSize(uSamplerMS, 0u, _84_dummy_parameter)); uint _92_dummy_parameter; - int3 i = int3(SPIRV_Cross_textureSize(uSamplerMSArray, 0u, _92_dummy_parameter)); + int3 i = int3(spvTextureSize(uSamplerMSArray, 0u, _92_dummy_parameter)); int _100; - SPIRV_Cross_textureSize(uSampler2D, 0u, _100); + spvTextureSize(uSampler2D, 0u, _100); int l1 = int(_100); int _104; - SPIRV_Cross_textureSize(uSampler2DArray, 0u, _104); + spvTextureSize(uSampler2DArray, 0u, _104); int l2 = int(_104); int _108; - SPIRV_Cross_textureSize(uSampler3D, 0u, _108); + spvTextureSize(uSampler3D, 0u, _108); int l3 = int(_108); int _112; - SPIRV_Cross_textureSize(uSamplerCube, 0u, _112); + spvTextureSize(uSamplerCube, 0u, _112); int l4 = int(_112); int _116; - SPIRV_Cross_textureSize(uSamplerMS, 0u, _116); + spvTextureSize(uSamplerMS, 0u, _116); int s0 = int(_116); int _120; - SPIRV_Cross_textureSize(uSamplerMSArray, 0u, _120); + spvTextureSize(uSamplerMSArray, 0u, _120); int s1 = int(_120); } diff --git a/reference/shaders-hlsl/frag/image-query-uav.frag b/reference/shaders-hlsl/frag/image-query-uav.frag new file mode 100644 index 00000000000..6626ed2f843 --- /dev/null +++ b/reference/shaders-hlsl/frag/image-query-uav.frag @@ -0,0 +1,64 @@ +RWTexture1D uImage1D : register(u0); +RWTexture2D uImage2D : register(u1); +RWTexture2DArray uImage2DArray : register(u2); +RWTexture3D uImage3D : register(u3); +RWBuffer uImageBuffer : register(u6); + +uint3 spvImageSize(RWTexture2DArray Tex, out uint Param) +{ + uint3 ret; + Tex.GetDimensions(ret.x, ret.y, ret.z); + Param = 0u; + return ret; +} + +uint2 spvImageSize(RWTexture2D Tex, out uint Param) +{ + uint2 ret; + Tex.GetDimensions(ret.x, ret.y); + Param = 0u; + return ret; +} + +uint spvImageSize(RWTexture1D Tex, out uint Param) +{ + uint ret; + Tex.GetDimensions(ret.x); + Param = 0u; + return ret; +} + +uint3 spvImageSize(RWTexture3D Tex, out uint Param) +{ + uint3 ret; + Tex.GetDimensions(ret.x, ret.y, ret.z); + Param = 0u; + return ret; +} + +uint spvImageSize(RWBuffer Tex, out uint Param) +{ + uint ret; + Tex.GetDimensions(ret.x); + Param = 0u; + return ret; +} + +void frag_main() +{ + uint _14_dummy_parameter; + int a = int(spvImageSize(uImage1D, _14_dummy_parameter)); + uint _22_dummy_parameter; + int2 b = int2(spvImageSize(uImage2D, _22_dummy_parameter)); + uint _30_dummy_parameter; + int3 c = int3(spvImageSize(uImage2DArray, _30_dummy_parameter)); + uint _36_dummy_parameter; + int3 d = int3(spvImageSize(uImage3D, _36_dummy_parameter)); + uint _42_dummy_parameter; + int e = int(spvImageSize(uImageBuffer, _42_dummy_parameter)); +} + +void main() +{ + frag_main(); +} diff --git a/reference/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag b/reference/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag new file mode 100644 index 00000000000..1e77c2c911f --- /dev/null +++ b/reference/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag @@ -0,0 +1,63 @@ +RWTexture1D uImage1D : register(u0); +RWTexture2D uImage2D : register(u1); +Texture2DArray uImage2DArray : register(t2); +RWTexture3D uImage3D : register(u3); +RWBuffer uImageBuffer : register(u6); + +uint3 spvTextureSize(Texture2DArray Tex, uint Level, out uint Param) +{ + uint3 ret; + Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param); + return ret; +} + +uint2 spvImageSize(RWTexture2D Tex, out uint Param) +{ + uint2 ret; + Tex.GetDimensions(ret.x, ret.y); + Param = 0u; + return ret; +} + +uint spvImageSize(RWTexture1D Tex, out uint Param) +{ + uint ret; + Tex.GetDimensions(ret.x); + Param = 0u; + return ret; +} + +uint3 spvImageSize(RWTexture3D Tex, out uint Param) +{ + uint3 ret; + Tex.GetDimensions(ret.x, ret.y, ret.z); + Param = 0u; + return ret; +} + +uint spvImageSize(RWBuffer Tex, out uint Param) +{ + uint ret; + Tex.GetDimensions(ret.x); + Param = 0u; + return ret; +} + +void frag_main() +{ + uint _14_dummy_parameter; + int a = int(spvImageSize(uImage1D, _14_dummy_parameter)); + uint _22_dummy_parameter; + int2 b = int2(spvImageSize(uImage2D, _22_dummy_parameter)); + uint _30_dummy_parameter; + int3 c = int3(spvTextureSize(uImage2DArray, 0u, _30_dummy_parameter)); + uint _36_dummy_parameter; + int3 d = int3(spvImageSize(uImage3D, _36_dummy_parameter)); + uint _42_dummy_parameter; + int e = int(spvImageSize(uImageBuffer, _42_dummy_parameter)); +} + +void main() +{ + frag_main(); +} diff --git a/reference/shaders-hlsl/frag/image-query.frag b/reference/shaders-hlsl/frag/image-query.frag index 71cefc10301..0e4b26bacac 100644 --- a/reference/shaders-hlsl/frag/image-query.frag +++ b/reference/shaders-hlsl/frag/image-query.frag @@ -16,35 +16,35 @@ SamplerState _uSamplerMS_sampler : register(s7); Texture2DMSArray uSamplerMSArray : register(t8); SamplerState _uSamplerMSArray_sampler : register(s8); -uint SPIRV_Cross_textureSize(Texture1D Tex, uint Level, out uint Param) +uint spvTextureSize(Texture1D Tex, uint Level, out uint Param) { uint ret; Tex.GetDimensions(Level, ret.x, Param); return ret; } -uint2 SPIRV_Cross_textureSize(Texture2D Tex, uint Level, out uint Param) +uint2 spvTextureSize(Texture2D Tex, uint Level, out uint Param) { uint2 ret; Tex.GetDimensions(Level, ret.x, ret.y, Param); return ret; } -uint3 SPIRV_Cross_textureSize(Texture2DArray Tex, uint Level, out uint Param) +uint3 spvTextureSize(Texture2DArray Tex, uint Level, out uint Param) { uint3 ret; Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param); return ret; } -uint3 SPIRV_Cross_textureSize(Texture3D Tex, uint Level, out uint Param) +uint3 spvTextureSize(Texture3D Tex, uint Level, out uint Param) { uint3 ret; Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param); return ret; } -uint SPIRV_Cross_textureSize(Buffer Tex, uint Level, out uint Param) +uint spvTextureSize(Buffer Tex, uint Level, out uint Param) { uint ret; Tex.GetDimensions(ret.x); @@ -52,28 +52,28 @@ uint SPIRV_Cross_textureSize(Buffer Tex, uint Level, out uint Param) return ret; } -uint2 SPIRV_Cross_textureSize(TextureCube Tex, uint Level, out uint Param) +uint2 spvTextureSize(TextureCube Tex, uint Level, out uint Param) { uint2 ret; Tex.GetDimensions(Level, ret.x, ret.y, Param); return ret; } -uint3 SPIRV_Cross_textureSize(TextureCubeArray Tex, uint Level, out uint Param) +uint3 spvTextureSize(TextureCubeArray Tex, uint Level, out uint Param) { uint3 ret; Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param); return ret; } -uint2 SPIRV_Cross_textureSize(Texture2DMS Tex, uint Level, out uint Param) +uint2 spvTextureSize(Texture2DMS Tex, uint Level, out uint Param) { uint2 ret; Tex.GetDimensions(ret.x, ret.y, Param); return ret; } -uint3 SPIRV_Cross_textureSize(Texture2DMSArray Tex, uint Level, out uint Param) +uint3 spvTextureSize(Texture2DMSArray Tex, uint Level, out uint Param) { uint3 ret; Tex.GetDimensions(ret.x, ret.y, ret.z, Param); @@ -83,46 +83,46 @@ uint3 SPIRV_Cross_textureSize(Texture2DMSArray Tex, uint Level, out uint void frag_main() { uint _17_dummy_parameter; - int a = int(SPIRV_Cross_textureSize(uSampler1D, uint(0), _17_dummy_parameter)); + int a = int(spvTextureSize(uSampler1D, uint(0), _17_dummy_parameter)); uint _27_dummy_parameter; - int2 b = int2(SPIRV_Cross_textureSize(uSampler2D, uint(0), _27_dummy_parameter)); + int2 b = int2(spvTextureSize(uSampler2D, uint(0), _27_dummy_parameter)); uint _37_dummy_parameter; - int3 c = int3(SPIRV_Cross_textureSize(uSampler2DArray, uint(0), _37_dummy_parameter)); + int3 c = int3(spvTextureSize(uSampler2DArray, uint(0), _37_dummy_parameter)); uint _45_dummy_parameter; - int3 d = int3(SPIRV_Cross_textureSize(uSampler3D, uint(0), _45_dummy_parameter)); + int3 d = int3(spvTextureSize(uSampler3D, uint(0), _45_dummy_parameter)); uint _53_dummy_parameter; - int2 e = int2(SPIRV_Cross_textureSize(uSamplerCube, uint(0), _53_dummy_parameter)); + int2 e = int2(spvTextureSize(uSamplerCube, uint(0), _53_dummy_parameter)); uint _61_dummy_parameter; - int3 f = int3(SPIRV_Cross_textureSize(uSamplerCubeArray, uint(0), _61_dummy_parameter)); + int3 f = int3(spvTextureSize(uSamplerCubeArray, uint(0), _61_dummy_parameter)); uint _69_dummy_parameter; - int g = int(SPIRV_Cross_textureSize(uSamplerBuffer, 0u, _69_dummy_parameter)); + int g = int(spvTextureSize(uSamplerBuffer, 0u, _69_dummy_parameter)); uint _77_dummy_parameter; - int2 h = int2(SPIRV_Cross_textureSize(uSamplerMS, 0u, _77_dummy_parameter)); + int2 h = int2(spvTextureSize(uSamplerMS, 0u, _77_dummy_parameter)); uint _85_dummy_parameter; - int3 i = int3(SPIRV_Cross_textureSize(uSamplerMSArray, 0u, _85_dummy_parameter)); + int3 i = int3(spvTextureSize(uSamplerMSArray, 0u, _85_dummy_parameter)); int _89; - SPIRV_Cross_textureSize(uSampler1D, 0u, _89); + spvTextureSize(uSampler1D, 0u, _89); int l0 = int(_89); int _93; - SPIRV_Cross_textureSize(uSampler2D, 0u, _93); + spvTextureSize(uSampler2D, 0u, _93); int l1 = int(_93); int _97; - SPIRV_Cross_textureSize(uSampler2DArray, 0u, _97); + spvTextureSize(uSampler2DArray, 0u, _97); int l2 = int(_97); int _101; - SPIRV_Cross_textureSize(uSampler3D, 0u, _101); + spvTextureSize(uSampler3D, 0u, _101); int l3 = int(_101); int _105; - SPIRV_Cross_textureSize(uSamplerCube, 0u, _105); + spvTextureSize(uSamplerCube, 0u, _105); int l4 = int(_105); int _109; - SPIRV_Cross_textureSize(uSamplerCubeArray, 0u, _109); + spvTextureSize(uSamplerCubeArray, 0u, _109); int l5 = int(_109); int _113; - SPIRV_Cross_textureSize(uSamplerMS, 0u, _113); + spvTextureSize(uSamplerMS, 0u, _113); int s0 = int(_113); int _117; - SPIRV_Cross_textureSize(uSamplerMSArray, 0u, _117); + spvTextureSize(uSamplerMSArray, 0u, _117); int s1 = int(_117); } diff --git a/reference/shaders-hlsl/frag/input-attachment-ms.frag b/reference/shaders-hlsl/frag/input-attachment-ms.frag index 130b799651d..954fa1a94b8 100644 --- a/reference/shaders-hlsl/frag/input-attachment-ms.frag +++ b/reference/shaders-hlsl/frag/input-attachment-ms.frag @@ -18,7 +18,8 @@ struct SPIRV_Cross_Output float4 load_subpasses(Texture2DMS uInput) { - return uInput.Load(int2(gl_FragCoord.xy), gl_SampleID); + float4 _24 = uInput.Load(int2(gl_FragCoord.xy), gl_SampleID); + return _24; } void frag_main() @@ -29,6 +30,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; gl_SampleID = stage_input.gl_SampleID; frag_main(); SPIRV_Cross_Output stage_output; diff --git a/reference/shaders-hlsl/frag/input-attachment.frag b/reference/shaders-hlsl/frag/input-attachment.frag index 0b815ae08aa..b0e297c55fa 100644 --- a/reference/shaders-hlsl/frag/input-attachment.frag +++ b/reference/shaders-hlsl/frag/input-attachment.frag @@ -27,6 +27,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/shaders-hlsl/frag/io-block.frag b/reference/shaders-hlsl/frag/io-block.frag index 52c1f518bf2..812a44d8138 100644 --- a/reference/shaders-hlsl/frag/io-block.frag +++ b/reference/shaders-hlsl/frag/io-block.frag @@ -1,13 +1,18 @@ -static float4 FragColor; - struct VertexOut { - float4 a : TEXCOORD1; - float4 b : TEXCOORD2; + float4 a; + float4 b; }; +static float4 FragColor; static VertexOut _12; +struct SPIRV_Cross_Input +{ + float4 VertexOut_a : TEXCOORD1; + float4 VertexOut_b : TEXCOORD2; +}; + struct SPIRV_Cross_Output { float4 FragColor : SV_Target0; @@ -18,9 +23,10 @@ void frag_main() FragColor = _12.a + _12.b; } -SPIRV_Cross_Output main(in VertexOut stage_input_12) +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { - _12 = stage_input_12; + _12.a = stage_input.VertexOut_a; + _12.b = stage_input.VertexOut_b; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag b/reference/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag new file mode 100644 index 00000000000..2af0e513b44 --- /dev/null +++ b/reference/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag @@ -0,0 +1,32 @@ +uniform sampler2D uSampler; + +static float4 FragColor; +static float2 vUV; + +struct SPIRV_Cross_Input +{ + float2 vUV : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : COLOR0; +}; + +void frag_main() +{ + float3 _23 = float3(vUV, 5.0f); + FragColor = tex2Dproj(uSampler, float4(_23.xy, 0.0, _23.z)); + FragColor += tex2Dbias(uSampler, float4(vUV, 0.0, 3.0f)); + FragColor += tex2Dlod(uSampler, float4(vUV, 0.0, 2.0f)); + FragColor += tex2Dgrad(uSampler, vUV, 4.0f.xx, 5.0f.xx); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vUV = stage_input.vUV; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = float4(FragColor); + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag b/reference/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag index 8f5e022eba3..cd5a6eee949 100644 --- a/reference/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag +++ b/reference/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag @@ -3,12 +3,15 @@ struct UBO_1_1 float4 v[64]; }; -ConstantBuffer ubos[] : register(b0, space3); -ByteAddressBuffer ssbos[] : register(t0, space4); +ConstantBuffer ubos[] : register(b2, space9); +RWByteAddressBuffer ssbos[] : register(u3, space10); Texture2D uSamplers[] : register(t0, space0); -SamplerState uSamps[] : register(s0, space2); -Texture2D uCombinedSamplers[] : register(t0, space1); -SamplerState _uCombinedSamplers_sampler[] : register(s0, space1); +SamplerState uSamps[] : register(s1, space3); +Texture2D uCombinedSamplers[] : register(t4, space2); +SamplerState _uCombinedSamplers_sampler[] : register(s4, space2); +Texture2DMS uSamplersMS[] : register(t0, space1); +RWTexture2D uImages[] : register(u5, space7); +RWTexture2D uImagesU32[] : register(u5, space8); static int vIndex; static float4 FragColor; @@ -25,14 +28,86 @@ struct SPIRV_Cross_Output float4 FragColor : SV_Target0; }; +uint2 spvTextureSize(Texture2D Tex, uint Level, out uint Param) +{ + uint2 ret; + Tex.GetDimensions(Level, ret.x, ret.y, Param); + return ret; +} + +uint2 spvTextureSize(Texture2DMS Tex, uint Level, out uint Param) +{ + uint2 ret; + Tex.GetDimensions(ret.x, ret.y, Param); + return ret; +} + +uint2 spvImageSize(RWTexture2D Tex, out uint Param) +{ + uint2 ret; + Tex.GetDimensions(ret.x, ret.y); + Param = 0u; + return ret; +} + void frag_main() { int i = vIndex; FragColor = uSamplers[NonUniformResourceIndex(i + 10)].Sample(uSamps[NonUniformResourceIndex(i + 40)], vUV); - int _47 = i + 10; - FragColor = uCombinedSamplers[NonUniformResourceIndex(_47)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_47)], vUV); - FragColor += ubos[NonUniformResourceIndex(i + 20)].v[i + 40]; - FragColor += asfloat(ssbos[NonUniformResourceIndex(i + 50)].Load4((i + 60) * 16 + 0)); + int _49 = i + 10; + FragColor = uCombinedSamplers[NonUniformResourceIndex(_49)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_49)], vUV); + int _65 = i + 20; + int _69 = i + 40; + FragColor += ubos[NonUniformResourceIndex(_65)].v[_69]; + int _83 = i + 50; + int _88 = i + 60; + FragColor += asfloat(ssbos[NonUniformResourceIndex(_83)].Load4(_88 * 16 + 16)); + int _96 = i + 60; + int _100 = i + 70; + ssbos[NonUniformResourceIndex(_96)].Store4(_100 * 16 + 16, asuint(20.0f.xxxx)); + int _106 = i + 10; + FragColor = uSamplers[NonUniformResourceIndex(_106)].Load(int3(int2(vUV), 0)); + int _116 = i + 100; + uint _122; + ssbos[_116].InterlockedAdd(0, 100u, _122); + float _136_tmp = uSamplers[NonUniformResourceIndex(i + 10)].CalculateLevelOfDetail(uSamps[NonUniformResourceIndex(i + 40)], vUV); + float2 _136 = _136_tmp.xx; + float2 queried = _136; + int _139 = i + 10; + float _143_tmp = uCombinedSamplers[NonUniformResourceIndex(_139)].CalculateLevelOfDetail(_uCombinedSamplers_sampler[NonUniformResourceIndex(_139)], vUV); + float2 _143 = _143_tmp.xx; + queried += _143; + float4 _147 = FragColor; + float2 _149 = _147.xy + queried; + FragColor.x = _149.x; + FragColor.y = _149.y; + int _157 = i + 20; + int _160; + spvTextureSize(uSamplers[NonUniformResourceIndex(_157)], 0u, _160); + FragColor.x += float(int(_160)); + int _172 = i + 20; + int _176; + spvTextureSize(uSamplersMS[NonUniformResourceIndex(_172)], 0u, _176); + FragColor.y += float(int(_176)); + int _184 = i + 20; + uint _187_dummy_parameter; + float4 _189 = FragColor; + float2 _191 = _189.xy + float2(int2(spvTextureSize(uSamplers[NonUniformResourceIndex(_184)], uint(0), _187_dummy_parameter))); + FragColor.x = _191.x; + FragColor.y = _191.y; + int _202 = i + 50; + FragColor += uImages[NonUniformResourceIndex(_202)][int2(vUV)].xxxx; + int _213 = i + 20; + uint _216_dummy_parameter; + float4 _218 = FragColor; + float2 _220 = _218.xy + float2(int2(spvImageSize(uImages[NonUniformResourceIndex(_213)], _216_dummy_parameter))); + FragColor.x = _220.x; + FragColor.y = _220.y; + int _227 = i + 60; + uImages[NonUniformResourceIndex(_227)][int2(vUV)] = 50.0f.x; + int _240 = i + 70; + uint _248; + InterlockedAdd(uImagesU32[NonUniformResourceIndex(_240)][int2(vUV)], 40u, _248); } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag new file mode 100644 index 00000000000..8923f96a75e --- /dev/null +++ b/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag @@ -0,0 +1,24 @@ +RWByteAddressBuffer _9 : register(u6, space0); +globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0); +RasterizerOrderedByteAddressBuffer _52 : register(u4, space0); +RWTexture2D img4 : register(u5, space0); +RasterizerOrderedTexture2D img : register(u0, space0); +RasterizerOrderedTexture2D img3 : register(u2, space0); +RasterizerOrderedTexture2D img2 : register(u1, space0); + +void frag_main() +{ + _9.Store(0, uint(0)); + img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f); + img[int2(0, 0)] = img3[int2(0, 0)]; + uint _39; + InterlockedAdd(img2[int2(0, 0)], 1u, _39); + _42.Store(0, uint(int(_42.Load(0)) + 42)); + uint _55; + _42.InterlockedAnd(4, _52.Load(0), _55); +} + +void main() +{ + frag_main(); +} diff --git a/reference/shaders-hlsl/frag/query-lod.desktop.frag b/reference/shaders-hlsl/frag/query-lod.desktop.frag index fd95798bf42..a9d4bd83d9d 100644 --- a/reference/shaders-hlsl/frag/query-lod.desktop.frag +++ b/reference/shaders-hlsl/frag/query-lod.desktop.frag @@ -17,7 +17,8 @@ struct SPIRV_Cross_Output void frag_main() { float _19_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vTexCoord); - FragColor = float2(_19_tmp, _19_tmp).xyxy; + float2 _19 = _19_tmp.xx; + FragColor = _19.xyxy; } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag b/reference/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag new file mode 100644 index 00000000000..bbe3e4a7d32 --- /dev/null +++ b/reference/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag @@ -0,0 +1,21 @@ +globallycoherent RWByteAddressBuffer _12 : register(u0); + +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = asfloat(_12.Load4(0)); +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/readonly-coherent-ssbo.frag b/reference/shaders-hlsl/frag/readonly-coherent-ssbo.frag new file mode 100644 index 00000000000..02252f9cbc5 --- /dev/null +++ b/reference/shaders-hlsl/frag/readonly-coherent-ssbo.frag @@ -0,0 +1,21 @@ +ByteAddressBuffer _12 : register(t0); + +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = asfloat(_12.Load4(0)); +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag b/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag index b6df0019afc..5c583c66fa9 100644 --- a/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag +++ b/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag @@ -31,12 +31,14 @@ void frag_main() float l0 = uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)); float l1 = uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1)); float l2 = uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w); - float4 _80 = vDirRef; - _80.z = vDirRef.w; - float p0 = uSampler2D.SampleCmp(_uSampler2D_sampler, _80.xy / _80.z, vDirRef.z / _80.z, int2(1, 1)); - float4 _87 = vDirRef; - _87.z = vDirRef.w; - float p1 = uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, _87.xy / _87.z, vDirRef.z / _87.z, int2(1, 1)); + float4 _75 = vDirRef; + float4 _80 = _75; + _80.z = _75.w; + float p0 = uSampler2D.SampleCmp(_uSampler2D_sampler, _80.xy / _80.z, _75.z / _80.z, int2(1, 1)); + float4 _84 = vDirRef; + float4 _87 = _84; + _87.z = _84.w; + float p1 = uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, _87.xy / _87.z, _84.z / _87.z, int2(1, 1)); FragColor = (((((((s0 + s1) + s2) + s3) + l0) + l1) + l2) + p0) + p1; } diff --git a/reference/shaders-hlsl/frag/sample-mask-in-and-out.frag b/reference/shaders-hlsl/frag/sample-mask-in-and-out.frag new file mode 100644 index 00000000000..185a09821ea --- /dev/null +++ b/reference/shaders-hlsl/frag/sample-mask-in-and-out.frag @@ -0,0 +1,30 @@ +static int gl_SampleMaskIn; +static int gl_SampleMask; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + uint gl_SampleMaskIn : SV_Coverage; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; + uint gl_SampleMask : SV_Coverage; +}; + +void frag_main() +{ + FragColor = 1.0f.xxxx; + gl_SampleMask = gl_SampleMaskIn; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + gl_SampleMaskIn = stage_input.gl_SampleMaskIn; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_SampleMask = gl_SampleMask; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/sample-mask-in.frag b/reference/shaders-hlsl/frag/sample-mask-in.frag new file mode 100644 index 00000000000..8f6cfaf9e53 --- /dev/null +++ b/reference/shaders-hlsl/frag/sample-mask-in.frag @@ -0,0 +1,32 @@ +static int gl_SampleID; +static int gl_SampleMaskIn; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + uint gl_SampleID : SV_SampleIndex; + uint gl_SampleMaskIn : SV_Coverage; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + if ((gl_SampleMaskIn & (1 << gl_SampleID)) != 0) + { + FragColor = 1.0f.xxxx; + } +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + gl_SampleID = stage_input.gl_SampleID; + gl_SampleMaskIn = stage_input.gl_SampleMaskIn; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/sample-mask-out.frag b/reference/shaders-hlsl/frag/sample-mask-out.frag new file mode 100644 index 00000000000..a966c032183 --- /dev/null +++ b/reference/shaders-hlsl/frag/sample-mask-out.frag @@ -0,0 +1,23 @@ +static int gl_SampleMask; +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; + uint gl_SampleMask : SV_Coverage; +}; + +void frag_main() +{ + FragColor = 1.0f.xxxx; + gl_SampleMask = 0; +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_SampleMask = gl_SampleMask; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/sampler-array.frag b/reference/shaders-hlsl/frag/sampler-array.frag index e941357d299..fd08d4230d2 100644 --- a/reference/shaders-hlsl/frag/sampler-array.frag +++ b/reference/shaders-hlsl/frag/sampler-array.frag @@ -38,6 +38,7 @@ void frag_main() void main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; vTex = stage_input.vTex; vIndex = stage_input.vIndex; frag_main(); diff --git a/reference/shaders-hlsl/frag/scalar-refract-reflect.frag b/reference/shaders-hlsl/frag/scalar-refract-reflect.frag index 0fb694c543f..6c2d0be4f71 100644 --- a/reference/shaders-hlsl/frag/scalar-refract-reflect.frag +++ b/reference/shaders-hlsl/frag/scalar-refract-reflect.frag @@ -11,12 +11,12 @@ struct SPIRV_Cross_Output float FragColor : SV_Target0; }; -float SPIRV_Cross_Reflect(float i, float n) +float spvReflect(float i, float n) { return i - 2.0 * dot(n, i) * n; } -float SPIRV_Cross_Refract(float i, float n, float eta) +float spvRefract(float i, float n, float eta) { float NoI = n * i; float NoI2 = NoI * NoI; @@ -33,8 +33,8 @@ float SPIRV_Cross_Refract(float i, float n, float eta) void frag_main() { - FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z); - FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y); + FragColor = spvRefract(vRefract.x, vRefract.y, vRefract.z); + FragColor += spvReflect(vRefract.x, vRefract.y); FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y; FragColor += reflect(vRefract.xy, vRefract.zy).y; } diff --git a/reference/shaders-hlsl/frag/switch-unreachable-break.frag b/reference/shaders-hlsl/frag/switch-unreachable-break.frag new file mode 100644 index 00000000000..be36b82266e --- /dev/null +++ b/reference/shaders-hlsl/frag/switch-unreachable-break.frag @@ -0,0 +1,55 @@ +cbuffer UBO : register(b0) +{ + int _13_cond : packoffset(c0); + int _13_cond2 : packoffset(c0.y); +}; + + +static float4 FragColor; +static float4 vInput; + +struct SPIRV_Cross_Input +{ + float4 vInput : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + bool frog = false; + switch (_13_cond) + { + case 1: + { + if (_13_cond2 < 50) + { + break; + } + else + { + discard; + } + break; // unreachable workaround + } + default: + { + frog = true; + break; + } + } + bool4 _45 = frog.xxxx; + FragColor = float4(_45.x ? 10.0f.xxxx.x : 20.0f.xxxx.x, _45.y ? 10.0f.xxxx.y : 20.0f.xxxx.y, _45.z ? 10.0f.xxxx.z : 20.0f.xxxx.z, _45.w ? 10.0f.xxxx.w : 20.0f.xxxx.w); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vInput = stage_input.vInput; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/tex-sampling-ms.frag b/reference/shaders-hlsl/frag/tex-sampling-ms.frag index 1435315383b..854ad5016d6 100644 --- a/reference/shaders-hlsl/frag/tex-sampling-ms.frag +++ b/reference/shaders-hlsl/frag/tex-sampling-ms.frag @@ -25,6 +25,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/shaders-hlsl/frag/tex-sampling.sm30.frag b/reference/shaders-hlsl/frag/tex-sampling.sm30.frag new file mode 100644 index 00000000000..10c6c065312 --- /dev/null +++ b/reference/shaders-hlsl/frag/tex-sampling.sm30.frag @@ -0,0 +1,83 @@ +uniform sampler1D tex1d; +uniform sampler2D tex2d; +uniform sampler3D tex3d; +uniform samplerCUBE texCube; +uniform sampler1D tex1dShadow; +uniform sampler2D tex2dShadow; + +static float texCoord1d; +static float2 texCoord2d; +static float3 texCoord3d; +static float4 FragColor; +static float4 texCoord4d; + +struct SPIRV_Cross_Input +{ + float texCoord1d : TEXCOORD0; + float2 texCoord2d : TEXCOORD1; + float3 texCoord3d : TEXCOORD2; + float4 texCoord4d : TEXCOORD3; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : COLOR0; +}; + +void frag_main() +{ + float4 texcolor = tex1D(tex1d, texCoord1d); + texcolor += tex1Dlod(tex1d, float4(texCoord1d, 0.0, 0.0, 2.0f)); + texcolor += tex1Dgrad(tex1d, texCoord1d, 1.0f, 2.0f); + float2 _34 = float2(texCoord1d, 2.0f); + texcolor += tex1Dproj(tex1d, float4(_34.x, 0.0, 0.0, _34.y)); + texcolor += tex1Dbias(tex1d, float4(texCoord1d, 0.0, 0.0, 1.0f)); + texcolor += tex2D(tex2d, texCoord2d); + texcolor += tex2Dlod(tex2d, float4(texCoord2d, 0.0, 2.0f)); + texcolor += tex2Dgrad(tex2d, texCoord2d, float2(1.0f, 2.0f), float2(3.0f, 4.0f)); + float3 _73 = float3(texCoord2d, 2.0f); + texcolor += tex2Dproj(tex2d, float4(_73.xy, 0.0, _73.z)); + texcolor += tex2Dbias(tex2d, float4(texCoord2d, 0.0, 1.0f)); + texcolor += tex3D(tex3d, texCoord3d); + texcolor += tex3Dlod(tex3d, float4(texCoord3d, 2.0f)); + texcolor += tex3Dgrad(tex3d, texCoord3d, float3(1.0f, 2.0f, 3.0f), float3(4.0f, 5.0f, 6.0f)); + float4 _112 = float4(texCoord3d, 2.0f); + texcolor += tex3Dproj(tex3d, float4(_112.xyz, _112.w)); + texcolor += tex3Dbias(tex3d, float4(texCoord3d, 1.0f)); + texcolor += texCUBE(texCube, texCoord3d); + texcolor += texCUBElod(texCube, float4(texCoord3d, 2.0f)); + texcolor += texCUBEbias(texCube, float4(texCoord3d, 1.0f)); + float3 _147 = float3(texCoord1d, 0.0f, 0.0f); + texcolor.w += tex1Dproj(tex1dShadow, float4(_147.x, 0.0, _147.z, 1.0)).x; + float3 _159 = float3(texCoord1d, 0.0f, 0.0f); + texcolor.w += tex1Dlod(tex1dShadow, float4(_159.x, 0.0, _159.z, 2.0f)).x; + float4 _168 = float4(texCoord1d, 0.0f, 0.0f, 2.0f); + float4 _171 = _168; + _171.y = _168.w; + texcolor.w += tex1Dproj(tex1dShadow, float4(_171.x, 0.0, _168.z, _171.y)).x; + float3 _179 = float3(texCoord1d, 0.0f, 0.0f); + texcolor.w += tex1Dbias(tex1dShadow, float4(_179.x, 0.0, _179.z, 1.0f)).x; + float3 _194 = float3(texCoord2d, 0.0f); + texcolor.w += tex2Dproj(tex2dShadow, float4(_194.xy, _194.z, 1.0)).x; + float3 _205 = float3(texCoord2d, 0.0f); + texcolor.w += tex2Dlod(tex2dShadow, float4(_205.xy, _205.z, 2.0f)).x; + float4 _216 = float4(texCoord2d, 0.0f, 2.0f); + float4 _219 = _216; + _219.z = _216.w; + texcolor.w += tex2Dproj(tex2dShadow, float4(_219.xy, _216.z, _219.z)).x; + float3 _229 = float3(texCoord2d, 0.0f); + texcolor.w += tex2Dbias(tex2dShadow, float4(_229.xy, _229.z, 1.0f)).x; + FragColor = texcolor; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + texCoord1d = stage_input.texCoord1d; + texCoord2d = stage_input.texCoord2d; + texCoord3d = stage_input.texCoord3d; + texCoord4d = stage_input.texCoord4d; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = float4(FragColor); + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/texel-fetch-offset.frag b/reference/shaders-hlsl/frag/texel-fetch-offset.frag index f2a02e16295..c7ae589dd2b 100644 --- a/reference/shaders-hlsl/frag/texel-fetch-offset.frag +++ b/reference/shaders-hlsl/frag/texel-fetch-offset.frag @@ -23,6 +23,7 @@ void frag_main() SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { gl_FragCoord = stage_input.gl_FragCoord; + gl_FragCoord.w = 1.0 / gl_FragCoord.w; frag_main(); SPIRV_Cross_Output stage_output; stage_output.FragColor = FragColor; diff --git a/reference/shaders-hlsl/frag/texture-proj-shadow.frag b/reference/shaders-hlsl/frag/texture-proj-shadow.frag index 07e06008a0b..f98a5126574 100644 --- a/reference/shaders-hlsl/frag/texture-proj-shadow.frag +++ b/reference/shaders-hlsl/frag/texture-proj-shadow.frag @@ -28,12 +28,14 @@ struct SPIRV_Cross_Output void frag_main() { - float4 _20 = vClip4; - _20.y = vClip4.w; - FragColor = uShadow1D.SampleCmp(_uShadow1D_sampler, _20.x / _20.y, vClip4.z / _20.y); - float4 _30 = vClip4; - _30.z = vClip4.w; - FragColor = uShadow2D.SampleCmp(_uShadow2D_sampler, _30.xy / _30.z, vClip4.z / _30.z); + float4 _17 = vClip4; + float4 _20 = _17; + _20.y = _17.w; + FragColor = uShadow1D.SampleCmp(_uShadow1D_sampler, _20.x / _20.y, _17.z / _20.y); + float4 _27 = vClip4; + float4 _30 = _27; + _30.z = _27.w; + FragColor = uShadow2D.SampleCmp(_uShadow2D_sampler, _30.xy / _30.z, _27.z / _30.z); FragColor = uSampler1D.Sample(_uSampler1D_sampler, vClip2.x / vClip2.y).x; FragColor = uSampler2D.Sample(_uSampler2D_sampler, vClip3.xy / vClip3.z).x; FragColor = uSampler3D.Sample(_uSampler3D_sampler, vClip4.xyz / vClip4.w).x; diff --git a/reference/shaders-hlsl/frag/texture-size-combined-image-sampler.frag b/reference/shaders-hlsl/frag/texture-size-combined-image-sampler.frag index d5c373746d8..dd2eb251fc2 100644 --- a/reference/shaders-hlsl/frag/texture-size-combined-image-sampler.frag +++ b/reference/shaders-hlsl/frag/texture-size-combined-image-sampler.frag @@ -8,7 +8,7 @@ struct SPIRV_Cross_Output int2 FooOut : SV_Target0; }; -uint2 SPIRV_Cross_textureSize(Texture2D Tex, uint Level, out uint Param) +uint2 spvTextureSize(Texture2D Tex, uint Level, out uint Param) { uint2 ret; Tex.GetDimensions(Level, ret.x, ret.y, Param); @@ -18,7 +18,7 @@ uint2 SPIRV_Cross_textureSize(Texture2D Tex, uint Level, out uint Param) void frag_main() { uint _23_dummy_parameter; - FooOut = int2(SPIRV_Cross_textureSize(uTex, uint(0), _23_dummy_parameter)); + FooOut = int2(spvTextureSize(uTex, uint(0), _23_dummy_parameter)); } SPIRV_Cross_Output main() diff --git a/reference/shaders-hlsl/frag/unorm-snorm-packing.frag b/reference/shaders-hlsl/frag/unorm-snorm-packing.frag index 57b5950636e..95786b93b68 100644 --- a/reference/shaders-hlsl/frag/unorm-snorm-packing.frag +++ b/reference/shaders-hlsl/frag/unorm-snorm-packing.frag @@ -27,50 +27,50 @@ struct SPIRV_Cross_Output uint SNORM16Out : SV_Target4; }; -uint SPIRV_Cross_packUnorm4x8(float4 value) +uint spvPackUnorm4x8(float4 value) { uint4 Packed = uint4(round(saturate(value) * 255.0)); return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24); } -float4 SPIRV_Cross_unpackUnorm4x8(uint value) +float4 spvUnpackUnorm4x8(uint value) { uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24); return float4(Packed) / 255.0; } -uint SPIRV_Cross_packSnorm4x8(float4 value) +uint spvPackSnorm4x8(float4 value) { int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff; return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24)); } -float4 SPIRV_Cross_unpackSnorm4x8(uint value) +float4 spvUnpackSnorm4x8(uint value) { int SignedValue = int(value); int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24; return clamp(float4(Packed) / 127.0, -1.0, 1.0); } -uint SPIRV_Cross_packUnorm2x16(float2 value) +uint spvPackUnorm2x16(float2 value) { uint2 Packed = uint2(round(saturate(value) * 65535.0)); return Packed.x | (Packed.y << 16); } -float2 SPIRV_Cross_unpackUnorm2x16(uint value) +float2 spvUnpackUnorm2x16(uint value) { uint2 Packed = uint2(value & 0xffff, value >> 16); return float2(Packed) / 65535.0; } -uint SPIRV_Cross_packSnorm2x16(float2 value) +uint spvPackSnorm2x16(float2 value) { int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff; return uint(Packed.x | (Packed.y << 16)); } -float2 SPIRV_Cross_unpackSnorm2x16(uint value) +float2 spvUnpackSnorm2x16(uint value) { int SignedValue = int(value); int2 Packed = int2(SignedValue << 16, SignedValue) >> 16; @@ -79,16 +79,18 @@ float2 SPIRV_Cross_unpackSnorm2x16(uint value) void frag_main() { - FP32Out = SPIRV_Cross_unpackUnorm4x8(UNORM8); - FP32Out = SPIRV_Cross_unpackSnorm4x8(SNORM8); - float2 _21 = SPIRV_Cross_unpackUnorm2x16(UNORM16); - FP32Out = float4(_21.x, _21.y, FP32Out.z, FP32Out.w); - float2 _26 = SPIRV_Cross_unpackSnorm2x16(SNORM16); - FP32Out = float4(_26.x, _26.y, FP32Out.z, FP32Out.w); - UNORM8Out = SPIRV_Cross_packUnorm4x8(FP32); - SNORM8Out = SPIRV_Cross_packSnorm4x8(FP32); - UNORM16Out = SPIRV_Cross_packUnorm2x16(FP32.xy); - SNORM16Out = SPIRV_Cross_packSnorm2x16(FP32.zw); + FP32Out = spvUnpackUnorm4x8(UNORM8); + FP32Out = spvUnpackSnorm4x8(SNORM8); + float2 _21 = spvUnpackUnorm2x16(UNORM16); + FP32Out.x = _21.x; + FP32Out.y = _21.y; + float2 _31 = spvUnpackSnorm2x16(SNORM16); + FP32Out.x = _31.x; + FP32Out.y = _31.y; + UNORM8Out = spvPackUnorm4x8(FP32); + SNORM8Out = spvPackSnorm4x8(FP32); + UNORM16Out = spvPackUnorm2x16(FP32.xy); + SNORM16Out = spvPackSnorm2x16(FP32.zw); } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh new file mode 100644 index 00000000000..dad35928c7d --- /dev/null +++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -0,0 +1,97 @@ +struct BlockOut +{ + float4 a; + float4 b; +}; + +struct BlockOutPrim +{ + float4 a; + float4 b; +}; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u); + +static uint3 gl_WorkGroupID; +static uint3 gl_GlobalInvocationID; +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +struct gl_MeshPerVertexEXT +{ + float4 vOut : TEXCOORD0; + BlockOut outputs : TEXCOORD2; + float4 gl_Position : SV_Position; + float gl_ClipDistance[1] : SV_ClipDistance; + float2 gl_CullDistance : SV_CullDistance; +}; + +struct gl_MeshPerPrimitiveEXT +{ + float4 vPrim : TEXCOORD1; + BlockOutPrim prim_outputs : TEXCOORD4; + uint gl_PrimitiveID : SV_PrimitiveID; + uint gl_Layer : SV_RenderTargetArrayIndex; + uint gl_ViewportIndex : SV_ViewportArrayIndex; + uint gl_PrimitiveShadingRateEXT : SV_ShadingRate; + bool gl_CullPrimitiveEXT : SV_CullPrimitive; +}; + +groupshared float shared_float[16]; + +void main3(inout uint2 gl_PrimitiveLineIndicesEXT[22], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22]) +{ + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; +} + +void main2(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) +{ + SetMeshOutputCounts(24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(float3(gl_GlobalInvocationID), 2.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationIndex < 22u) + { + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; + main3(gl_PrimitiveLineIndicesEXT, gl_MeshPrimitivesEXT); + } +} + +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) +{ + main2(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT); +} + +[outputtopology("line")] +[numthreads(2, 3, 4)] +void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint2 gl_PrimitiveLineIndicesEXT[22]) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT); +} diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh new file mode 100644 index 00000000000..e636453da4d --- /dev/null +++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -0,0 +1,87 @@ +struct BlockOut +{ + float4 a; + float4 b; +}; + +struct BlockOutPrim +{ + float4 a; + float4 b; +}; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u); + +static uint3 gl_WorkGroupID; +static uint3 gl_GlobalInvocationID; +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +struct gl_MeshPerVertexEXT +{ + float4 vOut : TEXCOORD0; + BlockOut outputs : TEXCOORD2; + float4 gl_Position : SV_Position; + float gl_ClipDistance[1] : SV_ClipDistance; + float2 gl_CullDistance : SV_CullDistance; +}; + +struct gl_MeshPerPrimitiveEXT +{ + float4 vPrim : TEXCOORD1; + BlockOutPrim prim_outputs : TEXCOORD4; + uint gl_PrimitiveID : SV_PrimitiveID; + uint gl_Layer : SV_RenderTargetArrayIndex; + uint gl_ViewportIndex : SV_ViewportArrayIndex; + uint gl_PrimitiveShadingRateEXT : SV_ShadingRate; + bool gl_CullPrimitiveEXT : SV_CullPrimitive; +}; + +groupshared float shared_float[16]; + +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint3 gl_PrimitiveTriangleIndicesEXT[22]) +{ + SetMeshOutputCounts(24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(float3(gl_GlobalInvocationID), 2.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationIndex < 22u) + { + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(0u, 1u, 2u) + gl_LocalInvocationIndex.xxx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} + +[outputtopology("triangle")] +[numthreads(2, 3, 4)] +void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint3 gl_PrimitiveTriangleIndicesEXT[22]) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveTriangleIndicesEXT); +} diff --git a/reference/shaders-hlsl/vert/invariant.vert b/reference/shaders-hlsl/vert/invariant.vert new file mode 100644 index 00000000000..54739626865 --- /dev/null +++ b/reference/shaders-hlsl/vert/invariant.vert @@ -0,0 +1,40 @@ +static float4 gl_Position; +static float4 vInput0; +static float4 vInput1; +static float4 vInput2; +static float4 vColor; + +struct SPIRV_Cross_Input +{ + float4 vInput0 : TEXCOORD0; + float4 vInput1 : TEXCOORD1; + float4 vInput2 : TEXCOORD2; +}; + +struct SPIRV_Cross_Output +{ + precise float4 vColor : TEXCOORD0; + precise float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + float4 _20 = vInput1 * vInput2; + float4 _21 = vInput0 + _20; + gl_Position = _21; + float4 _27 = vInput0 - vInput1; + float4 _29 = _27 * vInput2; + vColor = _29; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vInput0 = stage_input.vInput0; + vInput1 = stage_input.vInput1; + vInput2 = stage_input.vInput2; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + stage_output.vColor = vColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/vert/locations.vert b/reference/shaders-hlsl/vert/locations.vert index b06b204bdd1..da9fa45ede6 100644 --- a/reference/shaders-hlsl/vert/locations.vert +++ b/reference/shaders-hlsl/vert/locations.vert @@ -5,6 +5,12 @@ struct Foo float3 c; }; +struct VertexOut +{ + float3 color; + float3 foo; +}; + static float4 gl_Position; static float4 Input2; static float4 Input4; @@ -14,13 +20,6 @@ static float vLocation1; static float vLocation2[2]; static Foo vLocation4; static float vLocation9; - -struct VertexOut -{ - float3 color : TEXCOORD7; - float3 foo : TEXCOORD8; -}; - static VertexOut vout; struct SPIRV_Cross_Input @@ -36,6 +35,8 @@ struct SPIRV_Cross_Output float vLocation1 : TEXCOORD1; float vLocation2[2] : TEXCOORD2; Foo vLocation4 : TEXCOORD4; + float3 VertexOut_color : TEXCOORD7; + float3 VertexOut_foo : TEXCOORD8; float vLocation9 : TEXCOORD9; float4 gl_Position : SV_Position; }; @@ -57,13 +58,12 @@ void vert_main() vout.foo = 4.0f.xxx; } -SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input, out VertexOut stage_outputvout) +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) { Input2 = stage_input.Input2; Input4 = stage_input.Input4; Input0 = stage_input.Input0; vert_main(); - stage_outputvout = vout; SPIRV_Cross_Output stage_output; stage_output.gl_Position = gl_Position; stage_output.vLocation0 = vLocation0; @@ -71,5 +71,7 @@ SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input, out VertexOut stage_outpu stage_output.vLocation2 = vLocation2; stage_output.vLocation4 = vLocation4; stage_output.vLocation9 = vLocation9; + stage_output.VertexOut_color = vout.color; + stage_output.VertexOut_foo = vout.foo; return stage_output; } diff --git a/reference/shaders-hlsl/vert/no-contraction.vert b/reference/shaders-hlsl/vert/no-contraction.vert new file mode 100644 index 00000000000..ad37dc23f19 --- /dev/null +++ b/reference/shaders-hlsl/vert/no-contraction.vert @@ -0,0 +1,45 @@ +static float4 gl_Position; +static float4 vA; +static float4 vB; +static float4 vC; + +struct SPIRV_Cross_Input +{ + float4 vA : TEXCOORD0; + float4 vB : TEXCOORD1; + float4 vC : TEXCOORD2; +}; + +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + precise float4 _15 = vA * vB; + float4 mul = _15; + precise float4 _19 = vA + vB; + float4 add = _19; + precise float4 _23 = vA - vB; + float4 sub = _23; + precise float4 _27 = vA * vB; + precise float4 _30 = _27 + vC; + float4 mad = _30; + precise float4 _34 = mul + add; + precise float4 _36 = _34 + sub; + precise float4 _38 = _36 + mad; + float4 summed = _38; + gl_Position = summed; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vA = stage_input.vA; + vB = stage_input.vB; + vC = stage_input.vC; + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/shaders-hlsl/vert/qualifiers.vert b/reference/shaders-hlsl/vert/qualifiers.vert index 13ee2a8c1c0..bbf7dc61e45 100644 --- a/reference/shaders-hlsl/vert/qualifiers.vert +++ b/reference/shaders-hlsl/vert/qualifiers.vert @@ -1,17 +1,16 @@ +struct Block +{ + float vFlat; + float vCentroid; + float vSample; + float vNoperspective; +}; + static float4 gl_Position; static float vFlat; static float vCentroid; static float vSample; static float vNoperspective; - -struct Block -{ - nointerpolation float vFlat : TEXCOORD4; - centroid float vCentroid : TEXCOORD5; - sample float vSample : TEXCOORD6; - noperspective float vNoperspective : TEXCOORD7; -}; - static Block vout; struct SPIRV_Cross_Output @@ -20,6 +19,10 @@ struct SPIRV_Cross_Output centroid float vCentroid : TEXCOORD1; sample float vSample : TEXCOORD2; noperspective float vNoperspective : TEXCOORD3; + nointerpolation float Block_vFlat : TEXCOORD4; + centroid float Block_vCentroid : TEXCOORD5; + sample float Block_vSample : TEXCOORD6; + noperspective float Block_vNoperspective : TEXCOORD7; float4 gl_Position : SV_Position; }; @@ -36,15 +39,18 @@ void vert_main() vout.vNoperspective = 3.0f; } -SPIRV_Cross_Output main(out Block stage_outputvout) +SPIRV_Cross_Output main() { vert_main(); - stage_outputvout = vout; SPIRV_Cross_Output stage_output; stage_output.gl_Position = gl_Position; stage_output.vFlat = vFlat; stage_output.vCentroid = vCentroid; stage_output.vSample = vSample; stage_output.vNoperspective = vNoperspective; + stage_output.Block_vFlat = vout.vFlat; + stage_output.Block_vCentroid = vout.vCentroid; + stage_output.Block_vSample = vout.vSample; + stage_output.Block_vNoperspective = vout.vNoperspective; return stage_output; } diff --git a/reference/shaders-hlsl/vert/return-array.vert b/reference/shaders-hlsl/vert/return-array.vert index 83e3a281232..3e021257bd9 100644 --- a/reference/shaders-hlsl/vert/return-array.vert +++ b/reference/shaders-hlsl/vert/return-array.vert @@ -15,17 +15,17 @@ struct SPIRV_Cross_Output float4 gl_Position : SV_Position; }; -void test(out float4 SPIRV_Cross_return_value[2]) +void test(out float4 spvReturnValue[2]) { - SPIRV_Cross_return_value = _20; + spvReturnValue = _20; } -void test2(out float4 SPIRV_Cross_return_value[2]) +void test2(out float4 spvReturnValue[2]) { float4 foobar[2]; foobar[0] = vInput0; foobar[1] = vInput1; - SPIRV_Cross_return_value = foobar; + spvReturnValue = foobar; } void vert_main() diff --git a/reference/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/reference/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp new file mode 100644 index 00000000000..d24b9666fab --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct T +{ + float a; +}; + +struct T_1 +{ + float b; +}; + +struct SSBO1 +{ + T_1 foo[1]; +}; + +struct T_2 +{ + float c; + char _m0_final_padding[12]; +}; + +struct SSBO2 +{ + T_2 bar[1]; +}; + +kernel void main0(device SSBO1& _7 [[buffer(0)]], device SSBO2& _10 [[buffer(1)]]) +{ + T v = T{ 40.0 }; + _7.foo[10].b = v.a; + _10.bar[30].c = v.a; +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp b/reference/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000000..1015d2a5eef --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,23 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct SSBO +{ + uint a; + uint b; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _5 [[buffer(0)]]) +{ + uint _20 = atomic_load_explicit((device atomic_uint*)&_5.b, memory_order_relaxed); + uint c = _20; + atomic_store_explicit((device atomic_uint*)&_5.a, c, memory_order_relaxed); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp b/reference/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp new file mode 100644 index 00000000000..3fdf46bbc5b --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp @@ -0,0 +1,28 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct SSBO +{ + uint a; + int b; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _4 [[buffer(0)]]) +{ + uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&_4.a, 1u, memory_order_relaxed); + uint _27 = uint(atomic_fetch_min_explicit((device atomic_int*)&_4.a, int(1u), memory_order_relaxed)); + uint _28 = atomic_fetch_min_explicit((device atomic_uint*)&_4.a, 4294967295u, memory_order_relaxed); + uint _29 = uint(atomic_fetch_max_explicit((device atomic_int*)&_4.a, int(4294967295u), memory_order_relaxed)); + int _30 = atomic_fetch_max_explicit((device atomic_int*)&_4.b, -3, memory_order_relaxed); + int _31 = int(atomic_fetch_min_explicit((device atomic_uint*)&_4.b, uint(-3), memory_order_relaxed)); + int _32 = atomic_fetch_min_explicit((device atomic_int*)&_4.b, 4, memory_order_relaxed); + int _33 = int(atomic_fetch_max_explicit((device atomic_uint*)&_4.b, uint(4), memory_order_relaxed)); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp b/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp index 8b6694288e6..4624ef0b7c1 100644 --- a/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp +++ b/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp @@ -14,7 +14,7 @@ struct SSBO kernel void main0(device SSBO& _5 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - uint _24 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_5.count, 1u, memory_order_relaxed); + uint _24 = atomic_fetch_add_explicit((device atomic_uint*)&_5.count, 1u, memory_order_relaxed); if (_24 < 1024u) { _5.data[_24] = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp b/reference/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp new file mode 100644 index 00000000000..0d63f5fa75b --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp @@ -0,0 +1,21 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + half2 a; + float b; + float c; + half2 d; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _4 [[buffer(0)]]) +{ + _4.b = as_type(_4.a); + _4.d = as_type(_4.c); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/reference/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp new file mode 100644 index 00000000000..8e198a94df8 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + int4 ints; + uint4 uints; +}; + +kernel void main0(device SSBO& _3 [[buffer(0)]]) +{ + int4 _19 = _3.ints; + uint4 _20 = _3.uints; + _3.ints = popcount(_19); + _3.uints = uint4(popcount(_19)); + _3.ints = int4(popcount(_20)); + _3.uints = popcount(_20); + _3.ints = reverse_bits(_19); + _3.uints = reverse_bits(_20); + _3.ints = extract_bits(_19, uint(1), 11u); + _3.uints = uint4(extract_bits(int4(_20), 11u, uint(1))); + _3.ints = int4(extract_bits(uint4(_19), uint(1), 11u)); + _3.uints = extract_bits(_20, 11u, uint(1)); + _3.ints = insert_bits(_19, _19.wzyx, uint(1), 11u); + _3.uints = insert_bits(_20, _20.wzyx, 11u, uint(1)); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/bitscan.asm.comp b/reference/shaders-msl-no-opt/asm/comp/bitscan.asm.comp new file mode 100644 index 00000000000..1be65ec7cd4 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/bitscan.asm.comp @@ -0,0 +1,53 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +// Implementation of the GLSL findLSB() function +template +inline T spvFindLSB(T x) +{ + return select(ctz(x), T(-1), x == T(0)); +} + +// Implementation of the signed GLSL findMSB() function +template +inline T spvFindSMSB(T x) +{ + T v = select(x, T(-1) - x, x < T(0)); + return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0)); +} + +// Implementation of the unsigned GLSL findMSB() function +template +inline T spvFindUMSB(T x) +{ + return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0)); +} + +struct SSBO +{ + uint4 u; + int4 i; +}; + +kernel void main0(device SSBO& _4 [[buffer(0)]]) +{ + uint4 _19 = _4.u; + int4 _20 = _4.i; + _4.u = spvFindLSB(_19); + _4.i = int4(spvFindLSB(_19)); + _4.u = uint4(spvFindLSB(_20)); + _4.i = spvFindLSB(_20); + _4.u = spvFindUMSB(_19); + _4.i = int4(spvFindUMSB(_19)); + _4.u = spvFindUMSB(uint4(_20)); + _4.i = int4(spvFindUMSB(uint4(_20))); + _4.u = uint4(spvFindSMSB(int4(_19))); + _4.i = spvFindSMSB(int4(_19)); + _4.u = uint4(spvFindSMSB(_20)); + _4.i = spvFindSMSB(_20); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp b/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp new file mode 100644 index 00000000000..734a66870b9 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp @@ -0,0 +1,77 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_CommonConstants +{ + uint g_count; + packed_uint3 g_padding4; +}; + +struct MyStruct +{ + float4 m_coefficients[4]; +}; + +struct type_RWStructuredBuffer_MyStruct +{ + MyStruct _m0[1]; +}; + +constant spvUnsafeArray _27 = spvUnsafeArray({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) }); + +kernel void main0(constant type_CommonConstants& CommonConstants [[buffer(0)]], device type_RWStructuredBuffer_MyStruct& g_data [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + do + { + if (gl_GlobalInvocationID.x >= CommonConstants.g_count) + { + break; + } + g_data._m0[gl_GlobalInvocationID.x] = MyStruct{ { float4(0.0), float4(0.0), float4(0.0), float4(0.0) } }; + break; + } while(false); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp b/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp new file mode 100644 index 00000000000..66550535350 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp @@ -0,0 +1,77 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _10 +{ + float _m0[4]; + float _m1[4]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +struct SSBO +{ + uint a; + int b; +}; + +constant spvUnsafeArray _31 = spvUnsafeArray({ 1.0, 2.0, 3.0, 4.0 }); + +kernel void main0() +{ + spvUnsafeArray<_10, 2> _34 = spvUnsafeArray<_10, 2>({ _10{ { 1.0, 2.0, 3.0, 4.0 }, { 1.0, 2.0, 3.0, 4.0 } }, _10{ { 1.0, 2.0, 3.0, 4.0 }, { 1.0, 2.0, 3.0, 4.0 } } }); + + spvUnsafeArray foo; + foo[0] = 1.0; + foo = _31; + foo[1] = 2.0; + foo[2] = 3.0; + foo[3] = 4.0; + spvUnsafeArray foo2; + foo2 = foo; + _10 _37 = _10{ { foo[0], foo[1], foo[2], foo[3] }, { foo2[0], foo2[1], foo2[2], foo2[3] } }; +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp b/reference/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp new file mode 100644 index 00000000000..74464092ef0 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp @@ -0,0 +1,33 @@ +#include +#include + +using namespace metal; + +struct SomeBuffer; + +struct SomeBuffer +{ + float4 v; + ulong a; + uint2 b; +}; + +struct Registers +{ + ulong address; + uint2 address2; +}; + +kernel void main0(constant Registers& registers [[buffer(0)]]) +{ + device SomeBuffer* _44 = reinterpret_cast(registers.address); + device SomeBuffer* _45 = reinterpret_cast(registers.address); + device SomeBuffer* _46 = reinterpret_cast(as_type(registers.address2)); + _44->v = float4(1.0, 2.0, 3.0, 4.0); + _45->v = float4(1.0, 2.0, 3.0, 4.0); + _46->v = float4(1.0, 2.0, 3.0, 4.0); + _44->a = reinterpret_cast(_44); + _45->a = reinterpret_cast(_45); + _46->b = as_type(reinterpret_cast(_46)); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp b/reference/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp new file mode 100644 index 00000000000..2fe09814bf4 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct Block +{ + uint2 _m0[2]; + uint2 _m1[2]; +}; + +struct SSBO +{ + Block _m0[3]; +}; + +kernel void main0(device SSBO& ssbo [[buffer(0)]]) +{ + threadgroup uint2 _18[2]; + ssbo._m0[0u] = Block{ { ssbo._m0[0u]._m1[0], ssbo._m0[0u]._m1[1] }, { ssbo._m0[0u]._m1[0], ssbo._m0[0u]._m1[1] } }; +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/reference/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp new file mode 100644 index 00000000000..a5b6fc32ce2 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp @@ -0,0 +1,61 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct SSBO +{ + int values[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 4u, 1u); + +constant spvUnsafeArray indexable = spvUnsafeArray({ 0, 1, 2, 3 }); +constant spvUnsafeArray indexable_1 = spvUnsafeArray({ 4, 5, 6, 7 }); + +kernel void main0(device SSBO& _6 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +{ + _6.values[gl_GlobalInvocationID.x] = indexable[gl_LocalInvocationID.x] + indexable_1[gl_LocalInvocationID.y]; +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp new file mode 100644 index 00000000000..09a31d68a85 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp @@ -0,0 +1,60 @@ +#include +#include + +using namespace metal; + +struct _11 +{ + float2x2 _m0; +}; + +struct _12 +{ + float2x4 _m0; +}; + +struct B2 +{ + float4 elem2; +}; + +struct C +{ + float4 c; + B2 b2; + B2 b2_array[4]; + _12 _m3; +}; + +struct B1 +{ + float4 elem1; +}; + +struct A +{ + float4 a; + B1 b1; + B1 b1_array[4]; + _11 _m3; +}; + +struct _8 +{ + A a_block; + C c_block; +}; + +kernel void main0(device _8& _3 [[buffer(0)]]) +{ + A _31; + _31.a = _3.c_block.c; + _31.b1.elem1 = _3.c_block.b2.elem2; + _31.b1_array[0].elem1 = _3.c_block.b2_array[0].elem2; + _31.b1_array[1].elem1 = _3.c_block.b2_array[1].elem2; + _31.b1_array[2].elem1 = _3.c_block.b2_array[2].elem2; + _31.b1_array[3].elem1 = _3.c_block.b2_array[3].elem2; + _31._m3._m0 = transpose(float2x2(_3.c_block._m3._m0[0].xy, _3.c_block._m3._m0[1].xy)); + _3.a_block = _31; +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp new file mode 100644 index 00000000000..54087ddc511 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp @@ -0,0 +1,54 @@ +#include +#include + +using namespace metal; + +struct _8 +{ + char _m0_pad[4]; + uint _m0; +}; + +struct _9 +{ + char _m0_pad[8]; + uint _m0; +}; + +struct _4 +{ + uint _m0; + uint4 _m1[2]; + uint _m2; + char _m3_pad[12]; + _8 _m3; + float4 _m4; + float3 _m5; + float2 _m6; +}; + +struct _5 +{ + uint _m0; + uint _m1[2]; + uint _m2; + _9 _m3; + float4 _m4; + float3 _m5; + float2 _m6; +}; + +kernel void main0(device _5& _2 [[buffer(0)]], device _4& _3 [[buffer(1)]]) +{ + _4 _23; + _23._m0 = _2._m0; + (thread uint&)_23._m1[0] = _2._m1[0]; + (thread uint&)_23._m1[1] = _2._m1[1]; + _23._m2 = _2._m2; + _23._m3._m0 = _2._m3._m0; + _23._m4 = _2._m4; + _23._m5 = _2._m5; + _23._m6 = _2._m6; + _3 = _23; +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp new file mode 100644 index 00000000000..2225981524a --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp @@ -0,0 +1,47 @@ +#include +#include + +using namespace metal; + +struct B2 +{ + float4 elem2; +}; + +struct C +{ + float4 c; + B2 b2; + B2 b2_array[4]; +}; + +struct B1 +{ + float4 elem1; +}; + +struct A +{ + float4 a; + B1 b1; + B1 b1_array[4]; +}; + +struct _8 +{ + A a_block; + C c_block; +}; + +kernel void main0(device _8& _3 [[buffer(0)]]) +{ + A _27; + _27.a = _3.c_block.c; + _27.b1.elem1 = _3.c_block.b2.elem2; + _27.b1_array[0].elem1 = _3.c_block.b2_array[0].elem2; + _27.b1_array[1].elem1 = _3.c_block.b2_array[1].elem2; + _27.b1_array[2].elem1 = _3.c_block.b2_array[2].elem2; + _27.b1_array[3].elem1 = _3.c_block.b2_array[3].elem2; + _3.a_block = _27; +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp b/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp new file mode 100644 index 00000000000..b024b5539d7 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp @@ -0,0 +1,174 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +struct Block +{ + uint2 _m0[2]; + uint2 _m1[2]; +}; + +struct SSBO +{ + Block _m0[3]; +}; + +kernel void main0(device SSBO& ssbo [[buffer(0)]]) +{ + threadgroup uint2 _18[2]; + spvUnsafeArray _27; + spvArrayCopyFromDeviceToStack1(_27.elements, ssbo._m0[0u]._m1); + spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27.elements); + spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27.elements); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp b/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp new file mode 100644 index 00000000000..a029a283d46 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp @@ -0,0 +1,135 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +struct Block +{ + uint2 _m0[2]; + uint2 _m1[2]; +}; + +struct SSBO +{ + Block _m0[3]; +}; + +kernel void main0(device SSBO& ssbo [[buffer(0)]]) +{ + threadgroup uint2 _18[2]; + uint2 _27[2]; + spvArrayCopyFromDeviceToStack1(_27, ssbo._m0[0u]._m1); + spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27); + spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp b/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp new file mode 100644 index 00000000000..3ebc0d91284 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp @@ -0,0 +1,179 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +struct Block +{ + uint2 _m0[2]; + uint2 _m1[2]; +}; + +struct SSBO +{ + Block _m0[3]; +}; + +kernel void main0(device SSBO& ssbo [[buffer(0)]], constant SSBO& ubo [[buffer(1)]]) +{ + threadgroup uint2 _18[2]; + spvArrayCopyFromDeviceToDevice1(ssbo._m0[0u]._m0, ssbo._m0[0u]._m1); + spvArrayCopyFromConstantToDevice1(ssbo._m0[0u]._m0, ubo._m0[0u]._m1); + spvUnsafeArray _24; + spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _24.elements); + spvArrayCopyFromThreadGroupToDevice1(ssbo._m0[0u]._m0, _18); + spvArrayCopyFromDeviceToThreadGroup1(_18, ssbo._m0[0u]._m1); + spvArrayCopyFromDeviceToStack1(_24.elements, ssbo._m0[0u]._m1); + spvArrayCopyFromConstantToThreadGroup1(_18, ubo._m0[0u]._m1); + spvArrayCopyFromConstantToStack1(_24.elements, ubo._m0[0u]._m1); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp b/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp new file mode 100644 index 00000000000..6f63d36e6da --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp @@ -0,0 +1,140 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +struct Block +{ + uint2 _m0[2]; + uint2 _m1[2]; +}; + +struct SSBO +{ + Block _m0[3]; +}; + +kernel void main0(device SSBO& ssbo [[buffer(0)]], constant SSBO& ubo [[buffer(1)]]) +{ + threadgroup uint2 _18[2]; + spvArrayCopyFromDeviceToDevice1(ssbo._m0[0u]._m0, ssbo._m0[0u]._m1); + spvArrayCopyFromConstantToDevice1(ssbo._m0[0u]._m0, ubo._m0[0u]._m1); + uint2 _24[2]; + spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _24); + spvArrayCopyFromThreadGroupToDevice1(ssbo._m0[0u]._m0, _18); + spvArrayCopyFromDeviceToThreadGroup1(_18, ssbo._m0[0u]._m1); + spvArrayCopyFromDeviceToStack1(_24, ssbo._m0[0u]._m1); + spvArrayCopyFromConstantToThreadGroup1(_18, ubo._m0[0u]._m1); + spvArrayCopyFromConstantToStack1(_24, ubo._m0[0u]._m1); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp new file mode 100644 index 00000000000..e265f1bd976 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +struct UBO +{ + float v; +}; + +struct SSBO +{ + float v; +}; + +kernel void main0() +{ + threadgroup float w; + float v; +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/glsl-signed-operations.asm.comp b/reference/shaders-msl-no-opt/asm/comp/glsl-signed-operations.asm.comp index e6682c513e1..ab7a3363270 100644 --- a/reference/shaders-msl-no-opt/asm/comp/glsl-signed-operations.asm.comp +++ b/reference/shaders-msl-no-opt/asm/comp/glsl-signed-operations.asm.comp @@ -5,15 +5,9 @@ using namespace metal; -struct SSBO -{ - int4 ints; - uint4 uints; -}; - // Implementation of the signed GLSL findMSB() function template -T findSMSB(T x) +inline T spvFindSMSB(T x) { T v = select(x, T(-1) - x, x < T(0)); return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0)); @@ -21,18 +15,24 @@ T findSMSB(T x) // Implementation of the unsigned GLSL findMSB() function template -T findUMSB(T x) +inline T spvFindUMSB(T x) { return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0)); } // Implementation of the GLSL sign() function for integer types template::value>::type> -T sign(T x) +inline T sign(T x) { return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0)); } +struct SSBO +{ + int4 ints; + uint4 uints; +}; + kernel void main0(device SSBO& _4 [[buffer(0)]]) { int4 _19 = _4.ints; @@ -45,10 +45,10 @@ kernel void main0(device SSBO& _4 [[buffer(0)]]) _4.uints = uint4(sign(_19)); _4.ints = sign(int4(_20)); _4.uints = uint4(sign(int4(_20))); - _4.ints = findSMSB(int4(_20)); - _4.uints = uint4(findSMSB(int4(_20))); - _4.ints = int4(findUMSB(uint4(_19))); - _4.uints = findUMSB(uint4(_19)); + _4.ints = spvFindSMSB(int4(_20)); + _4.uints = uint4(spvFindSMSB(int4(_20))); + _4.ints = int4(spvFindUMSB(uint4(_19))); + _4.uints = spvFindUMSB(uint4(_19)); _4.ints = min(_19, _19); _4.uints = uint4(min(_19, int4(_20))); _4.ints = min(int4(_20), int4(_20)); diff --git a/reference/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp b/reference/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp new file mode 100644 index 00000000000..0063faceaa0 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct _8 +{ + float _m0; + float _m1; +}; + +struct _15 +{ + float _m0; + int _m1; +}; + +struct _3 +{ + float _m0; + int _m1; +}; + +kernel void main0(device _3& _4 [[buffer(0)]]) +{ + _8 _23; + _23._m0 = modf(20.0, _23._m1); + _15 _24; + _24._m0 = frexp(40.0, _24._m1); + _4._m0 = _23._m0; + _4._m0 = _23._m1; + _4._m0 = _24._m0; + _4._m1 = _24._m1; +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp b/reference/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp new file mode 100644 index 00000000000..365f89f74f5 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 values[1]; +}; + +constant uint _10_tmp [[function_constant(1)]]; +constant uint _10 = is_function_constant_defined(_10_tmp) ? _10_tmp : 11u; +constant uint _11_tmp [[function_constant(2)]]; +constant uint _11 = is_function_constant_defined(_11_tmp) ? _11_tmp : 12u; +constant uint _4_tmp [[function_constant(3)]]; +constant uint _4 = is_function_constant_defined(_4_tmp) ? _4_tmp : 13u; +constant uint _5_tmp [[function_constant(4)]]; +constant uint _5 = is_function_constant_defined(_5_tmp) ? _5_tmp : 14u; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, _10, _11); + +kernel void main0(device SSBO& _8 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + _8.values[gl_GlobalInvocationID.x] += float4(2.0); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp b/reference/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp new file mode 100644 index 00000000000..2dcff36923c --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp @@ -0,0 +1,26 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 values[1]; +}; + +constant int _10_tmp [[function_constant(1)]]; +constant int _10 = is_function_constant_defined(_10_tmp) ? _10_tmp : 11; +constant int _11_tmp [[function_constant(2)]]; +constant int _11 = is_function_constant_defined(_11_tmp) ? _11_tmp : 12; +constant int _4_tmp [[function_constant(3)]]; +constant int _4 = is_function_constant_defined(_4_tmp) ? _4_tmp : 13; +constant int _5_tmp [[function_constant(4)]]; +constant int _5 = is_function_constant_defined(_5_tmp) ? _5_tmp : 14; +constant uint _29 = (uint(_4) + 3u); +constant uint3 _30 = uint3(_29, _5, 2u); + +kernel void main0(device SSBO& _8 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + _8.values[gl_GlobalInvocationID.x] = ((((_8.values[gl_GlobalInvocationID.x] + float4(2.0)) + float3(_30).xyzz) * float(_4)) * float(_5)) * float(int(2u)); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp b/reference/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp new file mode 100644 index 00000000000..3c00707f2e2 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp @@ -0,0 +1,44 @@ +#include +#include + +using namespace metal; + +struct _17 +{ + float2 _m0; + float2 _m1; +}; + +struct _4 +{ + uint2 _m0[324]; +}; + +struct _7 +{ + float2 _m0[648]; +}; + +struct _10 +{ + float2 _m0[648]; +}; + +kernel void main0(const device _4& _5 [[buffer(0)]], device _7& _8 [[buffer(1)]], device _10& _11 [[buffer(2)]]) +{ + for (uint _39 = 0u; _39 < 648u; _39 += 2u) + { + uint2 _40 = _5._m0[_39 / 2u]; + float2 _41 = as_type(_40); + float2 _76; + float2 _61 = modf(_41, _76); + _8._m0[_39] = _76; + _8._m0[_39 + 1u] = _61; + _17 _64; + _64._m0 = modf(_41, _64._m1); + _17 _42 = _64; + _11._m0[_39] = _42._m1; + _11._m0[_39 + 1u] = _42._m0; + } +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp new file mode 100644 index 00000000000..2a8b59f0b47 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp @@ -0,0 +1,51 @@ +#include +#include + +using namespace metal; + +struct _7 +{ + int _m0[1][4]; +}; + +struct _9 +{ + int _m0[1][17]; +}; + +struct _11 +{ + int _m0; +}; + +kernel void main0(device _7& _2 [[buffer(0)]], device _9& _3 [[buffer(1)]], constant _11& _4 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + if (int3(gl_WorkGroupID).x >= _4._m0) + { + return; + } + int _49; + if (int3(gl_LocalInvocationID).x == 1) + { + _3._m0[int3(gl_WorkGroupID).x][16] = &_2._m0[int3(gl_WorkGroupID).x] - &_2._m0[0]; + _49 = 0; + } + else + { + _49 = 0; + } + for (;;) + { + int _50 = _49 + 1; + _3._m0[int3(gl_WorkGroupID).x][(int3(gl_LocalInvocationID).x * 4) + _49] = &_2._m0[int3(gl_WorkGroupID).x][int3(gl_LocalInvocationID).x] - &_2._m0[int3(gl_WorkGroupID).x][_49]; + if (_50 == 4) + { + break; + } + else + { + _49 = _50; + } + } +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp new file mode 100644 index 00000000000..69e76f3f38b --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp @@ -0,0 +1,45 @@ +#include +#include + +using namespace metal; + +struct _7 +{ + int _m0; + int _m1[1]; +}; + +struct _9 +{ + int2 _m0[1]; +}; + +kernel void main0(device _7& _2 [[buffer(0)]], device _9& _3 [[buffer(1)]]) +{ + int _28 = _2._m0; + device int* _4 = &_2._m1[0]; + device int* _5 = &_2._m1[0 + _28]; + int _34; + if (!(_28 <= 0)) + { + _34 = 0; + for (;;) + { + device int* _36 = _4; + device int* _37 = _5; + int _35 = _34 + 1; + _4 = &_36[1]; + _5 = &_37[-1]; + _3._m0[_34] = int2(_36 - _37, _37 - _36); + if (_34 >= _28) + { + break; + } + else + { + _34 = _35; + } + } + } +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp new file mode 100644 index 00000000000..52916413e55 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp @@ -0,0 +1,33 @@ +#include +#include + +using namespace metal; + +struct _7 +{ + uint _m0[1]; +}; + +kernel void main0(device _7& _2 [[buffer(0)]], device _7& _3 [[buffer(1)]], device _7& _4 [[buffer(2)]], device _7& _5 [[buffer(3)]]) +{ + uint _18 = 0u; + uint _28 = _18 + 1u; + _5._m0[_18] = uint(&_2 == &_3); + uint _32 = _28 + 1u; + _5._m0[_28] = uint(&_2._m0 == &_3._m0); + uint _36 = _32 + 1u; + _5._m0[_32] = uint(&_2._m0[0u] == &_3._m0[0u]); + uint _40 = _36 + 1u; + _5._m0[_36] = uint(&_2 == &_4); + uint _44 = _40 + 1u; + _5._m0[_40] = uint(&_2._m0 == &_4._m0); + uint _48 = _44 + 1u; + _5._m0[_44] = uint(&_2._m0[0u] == &_4._m0[0u]); + uint _52 = _48 + 1u; + _5._m0[_48] = uint(&_3 == &_4); + uint _56 = _52 + 1u; + _5._m0[_52] = uint(&_3._m0 == &_4._m0); + _5._m0[_56] = uint(&_3._m0[0u] == &_4._m0[0u]); + _5._m0[_56 + 1u] = uint(&_2 == &_2); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp new file mode 100644 index 00000000000..16d29c1d6c7 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp @@ -0,0 +1,37 @@ +#include +#include + +using namespace metal; + +struct _6 +{ + float4x4 _m0; + float4x4 _m1; + float _m2; + float _m3; +}; + +struct _7 +{ + uint _m0[1]; +}; + +kernel void main0(device _6& _2 [[buffer(0)]], device _6& _3 [[buffer(1)]], device _7& _4 [[buffer(2)]]) +{ + uint _26 = 0u; + uint _39 = _26 + 1u; + _4._m0[_26] = (&_2._m2 == &_2._m3) ? 0u : 1u; + bool _40 = &_2._m2 == &_3._m2; + uint _43 = _39 + 1u; + _4._m0[_39] = _40 ? 0u : 1u; + bool _46 = (_40 ? &_2._m2 : &_2._m3) == (_40 ? &_3._m2 : &_3._m3); + uint _49 = _43 + 1u; + _4._m0[_43] = _46 ? 0u : 1u; + uint _54 = _49 + 1u; + _4._m0[_49] = ((_46 ? &_2._m2 : &_2._m3) == &((device float*)&_2._m0[0u])[0u]) ? 0u : 1u; + uint _56 = (&_2._m0 == &_2._m1) ? 0u : 1u; + uint _58 = _54 + 1u; + _4._m0[_54] = _56; + _4._m0[_58] = _56; +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp new file mode 100644 index 00000000000..d9af203553e --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp @@ -0,0 +1,33 @@ +#include +#include + +using namespace metal; + +struct _7 +{ + uint _m0[1]; +}; + +kernel void main0(device _7& _2 [[buffer(0)]], device _7& _3 [[buffer(1)]], device _7& _4 [[buffer(2)]], device _7& _5 [[buffer(3)]]) +{ + uint _18 = 0u; + uint _28 = _18 + 1u; + _5._m0[_18] = uint(&_2 != &_3); + uint _32 = _28 + 1u; + _5._m0[_28] = uint(&_2._m0 != &_3._m0); + uint _36 = _32 + 1u; + _5._m0[_32] = uint(&_2._m0[0u] != &_3._m0[0u]); + uint _40 = _36 + 1u; + _5._m0[_36] = uint(&_2 != &_4); + uint _44 = _40 + 1u; + _5._m0[_40] = uint(&_2._m0 != &_4._m0); + uint _48 = _44 + 1u; + _5._m0[_44] = uint(&_2._m0[0u] != &_4._m0[0u]); + uint _52 = _48 + 1u; + _5._m0[_48] = uint(&_3 != &_4); + uint _56 = _52 + 1u; + _5._m0[_52] = uint(&_3._m0 != &_4._m0); + _5._m0[_56] = uint(&_3._m0[0u] != &_4._m0[0u]); + _5._m0[_56 + 1u] = uint(&_2 != &_2); +} + diff --git a/reference/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp b/reference/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp new file mode 100644 index 00000000000..dda85050991 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + int values[1]; +}; + +constant int A_tmp [[function_constant(0)]]; +constant int A = is_function_constant_defined(A_tmp) ? A_tmp : 0; +constant int A_1_tmp [[function_constant(1)]]; +constant int A_1 = is_function_constant_defined(A_1_tmp) ? A_1_tmp : 1; +constant int A_2_tmp [[function_constant(2)]]; +constant int A_2 = is_function_constant_defined(A_2_tmp) ? A_2_tmp : 2; +constant int A_3_tmp [[function_constant(3)]]; +constant int A_3 = is_function_constant_defined(A_3_tmp) ? A_3_tmp : 3; +constant int A_4_tmp [[function_constant(4)]]; +constant int A_4 = is_function_constant_defined(A_4_tmp) ? A_4_tmp : 4; +constant int A_5_tmp [[function_constant(5)]]; +constant int A_5 = is_function_constant_defined(A_5_tmp) ? A_5_tmp : 5; +constant int A_6 = (A - A_1); +constant int A_7 = (A_6 - A_2); +constant int A_8 = (A_7 - A_3); +constant int A_9 = (A_8 - A_4); +constant int A_10 = (A_9 - A_5); +constant int A_11 = (A_10 + A_5); +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _5 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + _5.values[gl_GlobalInvocationID.x] = A_11; +} + diff --git a/reference/opt/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp b/reference/shaders-msl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp similarity index 100% rename from reference/opt/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp rename to reference/shaders-msl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp index 473298c2741..5b1ed8ae243 100644 --- a/reference/opt/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp +++ b/reference/shaders-msl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp @@ -16,7 +16,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(_3, 2u, _4); kernel void main0(device _6& _8 [[buffer(0)]], device _6& _9 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { - _8._m0[gl_WorkGroupID.x] = _9._m0[gl_WorkGroupID.x] + _8._m0[gl_WorkGroupID.x]; uint3 _23 = gl_WorkGroupSize; + _8._m0[gl_WorkGroupID.x] = _9._m0[gl_WorkGroupID.x] + _8._m0[gl_WorkGroupID.x]; } diff --git a/reference/shaders-msl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp b/reference/shaders-msl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp index ec40c6afc13..6e9768540b7 100644 --- a/reference/shaders-msl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp +++ b/reference/shaders-msl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp @@ -15,6 +15,7 @@ struct SSBORead float b; }; +static inline __attribute__((always_inline)) void copy_out(device float& A, device const float& B) { A = B; diff --git a/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp b/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp index 37731c720ec..7c9718d1c95 100644 --- a/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp +++ b/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp @@ -22,16 +22,19 @@ struct baz int e[128]; }; +static inline __attribute__((always_inline)) device int* select_buffer(device foo& buf, device baz& buf2, constant bar& cb) { return (cb.d != 0) ? &buf.a[0u] : &buf2.e[0u]; } +static inline __attribute__((always_inline)) device int* select_buffer_null(device foo& buf, constant bar& cb) { return (cb.d != 0) ? &buf.a[0u] : nullptr; } +static inline __attribute__((always_inline)) threadgroup int* select_tgsm(constant bar& cb, threadgroup int (&tgsm)[128]) { return (cb.d != 0) ? &tgsm[0u] : nullptr; diff --git a/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag new file mode 100644 index 00000000000..a7d3550a7aa --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag @@ -0,0 +1,27 @@ +#include +#include + +using namespace metal; + +struct UBOs +{ + float4 v; +}; + +struct spvDescriptorSetBuffer0 +{ + constant UBOs* ubos [[id(0)]][2]; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]]) +{ + main0_out out = {}; + out.FragColor = spvDescriptorSet0.ubos[0]->v + spvDescriptorSet0.ubos[1]->v; + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag b/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag new file mode 100644 index 00000000000..b6fe72b8d1a --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct UBOs +{ + float4 v; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(constant UBOs* ubos_0 [[buffer(0)]], constant UBOs* ubos_1 [[buffer(1)]]) +{ + constant UBOs* ubos[] = + { + ubos_0, + ubos_1, + }; + + main0_out out = {}; + out.FragColor = ubos[0]->v + ubos[1]->v; + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/reference/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag new file mode 100644 index 00000000000..e4397f828fa --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag @@ -0,0 +1,85 @@ +#include +#include + +using namespace metal; + +struct anon_aa +{ + int foo; +}; + +struct anon_ab +{ + int foo; +}; + +struct anon_a +{ + anon_aa _aa; + anon_ab ab; +}; + +struct anon_ba +{ + int foo; +}; + +struct anon_bb +{ + int foo; +}; + +struct anon_b +{ + anon_ba _ba; + anon_bb bb; +}; + +struct VertexData +{ + anon_a _a; + anon_b b; +}; + +struct anon_ca +{ + int foo; +}; + +struct anon_c +{ + anon_ca _ca; +}; + +struct anon_da +{ + int foo; +}; + +struct anon_d +{ + anon_da da; +}; + +struct UBO +{ + anon_c _c; + anon_d d; +}; + +struct anon_e +{ + int a; +}; + +struct SSBO +{ + anon_e _m0; + anon_e _e; + anon_e f; +}; + +fragment void main0() +{ +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag b/reference/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag new file mode 100644 index 00000000000..2da91dac7de --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag @@ -0,0 +1,34 @@ +#include +#include + +using namespace metal; + +struct type_Globals +{ + float4 _BorderWidths[4]; +}; + +struct main0_out +{ + float4 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Globals& _Globals [[buffer(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + float2 _31 = float2(_Globals._BorderWidths[0].x, _Globals._BorderWidths[1].x); + float2 _39; + if (gl_FragCoord.x > 0.0) + { + float2 _38 = _31; + _38.x = _Globals._BorderWidths[2].x; + _39 = _38; + } + else + { + _39 = _31; + } + out.out_var_SV_Target = float4(_39, 0.0, 1.0); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/reference/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag new file mode 100644 index 00000000000..a4bb56283a1 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag @@ -0,0 +1,121 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant float4 _32 = {}; + +constant spvUnsafeArray _34 = spvUnsafeArray({ float4(0.0), float4(0.0) }); + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vInput [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float4 _37 = in.vInput; + float4 _38 = _37; + _38.x = 1.0; + _38.y = 2.0; + _38.z = 3.0; + _38.w = 4.0; + out.FragColor = _38; + float4 _6 = _37; + _6.x = 1.0; + _6.y = 2.0; + _6.z = 3.0; + _6.w = 4.0; + out.FragColor = _6; + float4 _42 = _37; + _42.x = 1.0; + _42.y = 2.0; + _42.z = 3.0; + _42.w = 4.0; + out.FragColor = _42; + float4 _44 = _37; + _44.x = 1.0; + float4 _45 = _44; + _45.y = 2.0; + float4 _46 = _45; + _46.z = 3.0; + float4 _47 = _46; + _47.w = 4.0; + out.FragColor = _47 + _44; + out.FragColor = _47 + _45; + float4 _49; + _49.x = 1.0; + _49.y = 2.0; + _49.z = 3.0; + _49.w = 4.0; + out.FragColor = _49; + float4 _53 = float4(0.0); + _53.x = 1.0; + out.FragColor = _53; + spvUnsafeArray _54 = _34; + _54[1].z = 1.0; + _54[0].w = 2.0; + out.FragColor = _54[0]; + out.FragColor = _54[1]; + float4x4 _58 = float4x4(float4(0.0), float4(0.0), float4(0.0), float4(0.0)); + _58[1].z = 1.0; + _58[2].w = 2.0; + out.FragColor = _58[0]; + out.FragColor = _58[1]; + out.FragColor = _58[2]; + out.FragColor = _58[3]; + float4 PHI; + PHI = _46; + float4 _65 = PHI; + _65.w = 4.0; + out.FragColor = _65; + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag new file mode 100644 index 00000000000..eb78db53672 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag @@ -0,0 +1,70 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _3 +{ + spvUnsafeArray _m0; + float _m1[2]; + spvUnsafeArray _m2; +}; + +constant spvUnsafeArray _15 = spvUnsafeArray({ 1.0, 2.0 }); +constant spvUnsafeArray _16 = spvUnsafeArray({ 3.0, 4.0 }); +constant spvUnsafeArray _17 = spvUnsafeArray({ 5.0, 6.0 }); + +struct main0_out +{ + float m_2 [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + _3 _23 = _3{ spvUnsafeArray({ 1.0, 2.0 }), { 3.0, 4.0 }, spvUnsafeArray({ 5.0, 6.0 }) }; + out.m_2 = 1.0; + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag b/reference/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag new file mode 100644 index 00000000000..faa528bc327 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag @@ -0,0 +1,170 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +struct _3 +{ + float _m0[4]; +}; + +fragment void main0() +{ + spvUnsafeArray _20; + _20[0u] = 0.0; + _20[1u] = 0.0; + _20[2u] = 0.0; + _20[3u] = 0.0; + _3 _19; + spvArrayCopyFromStackToStack1(_19._m0, _20.elements); +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/reference/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag new file mode 100644 index 00000000000..fdf4a92b993 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag @@ -0,0 +1,35 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct EmptyStructTest +{ +}; +struct EmptyStruct2Test +{ + EmptyStructTest _m0; +}; + +static inline __attribute__((always_inline)) +float GetValue(thread const EmptyStruct2Test& self) +{ + return 0.0; +} + +static inline __attribute__((always_inline)) +float GetValue_1(EmptyStruct2Test self) +{ + return 0.0; +} + +fragment void main0() +{ + EmptyStruct2Test emptyStruct; + float value = GetValue(emptyStruct); + value = GetValue_1(EmptyStruct2Test{ EmptyStructTest{ } }); + value = GetValue_1(EmptyStruct2Test{ { } }); +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag b/reference/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag new file mode 100644 index 00000000000..9a5e195b488 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct _5ma_in_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment _5ma_in_out _5ma_in() +{ + _5ma_in_out out = {}; + out.FragColor = float4(1.0); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/image-gather.asm.frag b/reference/shaders-msl-no-opt/asm/frag/image-gather.asm.frag new file mode 100644 index 00000000000..47253429a84 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/image-gather.asm.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + float2 in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d g_texture [[texture(0)]], sampler g_sampler [[sampler(0)]], sampler g_comp [[sampler(1)]]) +{ + main0_out out = {}; + out.out_var_SV_Target0 = g_texture.gather(g_sampler, in.in_var_TEXCOORD0, int2(0), component::x) * g_texture.gather(g_sampler, in.in_var_TEXCOORD0, int2(0), component::y); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag b/reference/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag new file mode 100644 index 00000000000..daeccaedc6b --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag @@ -0,0 +1,105 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Foo +{ + float a; + float b; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float ALIAS_0_a [[user(locn1)]]; + float ALIAS_0_b [[user(locn2)]]; + float ALIAS_1_a [[user(locn3)]]; + float ALIAS_1_b [[user(locn4)]]; + float ALIAS_2_a [[user(locn5)]]; + float ALIAS_2_b [[user(locn6)]]; + float ALIAS_3_a [[user(locn7)]]; + float ALIAS_3_b [[user(locn8)]]; + float ALIAS_1_0_a [[user(locn10)]]; + float ALIAS_1_0_b [[user(locn11)]]; + float ALIAS_1_1_a [[user(locn12)]]; + float ALIAS_1_1_b [[user(locn13)]]; + float ALIAS_1_2_a [[user(locn14)]]; + float ALIAS_1_2_b [[user(locn15)]]; + float ALIAS_1_3_a [[user(locn16)]]; + float ALIAS_1_3_b [[user(locn17)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray ALIAS = {}; + spvUnsafeArray ALIAS_1 = {}; + ALIAS[0].a = in.ALIAS_0_a; + ALIAS[0].b = in.ALIAS_0_b; + ALIAS[1].a = in.ALIAS_1_a; + ALIAS[1].b = in.ALIAS_1_b; + ALIAS[2].a = in.ALIAS_2_a; + ALIAS[2].b = in.ALIAS_2_b; + ALIAS[3].a = in.ALIAS_3_a; + ALIAS[3].b = in.ALIAS_3_b; + ALIAS_1[0].a = in.ALIAS_1_0_a; + ALIAS_1[0].b = in.ALIAS_1_0_b; + ALIAS_1[1].a = in.ALIAS_1_1_a; + ALIAS_1[1].b = in.ALIAS_1_1_b; + ALIAS_1[2].a = in.ALIAS_1_2_a; + ALIAS_1[2].b = in.ALIAS_1_2_b; + ALIAS_1[3].a = in.ALIAS_1_3_a; + ALIAS_1[3].b = in.ALIAS_1_3_b; + out.FragColor.x = ALIAS[0].a; + out.FragColor.y = ALIAS[1].b; + out.FragColor.z = ALIAS[2].a; + out.FragColor.w = ALIAS_1[3].b; + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag b/reference/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag deleted file mode 100644 index 1af9edc351c..00000000000 --- a/reference/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag +++ /dev/null @@ -1,236 +0,0 @@ -#include -#include - -using namespace metal; - -struct VertexOutput -{ - float4 HPosition; - float4 Uv_EdgeDistance1; - float4 UvStuds_EdgeDistance2; - float4 Color; - float4 LightPosition_Fog; - float4 View_Depth; - float4 Normal_SpecPower; - float3 Tangent; - float4 PosLightSpace_Reflectance; - float studIndex; -}; - -struct Surface -{ - float3 albedo; - float3 normal; - float specular; - float gloss; - float reflectance; - float opacity; -}; - -struct SurfaceInput -{ - float4 Color; - float2 Uv; - float2 UvStuds; -}; - -struct Globals -{ - float4x4 ViewProjection; - float4 ViewRight; - float4 ViewUp; - float4 ViewDir; - float3 CameraPosition; - float3 AmbientColor; - float3 Lamp0Color; - float3 Lamp0Dir; - float3 Lamp1Color; - float4 FogParams; - float3 FogColor; - float4 LightBorder; - float4 LightConfig0; - float4 LightConfig1; - float4 LightConfig2; - float4 LightConfig3; - float4 RefractionBias_FadeDistance_GlowFactor; - float4 OutlineBrightness_ShadowInfo; - float4 ShadowMatrix0; - float4 ShadowMatrix1; - float4 ShadowMatrix2; -}; - -struct CB0 -{ - Globals CB0; -}; - -struct Params -{ - float4 LqmatFarTilingFactor; -}; - -struct CB2 -{ - Params CB2; -}; - -constant VertexOutput _121 = {}; -constant SurfaceInput _122 = {}; -constant float2 _123 = {}; -constant float4 _124 = {}; -constant Surface _125 = {}; -constant float4 _192 = {}; -constant float4 _219 = {}; -constant float4 _297 = {}; - -struct main0_out -{ - float4 _entryPointOutput [[color(0)]]; -}; - -struct main0_in -{ - float4 IN_Uv_EdgeDistance1 [[user(locn0)]]; - float4 IN_UvStuds_EdgeDistance2 [[user(locn1)]]; - float4 IN_Color [[user(locn2)]]; - float4 IN_LightPosition_Fog [[user(locn3)]]; - float4 IN_View_Depth [[user(locn4)]]; - float4 IN_Normal_SpecPower [[user(locn5)]]; - float3 IN_Tangent [[user(locn6)]]; - float4 IN_PosLightSpace_Reflectance [[user(locn7)]]; - float IN_studIndex [[user(locn8)]]; -}; - -fragment main0_out main0(main0_in in [[stage_in]], constant CB0& _19 [[buffer(0)]], texture3d LightMapTexture [[texture(0)]], texture2d ShadowMapTexture [[texture(1)]], texturecube EnvironmentMapTexture [[texture(2)]], texture2d DiffuseMapTexture [[texture(3)]], texture2d NormalMapTexture [[texture(4)]], texture2d NormalDetailMapTexture [[texture(5)]], texture2d StudsMapTexture [[texture(6)]], texture2d SpecularMapTexture [[texture(7)]], sampler LightMapSampler [[sampler(0)]], sampler ShadowMapSampler [[sampler(1)]], sampler EnvironmentMapSampler [[sampler(2)]], sampler DiffuseMapSampler [[sampler(3)]], sampler NormalMapSampler [[sampler(4)]], sampler NormalDetailMapSampler [[sampler(5)]], sampler StudsMapSampler [[sampler(6)]], sampler SpecularMapSampler [[sampler(7)]], float4 gl_FragCoord [[position]]) -{ - main0_out out = {}; - VertexOutput _128 = _121; - _128.HPosition = gl_FragCoord; - VertexOutput _130 = _128; - _130.Uv_EdgeDistance1 = in.IN_Uv_EdgeDistance1; - VertexOutput _132 = _130; - _132.UvStuds_EdgeDistance2 = in.IN_UvStuds_EdgeDistance2; - VertexOutput _134 = _132; - _134.Color = in.IN_Color; - VertexOutput _136 = _134; - _136.LightPosition_Fog = in.IN_LightPosition_Fog; - VertexOutput _138 = _136; - _138.View_Depth = in.IN_View_Depth; - VertexOutput _140 = _138; - _140.Normal_SpecPower = in.IN_Normal_SpecPower; - VertexOutput _142 = _140; - _142.Tangent = in.IN_Tangent; - VertexOutput _144 = _142; - _144.PosLightSpace_Reflectance = in.IN_PosLightSpace_Reflectance; - VertexOutput _146 = _144; - _146.studIndex = in.IN_studIndex; - SurfaceInput _147 = _122; - _147.Color = in.IN_Color; - SurfaceInput _149 = _147; - _149.Uv = in.IN_Uv_EdgeDistance1.xy; - SurfaceInput _151 = _149; - _151.UvStuds = in.IN_UvStuds_EdgeDistance2.xy; - SurfaceInput _156 = _151; - _156.UvStuds.y = (fract(_151.UvStuds.y) + in.IN_studIndex) * 0.25; - float _163 = _146.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y; - float _165 = fast::clamp(1.0 - _163, 0.0, 1.0); - float2 _166 = in.IN_Uv_EdgeDistance1.xy * 1.0; - bool _173; - float4 _193; - do - { - _173 = 0.0 == 0.0; - if (_173) - { - _193 = DiffuseMapTexture.sample(DiffuseMapSampler, _166); - break; - } - else - { - float _180 = 1.0 / (1.0 - 0.0); - _193 = mix(DiffuseMapTexture.sample(DiffuseMapSampler, (_166 * 0.25)), DiffuseMapTexture.sample(DiffuseMapSampler, _166), float4(fast::clamp((fast::clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0) * _180) - (0.0 * _180), 0.0, 1.0))); - break; - } - _193 = _192; - break; - } while (false); - float4 _194 = _193 * 1.0; - float4 _220; - do - { - if (_173) - { - _220 = NormalMapTexture.sample(NormalMapSampler, _166); - break; - } - else - { - float _207 = 1.0 / (1.0 - 0.0); - _220 = mix(NormalMapTexture.sample(NormalMapSampler, (_166 * 0.25)), NormalMapTexture.sample(NormalMapSampler, _166), float4(fast::clamp((_165 * _207) - (0.0 * _207), 0.0, 1.0))); - break; - } - _220 = _219; - break; - } while (false); - float2 _223 = float2(1.0); - float2 _224 = (_220.wy * 2.0) - _223; - float3 _232 = float3(_224, sqrt(fast::clamp(1.0 + dot(-_224, _224), 0.0, 1.0))); - float2 _240 = (NormalDetailMapTexture.sample(NormalDetailMapSampler, (_166 * 0.0)).wy * 2.0) - _223; - float2 _252 = _232.xy + (float3(_240, sqrt(fast::clamp(1.0 + dot(-_240, _240), 0.0, 1.0))).xy * 0.0); - float3 _253 = float3(_252.x, _252.y, _232.z); - float2 _255 = _253.xy * _165; - float3 _256 = float3(_255.x, _255.y, _253.z); - float3 _271 = ((in.IN_Color.xyz * _194.xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (StudsMapTexture.sample(StudsMapSampler, _156.UvStuds).x * 2.0); - float4 _298; - do - { - if (0.75 == 0.0) - { - _298 = SpecularMapTexture.sample(SpecularMapSampler, _166); - break; - } - else - { - float _285 = 1.0 / (1.0 - 0.75); - _298 = mix(SpecularMapTexture.sample(SpecularMapSampler, (_166 * 0.25)), SpecularMapTexture.sample(SpecularMapSampler, _166), float4(fast::clamp((_165 * _285) - (0.75 * _285), 0.0, 1.0))); - break; - } - _298 = _297; - break; - } while (false); - float2 _303 = mix(float2(0.800000011920928955078125, 120.0), (_298.xy * float2(2.0, 256.0)) + float2(0.0, 0.00999999977648258209228515625), float2(_165)); - Surface _304 = _125; - _304.albedo = _271; - Surface _305 = _304; - _305.normal = _256; - float _306 = _303.x; - Surface _307 = _305; - _307.specular = _306; - float _308 = _303.y; - Surface _309 = _307; - _309.gloss = _308; - float _312 = (_298.xy.y * _165) * 0.0; - Surface _313 = _309; - _313.reflectance = _312; - float4 _318 = float4(_271, _146.Color.w); - float3 _329 = normalize(((in.IN_Tangent * _313.normal.x) + (cross(in.IN_Normal_SpecPower.xyz, in.IN_Tangent) * _313.normal.y)) + (in.IN_Normal_SpecPower.xyz * _313.normal.z)); - float3 _332 = -_19.CB0.Lamp0Dir; - float _333 = dot(_329, _332); - float _357 = fast::clamp(dot(step(_19.CB0.LightConfig3.xyz, abs(in.IN_LightPosition_Fog.xyz - _19.CB0.LightConfig2.xyz)), float3(1.0)), 0.0, 1.0); - float4 _368 = mix(LightMapTexture.sample(LightMapSampler, (in.IN_LightPosition_Fog.xyz.yzx - (in.IN_LightPosition_Fog.xyz.yzx * _357))), _19.CB0.LightBorder, float4(_357)); - float2 _376 = ShadowMapTexture.sample(ShadowMapSampler, in.IN_PosLightSpace_Reflectance.xyz.xy).xy; - float _392 = (1.0 - (((step(_376.x, in.IN_PosLightSpace_Reflectance.xyz.z) * fast::clamp(9.0 - (20.0 * abs(in.IN_PosLightSpace_Reflectance.xyz.z - 0.5)), 0.0, 1.0)) * _376.y) * _19.CB0.OutlineBrightness_ShadowInfo.w)) * _368.w; - float3 _403 = mix(_318.xyz, EnvironmentMapTexture.sample(EnvironmentMapSampler, reflect(-in.IN_View_Depth.xyz, _329)).xyz, float3(_312)); - float4 _404 = float4(_403.x, _403.y, _403.z, _318.w); - float3 _422 = (((_19.CB0.AmbientColor + (((_19.CB0.Lamp0Color * fast::clamp(_333, 0.0, 1.0)) + (_19.CB0.Lamp1Color * fast::max(-_333, 0.0))) * _392)) + _368.xyz) * _404.xyz) + (_19.CB0.Lamp0Color * (((step(0.0, _333) * _306) * _392) * pow(fast::clamp(dot(_329, normalize(_332 + normalize(in.IN_View_Depth.xyz))), 0.0, 1.0), _308))); - float4 _425 = float4(_422.x, _422.y, _422.z, _124.w); - _425.w = _404.w; - float2 _435 = fast::min(in.IN_Uv_EdgeDistance1.wz, in.IN_UvStuds_EdgeDistance2.wz); - float _439 = fast::min(_435.x, _435.y) / _163; - float3 _445 = _425.xyz * fast::clamp((fast::clamp((_163 * _19.CB0.OutlineBrightness_ShadowInfo.x) + _19.CB0.OutlineBrightness_ShadowInfo.y, 0.0, 1.0) * (1.5 - _439)) + _439, 0.0, 1.0); - float4 _446 = float4(_445.x, _445.y, _445.z, _425.w); - float3 _453 = mix(_19.CB0.FogColor, _446.xyz, float3(fast::clamp(_146.LightPosition_Fog.w, 0.0, 1.0))); - out._entryPointOutput = float4(_453.x, _453.y, _453.z, _446.w); - return out; -} - diff --git a/reference/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag b/reference/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag new file mode 100644 index 00000000000..0643acfa72d --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag @@ -0,0 +1,25 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +float4 load_subpasses(texture2d uInput, thread float4& gl_FragCoord) +{ + return uInput.read(uint2(gl_FragCoord.xy)); +} + +fragment main0_out main0(texture2d uSubpass0 [[texture(0)]], texture2d uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy)) + load_subpasses(uSubpass1, gl_FragCoord); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag b/reference/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag new file mode 100644 index 00000000000..910c8fa734b --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +fragment void main0() +{ + float3 col; + int2 _18; + float _23; + float _21 = modf(0.1500000059604644775390625, _23); + col.x = _23; + int _24; + float _22 = frexp(0.1500000059604644775390625, _24); + _18.y = _24; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/reference/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag new file mode 100644 index 00000000000..3f552ebbd04 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float gl_FragDepth [[depth(any)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + out.gl_FragDepth = 0.5; + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag b/reference/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag new file mode 100644 index 00000000000..cffd0bd1afb --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag @@ -0,0 +1,42 @@ +#include +#include + +using namespace metal; + +constant int uninit_int = {}; +constant int4 uninit_vector = {}; +constant float4x4 uninit_matrix = {}; + +struct Foo +{ + int a; +}; + +constant Foo uninit_foo = {}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vColor [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + int _39 = {}; + if (in.vColor.x > 10.0) + { + _39 = 10; + } + else + { + _39 = 20; + } + out.FragColor = in.vColor; + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag new file mode 100644 index 00000000000..8ceb9f43e72 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag @@ -0,0 +1,37 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +static inline __attribute__((always_inline)) +void callee2(thread float4& gl_FragCoord, device SSBO1& v_7) +{ + int _31 = int(gl_FragCoord.x); + v_7.values1[_31]++; +} + +static inline __attribute__((always_inline)) +void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9) +{ + int _39 = int(gl_FragCoord.x); + v_9.values0[_39]++; + callee2(gl_FragCoord, v_7); +} + +fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1)]], float4 gl_FragCoord [[position]]) +{ + callee(gl_FragCoord, v_7, v_9); +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag new file mode 100644 index 00000000000..a3823163914 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag @@ -0,0 +1,52 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct _12 +{ + uint _m0[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +static inline __attribute__((always_inline)) +void callee2(thread float4& gl_FragCoord, device SSBO1& v_7) +{ + int _44 = int(gl_FragCoord.x); + v_7.values1[_44]++; +} + +static inline __attribute__((always_inline)) +void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9) +{ + int _52 = int(gl_FragCoord.x); + v_9.values0[_52]++; + callee2(gl_FragCoord, v_7); + if (true) + { + } +} + +static inline __attribute__((always_inline)) +void _35(thread float4& gl_FragCoord, device _12& v_13) +{ + v_13._m0[int(gl_FragCoord.x)] = 4u; +} + +fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device _12& v_13 [[buffer(1)]], device SSBO0& v_9 [[buffer(2), raster_order_group(0)]], float4 gl_FragCoord [[position]]) +{ + callee(gl_FragCoord, v_7, v_9); + _35(gl_FragCoord, v_13); +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag new file mode 100644 index 00000000000..beb21241f4e --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag @@ -0,0 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +static inline __attribute__((always_inline)) +void callee2(thread float4& gl_FragCoord, device SSBO1& v_7) +{ + int _37 = int(gl_FragCoord.x); + v_7.values1[_37]++; +} + +static inline __attribute__((always_inline)) +void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9) +{ + int _45 = int(gl_FragCoord.x); + v_9.values0[_45]++; + callee2(gl_FragCoord, v_7); +} + +static inline __attribute__((always_inline)) +void _29() +{ +} + +static inline __attribute__((always_inline)) +void _31() +{ +} + +fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]]) +{ + callee(gl_FragCoord, v_7, v_9); + _29(); + _31(); +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag b/reference/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag new file mode 100644 index 00000000000..5bc3c47ef97 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + float b = 10.0; + b = 20.0; + out.FragColor = b + b; + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag b/reference/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag new file mode 100644 index 00000000000..5c8ec371e30 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag @@ -0,0 +1,33 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +float _mat3(thread const float& a) +{ + return a + 1.0; +} + +static inline __attribute__((always_inline)) +float _RESERVED_IDENTIFIER_FIXUP_gl_Foo(thread const int& a) +{ + return float(a) + 1.0; +} + +fragment main0_out main0() +{ + main0_out out = {}; + float param = 2.0; + int param_1 = 4; + out.FragColor = _mat3(param) + _RESERVED_IDENTIFIER_FIXUP_gl_Foo(param_1); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag b/reference/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag new file mode 100644 index 00000000000..e5b8fc5bf1a --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag @@ -0,0 +1,72 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _15 +{ + float _m0; +}; + +constant spvUnsafeArray _29 = spvUnsafeArray({ 0.0, 1.0 }); +constant spvUnsafeArray _30 = spvUnsafeArray({ 1.0, 0.0 }); + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + out.FragColor = false ? float4(1.0, 1.0, 0.0, 1.0) : float4(0.0, 0.0, 0.0, 1.0); + out.FragColor = float4(false); + out.FragColor = select(float4(0.0, 0.0, 0.0, 1.0), float4(1.0, 1.0, 0.0, 1.0), bool4(false, true, false, true)); + out.FragColor = float4(bool4(false, true, false, true)); + _15 _32 = false ? (_15{ 0.0 }) : (_15{ 1.0 }); + spvUnsafeArray _33; + _33 = true ? _29 : _30; + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag b/reference/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag new file mode 100644 index 00000000000..2f5cd66284f --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag @@ -0,0 +1,30 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + uint FragColor [[color(0)]]; +}; + +struct main0_in +{ + int index [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + uint _17 = uint(in.index); + out.FragColor = uint(simd_min(in.index)); + out.FragColor = uint(simd_max(int(_17))); + out.FragColor = simd_min(uint(in.index)); + out.FragColor = simd_max(_17); + out.FragColor = uint(quad_min(in.index)); + out.FragColor = uint(quad_max(int(_17))); + out.FragColor = quad_min(uint(in.index)); + out.FragColor = quad_max(_17); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/reference/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag similarity index 100% rename from reference/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag rename to reference/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag diff --git a/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag b/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag index fce6757b45e..6c4cc7248be 100644 --- a/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag +++ b/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag @@ -6,22 +6,12 @@ using namespace metal; // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -34,6 +24,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -72,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -109,8 +110,8 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } // Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +template class Tex, typename... Ts> +inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint sw, Ts... params) { if (sw) { @@ -149,41 +150,41 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d c = spvTextureSwizzle(texCube.sample(texCubeSamp, float3(0.0)), texCubeSwzl); c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySamp, float3(0.0).xy, uint(round(float3(0.0).z))), tex2dArraySwzl); c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w))), texCubeArraySwzl); - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), depth2dSwzl); - c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSamp, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), depthCubeSwzl); - c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySamp, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), depth2dArraySwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, 1.0), depth2dSwzl); + c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSamp, float4(0.0, 0.0, 0.0, 1.0).xyz, 1.0), depthCubeSwzl); + c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySamp, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), 1.0), depth2dArraySwzl); c.x = spvTextureSwizzle(depthCubeArray.sample_compare(depthCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), depthCubeArraySwzl); c = spvTextureSwizzle(tex1d.sample(tex1dSamp, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), tex2dSwzl); c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), tex3dSwzl); float4 _152 = float4(0.0, 0.0, 1.0, 1.0); - _152.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _152.xy / _152.z, float4(0.0, 0.0, 1.0, 1.0).z / _152.z), depth2dSwzl); + _152.z = 1.0; + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _152.xy / _152.z, 1.0 / _152.z), depth2dSwzl); c = spvTextureSwizzle(tex1d.sample(tex1dSamp, 0.0), tex1dSwzl); c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float2(0.0), level(0.0)), tex2dSwzl); c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float3(0.0), level(0.0)), tex3dSwzl); c = spvTextureSwizzle(texCube.sample(texCubeSamp, float3(0.0), level(0.0)), texCubeSwzl); c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySamp, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), tex2dArraySwzl); c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), texCubeArraySwzl); - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), depth2dSwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, 1.0, level(0.0)), depth2dSwzl); c = spvTextureSwizzle(tex1d.sample(tex1dSamp, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), tex2dSwzl); c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), tex3dSwzl); float4 _202 = float4(0.0, 0.0, 1.0, 1.0); - _202.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _202.xy / _202.z, float4(0.0, 0.0, 1.0, 1.0).z / _202.z, level(0.0)), depth2dSwzl); + _202.z = 1.0; + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _202.xy / _202.z, 1.0 / _202.z, level(0.0)), depth2dSwzl); c = spvTextureSwizzle(tex1d.read(uint(0)), tex1dSwzl); c = spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), tex2dSwzl); c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl); c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl); c = texBuffer.read(spvTexelBufferCoord(0)); - c = spvGatherSwizzle, float2, int2>(tex2dSamp, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl); - c = spvGatherSwizzle, float3>(texCubeSamp, texCube, float3(0.0), component::y, texCubeSwzl); - c = spvGatherSwizzle, float2, uint, int2>(tex2dArraySamp, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl); - c = spvGatherSwizzle, float3, uint>(texCubeArraySamp, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl); - c = spvGatherCompareSwizzle, float2, float>(depth2dSamp, depth2d, float2(0.0), 1.0, depth2dSwzl); - c = spvGatherCompareSwizzle, float3, float>(depthCubeSamp, depthCube, float3(0.0), 1.0, depthCubeSwzl); - c = spvGatherCompareSwizzle, float2, uint, float>(depth2dArraySamp, depth2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, depth2dArraySwzl); - c = spvGatherCompareSwizzle, float3, uint, float>(depthCubeArraySamp, depthCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, depthCubeArraySwzl); + c = spvGatherSwizzle(tex2d, tex2dSamp, tex2dSwzl, component::x, float2(0.0), int2(0)); + c = spvGatherSwizzle(texCube, texCubeSamp, texCubeSwzl, component::y, float3(0.0)); + c = spvGatherSwizzle(tex2dArray, tex2dArraySamp, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0)); + c = spvGatherSwizzle(texCubeArray, texCubeArraySamp, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w))); + c = spvGatherCompareSwizzle(depth2d, depth2dSamp, depth2dSwzl, float2(0.0), 1.0); + c = spvGatherCompareSwizzle(depthCube, depthCubeSamp, depthCubeSwzl, float3(0.0), 1.0); + c = spvGatherCompareSwizzle(depth2dArray, depth2dArraySamp, depth2dArraySwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0); + c = spvGatherCompareSwizzle(depthCubeArray, depthCubeArraySamp, depthCubeArraySwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); } diff --git a/reference/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag b/reference/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag new file mode 100644 index 00000000000..5ba57b3f626 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 _GLF_color [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + float4 _13 = modf(float4(1.0, 0.0, 0.0, 1.0), out._GLF_color); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc new file mode 100644 index 00000000000..3872124d5cd --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc @@ -0,0 +1,82 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct C +{ + float4 v; +}; + +struct P +{ + float4 v; +}; + +struct main0_out +{ + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + float4 p_v; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _18 = spvUnsafeArray({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } }); + + threadgroup C c[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + c[gl_InvocationID] = _18[gl_InvocationID]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + patchOut.p_v = float4(0.0); + c[gl_InvocationID].v = float4(1.0); + patchOut.p_v = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc new file mode 100644 index 00000000000..e576472f379 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc @@ -0,0 +1,85 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct C +{ + float4 v; +}; + +struct P +{ + float4 v; +}; + +struct main0_out +{ + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + float4 p_v; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _18 = spvUnsafeArray({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } }); + + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup C spvStoragec[8][4]; + threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8]; + c[gl_GlobalInvocationID.x % 4] = _18[gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + patchOut.p_v = float4(0.0); + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + c[gl_InvocationID].v = float4(1.0); + patchOut.p_v = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc new file mode 100644 index 00000000000..5c6ad2a8bee --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc @@ -0,0 +1,81 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct C +{ + float4 v; +}; + +struct P +{ + float4 v; +}; + +struct main0_out +{ + float4 c_v; + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ +}; +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _18 = spvUnsafeArray({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } }); + + threadgroup P p; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + gl_out[gl_InvocationID].c_v = _18[gl_InvocationID].v; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + p = P{ float4(0.0) }; + gl_out[gl_InvocationID].c_v = float4(1.0); + p.v = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc new file mode 100644 index 00000000000..12295e778e5 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc @@ -0,0 +1,84 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct C +{ + float4 v; +}; + +struct P +{ + float4 v; +}; + +struct main0_out +{ + float4 c_v; + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ +}; +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _18 = spvUnsafeArray({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } }); + + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + gl_out[gl_GlobalInvocationID.x % 4].c_v = _18[gl_GlobalInvocationID.x % 4].v; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + threadgroup P spvStoragep[8]; + threadgroup P (&p) = spvStoragep[(gl_GlobalInvocationID.x / 4) % 8]; + p = P{ float4(0.0) }; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].c_v = float4(1.0); + p.v = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc new file mode 100644 index 00000000000..d5ff9d0bd21 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc @@ -0,0 +1,100 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct C +{ + float4 v; +}; + +struct P +{ + float4 v; +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +constant spvUnsafeArray _51 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _52 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 c_v; + float4 gl_Position; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ + float4 p_v; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _18 = spvUnsafeArray({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } }); + spvUnsafeArray _33 = spvUnsafeArray({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + threadgroup gl_PerVertex gl_out_masked[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + gl_out[gl_InvocationID].c_v = _18[gl_InvocationID].v; + gl_out[gl_InvocationID].gl_Position = _33[gl_InvocationID].gl_Position; + gl_out[gl_InvocationID].gl_ClipDistance = _33[gl_InvocationID].gl_ClipDistance; + gl_out[gl_InvocationID].gl_CullDistance = _33[gl_InvocationID].gl_CullDistance; + gl_out_masked[gl_InvocationID] = _33[gl_InvocationID]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + patchOut.p_v = float4(0.0); + gl_out[gl_InvocationID].c_v = float4(1.0); + patchOut.p_v = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out_masked[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc new file mode 100644 index 00000000000..32fb6598937 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc @@ -0,0 +1,103 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct C +{ + float4 v; +}; + +struct P +{ + float4 v; +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +constant spvUnsafeArray _51 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _52 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 c_v; + float4 gl_Position; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ + float4 p_v; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _18 = spvUnsafeArray({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } }); + spvUnsafeArray _33 = spvUnsafeArray({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + gl_out[gl_GlobalInvocationID.x % 4].c_v = _18[gl_GlobalInvocationID.x % 4].v; + gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _33[gl_GlobalInvocationID.x % 4].gl_Position; + gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _33[gl_GlobalInvocationID.x % 4].gl_ClipDistance; + gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _33[gl_GlobalInvocationID.x % 4].gl_CullDistance; + threadgroup gl_PerVertex spvStoragegl_out_masked[8][4]; + threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8]; + gl_out_masked[gl_GlobalInvocationID.x % 4] = _33[gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + patchOut.p_v = float4(0.0); + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].c_v = float4(1.0); + patchOut.p_v = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out_masked[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc new file mode 100644 index 00000000000..ce16f379750 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc @@ -0,0 +1,100 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct C +{ + float4 v; +}; + +struct P +{ + float4 v; +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +constant spvUnsafeArray _51 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _52 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 c_v; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ + float4 p_v; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _18 = spvUnsafeArray({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } }); + spvUnsafeArray _33 = spvUnsafeArray({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + threadgroup gl_PerVertex gl_out_masked[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + gl_out[gl_InvocationID].c_v = _18[gl_InvocationID].v; + gl_out[gl_InvocationID].gl_PointSize = _33[gl_InvocationID].gl_PointSize; + gl_out[gl_InvocationID].gl_ClipDistance = _33[gl_InvocationID].gl_ClipDistance; + gl_out[gl_InvocationID].gl_CullDistance = _33[gl_InvocationID].gl_CullDistance; + gl_out_masked[gl_InvocationID] = _33[gl_InvocationID]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + patchOut.p_v = float4(0.0); + gl_out[gl_InvocationID].c_v = float4(1.0); + patchOut.p_v = float4(2.0); + gl_out_masked[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc new file mode 100644 index 00000000000..671aa25a021 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc @@ -0,0 +1,103 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct C +{ + float4 v; +}; + +struct P +{ + float4 v; +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +constant spvUnsafeArray _51 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _52 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 c_v; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ + float4 p_v; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _18 = spvUnsafeArray({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } }); + spvUnsafeArray _33 = spvUnsafeArray({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + gl_out[gl_GlobalInvocationID.x % 4].c_v = _18[gl_GlobalInvocationID.x % 4].v; + gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _33[gl_GlobalInvocationID.x % 4].gl_PointSize; + gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _33[gl_GlobalInvocationID.x % 4].gl_ClipDistance; + gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _33[gl_GlobalInvocationID.x % 4].gl_CullDistance; + threadgroup gl_PerVertex spvStoragegl_out_masked[8][4]; + threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8]; + gl_out_masked[gl_GlobalInvocationID.x % 4] = _33[gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + patchOut.p_v = float4(0.0); + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].c_v = float4(1.0); + patchOut.p_v = float4(2.0); + gl_out_masked[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc new file mode 100644 index 00000000000..25fe13bf674 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc @@ -0,0 +1,90 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex +{ + float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position; + float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize; + spvUnsafeArray _RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance; + spvUnsafeArray _RESERVED_IDENTIFIER_FIXUP_gl_CullDistance; +}; + +constant spvUnsafeArray _15 = spvUnsafeArray({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) }); +constant spvUnsafeArray _45 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _46 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ + float4 foo_patch; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _29 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + threadgroup float4 foo[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + foo[gl_InvocationID] = _15[gl_InvocationID]; + gl_out[gl_InvocationID].gl_Position = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position; + gl_out[gl_InvocationID].gl_PointSize = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize; + gl_out[gl_InvocationID].gl_ClipDistance = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance; + gl_out[gl_InvocationID].gl_CullDistance = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + patchOut.foo_patch = float4(0.0); + foo[gl_InvocationID] = float4(1.0); + patchOut.foo_patch = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc new file mode 100644 index 00000000000..750ef96d17b --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc @@ -0,0 +1,93 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex +{ + float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position; + float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize; + spvUnsafeArray _RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance; + spvUnsafeArray _RESERVED_IDENTIFIER_FIXUP_gl_CullDistance; +}; + +constant spvUnsafeArray _15 = spvUnsafeArray({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) }); +constant spvUnsafeArray _45 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _46 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ + float4 foo_patch; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _29 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup float4 spvStoragefoo[8][4]; + threadgroup float4 (&foo)[4] = spvStoragefoo[(gl_GlobalInvocationID.x / 4) % 8]; + foo[gl_GlobalInvocationID.x % 4] = _15[gl_GlobalInvocationID.x % 4]; + gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_Position; + gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize; + gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance; + gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + patchOut.foo_patch = float4(0.0); + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + foo[gl_InvocationID] = float4(1.0); + patchOut.foo_patch = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc new file mode 100644 index 00000000000..e8f1146b42d --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc @@ -0,0 +1,89 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex +{ + float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position; + float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize; + spvUnsafeArray _RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance; + spvUnsafeArray _RESERVED_IDENTIFIER_FIXUP_gl_CullDistance; +}; + +constant spvUnsafeArray _15 = spvUnsafeArray({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) }); +constant spvUnsafeArray _45 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _46 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 foo; + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ +}; +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _29 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + threadgroup float4 foo_patch; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + gl_out[gl_InvocationID].foo = _15[gl_InvocationID]; + gl_out[gl_InvocationID].gl_Position = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position; + gl_out[gl_InvocationID].gl_PointSize = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize; + gl_out[gl_InvocationID].gl_ClipDistance = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance; + gl_out[gl_InvocationID].gl_CullDistance = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + foo_patch = float4(0.0); + gl_out[gl_InvocationID].foo = float4(1.0); + foo_patch = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc new file mode 100644 index 00000000000..a7c1e5d617b --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc @@ -0,0 +1,92 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex +{ + float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position; + float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize; + spvUnsafeArray _RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance; + spvUnsafeArray _RESERVED_IDENTIFIER_FIXUP_gl_CullDistance; +}; + +constant spvUnsafeArray _15 = spvUnsafeArray({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) }); +constant spvUnsafeArray _45 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _46 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 foo; + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ +}; +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _29 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + gl_out[gl_GlobalInvocationID.x % 4].foo = _15[gl_GlobalInvocationID.x % 4]; + gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_Position; + gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize; + gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance; + gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + threadgroup float4 spvStoragefoo_patch[8]; + threadgroup float4 (&foo_patch) = spvStoragefoo_patch[(gl_GlobalInvocationID.x / 4) % 8]; + foo_patch = float4(0.0); + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].foo = float4(1.0); + foo_patch = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc new file mode 100644 index 00000000000..344751b04ce --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc @@ -0,0 +1,90 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +constant spvUnsafeArray _15 = spvUnsafeArray({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) }); +constant spvUnsafeArray _45 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _46 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 foo; + float4 gl_Position; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ + float4 foo_patch; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _29 = spvUnsafeArray({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + threadgroup gl_PerVertex gl_out_masked[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + gl_out[gl_InvocationID].foo = _15[gl_InvocationID]; + gl_out[gl_InvocationID].gl_Position = _29[gl_InvocationID].gl_Position; + gl_out[gl_InvocationID].gl_ClipDistance = _29[gl_InvocationID].gl_ClipDistance; + gl_out[gl_InvocationID].gl_CullDistance = _29[gl_InvocationID].gl_CullDistance; + gl_out_masked[gl_InvocationID] = _29[gl_InvocationID]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + patchOut.foo_patch = float4(0.0); + gl_out[gl_InvocationID].foo = float4(1.0); + patchOut.foo_patch = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out_masked[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc new file mode 100644 index 00000000000..92731ec03a4 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc @@ -0,0 +1,93 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +constant spvUnsafeArray _15 = spvUnsafeArray({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) }); +constant spvUnsafeArray _45 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _46 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 foo; + float4 gl_Position; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ + float4 foo_patch; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _29 = spvUnsafeArray({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + gl_out[gl_GlobalInvocationID.x % 4].foo = _15[gl_GlobalInvocationID.x % 4]; + gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _29[gl_GlobalInvocationID.x % 4].gl_Position; + gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _29[gl_GlobalInvocationID.x % 4].gl_ClipDistance; + gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _29[gl_GlobalInvocationID.x % 4].gl_CullDistance; + threadgroup gl_PerVertex spvStoragegl_out_masked[8][4]; + threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8]; + gl_out_masked[gl_GlobalInvocationID.x % 4] = _29[gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + patchOut.foo_patch = float4(0.0); + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].foo = float4(1.0); + patchOut.foo_patch = float4(2.0); + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out_masked[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc new file mode 100644 index 00000000000..2f11636a0cb --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc @@ -0,0 +1,90 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +constant spvUnsafeArray _15 = spvUnsafeArray({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) }); +constant spvUnsafeArray _45 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _46 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 foo; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ + float4 foo_patch; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _29 = spvUnsafeArray({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + threadgroup gl_PerVertex gl_out_masked[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + gl_out[gl_InvocationID].foo = _15[gl_InvocationID]; + gl_out[gl_InvocationID].gl_PointSize = _29[gl_InvocationID].gl_PointSize; + gl_out[gl_InvocationID].gl_ClipDistance = _29[gl_InvocationID].gl_ClipDistance; + gl_out[gl_InvocationID].gl_CullDistance = _29[gl_InvocationID].gl_CullDistance; + gl_out_masked[gl_InvocationID] = _29[gl_InvocationID]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + patchOut.foo_patch = float4(0.0); + gl_out[gl_InvocationID].foo = float4(1.0); + patchOut.foo_patch = float4(2.0); + gl_out_masked[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc new file mode 100644 index 00000000000..7283eddb1f4 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc @@ -0,0 +1,93 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +constant spvUnsafeArray _15 = spvUnsafeArray({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) }); +constant spvUnsafeArray _45 = spvUnsafeArray({ 0.0 }); +constant spvUnsafeArray _46 = spvUnsafeArray({ 0.0 }); + +struct main0_out +{ + float4 foo; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_patchOut +{ + float4 foo_patch; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _29 = spvUnsafeArray({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray({ 0.0 }), spvUnsafeArray({ 0.0 }) } }); + + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + gl_out[gl_GlobalInvocationID.x % 4].foo = _15[gl_GlobalInvocationID.x % 4]; + gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _29[gl_GlobalInvocationID.x % 4].gl_PointSize; + gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _29[gl_GlobalInvocationID.x % 4].gl_ClipDistance; + gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _29[gl_GlobalInvocationID.x % 4].gl_CullDistance; + threadgroup gl_PerVertex spvStoragegl_out_masked[8][4]; + threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8]; + gl_out_masked[gl_GlobalInvocationID.x % 4] = _29[gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + patchOut.foo_patch = float4(0.0); + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].foo = float4(1.0); + patchOut.foo_patch = float4(2.0); + gl_out_masked[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp b/reference/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp new file mode 100644 index 00000000000..d2c368b8ede --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp @@ -0,0 +1,16 @@ +#include +#include + +using namespace metal; + +struct SSBORow +{ + float v; + float4x4 row_major0; +}; + +kernel void main0(device SSBORow& _4 [[buffer(0)]]) +{ + _4.v = _4.row_major0[2][1]; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp new file mode 100644 index 00000000000..0ae12f0858a --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct SSBOScalar +{ + float3 a; + float3x3 b; + float3x3 c; +}; + +kernel void main0(device SSBOScalar& _4 [[buffer(0)]]) +{ + float3x3 _20 = transpose(_4.b); + _4.b = _4.c; + _4.a = _20 * _4.a; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp new file mode 100644 index 00000000000..86bdd45279b --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct SSBOScalar +{ + packed_float3 a; + packed_float3 b; + packed_float3 c; +}; + +kernel void main0(device SSBOScalar& _4 [[buffer(0)]]) +{ + float3 _17 = float3(_4.b); + float3 _19 = float3(_4.c); + _4.c = _17; + _4.a = _17 * _19; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp new file mode 100644 index 00000000000..669420436b1 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct SSBOScalar +{ + float4 a[16]; + float4 b[16]; + float4 c[16]; +}; + +kernel void main0(device SSBOScalar& _4 [[buffer(0)]]) +{ + float2 _27 = _4.b[10].xy; + float _29 = _4.c[10].x; + (device float2&)_4.b[10] = float2(10.0, 11.0); + (device float2&)_4.a[10] = _27 * _29; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp new file mode 100644 index 00000000000..23d25b82867 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct SSBOScalar +{ + float2 a; + packed_float3 b; + packed_float3 c; +}; + +kernel void main0(device SSBOScalar& _4 [[buffer(0)]]) +{ + float3 _21 = float3(_4.b); + float3 _24 = float3(_4.c); + _4.b = float3(1.0); + _4.a = _21.xy * _24.yz; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp new file mode 100644 index 00000000000..c21fcc7ffc2 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +typedef packed_float3 packed_rm_float3x3[3]; + +struct SSBOScalar +{ + packed_float3 a; + packed_rm_float3x3 b; + packed_rm_float3x3 c; +}; + +kernel void main0(device SSBOScalar& _4 [[buffer(0)]]) +{ + float3x3 _20 = transpose(float3x3(float3(_4.b[0]), float3(_4.b[1]), float3(_4.b[2]))); + _4.b[0] = float3x3(float3(_4.c[0]), float3(_4.c[1]), float3(_4.c[2]))[0]; + _4.b[1] = float3x3(float3(_4.c[0]), float3(_4.c[1]), float3(_4.c[2]))[1]; + _4.b[2] = float3x3(float3(_4.c[0]), float3(_4.c[1]), float3(_4.c[2]))[2]; + _4.a = _20 * float3(_4.a); +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp b/reference/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp new file mode 100644 index 00000000000..4c70aede48a --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct SSBOScalar +{ + float2 a; + packed_float3 b; + packed_float3 c; + float3 d; +}; + +kernel void main0(device SSBOScalar& _4 [[buffer(0)]]) +{ + float3 _23 = float3(_4.b); + float3 _24 = _23; + _24.z = 2.0; + _4.a = _23.xy * _23.z; + _4.b = _24; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp b/reference/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp new file mode 100644 index 00000000000..f12092cf8f3 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp @@ -0,0 +1,16 @@ +#include +#include + +using namespace metal; + +struct SSBORow +{ + float v; + float4x4 row_major0; +}; + +kernel void main0(device SSBORow& _4 [[buffer(0)]]) +{ + _4.v = ((device float*)&_4.row_major0[2u])[1]; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag new file mode 100644 index 00000000000..f26e35c6722 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct type_Foo +{ + float4 a[1]; + char _m1_pad[8]; + float b; +}; + +struct main0_out +{ + float2 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]]) +{ + main0_out out = {}; + out.out_var_SV_Target = (Foo.a[0].xy + Foo.a[1].xy) + float2(Foo.b); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag new file mode 100644 index 00000000000..6f8546532f4 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct type_Foo +{ + packed_float3 a[1]; + float b; +}; + +struct main0_out +{ + float3 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]]) +{ + main0_out out = {}; + out.out_var_SV_Target = float3(Foo.a[0]) + float3(Foo.b); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag new file mode 100644 index 00000000000..565ee64e04b --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct type_Foo +{ + float4 a[1]; + char _m1_pad[12]; + float b; +}; + +struct main0_out +{ + float3 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]]) +{ + main0_out out = {}; + out.out_var_SV_Target = (Foo.a[0].xyz + Foo.a[1].xyz) + float3(Foo.b); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag new file mode 100644 index 00000000000..8440b2f2297 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct type_Foo +{ + float4 a[1]; + char _m1_pad[8]; + float b; +}; + +struct main0_out +{ + float2 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]]) +{ + main0_out out = {}; + out.out_var_SV_Target = (Foo.a[0u].xy + Foo.a[1u].xy) + float2(Foo.b); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag new file mode 100644 index 00000000000..9b347718edc --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct type_Foo +{ + float4 a[1]; + char _m1_pad[8]; + float b; +}; + +struct main0_out +{ + float2 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]]) +{ + main0_out out = {}; + out.out_var_SV_Target = (float2(Foo.a[0][0u], Foo.a[1][0u]) + float2(Foo.a[0][1u], Foo.a[1][1u])) + float2(Foo.b); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag new file mode 100644 index 00000000000..cd40af10f23 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct type_Foo +{ + float4 a[1]; + char _m1_pad[12]; + float b; +}; + +struct main0_out +{ + float3 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]]) +{ + main0_out out = {}; + out.out_var_SV_Target = (Foo.a[0u].xyz + Foo.a[1u].xyz) + float3(Foo.b); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag new file mode 100644 index 00000000000..86dfd6054bf --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct type_Foo +{ + float2x4 a; + char _m1_pad[8]; + float b; +}; + +struct main0_out +{ + float3 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]]) +{ + main0_out out = {}; + out.out_var_SV_Target = (float3(Foo.a[0][0u], Foo.a[1][0u], Foo.a[2][0u]) + float3(Foo.a[0][1u], Foo.a[1][1u], Foo.a[2][1u])) + float3(Foo.b); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag new file mode 100644 index 00000000000..7430a551fa2 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct type_Foo +{ + float2x4 a; + char _m1_pad[8]; + float b; +}; + +struct main0_out +{ + float2 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]]) +{ + main0_out out = {}; + out.out_var_SV_Target = (Foo.a[0u].xy + Foo.a[1u].xy) + float2(Foo.b); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag new file mode 100644 index 00000000000..19b7f1eebb1 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct type_Foo +{ + float4 a[1]; + char _m1_pad[12]; + float b; +}; + +struct main0_out +{ + float2 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]]) +{ + main0_out out = {}; + out.out_var_SV_Target = (float2(Foo.a[0][0u], Foo.a[1][0u]) + float2(Foo.a[0][1u], Foo.a[1][1u])) + float2(Foo.b); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag new file mode 100644 index 00000000000..f8008525b64 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct type_Foo +{ + float2x4 a; + char _m1_pad[12]; + float b; +}; + +struct main0_out +{ + float3 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]]) +{ + main0_out out = {}; + out.out_var_SV_Target = (Foo.a[0u].xyz + Foo.a[1u].xyz) + float3(Foo.b); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag new file mode 100644 index 00000000000..041b6e91d69 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct type_Foo +{ + float2x4 a; + char _m1_pad[12]; + float b; +}; + +struct main0_out +{ + float3 out_var_SV_Target [[color(0)]]; +}; + +fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]]) +{ + main0_out out = {}; + out.out_var_SV_Target = (float3(Foo.a[0][0u], Foo.a[1][0u], Foo.a[2][0u]) + float3(Foo.a[0][1u], Foo.a[1][1u], Foo.a[2][1u])) + float3(Foo.b); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag b/reference/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag new file mode 100644 index 00000000000..6fbf2ff70f0 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + int vA [[user(locn0)]]; + int vB [[user(locn1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.FragColor = float4(0.0); + int _10 = {}; + int _15 = {}; + for (int _16 = 0, _17 = 0; _16 < in.vA; _17 = _15, _16 += _10) + { + if ((in.vA + _16) == 20) + { + _15 = 50; + } + else + { + _15 = ((in.vB + _16) == 40) ? 60 : _17; + } + _10 = _15 + 10; + out.FragColor += float4(1.0); + } + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc new file mode 100644 index 00000000000..5c30e05a4b5 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc @@ -0,0 +1,65 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _49 = spvUnsafeArray({ 0.0, 0.0, 0.0 }); +constant spvUnsafeArray, 4> _27 = spvUnsafeArray, 4>({ spvUnsafeArray({ 0.0, 0.0, 0.0 }), spvUnsafeArray({ 0.0, 0.0, 0.0 }), spvUnsafeArray({ 0.0, 0.0, 0.0 }), spvUnsafeArray({ 0.0, 0.0, 0.0 }) }); + +struct main0_out +{ + spvUnsafeArray foo; + float4 gl_Position; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + gl_out[gl_InvocationID].foo = _27[gl_InvocationID]; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].foo[0] = float(gl_InvocationID); + gl_out[gl_InvocationID].foo[1] = float(gl_InvocationID) + 1.0; + gl_out[gl_InvocationID].foo[2] = float(gl_InvocationID) + 2.0; +} + diff --git a/reference/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc new file mode 100644 index 00000000000..71498f5b201 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc @@ -0,0 +1,70 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Verts +{ + float a; + float2 b; +}; + +struct main0_out +{ + float verts_a; + float2 verts_b; + float4 gl_Position; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _27 = spvUnsafeArray({ Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) } }); + + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + gl_out[gl_InvocationID].verts_a = _27[gl_InvocationID].a; + gl_out[gl_InvocationID].verts_b = _27[gl_InvocationID].b; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].verts_a = float(gl_InvocationID); +} + diff --git a/reference/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc new file mode 100644 index 00000000000..d8b74bfa331 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc @@ -0,0 +1,80 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex +{ + float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position; + float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize; +}; + +struct Verts +{ + float a; + float2 b; +}; + +struct main0_out +{ + float verts_a; + float2 verts_b; + float4 gl_Position; + float gl_PointSize; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _17 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0 }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0 }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0 }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0 } }); + spvUnsafeArray _27 = spvUnsafeArray({ Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) } }); + + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + gl_out[gl_InvocationID].gl_Position = _17[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position; + gl_out[gl_InvocationID].gl_PointSize = _17[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize; + gl_out[gl_InvocationID].verts_a = _27[gl_InvocationID].a; + gl_out[gl_InvocationID].verts_b = _27[gl_InvocationID].b; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].verts_a = float(gl_InvocationID); +} + diff --git a/reference/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc new file mode 100644 index 00000000000..a10731bb283 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc @@ -0,0 +1,69 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Foo +{ + float a; + float2 b; + float4 c; +}; + +struct main0_out +{ + Foo foo; + float4 gl_Position; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + spvUnsafeArray _27 = spvUnsafeArray({ Foo{ 0.0, float2(0.0), float4(0.0) }, Foo{ 0.0, float2(0.0), float4(0.0) }, Foo{ 0.0, float2(0.0), float4(0.0) }, Foo{ 0.0, float2(0.0), float4(0.0) } }); + + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + gl_out[gl_InvocationID].foo = _27[gl_InvocationID]; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].foo.a = float(gl_InvocationID); +} + diff --git a/reference/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc new file mode 100644 index 00000000000..b7246ac570b --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc @@ -0,0 +1,125 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct cb1_struct +{ + float4 _m0[1]; +}; + +struct main0_out +{ + float3 vocp0; + float4 vocp1; +}; + +struct main0_in +{ + float4 v0 [[attribute(0)]]; + float4 v1 [[attribute(1)]]; + float3 vicp0 [[attribute(2)]]; + float4 vicp1 [[attribute(4)]]; +}; + +static inline __attribute__((always_inline)) +void fork0_epilogue(thread const float4& _87, thread const float4& _88, thread const float4& _89, device half (&gl_TessLevelOuter)[3]) +{ + gl_TessLevelOuter[0u] = half(_87.x); + gl_TessLevelOuter[1u] = half(_88.x); + gl_TessLevelOuter[2u] = half(_89.x); +} + +static inline __attribute__((always_inline)) +void fork0(uint vForkInstanceId, device half (&gl_TessLevelOuter)[3], thread spvUnsafeArray& opc, constant cb1_struct& cb0_0, thread float4& v_48, thread float4& v_49, thread float4& v_50) +{ + float4 r0; + r0.x = as_type(vForkInstanceId); + opc[as_type(r0.x)].x = cb0_0._m0[0u].x; + v_48 = opc[0u]; + v_49 = opc[1u]; + v_50 = opc[2u]; + fork0_epilogue(v_48, v_49, v_50, gl_TessLevelOuter); +} + +static inline __attribute__((always_inline)) +void fork1_epilogue(thread const float4& _109, device half &gl_TessLevelInner) +{ + gl_TessLevelInner = half(_109.x); +} + +static inline __attribute__((always_inline)) +void fork1(device half &gl_TessLevelInner, thread spvUnsafeArray& opc, constant cb1_struct& cb0_0, thread float4& v_56) +{ + opc[3u].x = cb0_0._m0[0u].x; + v_56 = opc[3u]; + fork1_epilogue(v_56, gl_TessLevelInner); +} + +kernel void main0(main0_in in [[stage_in]], constant cb1_struct& cb0_0 [[buffer(0)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 3) + return; + spvUnsafeArray, 2> vicp; + spvUnsafeArray _153 = spvUnsafeArray({ gl_in[0].v0, gl_in[1].v0, gl_in[2].v0 }); + vicp[0u] = _153; + spvUnsafeArray _154 = spvUnsafeArray({ gl_in[0].v1, gl_in[1].v1, gl_in[2].v1 }); + vicp[1u] = _154; + gl_out[gl_InvocationID].vocp0 = gl_in[gl_InvocationID].vicp0; + gl_out[gl_InvocationID].vocp1 = gl_in[gl_InvocationID].vicp1; + spvUnsafeArray opc; + float4 v_48; + float4 v_49; + float4 v_50; + fork0(0u, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor, opc, cb0_0, v_48, v_49, v_50); + fork0(1u, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor, opc, cb0_0, v_48, v_49, v_50); + fork0(2u, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor, opc, cb0_0, v_48, v_49, v_50); + float4 v_56; + fork1(spvTessLevel[gl_PrimitiveID].insideTessellationFactor, opc, cb0_0, v_56); +} + diff --git a/reference/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc new file mode 100644 index 00000000000..a492cb829cf --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc @@ -0,0 +1,68 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _19 = spvUnsafeArray({ 1.0, 2.0 }); +constant spvUnsafeArray _25 = spvUnsafeArray({ 1.0, 2.0, 3.0, 4.0 }); + +struct main0_out +{ + float4 gl_Position; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1]; + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_19[0]); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_25[0]); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_25[1]); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_25[2]); + spvUnsafeArray inner; + inner = spvUnsafeArray({ float(spvTessLevel[gl_PrimitiveID].insideTessellationFactor), 0.0 }); + spvUnsafeArray outer; + outer = spvUnsafeArray({ float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), 0.0 }); + gl_out[gl_InvocationID].gl_Position = float4(1.0); +} + diff --git a/reference/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc new file mode 100644 index 00000000000..0c6c1dc813a --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc @@ -0,0 +1,62 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _25 = spvUnsafeArray({ 0.0, 0.0, 0.0, 0.0 }); + +struct main0_out +{ + float v; + float4 gl_Position; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + gl_out[gl_InvocationID].v = _25[gl_InvocationID]; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].v = float(gl_InvocationID); +} + diff --git a/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc similarity index 60% rename from reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc rename to reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc index 6a765117e4e..bdbd4bef873 100644 --- a/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc +++ b/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc @@ -1,10 +1,49 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct VertexOutput { float4 pos; @@ -19,7 +58,7 @@ struct HSOut struct HSConstantOut { - float EdgeTess[3]; + spvUnsafeArray EdgeTess; float InsideTess; }; @@ -41,24 +80,12 @@ struct main0_out struct main0_in { - float2 VertexOutput_uv [[attribute(0)]]; + float2 p_uv [[attribute(0)]]; float4 gl_Position [[attribute(1)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -HSOut _hs_main(thread const VertexOutput (&p)[3], thread const uint& i) +static inline __attribute__((always_inline)) +HSOut _hs_main(thread const spvUnsafeArray& p, thread const uint& i) { HSOut _output; _output.pos = p[i].pos; @@ -66,7 +93,8 @@ HSOut _hs_main(thread const VertexOutput (&p)[3], thread const uint& i) return _output; } -HSConstantOut PatchHS(thread const VertexOutput (&_patch)[3]) +static inline __attribute__((always_inline)) +HSConstantOut PatchHS(thread const spvUnsafeArray& _patch) { HSConstantOut _output; _output.EdgeTess[0] = (float2(1.0) + _patch[0].uv).x; @@ -84,25 +112,25 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_ threadgroup_barrier(mem_flags::mem_threadgroup); if (gl_InvocationID >= 3) return; - VertexOutput p[3]; + spvUnsafeArray p; p[0].pos = gl_in[0].gl_Position; - p[0].uv = gl_in[0].VertexOutput_uv; + p[0].uv = gl_in[0].p_uv; p[1].pos = gl_in[1].gl_Position; - p[1].uv = gl_in[1].VertexOutput_uv; + p[1].uv = gl_in[1].p_uv; p[2].pos = gl_in[2].gl_Position; - p[2].uv = gl_in[2].VertexOutput_uv; + p[2].uv = gl_in[2].p_uv; uint i = gl_InvocationID; - VertexOutput param[3]; - spvArrayCopyFromStack1(param, p); + spvUnsafeArray param; + param = p; uint param_1 = i; HSOut flattenTemp = _hs_main(param, param_1); gl_out[gl_InvocationID].gl_Position = flattenTemp.pos; gl_out[gl_InvocationID]._entryPointOutput.uv = flattenTemp.uv; - threadgroup_barrier(mem_flags::mem_device); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); if (int(gl_InvocationID) == 0) { - VertexOutput param_2[3]; - spvArrayCopyFromStack1(param_2, p); + spvUnsafeArray param_2; + param_2 = p; HSConstantOut _patchConstantResult = PatchHS(param_2); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_patchConstantResult.EdgeTess[0]); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_patchConstantResult.EdgeTess[1]); diff --git a/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc new file mode 100644 index 00000000000..dabe1b3857a --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc @@ -0,0 +1,140 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct VertexOutput +{ + float4 pos; + float2 uv; +}; + +struct HSOut +{ + float4 pos; + float2 uv; +}; + +struct HSConstantOut +{ + spvUnsafeArray EdgeTess; + float InsideTess; +}; + +struct VertexOutput_1 +{ + float2 uv; +}; + +struct HSOut_1 +{ + float2 uv; +}; + +struct main0_out +{ + HSOut_1 _entryPointOutput; + float4 gl_Position; +}; + +struct main0_in +{ + VertexOutput_1 p; + ushort2 m_171; + float4 gl_Position; +}; + +static inline __attribute__((always_inline)) +HSOut _hs_main(thread const spvUnsafeArray& p, thread const uint& i) +{ + HSOut _output; + _output.pos = p[i].pos; + _output.uv = p[i].uv; + return _output; +} + +static inline __attribute__((always_inline)) +HSConstantOut PatchHS(thread const spvUnsafeArray& _patch) +{ + HSConstantOut _output; + _output.EdgeTess[0] = (float2(1.0) + _patch[0].uv).x; + _output.EdgeTess[1] = (float2(1.0) + _patch[0].uv).x; + _output.EdgeTess[2] = (float2(1.0) + _patch[0].uv).x; + _output.InsideTess = (float2(1.0) + _patch[0].uv).x; + return _output; +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 3]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 3; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1); + spvUnsafeArray p; + p[0].pos = gl_in[0].gl_Position; + p[0].uv = gl_in[0].p.uv; + p[1].pos = gl_in[1].gl_Position; + p[1].uv = gl_in[1].p.uv; + p[2].pos = gl_in[2].gl_Position; + p[2].uv = gl_in[2].p.uv; + uint i = gl_InvocationID; + spvUnsafeArray param; + param = p; + uint param_1 = i; + HSOut flattenTemp = _hs_main(param, param_1); + gl_out[gl_InvocationID].gl_Position = flattenTemp.pos; + gl_out[gl_InvocationID]._entryPointOutput.uv = flattenTemp.uv; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + if (int(gl_InvocationID) == 0) + { + spvUnsafeArray param_2; + param_2 = p; + HSConstantOut _patchConstantResult = PatchHS(param_2); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_patchConstantResult.EdgeTess[0]); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_patchConstantResult.EdgeTess[1]); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_patchConstantResult.EdgeTess[2]); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_patchConstantResult.InsideTess); + } +} + diff --git a/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc new file mode 100644 index 00000000000..d6d0bc01496 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc @@ -0,0 +1,78 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _27 = spvUnsafeArray({ 0.0, 0.0 }); +constant spvUnsafeArray _33 = spvUnsafeArray({ 0.0, 0.0, 0.0, 0.0 }); + +struct main0_out +{ + float4 gl_Position; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(0.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(0.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(0.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(0.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(0.0); + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(3.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(4.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(5.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(6.0); +} + diff --git a/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc new file mode 100644 index 00000000000..979f4329f5f --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc @@ -0,0 +1,72 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _27 = spvUnsafeArray({ 0.0, 0.0 }); +constant spvUnsafeArray _33 = spvUnsafeArray({ 0.0, 0.0, 0.0, 0.0 }); + +struct main0_out +{ + float4 gl_Position; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(0.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(0.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(0.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(0.0); + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(3.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(4.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(5.0); +} + diff --git a/reference/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc new file mode 100644 index 00000000000..b6e40bc835d --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc @@ -0,0 +1,35 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +static inline __attribute__((always_inline)) +void store_tess_level_in_func(device half &gl_TessLevelInner, device half (&gl_TessLevelOuter)[3]) +{ + gl_TessLevelInner = half(1.0); + gl_TessLevelOuter[0] = half(3.0); + gl_TessLevelOuter[1] = half(4.0); + gl_TessLevelOuter[2] = half(5.0); +} + +static inline __attribute__((always_inline)) +float load_tess_level_in_func(device half &gl_TessLevelInner, device half (&gl_TessLevelOuter)[3]) +{ + return float(gl_TessLevelInner) + float(gl_TessLevelOuter[1]); +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1]; + store_tess_level_in_func(spvTessLevel[gl_PrimitiveID].insideTessellationFactor, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor); + float v = load_tess_level_in_func(spvTessLevel[gl_PrimitiveID].insideTessellationFactor, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor); + gl_out[gl_InvocationID].gl_Position = float4(v); +} + diff --git a/reference/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese b/reference/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese new file mode 100644 index 00000000000..7fa0f5a4e45 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese @@ -0,0 +1,73 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 out_var_CUSTOM_VALUE [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_patchIn +{ + float4 gl_TessLevelOuter [[attribute(0)]]; + float2 gl_TessLevelInner [[attribute(1)]]; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + spvUnsafeArray gl_TessLevelInner = {}; + gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2]; + gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3]; + gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0]; + gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1]; + out.out_var_CUSTOM_VALUE = float4(gl_TessLevelOuter[0] + gl_TessLevelInner[0], gl_TessLevelOuter[1] + gl_TessLevelInner[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese b/reference/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese new file mode 100644 index 00000000000..05a81133310 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float o0 [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 in0 [[attribute(0)]]; +}; + +struct main0_patchIn +{ + patch_control_point gl_in; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]]) +{ + main0_out out = {}; + out.o0 = patchIn.gl_in[0u].in0.z; + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert b/reference/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert new file mode 100644 index 00000000000..6c8f9382701 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert @@ -0,0 +1,39 @@ +#include +#include + +using namespace metal; + +struct Vert +{ + float a; + float b; +}; + +struct Foo +{ + float c; + float d; +}; + +struct main0_out +{ + float m_3_a [[user(locn0)]]; + float m_3_b [[user(locn1)]]; + float foo_c [[user(locn2)]]; + float foo_d [[user(locn3)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + Vert _3 = Vert{ 0.0, 0.0 }; + Foo foo = Foo{ 0.0, 0.0 }; + out.gl_Position = float4(0.0); + out.m_3_a = _3.a; + out.m_3_b = _3.b; + out.foo_c = foo.c; + out.foo_d = foo.d; + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert b/reference/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert new file mode 100644 index 00000000000..54b88ba9c6b --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_PointSize [[point_size]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + out.gl_Position = float4(0.0); + out.gl_PointSize = 0.0; + out.gl_Position = float4(1.0); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert b/reference/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert new file mode 100644 index 00000000000..ea89378b10a --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert @@ -0,0 +1,31 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct type_Float2Array +{ + float4 arr[3]; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +static inline __attribute__((always_inline)) +float4 src_VSMain(thread const uint& i, constant type_Float2Array& Float2Array) +{ + return float4(Float2Array.arr[i].x, Float2Array.arr[i].y, 0.0, 1.0); +} + +vertex main0_out main0(constant type_Float2Array& Float2Array [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + uint param_var_i = gl_VertexIndex; + out.gl_Position = src_VSMain(param_var_i, Float2Array); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert b/reference/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert new file mode 100644 index 00000000000..07bcb9f1019 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert @@ -0,0 +1,89 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _14 +{ + float _m0[3]; +}; + +struct _15 +{ + float _m0[3]; +}; + +constant spvUnsafeArray _93 = spvUnsafeArray({ 1.0, 2.0, 1.0 }); +constant spvUnsafeArray _94 = spvUnsafeArray({ -1.0, -2.0, -1.0 }); + +struct main0_out +{ + float4 m_4 [[user(locn1)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 m_3 [[attribute(0)]]; + float4 m_5 [[attribute(1)]]; +}; + +static inline __attribute__((always_inline)) +float4 _102(float4 _107) +{ + float4 _109 = _107; + _14 _110 = _14{ { 1.0, 2.0, 1.0 } }; + _15 _111 = _15{ { -1.0, -2.0, -1.0 } }; + _109.y = (_110._m0[2] + _111._m0[2]) + _109.y; + return _109; +} + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.gl_Position = in.m_3; + out.m_4 = _102(in.m_5); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert b/reference/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert new file mode 100644 index 00000000000..f007a67f226 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + uint gl_Layer [[render_target_array_index]]; +}; + +vertex main0_out main0(uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]]) +{ + main0_out out = {}; + const uint gl_ViewIndex = 0; + out.gl_Position = float4(float(int(gl_ViewIndex))); + return out; +} + diff --git a/reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert b/reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert index e9cd6a540aa..95b61a40dbe 100644 --- a/reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert +++ b/reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert @@ -5,12 +5,9 @@ using namespace metal; struct Test { - int empty_struct_member; }; - vertex void main0() { - Test _14 = Test{ 0 }; - Test t = _14; + Test t = Test{ }; } diff --git a/reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag b/reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag index e4f09e890e7..18d98993e5f 100644 --- a/reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag +++ b/reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag @@ -1,10 +1,49 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + constant float _21 = {}; struct main0_out @@ -12,35 +51,20 @@ struct main0_out float4 gl_Position [[position]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - vertex main0_out main0() { main0_out out = {}; - float _23[2]; + spvUnsafeArray _23; for (int _25 = 0; _25 < 2; ) { _23[_25] = 0.0; _25++; continue; } - float _31[2]; - spvArrayCopyFromStack1(_31, _23); float _37; if (as_type(3.0) != 0u) { - _37 = _31[0]; + _37 = _23[0]; } else { diff --git a/reference/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert b/reference/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert new file mode 100644 index 00000000000..750afcf25bf --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert @@ -0,0 +1,20 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +device float* thread * constant _9 = {}; + +static inline __attribute__((always_inline)) +void _10(device float* thread * const thread & _11) +{ +} + +vertex void main0() +{ + device float* thread * _14 = _9; + _10(_14); +} + diff --git a/reference/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp b/reference/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp new file mode 100644 index 00000000000..eab1df4fce9 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp @@ -0,0 +1,172 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 1u, 1u); + +kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) +{ + threadgroup float shared_group[8][8]; + threadgroup float shared_group_alt[8][8]; + spvUnsafeArray blob; + for (int i = 0; i < 8; i++) + { + blob[i] = float(i); + } + spvArrayCopyFromStackToThreadGroup1(shared_group[gl_LocalInvocationIndex], blob.elements); + threadgroup_barrier(mem_flags::mem_threadgroup); + spvUnsafeArray copied_blob; + spvArrayCopyFromThreadGroupToStack1(copied_blob.elements, shared_group[gl_LocalInvocationIndex ^ 1u]); + spvArrayCopyFromThreadGroupToThreadGroup1(shared_group_alt[gl_LocalInvocationIndex], shared_group[gl_LocalInvocationIndex]); +} + diff --git a/reference/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp b/reference/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp new file mode 100644 index 00000000000..ae8c5b02953 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp @@ -0,0 +1,90 @@ +#include +#include + +using namespace metal; + +struct Baz +{ + int e; + int f; +}; + +struct Foo +{ + int a; + int b; +}; + +struct Bar +{ + int c; + int d; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u); + +struct spvDescriptorSetBuffer0 +{ + constant Foo* m_34 [[id(0)]]; + constant Bar* m_40 [[id(1)]]; +}; + +struct spvDescriptorSetBuffer1 +{ + device Baz* baz [[id(0)]][3][3][2]; +}; + +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvDynamicOffsets [[buffer(23)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + constant auto& _34 = *(constant Foo* )((constant char* )spvDescriptorSet0.m_34 + spvDynamicOffsets[0]); + device Baz* baz[3][3][2] = + { + { + { + (device Baz* )((device char* )spvDescriptorSet1.baz[0][0][0] + spvDynamicOffsets[1]), + (device Baz* )((device char* )spvDescriptorSet1.baz[0][0][1] + spvDynamicOffsets[2]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[0][1][0] + spvDynamicOffsets[3]), + (device Baz* )((device char* )spvDescriptorSet1.baz[0][1][1] + spvDynamicOffsets[4]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[0][2][0] + spvDynamicOffsets[5]), + (device Baz* )((device char* )spvDescriptorSet1.baz[0][2][1] + spvDynamicOffsets[6]), + }, + }, + { + { + (device Baz* )((device char* )spvDescriptorSet1.baz[1][0][0] + spvDynamicOffsets[7]), + (device Baz* )((device char* )spvDescriptorSet1.baz[1][0][1] + spvDynamicOffsets[8]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[1][1][0] + spvDynamicOffsets[9]), + (device Baz* )((device char* )spvDescriptorSet1.baz[1][1][1] + spvDynamicOffsets[10]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[1][2][0] + spvDynamicOffsets[11]), + (device Baz* )((device char* )spvDescriptorSet1.baz[1][2][1] + spvDynamicOffsets[12]), + }, + }, + { + { + (device Baz* )((device char* )spvDescriptorSet1.baz[2][0][0] + spvDynamicOffsets[13]), + (device Baz* )((device char* )spvDescriptorSet1.baz[2][0][1] + spvDynamicOffsets[14]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[2][1][0] + spvDynamicOffsets[15]), + (device Baz* )((device char* )spvDescriptorSet1.baz[2][1][1] + spvDynamicOffsets[16]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[2][2][0] + spvDynamicOffsets[17]), + (device Baz* )((device char* )spvDescriptorSet1.baz[2][2][1] + spvDynamicOffsets[18]), + }, + }, + }; + + uint3 coords = gl_GlobalInvocationID; + baz[coords.x][coords.y][coords.z]->e = _34.a + (*spvDescriptorSet0.m_40).c; + baz[coords.x][coords.y][coords.z]->f = _34.b * (*spvDescriptorSet0.m_40).d; +} + diff --git a/reference/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp b/reference/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp new file mode 100644 index 00000000000..bf26b3b280b --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct Ref; + +struct Ref +{ + float4 v; +}; + +struct Registers +{ + device Ref* foo; +}; + +kernel void main0(constant Registers& _14 [[buffer(0)]]) +{ + device Ref* __restrict ref = _14.foo; + ref->v = float4(1.0); +} + diff --git a/reference/shaders-msl/comp/bitcast-16bit-1.invalid.comp b/reference/shaders-msl-no-opt/comp/bitcast-16bit-1.invalid.comp similarity index 54% rename from reference/shaders-msl/comp/bitcast-16bit-1.invalid.comp rename to reference/shaders-msl-no-opt/comp/bitcast-16bit-1.invalid.comp index 170e4920e7b..2e86f996c9b 100644 --- a/reference/shaders-msl/comp/bitcast-16bit-1.invalid.comp +++ b/reference/shaders-msl-no-opt/comp/bitcast-16bit-1.invalid.comp @@ -13,12 +13,14 @@ struct SSBO1 int4 outputs[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO0& _25 [[buffer(0)]], device SSBO1& _39 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; half2 a = as_type(_25.inputs[ident].xy); - _39.outputs[ident].x = int(as_type(a + half2(half(1.0)))); - _39.outputs[ident].y = as_type(_25.inputs[ident].zw); - _39.outputs[ident].z = int(as_type(ushort2(_25.inputs[ident].xy))); + ((device int*)&_39.outputs[ident])[0u] = int(as_type(a + half2(half(1.0)))); + ((device int*)&_39.outputs[ident])[1u] = as_type(_25.inputs[ident].zw); + ((device int*)&_39.outputs[ident])[2u] = int(as_type(ushort2(_25.inputs[ident].xy))); } diff --git a/reference/shaders-msl-no-opt/comp/bitcast-16bit-2.invalid.comp b/reference/shaders-msl-no-opt/comp/bitcast-16bit-2.invalid.comp new file mode 100644 index 00000000000..fa65e3bb4de --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/bitcast-16bit-2.invalid.comp @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct SSBO1 +{ + short4 outputs[1]; +}; + +struct SSBO0 +{ + int4 inputs[1]; +}; + +struct UBO +{ + half4 const0; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO1& _21 [[buffer(0)]], device SSBO0& _29 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint ident = gl_GlobalInvocationID.x; + int _33 = ((device int*)&_29.inputs[ident])[0u]; + short2 _47 = as_type(_33) + as_type(_40.const0.xy); + ((device short*)&_21.outputs[ident])[0u] = _47.x; + ((device short*)&_21.outputs[ident])[1u] = _47.y; + int _57 = ((device int*)&_29.inputs[ident])[1u]; + short2 _67 = short2(as_type(uint(_57)) - as_type(_40.const0.zw)); + ((device short*)&_21.outputs[ident])[2u] = _67.x; + ((device short*)&_21.outputs[ident])[3u] = _67.y; +} + diff --git a/reference/shaders-msl-no-opt/comp/bitfield.comp b/reference/shaders-msl-no-opt/comp/bitfield.comp index dbc27f02be9..7f797add160 100644 --- a/reference/shaders-msl-no-opt/comp/bitfield.comp +++ b/reference/shaders-msl-no-opt/comp/bitfield.comp @@ -7,14 +7,14 @@ using namespace metal; // Implementation of the GLSL findLSB() function template -T findLSB(T x) +inline T spvFindLSB(T x) { return select(ctz(x), T(-1), x == T(0)); } // Implementation of the signed GLSL findMSB() function template -T findSMSB(T x) +inline T spvFindSMSB(T x) { T v = select(x, T(-1) - x, x < T(0)); return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0)); @@ -22,7 +22,7 @@ T findSMSB(T x) // Implementation of the unsigned GLSL findMSB() function template -T findUMSB(T x) +inline T spvFindUMSB(T x) { return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0)); } @@ -31,17 +31,17 @@ kernel void main0() { int signed_value = 0; uint unsigned_value = 0u; - int s = extract_bits(signed_value, 5, 20); - uint u = extract_bits(unsigned_value, 6, 21); - s = insert_bits(s, 40, 5, 4); - u = insert_bits(u, 60u, 5, 4); + int s = extract_bits(signed_value, uint(5), uint(20)); + uint u = extract_bits(unsigned_value, uint(6), uint(21)); + s = insert_bits(s, 40, uint(5), uint(4)); + u = insert_bits(u, 60u, uint(5), uint(4)); u = reverse_bits(u); s = reverse_bits(s); - int v0 = popcount(u); + int v0 = int(popcount(u)); int v1 = popcount(s); - int v2 = int(findUMSB(u)); - int v3 = findSMSB(s); - int v4 = findLSB(u); - int v5 = findLSB(s); + int v2 = int(spvFindUMSB(u)); + int v3 = spvFindSMSB(s); + int v4 = int(spvFindLSB(u)); + int v5 = spvFindLSB(s); } diff --git a/reference/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp b/reference/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp new file mode 100644 index 00000000000..7864e0fb768 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct SSBO; + +struct S +{ + float3 v; +}; + +struct SSBO +{ + S s[1]; +}; + +struct PC +{ + uint2 ptr; +}; + +kernel void main0(constant PC& pc [[buffer(0)]]) +{ + device SSBO* ssbo = reinterpret_cast(as_type(pc.ptr)); + ssbo->s[0].v = float3(1.0); +} + diff --git a/reference/shaders-msl-no-opt/comp/glsl.std450.comp b/reference/shaders-msl-no-opt/comp/glsl.std450.comp new file mode 100644 index 00000000000..b1790b23df6 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/glsl.std450.comp @@ -0,0 +1,289 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +// Implementation of the GLSL radians() function +template +inline T radians(T d) +{ + return d * T(0.01745329251); +} + +// Implementation of the GLSL degrees() function +template +inline T degrees(T r) +{ + return r * T(57.2957795131); +} + +// Implementation of the GLSL findLSB() function +template +inline T spvFindLSB(T x) +{ + return select(ctz(x), T(-1), x == T(0)); +} + +// Implementation of the signed GLSL findMSB() function +template +inline T spvFindSMSB(T x) +{ + T v = select(x, T(-1) - x, x < T(0)); + return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0)); +} + +// Implementation of the unsigned GLSL findMSB() function +template +inline T spvFindUMSB(T x) +{ + return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0)); +} + +// Implementation of the GLSL sign() function for integer types +template::value>::type> +inline T sign(T x) +{ + return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0)); +} + +// Returns the determinant of a 2x2 matrix. +static inline __attribute__((always_inline)) +float spvDet2x2(float a1, float a2, float b1, float b2) +{ + return a1 * b2 - b1 * a2; +} + +// Returns the determinant of a 3x3 matrix. +static inline __attribute__((always_inline)) +float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) +{ + return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3); +} + +// Returns the inverse of a matrix, by using the algorithm of calculating the classical +// adjoint and dividing by the determinant. The contents of the matrix are changed. +static inline __attribute__((always_inline)) +float4x4 spvInverse4x4(float4x4 m) +{ + float4x4 adj; // The adjoint matrix (inverse after dividing by determinant) + + // Create the transpose of the cofactors, as the classical adjoint of the matrix. + adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); + adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]); + adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]); + adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]); + + adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); + adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]); + adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]); + adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]); + + adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); + adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]); + adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]); + adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]); + + adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); + adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]); + adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]); + adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]); + + // Calculate the determinant as a combination of the cofactors of the first row. + float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]); + + // Divide the classical adjoint matrix by the determinant. + // If determinant is zero, matrix is not invertable, so leave it unchanged. + return (det != 0.0f) ? (adj * (1.0f / det)) : m; +} + +// Returns the inverse of a matrix, by using the algorithm of calculating the classical +// adjoint and dividing by the determinant. The contents of the matrix are changed. +static inline __attribute__((always_inline)) +float3x3 spvInverse3x3(float3x3 m) +{ + float3x3 adj; // The adjoint matrix (inverse after dividing by determinant) + + // Create the transpose of the cofactors, as the classical adjoint of the matrix. + adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]); + adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]); + adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]); + + adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]); + adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]); + adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]); + + adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]); + adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]); + adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]); + + // Calculate the determinant as a combination of the cofactors of the first row. + float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]); + + // Divide the classical adjoint matrix by the determinant. + // If determinant is zero, matrix is not invertable, so leave it unchanged. + return (det != 0.0f) ? (adj * (1.0f / det)) : m; +} + +// Returns the inverse of a matrix, by using the algorithm of calculating the classical +// adjoint and dividing by the determinant. The contents of the matrix are changed. +static inline __attribute__((always_inline)) +float2x2 spvInverse2x2(float2x2 m) +{ + float2x2 adj; // The adjoint matrix (inverse after dividing by determinant) + + // Create the transpose of the cofactors, as the classical adjoint of the matrix. + adj[0][0] = m[1][1]; + adj[0][1] = -m[0][1]; + + adj[1][0] = -m[1][0]; + adj[1][1] = m[0][0]; + + // Calculate the determinant as a combination of the cofactors of the first row. + float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]); + + // Divide the classical adjoint matrix by the determinant. + // If determinant is zero, matrix is not invertable, so leave it unchanged. + return (det != 0.0f) ? (adj * (1.0f / det)) : m; +} + +template +[[clang::optnone]] T spvReflect(T i, T n) +{ + return i - T(2) * i * n * n; +} + +template +inline T spvRefract(T i, T n, T eta) +{ + T NoI = n * i; + T NoI2 = NoI * NoI; + T k = T(1) - eta * eta * (T(1) - NoI2); + if (k < T(0)) + { + return T(0); + } + else + { + return eta * i - (eta * NoI + sqrt(k)) * n; + } +} + +template +inline T spvFaceForward(T n, T i, T nref) +{ + return i * nref < T(0) ? n : -n; +} + +struct SSBO +{ + float res; + int ires; + uint ures; + float4 f32; + int4 s32; + uint4 u32; + float2x2 m2; + float3x3 m3; + float4x4 m4; +}; + +struct ResType +{ + float _m0; + int _m1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _19 [[buffer(0)]]) +{ + _19.res = round(((device float*)&_19.f32)[0u]); + _19.res = rint(((device float*)&_19.f32)[0u]); + _19.res = trunc(((device float*)&_19.f32)[0u]); + _19.res = abs(((device float*)&_19.f32)[0u]); + _19.ires = abs(((device int*)&_19.s32)[0u]); + _19.res = sign(((device float*)&_19.f32)[0u]); + _19.ires = sign(((device int*)&_19.s32)[0u]); + _19.res = floor(((device float*)&_19.f32)[0u]); + _19.res = ceil(((device float*)&_19.f32)[0u]); + _19.res = fract(((device float*)&_19.f32)[0u]); + _19.res = radians(((device float*)&_19.f32)[0u]); + _19.res = degrees(((device float*)&_19.f32)[0u]); + _19.res = sin(((device float*)&_19.f32)[0u]); + _19.res = cos(((device float*)&_19.f32)[0u]); + _19.res = tan(((device float*)&_19.f32)[0u]); + _19.res = asin(((device float*)&_19.f32)[0u]); + _19.res = acos(((device float*)&_19.f32)[0u]); + _19.res = atan(((device float*)&_19.f32)[0u]); + _19.res = fast::sinh(((device float*)&_19.f32)[0u]); + _19.res = fast::cosh(((device float*)&_19.f32)[0u]); + _19.res = precise::tanh(((device float*)&_19.f32)[0u]); + _19.res = asinh(((device float*)&_19.f32)[0u]); + _19.res = acosh(((device float*)&_19.f32)[0u]); + _19.res = atanh(((device float*)&_19.f32)[0u]); + _19.res = precise::atan2(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]); + _19.res = pow(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]); + _19.res = exp(((device float*)&_19.f32)[0u]); + _19.res = log(((device float*)&_19.f32)[0u]); + _19.res = exp2(((device float*)&_19.f32)[0u]); + _19.res = log2(((device float*)&_19.f32)[0u]); + _19.res = sqrt(((device float*)&_19.f32)[0u]); + _19.res = rsqrt(((device float*)&_19.f32)[0u]); + _19.res = abs(((device float*)&_19.f32)[0u]); + _19.res = abs(((device float*)&_19.f32)[0u] - ((device float*)&_19.f32)[1u]); + _19.res = sign(((device float*)&_19.f32)[0u]); + _19.res = spvFaceForward(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]); + _19.res = spvReflect(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]); + _19.res = spvRefract(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]); + _19.res = length(_19.f32.xy); + _19.res = distance(_19.f32.xy, _19.f32.zw); + float2 v2 = fast::normalize(_19.f32.xy); + v2 = faceforward(_19.f32.xy, _19.f32.yz, _19.f32.zw); + v2 = reflect(_19.f32.xy, _19.f32.zw); + v2 = refract(_19.f32.xy, _19.f32.yz, ((device float*)&_19.f32)[3u]); + float3 v3 = cross(_19.f32.xyz, _19.f32.yzw); + _19.res = determinant(_19.m2); + _19.res = determinant(_19.m3); + _19.res = determinant(_19.m4); + _19.m2 = spvInverse2x2(_19.m2); + _19.m3 = spvInverse3x3(_19.m3); + _19.m4 = spvInverse4x4(_19.m4); + float tmp; + float _287 = modf(((device float*)&_19.f32)[0u], tmp); + _19.res = _287; + _19.res = fast::min(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]); + _19.ures = min(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u]); + _19.ires = min(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u]); + _19.res = fast::max(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]); + _19.ures = max(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u]); + _19.ires = max(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u]); + _19.res = fast::clamp(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]); + _19.ures = clamp(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u], ((device uint*)&_19.u32)[2u]); + _19.ires = clamp(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u], ((device int*)&_19.s32)[2u]); + _19.res = mix(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]); + _19.res = step(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]); + _19.res = smoothstep(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]); + _19.res = fma(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]); + ResType _387; + _387._m0 = frexp(((device float*)&_19.f32)[0u], _387._m1); + int itmp = _387._m1; + _19.res = _387._m0; + _19.res = ldexp(((device float*)&_19.f32)[0u], itmp); + _19.ures = pack_float_to_snorm4x8(_19.f32); + _19.ures = pack_float_to_unorm4x8(_19.f32); + _19.ures = pack_float_to_snorm2x16(_19.f32.xy); + _19.ures = pack_float_to_unorm2x16(_19.f32.xy); + _19.ures = as_type(half2(_19.f32.xy)); + v2 = unpack_snorm2x16_to_float(((device uint*)&_19.u32)[0u]); + v2 = unpack_unorm2x16_to_float(((device uint*)&_19.u32)[0u]); + v2 = float2(as_type(((device uint*)&_19.u32)[0u])); + float4 v4 = unpack_snorm4x8_to_float(((device uint*)&_19.u32)[0u]); + v4 = unpack_unorm4x8_to_float(((device uint*)&_19.u32)[0u]); + _19.s32 = spvFindLSB(_19.s32); + _19.s32 = int4(spvFindLSB(_19.u32)); + _19.s32 = spvFindSMSB(_19.s32); + _19.s32 = int4(spvFindUMSB(_19.u32)); +} + diff --git a/reference/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp b/reference/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp new file mode 100644 index 00000000000..de1695b0684 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct Foo +{ + float _abs; +}; + +struct Foo_1 +{ + float _abs; +}; + +struct SSBO +{ + Foo_1 foo; + Foo_1 foo2; +}; + +kernel void main0(device SSBO& _7 [[buffer(0)]]) +{ + Foo f; + f._abs = _7.foo._abs; + int _abs = 10; + _7.foo2._abs = f._abs; +} + diff --git a/reference/shaders-msl-no-opt/comp/implicit-integer-promotion.comp b/reference/shaders-msl-no-opt/comp/implicit-integer-promotion.comp new file mode 100644 index 00000000000..5c3ce49eb9d --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/implicit-integer-promotion.comp @@ -0,0 +1,93 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct BUF0 +{ + half2 f16s; + ushort2 u16; + short2 i16; + ushort4 u16s; + short4 i16s; + half f16; +}; + +static inline __attribute__((always_inline)) +void test_u16(device BUF0& v_24) +{ + v_24.f16 += as_type(ushort(((device ushort*)&v_24.u16)[0u] + ((device ushort*)&v_24.u16)[1u])); + v_24.f16 += as_type(ushort(((device ushort*)&v_24.u16)[0u] - ((device ushort*)&v_24.u16)[1u])); + v_24.f16 += as_type(ushort(((device ushort*)&v_24.u16)[0u] * ((device ushort*)&v_24.u16)[1u])); + v_24.f16 += as_type(ushort(((device ushort*)&v_24.u16)[0u] / ((device ushort*)&v_24.u16)[1u])); + v_24.f16 += as_type(ushort(((device ushort*)&v_24.u16)[0u] % ((device ushort*)&v_24.u16)[1u])); + v_24.f16 += as_type(ushort(((device ushort*)&v_24.u16)[0u] << ((device ushort*)&v_24.u16)[1u])); + v_24.f16 += as_type(ushort(((device ushort*)&v_24.u16)[0u] >> ((device ushort*)&v_24.u16)[1u])); + v_24.f16 += as_type(ushort(~((device ushort*)&v_24.u16)[0u])); + v_24.f16 += as_type(ushort(-((device ushort*)&v_24.u16)[0u])); + v_24.f16 += as_type(ushort(((device ushort*)&v_24.u16)[0u] ^ ((device ushort*)&v_24.u16)[1u])); + v_24.f16 += as_type(ushort(((device ushort*)&v_24.u16)[0u] & ((device ushort*)&v_24.u16)[1u])); + v_24.f16 += as_type(ushort(((device ushort*)&v_24.u16)[0u] | ((device ushort*)&v_24.u16)[1u])); +} + +static inline __attribute__((always_inline)) +void test_i16(device BUF0& v_24) +{ + v_24.f16 += as_type(short(((device short*)&v_24.i16)[0u] + ((device short*)&v_24.i16)[1u])); + v_24.f16 += as_type(short(((device short*)&v_24.i16)[0u] - ((device short*)&v_24.i16)[1u])); + v_24.f16 += as_type(short(((device short*)&v_24.i16)[0u] * ((device short*)&v_24.i16)[1u])); + v_24.f16 += as_type(short(((device short*)&v_24.i16)[0u] / ((device short*)&v_24.i16)[1u])); + v_24.f16 += as_type(short(((device short*)&v_24.i16)[0u] % ((device short*)&v_24.i16)[1u])); + v_24.f16 += as_type(short(((device short*)&v_24.i16)[0u] << ((device short*)&v_24.i16)[1u])); + v_24.f16 += as_type(short(((device short*)&v_24.i16)[0u] >> ((device short*)&v_24.i16)[1u])); + v_24.f16 += as_type(short(~((device short*)&v_24.i16)[0u])); + v_24.f16 += as_type(short(-((device short*)&v_24.i16)[0u])); + v_24.f16 += as_type(short(((device short*)&v_24.i16)[0u] ^ ((device short*)&v_24.i16)[1u])); + v_24.f16 += as_type(short(((device short*)&v_24.i16)[0u] & ((device short*)&v_24.i16)[1u])); + v_24.f16 += as_type(short(((device short*)&v_24.i16)[0u] | ((device short*)&v_24.i16)[1u])); +} + +static inline __attribute__((always_inline)) +void test_u16s(device BUF0& v_24) +{ + v_24.f16s += as_type(v_24.u16s.xy + v_24.u16s.zw); + v_24.f16s += as_type(v_24.u16s.xy - v_24.u16s.zw); + v_24.f16s += as_type(v_24.u16s.xy * v_24.u16s.zw); + v_24.f16s += as_type(v_24.u16s.xy / v_24.u16s.zw); + v_24.f16s += as_type(v_24.u16s.xy % v_24.u16s.zw); + v_24.f16s += as_type(v_24.u16s.xy << v_24.u16s.zw); + v_24.f16s += as_type(v_24.u16s.xy >> v_24.u16s.zw); + v_24.f16s += as_type(~v_24.u16s.xy); + v_24.f16s += as_type(-v_24.u16s.xy); + v_24.f16s += as_type(v_24.u16s.xy ^ v_24.u16s.zw); + v_24.f16s += as_type(v_24.u16s.xy & v_24.u16s.zw); + v_24.f16s += as_type(v_24.u16s.xy | v_24.u16s.zw); +} + +static inline __attribute__((always_inline)) +void test_i16s(device BUF0& v_24) +{ + v_24.f16s += as_type(v_24.i16s.xy + v_24.i16s.zw); + v_24.f16s += as_type(v_24.i16s.xy - v_24.i16s.zw); + v_24.f16s += as_type(v_24.i16s.xy * v_24.i16s.zw); + v_24.f16s += as_type(v_24.i16s.xy / v_24.i16s.zw); + v_24.f16s += as_type(v_24.i16s.xy % v_24.i16s.zw); + v_24.f16s += as_type(v_24.i16s.xy << v_24.i16s.zw); + v_24.f16s += as_type(v_24.i16s.xy >> v_24.i16s.zw); + v_24.f16s += as_type(~v_24.i16s.xy); + v_24.f16s += as_type(-v_24.i16s.xy); + v_24.f16s += as_type(v_24.i16s.xy ^ v_24.i16s.zw); + v_24.f16s += as_type(v_24.i16s.xy & v_24.i16s.zw); + v_24.f16s += as_type(v_24.i16s.xy | v_24.i16s.zw); +} + +kernel void main0(device BUF0& v_24 [[buffer(0)]]) +{ + test_u16(v_24); + test_i16(v_24); + test_u16s(v_24); + test_i16s(v_24); +} + diff --git a/reference/shaders-msl-no-opt/comp/int16min-literal.comp b/reference/shaders-msl-no-opt/comp/int16min-literal.comp new file mode 100644 index 00000000000..d73768c3436 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/int16min-literal.comp @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + half b; +}; + +struct SSBO +{ + half a; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(constant UBO& _12 [[buffer(0)]], device SSBO& _24 [[buffer(1)]]) +{ + short v = as_type(_12.b); + v = short(v ^ short(-32768)); + _24.a = as_type(v); +} + diff --git a/reference/shaders-msl-no-opt/comp/int64.invalid.msl22.comp b/reference/shaders-msl-no-opt/comp/int64.invalid.msl22.comp new file mode 100644 index 00000000000..d5bbbb47fc6 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/int64.invalid.msl22.comp @@ -0,0 +1,106 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct M0 +{ + long v; + spvUnsafeArray b; + ulong c; + spvUnsafeArray d; +}; + +struct SSBO0_Type +{ + long4 a; + M0 m0; +}; + +struct SSBO1_Type +{ + ulong4 b; + M0 m0; +}; + +struct SSBO2_Type +{ + spvUnsafeArray a; + spvUnsafeArray b; +}; + +struct SSBO3_Type +{ + spvUnsafeArray a; + spvUnsafeArray b; +}; + +struct SSBO +{ + int s32; + uint u32; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _96 [[buffer(0)]]) +{ + SSBO0_Type ssbo_0; + ssbo_0.a += long4(10l, 20l, 30l, 40l); + SSBO1_Type ssbo_1; + ssbo_1.b += ulong4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul); + ssbo_0.a += long4(20l); + ssbo_0.a = abs(ssbo_0.a + long4(ssbo_1.b)); + ssbo_0.a += long4(1l); + ssbo_1.b += ulong4(long4(1l)); + ssbo_0.a -= long4(1l); + ssbo_1.b -= ulong4(long4(1l)); + SSBO2_Type ssbo_2; + ssbo_2.a[0] += 1l; + SSBO3_Type ssbo_3; + ssbo_3.a[0] += 2l; + _96.s32 = int(uint(((ulong(ssbo_0.a.x) + ssbo_1.b.y) + ulong(ssbo_2.a[1])) + ulong(ssbo_3.a[2]))); + _96.u32 = uint(((ulong(ssbo_0.a.y) + ssbo_1.b.z) + ulong(ssbo_2.a[0])) + ulong(ssbo_3.a[1])); +} + diff --git a/reference/shaders-msl-no-opt/comp/int64min-literal.msl22.comp b/reference/shaders-msl-no-opt/comp/int64min-literal.msl22.comp new file mode 100644 index 00000000000..a8f2b0e270c --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/int64min-literal.msl22.comp @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + float b; +}; + +struct SSBO +{ + float a; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(constant UBO& _12 [[buffer(0)]], device SSBO& _25 [[buffer(1)]]) +{ + long v = long(as_type(_12.b)); + v ^= long(0x8000000000000000ul); + _25.a = as_type(int(v)); +} + diff --git a/reference/shaders-msl-no-opt/comp/intmin-literal.comp b/reference/shaders-msl-no-opt/comp/intmin-literal.comp new file mode 100644 index 00000000000..db2294fe6c0 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/intmin-literal.comp @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float a; +}; + +struct UBO +{ + float b; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _9 [[buffer(0)]], constant UBO& _14 [[buffer(1)]]) +{ + _9.a = as_type(as_type(_14.b) ^ int(0x80000000)); +} + diff --git a/reference/shaders-msl-no-opt/comp/loop.comp b/reference/shaders-msl-no-opt/comp/loop.comp index d7677fb4363..34fe64b0a2b 100644 --- a/reference/shaders-msl-no-opt/comp/loop.comp +++ b/reference/shaders-msl-no-opt/comp/loop.comp @@ -14,7 +14,9 @@ struct SSBO2 float4 out_data[1]; }; -kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _177 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(const device SSBO& _24 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; float4 idat = _24.in_data[ident]; @@ -85,23 +87,5 @@ kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _177 [[buf k += 10; continue; } - k = 0; - do - { - k++; - } while (k > 10); - int l = 0; - for (;;) - { - if (l == 5) - { - l++; - continue; - } - idat += float4(1.0); - l++; - continue; - } - _177.out_data[ident] = idat; } diff --git a/reference/shaders-msl-no-opt/comp/return.comp b/reference/shaders-msl-no-opt/comp/return.comp index 71fcfbe3911..04cacea9d53 100644 --- a/reference/shaders-msl-no-opt/comp/return.comp +++ b/reference/shaders-msl-no-opt/comp/return.comp @@ -8,6 +8,8 @@ struct SSBO2 float4 out_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; @@ -23,7 +25,8 @@ kernel void main0(device SSBO2& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [ return; } } - for (int i = 0; i < 20; i++) + int i = 0; + while (i < 20) { if (i == 10) { diff --git a/reference/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp b/reference/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp new file mode 100644 index 00000000000..ba278ccde76 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 a[16]; + float4 b[16]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _14 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + _14.b[gl_GlobalInvocationID.x] = float4(_14.a[gl_GlobalInvocationID.x].x); +} + diff --git a/reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/reference/shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp similarity index 64% rename from reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp rename to reference/shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp index 278a8bb2ee8..49758ca3e17 100644 --- a/reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp +++ b/reference/shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp @@ -4,7 +4,6 @@ using namespace metal; typedef packed_float2 packed_float2x2[2]; -typedef packed_float2 packed_rm_float2x3[3]; typedef packed_float3 packed_float2x3[2]; typedef packed_float3 packed_rm_float3x2[2]; @@ -32,11 +31,6 @@ struct S3 float b; }; -struct S4 -{ - float2 c; -}; - struct Content { S0 m0s[1]; @@ -47,7 +41,6 @@ struct Content S2 m2; S3 m3; float m4; - S4 m3s[8]; }; struct SSBO1 @@ -61,15 +54,17 @@ struct SSBO1 float3x2 m3; float2x2 m4; float2x2 m5[9]; - packed_rm_float2x3 m6[4][2]; - float3x2 m7; + float3x2 m6[4][2]; + packed_rm_float3x2 m7; float array[1]; }; struct S0_1 { - float4 a[1]; + float2 a[1]; + char _m1_pad[8]; float b; + char _m0_final_padding[12]; }; struct S1_1 @@ -82,6 +77,7 @@ struct S2_1 { float3 a[1]; float b; + char _m0_final_padding[12]; }; struct S3_1 @@ -90,11 +86,6 @@ struct S3_1 float b; }; -struct S4_1 -{ - float2 c; -}; - struct Content_1 { S0_1 m0s[1]; @@ -105,8 +96,7 @@ struct Content_1 S2_1 m2; S3_1 m3; float m4; - char _m8_pad[12]; - /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8]; + char _m0_final_padding[12]; }; struct SSBO0 @@ -114,18 +104,14 @@ struct SSBO0 Content_1 content; Content_1 content1[2]; Content_1 content2; - float2x2 m0; - char _m4_pad[16]; - float2x2 m1; - char _m5_pad[16]; + float2x4 m0; + float2x4 m1; float2x3 m2[4]; - float3x2 m3; - char _m7_pad[24]; - float2x2 m4; - char _m8_pad[16]; - float2x2 m5[9]; - float2x3 m6[4][2]; - float3x2 m7; + float3x4 m3; + float2x4 m4; + float2x4 m5[9]; + float3x4 m6[4][2]; + float2x3 m7; float4 array[1]; }; @@ -136,15 +122,17 @@ struct SSBO2 packed_rm_float3x2 m2; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO1& ssbo_scalar [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]], device SSBO2& ssbo_scalar2 [[buffer(2)]]) { - ssbo_scalar.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0].xy; + ssbo_scalar.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0]; ssbo_scalar.content.m0s[0].b = ssbo_140.content.m0s[0].b; ssbo_scalar.content.m1s[0].a = float3(ssbo_140.content.m1s[0].a); ssbo_scalar.content.m1s[0].b = ssbo_140.content.m1s[0].b; ssbo_scalar.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0]; ssbo_scalar.content.m2s[0].b = ssbo_140.content.m2s[0].b; - ssbo_scalar.content.m0.a[0] = ssbo_140.content.m0.a[0].xy; + ssbo_scalar.content.m0.a[0] = ssbo_140.content.m0.a[0]; ssbo_scalar.content.m0.b = ssbo_140.content.m0.b; ssbo_scalar.content.m1.a = float3(ssbo_140.content.m1.a); ssbo_scalar.content.m1.b = ssbo_140.content.m1.b; @@ -153,19 +141,11 @@ kernel void main0(device SSBO1& ssbo_scalar [[buffer(0)]], device SSBO0& ssbo_14 ssbo_scalar.content.m3.a = ssbo_140.content.m3.a; ssbo_scalar.content.m3.b = ssbo_140.content.m3.b; ssbo_scalar.content.m4 = ssbo_140.content.m4; - ssbo_scalar.content.m3s[0].c = ssbo_140.content.m3s[0].c; - ssbo_scalar.content.m3s[1].c = ssbo_140.content.m3s[1].c; - ssbo_scalar.content.m3s[2].c = ssbo_140.content.m3s[2].c; - ssbo_scalar.content.m3s[3].c = ssbo_140.content.m3s[3].c; - ssbo_scalar.content.m3s[4].c = ssbo_140.content.m3s[4].c; - ssbo_scalar.content.m3s[5].c = ssbo_140.content.m3s[5].c; - ssbo_scalar.content.m3s[6].c = ssbo_140.content.m3s[6].c; - ssbo_scalar.content.m3s[7].c = ssbo_140.content.m3s[7].c; ssbo_scalar.content.m1.a = float2x3(float3(ssbo_scalar.m2[1][0]), float3(ssbo_scalar.m2[1][1])) * float2(ssbo_scalar.content.m0.a[0]); ssbo_scalar.m0 = float2x2(float2(ssbo_scalar2.m1[0]), float2(ssbo_scalar2.m1[1])); - ssbo_scalar2.m1[0] = transpose(ssbo_scalar.m4)[0]; - ssbo_scalar2.m1[1] = transpose(ssbo_scalar.m4)[1]; - ssbo_scalar2.m2[0] = spvConvertFromRowMajor3x2(ssbo_scalar.m3)[0]; - ssbo_scalar2.m2[1] = spvConvertFromRowMajor3x2(ssbo_scalar.m3)[1]; + ssbo_scalar2.m1[0] = float2(ssbo_scalar.m4[0][0], ssbo_scalar.m4[1][0]); + ssbo_scalar2.m1[1] = float2(ssbo_scalar.m4[0][1], ssbo_scalar.m4[1][1]); + ssbo_scalar2.m2[0] = float3(ssbo_scalar.m3[0][0], ssbo_scalar.m3[1][0], ssbo_scalar.m3[2][0]); + ssbo_scalar2.m2[1] = float3(ssbo_scalar.m3[0][1], ssbo_scalar.m3[1][1], ssbo_scalar.m3[2][1]); } diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp new file mode 100644 index 00000000000..651991e3513 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp @@ -0,0 +1,30 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float FragColor; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], unsupported-built-in-type gl_WorkGroupSize [[unsupported-built-in]]) +{ + uint gl_NumSubgroups = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z; + uint gl_SubgroupID = gl_LocalInvocationIndex; + uint gl_SubgroupSize = 1; + uint gl_SubgroupInvocationID = 0; + _9.FragColor = float(gl_NumSubgroups); + _9.FragColor = float(gl_SubgroupID); + _9.FragColor = float(gl_SubgroupSize); + _9.FragColor = float(gl_SubgroupInvocationID); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + threadgroup_barrier(mem_flags::mem_device); + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup_barrier(mem_flags::mem_texture); + bool elected = true; +} + diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp new file mode 100644 index 00000000000..b1337e2532b --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp @@ -0,0 +1,327 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline T spvSubgroupBroadcast(T value, ushort lane) +{ + return simd_broadcast(value, lane); +} + +template<> +inline bool spvSubgroupBroadcast(bool value, ushort lane) +{ + return !!simd_broadcast((ushort)value, lane); +} + +template +inline vec spvSubgroupBroadcast(vec value, ushort lane) +{ + return (vec)simd_broadcast((vec)value, lane); +} + +template +inline T spvSubgroupBroadcastFirst(T value) +{ + return simd_broadcast_first(value); +} + +template<> +inline bool spvSubgroupBroadcastFirst(bool value) +{ + return !!simd_broadcast_first((ushort)value); +} + +template +inline vec spvSubgroupBroadcastFirst(vec value) +{ + return (vec)simd_broadcast_first((vec)value); +} + +inline uint4 spvSubgroupBallot(bool value) +{ + simd_vote vote = simd_ballot(value); + // simd_ballot() returns a 64-bit integer-like object, but + // SPIR-V callers expect a uint4. We must convert. + // FIXME: This won't include higher bits if Apple ever supports + // 128 lanes in an SIMD-group. + return uint4(as_type((simd_vote::vote_t)vote), 0, 0); +} + +inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) +{ + return !!extract_bits(ballot[bit / 32], bit % 32, 1); +} + +inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); +} + +inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); +} + +inline uint spvPopCount4(uint4 ballot) +{ + return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); +} + +inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +template +inline bool spvSubgroupAllEqual(T value) +{ + return simd_all(all(value == simd_broadcast_first(value))); +} + +template<> +inline bool spvSubgroupAllEqual(bool value) +{ + return simd_all(value) || !simd_any(value); +} + +template +inline bool spvSubgroupAllEqual(vec value) +{ + return simd_all(all(value == (vec)simd_broadcast_first((vec)value))); +} + +template +inline T spvSubgroupShuffle(T value, ushort lane) +{ + return simd_shuffle(value, lane); +} + +template<> +inline bool spvSubgroupShuffle(bool value, ushort lane) +{ + return !!simd_shuffle((ushort)value, lane); +} + +template +inline vec spvSubgroupShuffle(vec value, ushort lane) +{ + return (vec)simd_shuffle((vec)value, lane); +} + +template +inline T spvSubgroupShuffleXor(T value, ushort mask) +{ + return simd_shuffle_xor(value, mask); +} + +template<> +inline bool spvSubgroupShuffleXor(bool value, ushort mask) +{ + return !!simd_shuffle_xor((ushort)value, mask); +} + +template +inline vec spvSubgroupShuffleXor(vec value, ushort mask) +{ + return (vec)simd_shuffle_xor((vec)value, mask); +} + +template +inline T spvSubgroupShuffleUp(T value, ushort delta) +{ + return simd_shuffle_up(value, delta); +} + +template<> +inline bool spvSubgroupShuffleUp(bool value, ushort delta) +{ + return !!simd_shuffle_up((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleUp(vec value, ushort delta) +{ + return (vec)simd_shuffle_up((vec)value, delta); +} + +template +inline T spvSubgroupShuffleDown(T value, ushort delta) +{ + return simd_shuffle_down(value, delta); +} + +template<> +inline bool spvSubgroupShuffleDown(bool value, ushort delta) +{ + return !!simd_shuffle_down((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleDown(vec value, ushort delta) +{ + return (vec)simd_shuffle_down((vec)value, delta); +} + +template +inline T spvQuadBroadcast(T value, uint lane) +{ + return quad_broadcast(value, lane); +} + +template<> +inline bool spvQuadBroadcast(bool value, uint lane) +{ + return !!quad_broadcast((ushort)value, lane); +} + +template +inline vec spvQuadBroadcast(vec value, uint lane) +{ + return (vec)quad_broadcast((vec)value, lane); +} + +template +inline T spvQuadSwap(T value, uint dir) +{ + return quad_shuffle_xor(value, dir + 1); +} + +template<> +inline bool spvQuadSwap(bool value, uint dir) +{ + return !!quad_shuffle_xor((ushort)value, dir + 1); +} + +template +inline vec spvQuadSwap(vec value, uint dir) +{ + return (vec)quad_shuffle_xor((vec)value, dir + 1); +} + +struct SSBO +{ + float FragColor; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) +{ + uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID >= 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); + uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0)); + uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0)); + uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + _9.FragColor = float(gl_NumSubgroups); + _9.FragColor = float(gl_SubgroupID); + _9.FragColor = float(gl_SubgroupSize); + _9.FragColor = float(gl_SubgroupInvocationID); + simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + simdgroup_barrier(mem_flags::mem_device); + simdgroup_barrier(mem_flags::mem_threadgroup); + simdgroup_barrier(mem_flags::mem_texture); + bool _39 = simd_is_first(); + bool elected = _39; + _9.FragColor = float4(gl_SubgroupEqMask).x; + _9.FragColor = float4(gl_SubgroupGeMask).x; + _9.FragColor = float4(gl_SubgroupGtMask).x; + _9.FragColor = float4(gl_SubgroupLeMask).x; + _9.FragColor = float4(gl_SubgroupLtMask).x; + float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u); + bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u); + float3 first = spvSubgroupBroadcastFirst(float3(20.0)); + bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false)); + uint4 ballot_value = spvSubgroupBallot(true); + bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID); + bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u); + uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize); + uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize); + uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize); + uint shuffled = spvSubgroupShuffle(10u, 8u); + bool shuffled_bool = spvSubgroupShuffle(true, 9u); + uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u); + uint shuffled_up = spvSubgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u); + uint shuffled_down = spvSubgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u); + bool has_all = simd_all(true); + bool has_any = simd_any(true); + bool has_equal = spvSubgroupAllEqual(0); + has_equal = spvSubgroupAllEqual(true); + has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0)); + has_equal = spvSubgroupAllEqual(bool4(true, true, false, true)); + float4 added = simd_sum(float4(20.0)); + int4 iadded = simd_sum(int4(20)); + float4 multiplied = simd_product(float4(20.0)); + int4 imultiplied = simd_product(int4(20)); + float4 lo = simd_min(float4(20.0)); + float4 hi = simd_max(float4(20.0)); + int4 slo = simd_min(int4(20)); + int4 shi = simd_max(int4(20)); + uint4 ulo = simd_min(uint4(20u)); + uint4 uhi = simd_max(uint4(20u)); + uint4 anded = simd_and(ballot_value); + uint4 ored = simd_or(ballot_value); + uint4 xored = simd_xor(ballot_value); + bool4 anded_b = simd_and(ballot_value == uint4(42u)); + bool4 ored_b = simd_or(ballot_value == uint4(42u)); + bool4 xored_b = simd_xor(ballot_value == uint4(42u)); + added = simd_prefix_inclusive_sum(added); + iadded = simd_prefix_inclusive_sum(iadded); + multiplied = simd_prefix_inclusive_product(multiplied); + imultiplied = simd_prefix_inclusive_product(imultiplied); + added = simd_prefix_exclusive_sum(multiplied); + multiplied = simd_prefix_exclusive_product(multiplied); + iadded = simd_prefix_exclusive_sum(imultiplied); + imultiplied = simd_prefix_exclusive_product(imultiplied); + added = quad_sum(added); + multiplied = quad_product(multiplied); + iadded = quad_sum(iadded); + imultiplied = quad_product(imultiplied); + lo = quad_min(lo); + hi = quad_max(hi); + ulo = quad_min(ulo); + uhi = quad_max(uhi); + slo = quad_min(slo); + shi = quad_max(shi); + anded = quad_and(anded); + ored = quad_or(ored); + xored = quad_xor(xored); + anded_b = quad_and(anded == uint4(2u)); + ored_b = quad_or(ored == uint4(3u)); + xored_b = quad_xor(xored == uint4(4u)); + float4 swap_horiz = spvQuadSwap(float4(20.0), 0u); + bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u); + float4 swap_vertical = spvQuadSwap(float4(20.0), 1u); + bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u); + float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u); + bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u); + float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u); + bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u); +} + diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp new file mode 100644 index 00000000000..462c78fb70c --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp @@ -0,0 +1,322 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline T spvSubgroupBroadcast(T value, ushort lane) +{ + return simd_broadcast(value, lane); +} + +template<> +inline bool spvSubgroupBroadcast(bool value, ushort lane) +{ + return !!simd_broadcast((ushort)value, lane); +} + +template +inline vec spvSubgroupBroadcast(vec value, ushort lane) +{ + return (vec)simd_broadcast((vec)value, lane); +} + +template +inline T spvSubgroupBroadcastFirst(T value) +{ + return simd_broadcast_first(value); +} + +template<> +inline bool spvSubgroupBroadcastFirst(bool value) +{ + return !!simd_broadcast_first((ushort)value); +} + +template +inline vec spvSubgroupBroadcastFirst(vec value) +{ + return (vec)simd_broadcast_first((vec)value); +} + +inline uint4 spvSubgroupBallot(bool value) +{ + simd_vote vote = simd_ballot(value); + // simd_ballot() returns a 64-bit integer-like object, but + // SPIR-V callers expect a uint4. We must convert. + // FIXME: This won't include higher bits if Apple ever supports + // 128 lanes in an SIMD-group. + return uint4(as_type((simd_vote::vote_t)vote), 0, 0); +} + +inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) +{ + return !!extract_bits(ballot[bit / 32], bit % 32, 1); +} + +inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); +} + +inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); +} + +inline uint spvPopCount4(uint4 ballot) +{ + return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); +} + +inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +template +inline bool spvSubgroupAllEqual(T value) +{ + return simd_all(all(value == simd_broadcast_first(value))); +} + +template<> +inline bool spvSubgroupAllEqual(bool value) +{ + return simd_all(value) || !simd_any(value); +} + +template +inline bool spvSubgroupAllEqual(vec value) +{ + return simd_all(all(value == (vec)simd_broadcast_first((vec)value))); +} + +template +inline T spvSubgroupShuffle(T value, ushort lane) +{ + return simd_shuffle(value, lane); +} + +template<> +inline bool spvSubgroupShuffle(bool value, ushort lane) +{ + return !!simd_shuffle((ushort)value, lane); +} + +template +inline vec spvSubgroupShuffle(vec value, ushort lane) +{ + return (vec)simd_shuffle((vec)value, lane); +} + +template +inline T spvSubgroupShuffleXor(T value, ushort mask) +{ + return simd_shuffle_xor(value, mask); +} + +template<> +inline bool spvSubgroupShuffleXor(bool value, ushort mask) +{ + return !!simd_shuffle_xor((ushort)value, mask); +} + +template +inline vec spvSubgroupShuffleXor(vec value, ushort mask) +{ + return (vec)simd_shuffle_xor((vec)value, mask); +} + +template +inline T spvSubgroupShuffleUp(T value, ushort delta) +{ + return simd_shuffle_up(value, delta); +} + +template<> +inline bool spvSubgroupShuffleUp(bool value, ushort delta) +{ + return !!simd_shuffle_up((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleUp(vec value, ushort delta) +{ + return (vec)simd_shuffle_up((vec)value, delta); +} + +template +inline T spvSubgroupShuffleDown(T value, ushort delta) +{ + return simd_shuffle_down(value, delta); +} + +template<> +inline bool spvSubgroupShuffleDown(bool value, ushort delta) +{ + return !!simd_shuffle_down((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleDown(vec value, ushort delta) +{ + return (vec)simd_shuffle_down((vec)value, delta); +} + +template +inline T spvQuadBroadcast(T value, uint lane) +{ + return quad_broadcast(value, lane); +} + +template<> +inline bool spvQuadBroadcast(bool value, uint lane) +{ + return !!quad_broadcast((ushort)value, lane); +} + +template +inline vec spvQuadBroadcast(vec value, uint lane) +{ + return (vec)quad_broadcast((vec)value, lane); +} + +template +inline T spvQuadSwap(T value, uint dir) +{ + return quad_shuffle_xor(value, dir + 1); +} + +template<> +inline bool spvQuadSwap(bool value, uint dir) +{ + return !!quad_shuffle_xor((ushort)value, dir + 1); +} + +template +inline vec spvQuadSwap(vec value, uint dir) +{ + return (vec)quad_shuffle_xor((vec)value, dir + 1); +} + +struct SSBO +{ + float FragColor; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) +{ + uint gl_SubgroupSize = 32; + uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID >= 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); + uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, 32 - gl_SubgroupInvocationID), uint3(0)); + uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, 32 - gl_SubgroupInvocationID - 1), uint3(0)); + uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + _9.FragColor = float(gl_NumSubgroups); + _9.FragColor = float(gl_SubgroupID); + _9.FragColor = float(gl_SubgroupSize); + _9.FragColor = float(gl_SubgroupInvocationID); + simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + simdgroup_barrier(mem_flags::mem_device); + simdgroup_barrier(mem_flags::mem_threadgroup); + simdgroup_barrier(mem_flags::mem_texture); + bool _39 = simd_is_first(); + bool elected = _39; + _9.FragColor = float4(gl_SubgroupEqMask).x; + _9.FragColor = float4(gl_SubgroupGeMask).x; + _9.FragColor = float4(gl_SubgroupGtMask).x; + _9.FragColor = float4(gl_SubgroupLeMask).x; + _9.FragColor = float4(gl_SubgroupLtMask).x; + float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u); + bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u); + float3 first = spvSubgroupBroadcastFirst(float3(20.0)); + bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false)); + uint4 ballot_value = spvSubgroupBallot(true); + bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID); + bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u); + uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize); + uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize); + uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize); + uint shuffled = spvSubgroupShuffle(10u, 8u); + bool shuffled_bool = spvSubgroupShuffle(true, 9u); + uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u); + uint shuffled_up = spvSubgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u); + uint shuffled_down = spvSubgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u); + bool has_all = simd_all(true); + bool has_any = simd_any(true); + bool has_equal = spvSubgroupAllEqual(0); + has_equal = spvSubgroupAllEqual(true); + has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0)); + has_equal = spvSubgroupAllEqual(bool4(true, true, false, true)); + float4 added = simd_sum(float4(20.0)); + int4 iadded = simd_sum(int4(20)); + float4 multiplied = simd_product(float4(20.0)); + int4 imultiplied = simd_product(int4(20)); + float4 lo = simd_min(float4(20.0)); + float4 hi = simd_max(float4(20.0)); + int4 slo = simd_min(int4(20)); + int4 shi = simd_max(int4(20)); + uint4 ulo = simd_min(uint4(20u)); + uint4 uhi = simd_max(uint4(20u)); + uint4 anded = simd_and(ballot_value); + uint4 ored = simd_or(ballot_value); + uint4 xored = simd_xor(ballot_value); + added = simd_prefix_inclusive_sum(added); + iadded = simd_prefix_inclusive_sum(iadded); + multiplied = simd_prefix_inclusive_product(multiplied); + imultiplied = simd_prefix_inclusive_product(imultiplied); + added = simd_prefix_exclusive_sum(multiplied); + multiplied = simd_prefix_exclusive_product(multiplied); + iadded = simd_prefix_exclusive_sum(imultiplied); + imultiplied = simd_prefix_exclusive_product(imultiplied); + added = quad_sum(added); + multiplied = quad_product(multiplied); + iadded = quad_sum(iadded); + imultiplied = quad_product(imultiplied); + lo = quad_min(lo); + hi = quad_max(hi); + ulo = quad_min(ulo); + uhi = quad_max(uhi); + slo = quad_min(slo); + shi = quad_max(shi); + anded = quad_and(anded); + ored = quad_or(ored); + xored = quad_xor(xored); + float4 swap_horiz = spvQuadSwap(float4(20.0), 0u); + bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u); + float4 swap_vertical = spvQuadSwap(float4(20.0), 1u); + bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u); + float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u); + bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u); + float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u); + bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u); +} + diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp new file mode 100644 index 00000000000..1791ceca1ad --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp @@ -0,0 +1,151 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline T spvSubgroupShuffle(T value, ushort lane) +{ + return quad_shuffle(value, lane); +} + +template<> +inline bool spvSubgroupShuffle(bool value, ushort lane) +{ + return !!quad_shuffle((ushort)value, lane); +} + +template +inline vec spvSubgroupShuffle(vec value, ushort lane) +{ + return (vec)quad_shuffle((vec)value, lane); +} + +template +inline T spvSubgroupShuffleXor(T value, ushort mask) +{ + return quad_shuffle_xor(value, mask); +} + +template<> +inline bool spvSubgroupShuffleXor(bool value, ushort mask) +{ + return !!quad_shuffle_xor((ushort)value, mask); +} + +template +inline vec spvSubgroupShuffleXor(vec value, ushort mask) +{ + return (vec)quad_shuffle_xor((vec)value, mask); +} + +template +inline T spvSubgroupShuffleUp(T value, ushort delta) +{ + return quad_shuffle_up(value, delta); +} + +template<> +inline bool spvSubgroupShuffleUp(bool value, ushort delta) +{ + return !!quad_shuffle_up((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleUp(vec value, ushort delta) +{ + return (vec)quad_shuffle_up((vec)value, delta); +} + +template +inline T spvSubgroupShuffleDown(T value, ushort delta) +{ + return quad_shuffle_down(value, delta); +} + +template<> +inline bool spvSubgroupShuffleDown(bool value, ushort delta) +{ + return !!quad_shuffle_down((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleDown(vec value, ushort delta) +{ + return (vec)quad_shuffle_down((vec)value, delta); +} + +template +inline T spvQuadBroadcast(T value, uint lane) +{ + return quad_broadcast(value, lane); +} + +template<> +inline bool spvQuadBroadcast(bool value, uint lane) +{ + return !!quad_broadcast((ushort)value, lane); +} + +template +inline vec spvQuadBroadcast(vec value, uint lane) +{ + return (vec)quad_broadcast((vec)value, lane); +} + +template +inline T spvQuadSwap(T value, uint dir) +{ + return quad_shuffle_xor(value, dir + 1); +} + +template<> +inline bool spvQuadSwap(bool value, uint dir) +{ + return !!quad_shuffle_xor((ushort)value, dir + 1); +} + +template +inline vec spvQuadSwap(vec value, uint dir) +{ + return (vec)quad_shuffle_xor((vec)value, dir + 1); +} + +struct SSBO +{ + float FragColor; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]]) +{ + _9.FragColor = float(gl_NumSubgroups); + _9.FragColor = float(gl_SubgroupID); + _9.FragColor = float(gl_SubgroupSize); + _9.FragColor = float(gl_SubgroupInvocationID); + simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + simdgroup_barrier(mem_flags::mem_device); + simdgroup_barrier(mem_flags::mem_threadgroup); + simdgroup_barrier(mem_flags::mem_texture); + uint shuffled = spvSubgroupShuffle(10u, 8u); + bool shuffled_bool = spvSubgroupShuffle(true, 9u); + uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u); + uint shuffled_up = spvSubgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u); + uint shuffled_down = spvSubgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u); + float4 swap_horiz = spvQuadSwap(float4(20.0), 0u); + bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u); + float4 swap_vertical = spvQuadSwap(float4(20.0), 1u); + bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u); + float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u); + bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u); + float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u); + bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u); +} + diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp new file mode 100644 index 00000000000..3910e824405 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp @@ -0,0 +1,282 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline T spvSubgroupBroadcast(T value, ushort lane) +{ + return quad_broadcast(value, lane); +} + +template<> +inline bool spvSubgroupBroadcast(bool value, ushort lane) +{ + return !!quad_broadcast((ushort)value, lane); +} + +template +inline vec spvSubgroupBroadcast(vec value, ushort lane) +{ + return (vec)quad_broadcast((vec)value, lane); +} + +template +inline T spvSubgroupBroadcastFirst(T value) +{ + return quad_broadcast_first(value); +} + +template<> +inline bool spvSubgroupBroadcastFirst(bool value) +{ + return !!quad_broadcast_first((ushort)value); +} + +template +inline vec spvSubgroupBroadcastFirst(vec value) +{ + return (vec)quad_broadcast_first((vec)value); +} + +inline uint4 spvSubgroupBallot(bool value) +{ + return uint4((quad_vote::vote_t)quad_ballot(value), 0, 0, 0); +} + +inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) +{ + return !!extract_bits(ballot[bit / 32], bit % 32, 1); +} + +inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0)); + ballot &= mask; + return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); +} + +inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0)); + ballot &= mask; + return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); +} + +inline uint spvPopCount4(uint4 ballot) +{ + return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); +} + +inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0)); + return spvPopCount4(ballot & mask); +} + +template +inline bool spvSubgroupAllEqual(T value) +{ + return quad_all(all(value == quad_broadcast_first(value))); +} + +template<> +inline bool spvSubgroupAllEqual(bool value) +{ + return quad_all(value) || !quad_any(value); +} + +template +inline bool spvSubgroupAllEqual(vec value) +{ + return quad_all(all(value == (vec)quad_broadcast_first((vec)value))); +} + +template +inline T spvSubgroupShuffle(T value, ushort lane) +{ + return quad_shuffle(value, lane); +} + +template<> +inline bool spvSubgroupShuffle(bool value, ushort lane) +{ + return !!quad_shuffle((ushort)value, lane); +} + +template +inline vec spvSubgroupShuffle(vec value, ushort lane) +{ + return (vec)quad_shuffle((vec)value, lane); +} + +template +inline T spvSubgroupShuffleXor(T value, ushort mask) +{ + return quad_shuffle_xor(value, mask); +} + +template<> +inline bool spvSubgroupShuffleXor(bool value, ushort mask) +{ + return !!quad_shuffle_xor((ushort)value, mask); +} + +template +inline vec spvSubgroupShuffleXor(vec value, ushort mask) +{ + return (vec)quad_shuffle_xor((vec)value, mask); +} + +template +inline T spvSubgroupShuffleUp(T value, ushort delta) +{ + return quad_shuffle_up(value, delta); +} + +template<> +inline bool spvSubgroupShuffleUp(bool value, ushort delta) +{ + return !!quad_shuffle_up((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleUp(vec value, ushort delta) +{ + return (vec)quad_shuffle_up((vec)value, delta); +} + +template +inline T spvSubgroupShuffleDown(T value, ushort delta) +{ + return quad_shuffle_down(value, delta); +} + +template<> +inline bool spvSubgroupShuffleDown(bool value, ushort delta) +{ + return !!quad_shuffle_down((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleDown(vec value, ushort delta) +{ + return (vec)quad_shuffle_down((vec)value, delta); +} + +template +inline T spvQuadBroadcast(T value, uint lane) +{ + return quad_broadcast(value, lane); +} + +template<> +inline bool spvQuadBroadcast(bool value, uint lane) +{ + return !!quad_broadcast((ushort)value, lane); +} + +template +inline vec spvQuadBroadcast(vec value, uint lane) +{ + return (vec)quad_broadcast((vec)value, lane); +} + +template +inline T spvQuadSwap(T value, uint dir) +{ + return quad_shuffle_xor(value, dir + 1); +} + +template<> +inline bool spvQuadSwap(bool value, uint dir) +{ + return !!quad_shuffle_xor((ushort)value, dir + 1); +} + +template +inline vec spvQuadSwap(vec value, uint dir) +{ + return (vec)quad_shuffle_xor((vec)value, dir + 1); +} + +struct SSBO +{ + float FragColor; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]]) +{ + uint4 gl_SubgroupEqMask = uint4(1 << gl_SubgroupInvocationID, uint3(0)); + uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, gl_SubgroupSize - gl_SubgroupInvocationID), uint3(0)); + uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, gl_SubgroupSize - gl_SubgroupInvocationID - 1), uint3(0)); + uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0)); + uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint3(0)); + _9.FragColor = float(gl_NumSubgroups); + _9.FragColor = float(gl_SubgroupID); + _9.FragColor = float(gl_SubgroupSize); + _9.FragColor = float(gl_SubgroupInvocationID); + simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + simdgroup_barrier(mem_flags::mem_device); + simdgroup_barrier(mem_flags::mem_threadgroup); + simdgroup_barrier(mem_flags::mem_texture); + bool _39 = quad_is_first(); + bool elected = _39; + _9.FragColor = float4(gl_SubgroupEqMask).x; + _9.FragColor = float4(gl_SubgroupGeMask).x; + _9.FragColor = float4(gl_SubgroupGtMask).x; + _9.FragColor = float4(gl_SubgroupLeMask).x; + _9.FragColor = float4(gl_SubgroupLtMask).x; + float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u); + bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u); + float3 first = spvSubgroupBroadcastFirst(float3(20.0)); + bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false)); + uint4 ballot_value = spvSubgroupBallot(true); + bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID); + bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u); + uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize); + uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize); + uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize); + uint shuffled = spvSubgroupShuffle(10u, 8u); + bool shuffled_bool = spvSubgroupShuffle(true, 9u); + uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u); + uint shuffled_up = spvSubgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u); + uint shuffled_down = spvSubgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u); + bool has_all = quad_all(true); + bool has_any = quad_any(true); + bool has_equal = spvSubgroupAllEqual(0); + has_equal = spvSubgroupAllEqual(true); + has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0)); + has_equal = spvSubgroupAllEqual(bool4(true, true, false, true)); + float4 swap_horiz = spvQuadSwap(float4(20.0), 0u); + bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u); + float4 swap_vertical = spvQuadSwap(float4(20.0), 1u); + bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u); + float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u); + bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u); + float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u); + bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u); +} + diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp new file mode 100644 index 00000000000..71916ebb988 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp @@ -0,0 +1,316 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline T spvSubgroupBroadcast(T value, ushort lane) +{ + return simd_broadcast(value, lane); +} + +template<> +inline bool spvSubgroupBroadcast(bool value, ushort lane) +{ + return !!simd_broadcast((ushort)value, lane); +} + +template +inline vec spvSubgroupBroadcast(vec value, ushort lane) +{ + return (vec)simd_broadcast((vec)value, lane); +} + +template +inline T spvSubgroupBroadcastFirst(T value) +{ + return simd_broadcast_first(value); +} + +template<> +inline bool spvSubgroupBroadcastFirst(bool value) +{ + return !!simd_broadcast_first((ushort)value); +} + +template +inline vec spvSubgroupBroadcastFirst(vec value) +{ + return (vec)simd_broadcast_first((vec)value); +} + +inline uint4 spvSubgroupBallot(bool value) +{ + return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0); +} + +inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) +{ + return !!extract_bits(ballot[bit / 32], bit % 32, 1); +} + +inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0)); + ballot &= mask; + return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); +} + +inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0)); + ballot &= mask; + return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); +} + +inline uint spvPopCount4(uint4 ballot) +{ + return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); +} + +inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0)); + return spvPopCount4(ballot & mask); +} + +template +inline bool spvSubgroupAllEqual(T value) +{ + return simd_all(all(value == simd_broadcast_first(value))); +} + +template<> +inline bool spvSubgroupAllEqual(bool value) +{ + return simd_all(value) || !simd_any(value); +} + +template +inline bool spvSubgroupAllEqual(vec value) +{ + return simd_all(all(value == (vec)simd_broadcast_first((vec)value))); +} + +template +inline T spvSubgroupShuffle(T value, ushort lane) +{ + return simd_shuffle(value, lane); +} + +template<> +inline bool spvSubgroupShuffle(bool value, ushort lane) +{ + return !!simd_shuffle((ushort)value, lane); +} + +template +inline vec spvSubgroupShuffle(vec value, ushort lane) +{ + return (vec)simd_shuffle((vec)value, lane); +} + +template +inline T spvSubgroupShuffleXor(T value, ushort mask) +{ + return simd_shuffle_xor(value, mask); +} + +template<> +inline bool spvSubgroupShuffleXor(bool value, ushort mask) +{ + return !!simd_shuffle_xor((ushort)value, mask); +} + +template +inline vec spvSubgroupShuffleXor(vec value, ushort mask) +{ + return (vec)simd_shuffle_xor((vec)value, mask); +} + +template +inline T spvSubgroupShuffleUp(T value, ushort delta) +{ + return simd_shuffle_up(value, delta); +} + +template<> +inline bool spvSubgroupShuffleUp(bool value, ushort delta) +{ + return !!simd_shuffle_up((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleUp(vec value, ushort delta) +{ + return (vec)simd_shuffle_up((vec)value, delta); +} + +template +inline T spvSubgroupShuffleDown(T value, ushort delta) +{ + return simd_shuffle_down(value, delta); +} + +template<> +inline bool spvSubgroupShuffleDown(bool value, ushort delta) +{ + return !!simd_shuffle_down((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleDown(vec value, ushort delta) +{ + return (vec)simd_shuffle_down((vec)value, delta); +} + +template +inline T spvQuadBroadcast(T value, uint lane) +{ + return quad_broadcast(value, lane); +} + +template<> +inline bool spvQuadBroadcast(bool value, uint lane) +{ + return !!quad_broadcast((ushort)value, lane); +} + +template +inline vec spvQuadBroadcast(vec value, uint lane) +{ + return (vec)quad_broadcast((vec)value, lane); +} + +template +inline T spvQuadSwap(T value, uint dir) +{ + return quad_shuffle_xor(value, dir + 1); +} + +template<> +inline bool spvQuadSwap(bool value, uint dir) +{ + return !!quad_shuffle_xor((ushort)value, dir + 1); +} + +template +inline vec spvQuadSwap(vec value, uint dir) +{ + return (vec)quad_shuffle_xor((vec)value, dir + 1); +} + +struct SSBO +{ + float FragColor; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) +{ + uint4 gl_SubgroupEqMask = uint4(1 << gl_SubgroupInvocationID, uint3(0)); + uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, gl_SubgroupSize - gl_SubgroupInvocationID), uint3(0)); + uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, gl_SubgroupSize - gl_SubgroupInvocationID - 1), uint3(0)); + uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0)); + uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint3(0)); + _9.FragColor = float(gl_NumSubgroups); + _9.FragColor = float(gl_SubgroupID); + _9.FragColor = float(gl_SubgroupSize); + _9.FragColor = float(gl_SubgroupInvocationID); + simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + simdgroup_barrier(mem_flags::mem_device); + simdgroup_barrier(mem_flags::mem_threadgroup); + simdgroup_barrier(mem_flags::mem_texture); + bool _39 = simd_is_first(); + bool elected = _39; + _9.FragColor = float4(gl_SubgroupEqMask).x; + _9.FragColor = float4(gl_SubgroupGeMask).x; + _9.FragColor = float4(gl_SubgroupGtMask).x; + _9.FragColor = float4(gl_SubgroupLeMask).x; + _9.FragColor = float4(gl_SubgroupLtMask).x; + float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u); + bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u); + float3 first = spvSubgroupBroadcastFirst(float3(20.0)); + bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false)); + uint4 ballot_value = spvSubgroupBallot(true); + bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID); + bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u); + uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize); + uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize); + uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize); + uint shuffled = spvSubgroupShuffle(10u, 8u); + bool shuffled_bool = spvSubgroupShuffle(true, 9u); + uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u); + uint shuffled_up = spvSubgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u); + uint shuffled_down = spvSubgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u); + bool has_all = simd_all(true); + bool has_any = simd_any(true); + bool has_equal = spvSubgroupAllEqual(0); + has_equal = spvSubgroupAllEqual(true); + has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0)); + has_equal = spvSubgroupAllEqual(bool4(true, true, false, true)); + float4 added = simd_sum(float4(20.0)); + int4 iadded = simd_sum(int4(20)); + float4 multiplied = simd_product(float4(20.0)); + int4 imultiplied = simd_product(int4(20)); + float4 lo = simd_min(float4(20.0)); + float4 hi = simd_max(float4(20.0)); + int4 slo = simd_min(int4(20)); + int4 shi = simd_max(int4(20)); + uint4 ulo = simd_min(uint4(20u)); + uint4 uhi = simd_max(uint4(20u)); + uint4 anded = simd_and(ballot_value); + uint4 ored = simd_or(ballot_value); + uint4 xored = simd_xor(ballot_value); + added = simd_prefix_inclusive_sum(added); + iadded = simd_prefix_inclusive_sum(iadded); + multiplied = simd_prefix_inclusive_product(multiplied); + imultiplied = simd_prefix_inclusive_product(imultiplied); + added = simd_prefix_exclusive_sum(multiplied); + multiplied = simd_prefix_exclusive_product(multiplied); + iadded = simd_prefix_exclusive_sum(imultiplied); + imultiplied = simd_prefix_exclusive_product(imultiplied); + added = quad_sum(added); + multiplied = quad_product(multiplied); + iadded = quad_sum(iadded); + imultiplied = quad_product(imultiplied); + lo = quad_min(lo); + hi = quad_max(hi); + ulo = quad_min(ulo); + uhi = quad_max(uhi); + slo = quad_min(slo); + shi = quad_max(shi); + anded = quad_and(anded); + ored = quad_or(ored); + xored = quad_xor(xored); + float4 swap_horiz = spvQuadSwap(float4(20.0), 0u); + bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u); + float4 swap_vertical = spvQuadSwap(float4(20.0), 1u); + bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u); + float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u); + bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u); + float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u); + bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u); +} + diff --git a/reference/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp b/reference/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp new file mode 100644 index 00000000000..328b42ce9fe --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct A +{ + float3 a; + float3 b; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device A& _14 [[buffer(0)]]) +{ + bool3 c = _14.b < float3(1.0); + _14.a = select(float3(1.0, 0.0, 0.0), float3(0.0, 0.0, 1.0), c); +} + diff --git a/reference/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp b/reference/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp new file mode 100644 index 00000000000..2e37a326532 --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct A +{ + float3x3 a; + float b; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device A& _14 [[buffer(0)]]) +{ + bool c = _14.b < 1.0; + _14.a = c ? float3x3(float3(1.0), float3(1.0), float3(1.0)) : float3x3(float3(0.0), float3(0.0), float3(0.0)); + _14.a = c ? float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0)) : float3x3(float3(0.0), float3(0.0), float3(0.0)); +} + diff --git a/reference/shaders-msl-no-opt/components/fragment-input-component.frag b/reference/shaders-msl-no-opt/components/fragment-input-component.frag new file mode 100644 index 00000000000..9a65918a7d9 --- /dev/null +++ b/reference/shaders-msl-no-opt/components/fragment-input-component.frag @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float3 Foo3 [[user(locn0)]]; + float Foo1 [[user(locn0_3)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.FragColor = float4(in.Foo3, in.Foo1); + return out; +} + diff --git a/reference/shaders-msl-no-opt/components/fragment-output-component.frag b/reference/shaders-msl-no-opt/components/fragment-output-component.frag new file mode 100644 index 00000000000..45b05b9dba0 --- /dev/null +++ b/reference/shaders-msl-no-opt/components/fragment-output-component.frag @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 m_location_0 [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + float FragColor0 = {}; + float2 FragColor1 = {}; + float FragColor3 = {}; + FragColor0 = 1.0; + FragColor1 = float2(2.0, 3.0); + FragColor3 = 4.0; + out.m_location_0.x = FragColor0; + out.m_location_0.yz = FragColor1; + out.m_location_0.w = FragColor3; + return out; +} + diff --git a/reference/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag b/reference/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag new file mode 100644 index 00000000000..0e4bee12f6e --- /dev/null +++ b/reference/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float3 m_location_0 [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + float FragColor0 = {}; + float2 FragColor1 = {}; + FragColor0 = 1.0; + FragColor1 = float2(2.0, 3.0); + out.m_location_0.x = FragColor0; + out.m_location_0.yz = FragColor1; + return out; +} + diff --git a/reference/shaders-msl-no-opt/components/vertex-input-component.vert b/reference/shaders-msl-no-opt/components/vertex-input-component.vert new file mode 100644 index 00000000000..7a099f503b1 --- /dev/null +++ b/reference/shaders-msl-no-opt/components/vertex-input-component.vert @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float3 Foo [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 m_location_0 [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float3 Foo3 = {}; + float Foo1 = {}; + Foo3 = in.m_location_0.xyz; + Foo1 = in.m_location_0.w; + out.gl_Position = float4(Foo3, Foo1); + out.Foo = Foo3 + float3(Foo1); + return out; +} + diff --git a/reference/shaders-msl/vert/viewport-index.msl2.invalid.vert b/reference/shaders-msl-no-opt/components/vertex-output-component.vert similarity index 57% rename from reference/shaders-msl/vert/viewport-index.msl2.invalid.vert rename to reference/shaders-msl-no-opt/components/vertex-output-component.vert index e5316c072ac..cf135b51288 100644 --- a/reference/shaders-msl/vert/viewport-index.msl2.invalid.vert +++ b/reference/shaders-msl-no-opt/components/vertex-output-component.vert @@ -5,20 +5,22 @@ using namespace metal; struct main0_out { + float3 Foo3 [[user(locn0)]]; + float Foo1 [[user(locn0_3)]]; float4 gl_Position [[position]]; - uint gl_ViewportIndex [[viewport_array_index]]; }; struct main0_in { - float4 coord [[attribute(0)]]; + float4 vFoo [[attribute(0)]]; }; vertex main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - out.gl_Position = in.coord; - out.gl_ViewportIndex = uint(int(in.coord.z)); + out.gl_Position = in.vFoo; + out.Foo3 = in.vFoo.xyz; + out.Foo1 = in.vFoo.w; return out; } diff --git a/reference/opt/shaders-msl/frag/16bit-constants.frag b/reference/shaders-msl-no-opt/frag/16bit-constants.invalid.frag similarity index 84% rename from reference/opt/shaders-msl/frag/16bit-constants.frag rename to reference/shaders-msl-no-opt/frag/16bit-constants.invalid.frag index 56c7ea5df4a..542beb31898 100644 --- a/reference/opt/shaders-msl/frag/16bit-constants.frag +++ b/reference/shaders-msl-no-opt/frag/16bit-constants.invalid.frag @@ -14,8 +14,8 @@ fragment main0_out main0() { main0_out out = {}; out.foo = half(1.0); - out.bar = 2; - out.baz = 3u; + out.bar = short(2); + out.baz = ushort(3); return out; } diff --git a/reference/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag b/reference/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag new file mode 100644 index 00000000000..0e0348bf851 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag @@ -0,0 +1,11 @@ +#include +#include + +using namespace metal; + +fragment void main0() +{ + bool _9 = simd_is_helper_thread(); + bool helper = _9; +} + diff --git a/reference/shaders-msl-no-opt/frag/depth-image-gather.asm.frag b/reference/shaders-msl-no-opt/frag/depth-image-gather.asm.frag new file mode 100644 index 00000000000..025e2258561 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/depth-image-gather.asm.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + float2 in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], depth2d g_depthTexture [[texture(0)]], sampler g_sampler [[sampler(0)]], sampler g_comp [[sampler(1)]]) +{ + main0_out out = {}; + out.out_var_SV_Target0 = g_depthTexture.gather_compare(g_comp, in.in_var_TEXCOORD0, 0.5) * g_depthTexture.gather(g_sampler, in.in_var_TEXCOORD0, int2(0)); + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag b/reference/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag new file mode 100644 index 00000000000..5f8dc7203e5 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag @@ -0,0 +1,31 @@ +#include +#include + +using namespace metal; + +struct spvDescriptorSetBuffer0 +{ + texture2d uTexture2 [[id(0)]]; + sampler uTexture2Smplr [[id(1)]]; + texture2d uTexture1 [[id(2)]]; + sampler uTexture1Smplr [[id(3)]]; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], texture2d uTextureDiscrete2 [[texture(0)]], sampler uTextureDiscrete2Smplr [[sampler(0)]]) +{ + main0_out out = {}; + out.FragColor = spvDescriptorSet0.uTexture2.sample(spvDescriptorSet0.uTexture2Smplr, in.vUV); + out.FragColor += uTextureDiscrete2.sample(uTextureDiscrete2Smplr, in.vUV); + return out; +} + diff --git a/reference/shaders-msl/frag/fp16.desktop.invalid.frag b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag similarity index 88% rename from reference/shaders-msl/frag/fp16.desktop.invalid.frag rename to reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag index 001944fcba7..16182ae2e14 100644 --- a/reference/shaders-msl/frag/fp16.desktop.invalid.frag +++ b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag @@ -5,51 +5,54 @@ using namespace metal; -struct ResType -{ - half4 _m0; - int4 _m1; -}; - -struct main0_in -{ - half v1 [[user(locn0)]]; - half2 v2 [[user(locn1)]]; - half3 v3 [[user(locn2)]]; - half4 v4 [[user(locn3)]]; -}; - // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } // Implementation of the GLSL radians() function template -T radians(T d) +inline T radians(T d) { return d * T(0.01745329251); } // Implementation of the GLSL degrees() function template -T degrees(T r) +inline T degrees(T r) { return r * T(57.2957795131); } +struct ResType +{ + half4 _m0; + int4 _m1; +}; + +struct main0_in +{ + half v1 [[user(locn0)]]; + half2 v2 [[user(locn1)]]; + half3 v3 [[user(locn2)]]; + half4 v4 [[user(locn3)]]; +}; + +static inline __attribute__((always_inline)) half2x2 test_mat2(thread const half2& a, thread const half2& b, thread const half2& c, thread const half2& d) { return half2x2(half2(a), half2(b)) * half2x2(half2(c), half2(d)); } +static inline __attribute__((always_inline)) half3x3 test_mat3(thread const half3& a, thread const half3& b, thread const half3& c, thread const half3& d, thread const half3& e, thread const half3& f) { return half3x3(half3(a), half3(b), half3(c)) * half3x3(half3(d), half3(e), half3(f)); } +static inline __attribute__((always_inline)) void test_constants() { half a = half(1.0); @@ -62,11 +65,13 @@ void test_constants() half h = half(9.5367431640625e-07); } +static inline __attribute__((always_inline)) half test_result() { return half(1.0); } +static inline __attribute__((always_inline)) void test_conversions() { half one = test_result(); @@ -80,6 +85,7 @@ void test_conversions() half d2 = half(d); } +static inline __attribute__((always_inline)) void test_builtins(thread half4& v4, thread half3& v3, thread half& v1) { half4 res = radians(v4); @@ -88,11 +94,11 @@ void test_builtins(thread half4& v4, thread half3& v3, thread half& v1) res = cos(v4); res = tan(v4); res = asin(v4); - res = atan2(v4, v3.xyzz); + res = precise::atan2(v4, v3.xyzz); res = atan(v4); - res = sinh(v4); - res = cosh(v4); - res = tanh(v4); + res = fast::sinh(v4); + res = fast::cosh(v4); + res = precise::tanh(v4); res = asinh(v4); res = acosh(v4); res = atanh(v4); @@ -119,8 +125,7 @@ void test_builtins(thread half4& v4, thread half3& v3, thread half& v1) res = max(v4, v4); res = clamp(v4, v4, v4); res = mix(v4, v4, v4); - bool4 _243 = v4 < v4; - res = half4(_243.x ? v4.x : v4.x, _243.y ? v4.y : v4.y, _243.z ? v4.z : v4.z, _243.w ? v4.w : v4.w); + res = select(v4, v4, v4 < v4); res = step(v4, v4); res = smoothstep(v4, v4, v4); bool4 btmp = isnan(v4); @@ -138,7 +143,7 @@ void test_builtins(thread half4& v4, thread half3& v3, thread half& v1) t0 = distance(v4, v4); t0 = dot(v4, v4); half3 res3 = cross(v3, v3); - res = normalize(v4); + res = fast::normalize(v4); res = faceforward(v4, v4, v4); res = reflect(v4, v4); res = refract(v4, v4, v1); diff --git a/reference/shaders-msl-no-opt/frag/image-gather.frag b/reference/shaders-msl-no-opt/frag/image-gather.frag new file mode 100644 index 00000000000..db793c14eea --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/image-gather.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float3 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d uSamp [[texture(0)]], depth2d uSampShadow [[texture(1)]], sampler uSampSmplr [[sampler(0)]], sampler uSampShadowSmplr [[sampler(1)]]) +{ + main0_out out = {}; + out.FragColor = uSamp.gather(uSampSmplr, in.vUV.xy, int2(0), component::x); + out.FragColor += uSamp.gather(uSampSmplr, in.vUV.xy, int2(0), component::y); + out.FragColor += uSampShadow.gather_compare(uSampShadowSmplr, in.vUV.xy, in.vUV.z); + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/in_block_assign.frag b/reference/shaders-msl-no-opt/frag/in_block_assign.frag index 427c689c49c..6b7afc4d2c5 100644 --- a/reference/shaders-msl-no-opt/frag/in_block_assign.frag +++ b/reference/shaders-msl-no-opt/frag/in_block_assign.frag @@ -15,14 +15,14 @@ struct main0_out struct main0_in { - float4 VOUT_a [[user(locn0)]]; + float4 Clip_a [[user(locn0)]]; }; fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; VOUT Clip = {}; - Clip.a = in.VOUT_a; + Clip.a = in.Clip_a; VOUT tmp = Clip; tmp.a += float4(1.0); out.FragColor = tmp.a; diff --git a/reference/shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag b/reference/shaders-msl-no-opt/frag/min-max-clamp.invalid.asm.frag similarity index 100% rename from reference/shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag rename to reference/shaders-msl-no-opt/frag/min-max-clamp.invalid.asm.frag diff --git a/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag b/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag new file mode 100644 index 00000000000..7835e013076 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag @@ -0,0 +1,69 @@ +#include +#include + +using namespace metal; + +struct main0_in +{ + float v1 [[user(locn0)]]; + float2 v2 [[user(locn1)]]; + float3 v3 [[user(locn2)]]; + float4 v4 [[user(locn3)]]; + half h1 [[user(locn4)]]; + half2 h2 [[user(locn5)]]; + half3 h3 [[user(locn6)]]; + half4 h4 [[user(locn7)]]; +}; + +fragment void main0(main0_in in [[stage_in]]) +{ + float res = fast::min(in.v1, in.v1); + res = fast::max(in.v1, in.v1); + res = fast::clamp(in.v1, in.v1, in.v1); + res = fast::min(in.v1, in.v1); + res = fast::max(in.v1, in.v1); + res = fast::clamp(in.v1, in.v1, in.v1); + float2 res2 = fast::min(in.v2, in.v2); + res2 = fast::max(in.v2, in.v2); + res2 = fast::clamp(in.v2, in.v2, in.v2); + res2 = fast::min(in.v2, in.v2); + res2 = fast::max(in.v2, in.v2); + res2 = fast::clamp(in.v2, in.v2, in.v2); + float3 res3 = fast::min(in.v3, in.v3); + res3 = fast::max(in.v3, in.v3); + res3 = fast::clamp(in.v3, in.v3, in.v3); + res3 = fast::min(in.v3, in.v3); + res3 = fast::max(in.v3, in.v3); + res3 = fast::clamp(in.v3, in.v3, in.v3); + float4 res4 = fast::min(in.v4, in.v4); + res4 = fast::max(in.v4, in.v4); + res4 = fast::clamp(in.v4, in.v4, in.v4); + res4 = fast::min(in.v4, in.v4); + res4 = fast::max(in.v4, in.v4); + res4 = fast::clamp(in.v4, in.v4, in.v4); + half hres = min(in.h1, in.h1); + hres = max(in.h1, in.h1); + hres = clamp(in.h1, in.h1, in.h1); + hres = min(in.h1, in.h1); + hres = max(in.h1, in.h1); + hres = clamp(in.h1, in.h1, in.h1); + half2 hres2 = min(in.h2, in.h2); + hres2 = max(in.h2, in.h2); + hres2 = clamp(in.h2, in.h2, in.h2); + hres2 = min(in.h2, in.h2); + hres2 = max(in.h2, in.h2); + hres2 = clamp(in.h2, in.h2, in.h2); + half3 hres3 = min(in.h3, in.h3); + hres3 = max(in.h3, in.h3); + hres3 = clamp(in.h3, in.h3, in.h3); + hres3 = min(in.h3, in.h3); + hres3 = max(in.h3, in.h3); + hres3 = clamp(in.h3, in.h3, in.h3); + half4 hres4 = min(in.h4, in.h4); + hres4 = max(in.h4, in.h4); + hres4 = clamp(in.h4, in.h4, in.h4); + hres4 = min(in.h4, in.h4); + hres4 = max(in.h4, in.h4); + hres4 = clamp(in.h4, in.h4, in.h4); +} + diff --git a/reference/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag b/reference/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag new file mode 100644 index 00000000000..f1ad5c5fb25 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vUV [[user(locn0)]]; + int vIndex [[user(locn1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], array, 10> uTex [[texture(0)]], sampler Immut [[sampler(0)]]) +{ + main0_out out = {}; + out.FragColor = uTex[in.vIndex].sample(Immut, in.vUV); + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag new file mode 100644 index 00000000000..cb01950d221 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag @@ -0,0 +1,37 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +static inline __attribute__((always_inline)) +void callee2(device SSBO1& v_14, thread float4& gl_FragCoord) +{ + int _25 = int(gl_FragCoord.x); + v_14.values1[_25]++; +} + +static inline __attribute__((always_inline)) +void callee(device SSBO1& v_14, thread float4& gl_FragCoord, device SSBO0& v_35) +{ + int _38 = int(gl_FragCoord.x); + v_35.values0[_38]++; + callee2(v_14, gl_FragCoord); +} + +fragment void main0(device SSBO1& v_14 [[buffer(0), raster_order_group(0)]], device SSBO0& v_35 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]]) +{ + callee(v_14, gl_FragCoord, v_35); +} + diff --git a/reference/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag b/reference/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag new file mode 100644 index 00000000000..b5ffd11f40f --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag @@ -0,0 +1,75 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + interpolant a_0 [[user(locn0)]]; + interpolant a_1 [[user(locn1)]]; + interpolant b_0 [[user(locn2)]]; + interpolant b_1 [[user(locn3)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray a = {}; + spvUnsafeArray b = {}; + a[0] = in.a_0.interpolate_at_centroid(); + a[1] = in.a_1.interpolate_at_centroid(); + b[0] = in.b_0.interpolate_at_centroid(); + b[1] = in.b_1.interpolate_at_centroid(); + out.FragColor.x = in.a_0.interpolate_at_offset(float2(0.5) + 0.4375).x; + out.FragColor.y = in.a_1.interpolate_at_offset(float2(0.5) + 0.4375).y; + out.FragColor.z = in.b_0.interpolate_at_offset(float2(0.5) + 0.4375).z; + out.FragColor.w = in.b_1.interpolate_at_offset(float2(0.5) + 0.4375).w; + return out; +} + diff --git a/reference/opt/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag b/reference/shaders-msl-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag similarity index 100% rename from reference/opt/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag rename to reference/shaders-msl-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag diff --git a/reference/shaders-msl/frag/shadow-compare-global-alias.invalid.frag b/reference/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag similarity index 69% rename from reference/shaders-msl/frag/shadow-compare-global-alias.invalid.frag rename to reference/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag index 4bd5d32091f..58985c63541 100644 --- a/reference/shaders-msl/frag/shadow-compare-global-alias.invalid.frag +++ b/reference/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag @@ -15,22 +15,26 @@ struct main0_in float3 vUV [[user(locn0)]]; }; -float Samp(thread const float3& uv, thread depth2d uTex, thread sampler uSamp) +static inline __attribute__((always_inline)) +float Samp(thread const float3& uv, depth2d uTex, sampler uSamp) { return uTex.sample_compare(uSamp, uv.xy, uv.z); } -float Samp2(thread const float3& uv, thread depth2d uSampler, thread const sampler uSamplerSmplr, thread float3& vUV) +static inline __attribute__((always_inline)) +float Samp2(thread const float3& uv, depth2d uSampler, sampler uSamplerSmplr, thread float3& vUV) { return uSampler.sample_compare(uSamplerSmplr, vUV.xy, vUV.z); } -float Samp3(thread const depth2d uT, thread const sampler uS, thread const float3& uv, thread float3& vUV) +static inline __attribute__((always_inline)) +float Samp3(depth2d uT, sampler uS, thread const float3& uv, thread float3& vUV) { return uT.sample_compare(uS, vUV.xy, vUV.z); } -float Samp4(thread const depth2d uS, thread const sampler uSSmplr, thread const float3& uv, thread float3& vUV) +static inline __attribute__((always_inline)) +float Samp4(depth2d uS, sampler uSSmplr, thread const float3& uv, thread float3& vUV) { return uS.sample_compare(uSSmplr, vUV.xy, vUV.z); } diff --git a/reference/shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag b/reference/shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag new file mode 100644 index 00000000000..7680908a448 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag @@ -0,0 +1,314 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline T spvSubgroupBroadcast(T value, ushort lane) +{ + return simd_broadcast(value, lane); +} + +template<> +inline bool spvSubgroupBroadcast(bool value, ushort lane) +{ + return !!simd_broadcast((ushort)value, lane); +} + +template +inline vec spvSubgroupBroadcast(vec value, ushort lane) +{ + return (vec)simd_broadcast((vec)value, lane); +} + +template +inline T spvSubgroupBroadcastFirst(T value) +{ + return simd_broadcast_first(value); +} + +template<> +inline bool spvSubgroupBroadcastFirst(bool value) +{ + return !!simd_broadcast_first((ushort)value); +} + +template +inline vec spvSubgroupBroadcastFirst(vec value) +{ + return (vec)simd_broadcast_first((vec)value); +} + +inline uint4 spvSubgroupBallot(bool value) +{ + simd_vote vote = simd_ballot(value); + // simd_ballot() returns a 64-bit integer-like object, but + // SPIR-V callers expect a uint4. We must convert. + // FIXME: This won't include higher bits if Apple ever supports + // 128 lanes in an SIMD-group. + return uint4(as_type((simd_vote::vote_t)vote), 0, 0); +} + +inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) +{ + return !!extract_bits(ballot[bit / 32], bit % 32, 1); +} + +inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); +} + +inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); +} + +inline uint spvPopCount4(uint4 ballot) +{ + return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); +} + +inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +template +inline bool spvSubgroupAllEqual(T value) +{ + return simd_all(all(value == simd_broadcast_first(value))); +} + +template<> +inline bool spvSubgroupAllEqual(bool value) +{ + return simd_all(value) || !simd_any(value); +} + +template +inline bool spvSubgroupAllEqual(vec value) +{ + return simd_all(all(value == (vec)simd_broadcast_first((vec)value))); +} + +template +inline T spvSubgroupShuffle(T value, ushort lane) +{ + return simd_shuffle(value, lane); +} + +template<> +inline bool spvSubgroupShuffle(bool value, ushort lane) +{ + return !!simd_shuffle((ushort)value, lane); +} + +template +inline vec spvSubgroupShuffle(vec value, ushort lane) +{ + return (vec)simd_shuffle((vec)value, lane); +} + +template +inline T spvSubgroupShuffleXor(T value, ushort mask) +{ + return simd_shuffle_xor(value, mask); +} + +template<> +inline bool spvSubgroupShuffleXor(bool value, ushort mask) +{ + return !!simd_shuffle_xor((ushort)value, mask); +} + +template +inline vec spvSubgroupShuffleXor(vec value, ushort mask) +{ + return (vec)simd_shuffle_xor((vec)value, mask); +} + +template +inline T spvSubgroupShuffleUp(T value, ushort delta) +{ + return simd_shuffle_up(value, delta); +} + +template<> +inline bool spvSubgroupShuffleUp(bool value, ushort delta) +{ + return !!simd_shuffle_up((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleUp(vec value, ushort delta) +{ + return (vec)simd_shuffle_up((vec)value, delta); +} + +template +inline T spvSubgroupShuffleDown(T value, ushort delta) +{ + return simd_shuffle_down(value, delta); +} + +template<> +inline bool spvSubgroupShuffleDown(bool value, ushort delta) +{ + return !!simd_shuffle_down((ushort)value, delta); +} + +template +inline vec spvSubgroupShuffleDown(vec value, ushort delta) +{ + return (vec)simd_shuffle_down((vec)value, delta); +} + +template +inline T spvQuadBroadcast(T value, uint lane) +{ + return quad_broadcast(value, lane); +} + +template<> +inline bool spvQuadBroadcast(bool value, uint lane) +{ + return !!quad_broadcast((ushort)value, lane); +} + +template +inline vec spvQuadBroadcast(vec value, uint lane) +{ + return (vec)quad_broadcast((vec)value, lane); +} + +template +inline T spvQuadSwap(T value, uint dir) +{ + return quad_shuffle_xor(value, dir + 1); +} + +template<> +inline bool spvQuadSwap(bool value, uint dir) +{ + return !!quad_shuffle_xor((ushort)value, dir + 1); +} + +template +inline vec spvQuadSwap(vec value, uint dir) +{ + return (vec)quad_shuffle_xor((vec)value, dir + 1); +} + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +fragment main0_out main0(uint gl_SubgroupSize [[threads_per_simdgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) +{ + main0_out out = {}; + uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID >= 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); + uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0)); + uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0)); + uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + out.FragColor = float(gl_SubgroupSize); + out.FragColor = float(gl_SubgroupInvocationID); + bool _24 = simd_is_first(); + bool elected = _24; + out.FragColor = float4(gl_SubgroupEqMask).x; + out.FragColor = float4(gl_SubgroupGeMask).x; + out.FragColor = float4(gl_SubgroupGtMask).x; + out.FragColor = float4(gl_SubgroupLeMask).x; + out.FragColor = float4(gl_SubgroupLtMask).x; + float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u); + bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u); + float3 first = spvSubgroupBroadcastFirst(float3(20.0)); + bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false)); + uint4 ballot_value = spvSubgroupBallot(true); + bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID); + bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u); + uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize); + uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize); + uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize); + uint shuffled = spvSubgroupShuffle(10u, 8u); + bool shuffled_bool = spvSubgroupShuffle(true, 9u); + uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u); + uint shuffled_up = spvSubgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u); + uint shuffled_down = spvSubgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u); + bool has_all = simd_all(true); + bool has_any = simd_any(true); + bool has_equal = spvSubgroupAllEqual(0); + has_equal = spvSubgroupAllEqual(true); + has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0)); + has_equal = spvSubgroupAllEqual(bool4(true, true, false, true)); + float4 added = simd_sum(float4(20.0)); + int4 iadded = simd_sum(int4(20)); + float4 multiplied = simd_product(float4(20.0)); + int4 imultiplied = simd_product(int4(20)); + float4 lo = simd_min(float4(20.0)); + float4 hi = simd_max(float4(20.0)); + int4 slo = simd_min(int4(20)); + int4 shi = simd_max(int4(20)); + uint4 ulo = simd_min(uint4(20u)); + uint4 uhi = simd_max(uint4(20u)); + uint4 anded = simd_and(ballot_value); + uint4 ored = simd_or(ballot_value); + uint4 xored = simd_xor(ballot_value); + added = simd_prefix_inclusive_sum(added); + iadded = simd_prefix_inclusive_sum(iadded); + multiplied = simd_prefix_inclusive_product(multiplied); + imultiplied = simd_prefix_inclusive_product(imultiplied); + added = simd_prefix_exclusive_sum(multiplied); + multiplied = simd_prefix_exclusive_product(multiplied); + iadded = simd_prefix_exclusive_sum(imultiplied); + imultiplied = simd_prefix_exclusive_product(imultiplied); + added = quad_sum(added); + multiplied = quad_product(multiplied); + iadded = quad_sum(iadded); + imultiplied = quad_product(imultiplied); + lo = quad_min(lo); + hi = quad_max(hi); + ulo = quad_min(ulo); + uhi = quad_max(uhi); + slo = quad_min(slo); + shi = quad_max(shi); + anded = quad_and(anded); + ored = quad_or(ored); + xored = quad_xor(xored); + float4 swap_horiz = spvQuadSwap(float4(20.0), 0u); + bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u); + float4 swap_vertical = spvQuadSwap(float4(20.0), 1u); + bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u); + float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u); + bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u); + float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u); + bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u); + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag b/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag new file mode 100644 index 00000000000..c67984892dd --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(1)]]; +}; + +fragment main0_out main0(float4 uInput [[color(1)]]) +{ + main0_out out = {}; + out.FragColor = uInput; + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag b/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag new file mode 100644 index 00000000000..c67984892dd --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(1)]]; +}; + +fragment main0_out main0(float4 uInput [[color(1)]]) +{ + main0_out out = {}; + out.FragColor = uInput; + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag b/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag new file mode 100644 index 00000000000..950895d088e --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag @@ -0,0 +1,37 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +float4 samp3(float4 uS) +{ + return uS; +} + +static inline __attribute__((always_inline)) +float4 samp(float4 uSub) +{ + return uSub + samp3(uSub); +} + +static inline __attribute__((always_inline)) +float4 samp2(float4 uS) +{ + return uS + samp3(uS); +} + +fragment main0_out main0(float4 uSub [[color(0)]]) +{ + main0_out out = {}; + out.FragColor = samp(uSub) + samp2(uSub); + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag b/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag new file mode 100644 index 00000000000..950895d088e --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag @@ -0,0 +1,37 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +float4 samp3(float4 uS) +{ + return uS; +} + +static inline __attribute__((always_inline)) +float4 samp(float4 uSub) +{ + return uSub + samp3(uSub); +} + +static inline __attribute__((always_inline)) +float4 samp2(float4 uS) +{ + return uS + samp3(uS); +} + +fragment main0_out main0(float4 uSub [[color(0)]]) +{ + main0_out out = {}; + out.FragColor = samp(uSub) + samp2(uSub); + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag new file mode 100644 index 00000000000..8c7f67b68b0 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct spvDescriptorSetBuffer0 +{ + sampler uSampler [[id(8)]]; + texture2d uTex [[id(9)]]; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 uSub [[color(1)]]) +{ + main0_out out = {}; + out.FragColor = uSub + spvDescriptorSet0.uTex.sample(spvDescriptorSet0.uSampler, float2(0.5)); + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag new file mode 100644 index 00000000000..9108927ee41 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(float4 uSub [[color(1)]], texture2d uTex [[texture(9)]], sampler uSampler [[sampler(8)]]) +{ + main0_out out = {}; + out.FragColor = uSub + uTex.sample(uSampler, float2(0.5)); + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag new file mode 100644 index 00000000000..8c7f67b68b0 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct spvDescriptorSetBuffer0 +{ + sampler uSampler [[id(8)]]; + texture2d uTex [[id(9)]]; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 uSub [[color(1)]]) +{ + main0_out out = {}; + out.FragColor = uSub + spvDescriptorSet0.uTex.sample(spvDescriptorSet0.uSampler, float2(0.5)); + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag new file mode 100644 index 00000000000..9108927ee41 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(float4 uSub [[color(1)]], texture2d uTex [[texture(9)]], sampler uSampler [[sampler(8)]]) +{ + main0_out out = {}; + out.FragColor = uSub + uTex.sample(uSampler, float2(0.5)); + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag index 4cb0fdd59bc..ff4b8a91943 100644 --- a/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag +++ b/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag @@ -6,22 +6,12 @@ using namespace metal; // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -34,6 +24,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -72,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -108,29 +109,6 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } } -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]]) { constant uint& tex1dSwzl = spvSwizzleConstants[0]; @@ -162,9 +140,9 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d c = float4(spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl)); c = float4(spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl)); c = float4(texBuffer.read(spvTexelBufferCoord(0))); - c = float4(spvGatherSwizzle, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl)); - c = float4(spvGatherSwizzle, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl)); - c = float4(spvGatherSwizzle, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl)); - c = float4(spvGatherSwizzle, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl)); + c = float4(spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0))); + c = float4(spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0))); + c = float4(spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0))); + c = float4(spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w)))); } diff --git a/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag index 581f290941d..9e5dba8c568 100644 --- a/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag +++ b/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag @@ -6,22 +6,12 @@ using namespace metal; // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -34,6 +24,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -72,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -109,8 +110,8 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } // Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +template class Tex, typename... Ts> +inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint sw, Ts... params) { if (sw) { @@ -131,7 +132,8 @@ inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... p return t.gather_compare(s, spvForward(params)...); } -float4 doSwizzle(thread texture1d tex1d, thread const sampler tex1dSmplr, constant uint& tex1dSwzl, thread texture2d tex2d, thread const sampler tex2dSmplr, constant uint& tex2dSwzl, thread texture3d tex3d, thread const sampler tex3dSmplr, constant uint& tex3dSwzl, thread texturecube texCube, thread const sampler texCubeSmplr, constant uint& texCubeSwzl, thread texture2d_array tex2dArray, thread const sampler tex2dArraySmplr, constant uint& tex2dArraySwzl, thread texturecube_array texCubeArray, thread const sampler texCubeArraySmplr, constant uint& texCubeArraySwzl, thread depth2d depth2d, thread const sampler depth2dSmplr, constant uint& depth2dSwzl, thread depthcube depthCube, thread const sampler depthCubeSmplr, constant uint& depthCubeSwzl, thread depth2d_array depth2dArray, thread const sampler depth2dArraySmplr, constant uint& depth2dArraySwzl, thread depthcube_array depthCubeArray, thread const sampler depthCubeArraySmplr, constant uint& depthCubeArraySwzl, thread texture2d texBuffer) +static inline __attribute__((always_inline)) +float4 doSwizzle(texture1d tex1d, sampler tex1dSmplr, constant uint& tex1dSwzl, texture2d tex2d, sampler tex2dSmplr, constant uint& tex2dSwzl, texture3d tex3d, sampler tex3dSmplr, constant uint& tex3dSwzl, texturecube texCube, sampler texCubeSmplr, constant uint& texCubeSwzl, texture2d_array tex2dArray, sampler tex2dArraySmplr, constant uint& tex2dArraySwzl, texturecube_array texCubeArray, sampler texCubeArraySmplr, constant uint& texCubeArraySwzl, depth2d depth2d, sampler depth2dSmplr, constant uint& depth2dSwzl, depthcube depthCube, sampler depthCubeSmplr, constant uint& depthCubeSwzl, depth2d_array depth2dArray, sampler depth2dArraySmplr, constant uint& depth2dArraySwzl, depthcube_array depthCubeArray, sampler depthCubeArraySmplr, constant uint& depthCubeArraySwzl, texture2d texBuffer) { float4 c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl); c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), tex2dSwzl); @@ -139,42 +141,42 @@ float4 doSwizzle(thread texture1d tex1d, thread const sampler tex1dSmplr, c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0)), texCubeSwzl); c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z))), tex2dArraySwzl); c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), texCubeArraySwzl); - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), depth2dSwzl); - c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), depthCubeSwzl); - c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), depth2dArraySwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, 1.0), depth2dSwzl); + c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, 1.0), depthCubeSwzl); + c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), 1.0), depth2dArraySwzl); c.x = spvTextureSwizzle(depthCubeArray.sample_compare(depthCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), depthCubeArraySwzl); c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), tex2dSwzl); c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), tex3dSwzl); float4 _103 = float4(0.0, 0.0, 1.0, 1.0); - _103.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _103.xy / _103.z, float4(0.0, 0.0, 1.0, 1.0).z / _103.z), depth2dSwzl); + _103.z = 1.0; + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _103.xy / _103.z, 1.0 / _103.z), depth2dSwzl); c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl); c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0), level(0.0)), tex2dSwzl); c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0), level(0.0)), tex3dSwzl); c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0), level(0.0)), texCubeSwzl); c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), tex2dArraySwzl); c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), texCubeArraySwzl); - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), depth2dSwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, 1.0, level(0.0)), depth2dSwzl); c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), tex2dSwzl); c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), tex3dSwzl); float4 _131 = float4(0.0, 0.0, 1.0, 1.0); - _131.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _131.xy / _131.z, float4(0.0, 0.0, 1.0, 1.0).z / _131.z, level(0.0)), depth2dSwzl); + _131.z = 1.0; + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _131.xy / _131.z, 1.0 / _131.z, level(0.0)), depth2dSwzl); c = spvTextureSwizzle(tex1d.read(uint(0)), tex1dSwzl); c = spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), tex2dSwzl); c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl); c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl); c = texBuffer.read(spvTexelBufferCoord(0)); - c = spvGatherSwizzle, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl); - c = spvGatherSwizzle, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl); - c = spvGatherSwizzle, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl); - c = spvGatherSwizzle, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl); - c = spvGatherCompareSwizzle, float2, float>(depth2dSmplr, depth2d, float2(0.0), 1.0, depth2dSwzl); - c = spvGatherCompareSwizzle, float3, float>(depthCubeSmplr, depthCube, float3(0.0), 1.0, depthCubeSwzl); - c = spvGatherCompareSwizzle, float2, uint, float>(depth2dArraySmplr, depth2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, depth2dArraySwzl); - c = spvGatherCompareSwizzle, float3, uint, float>(depthCubeArraySmplr, depthCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, depthCubeArraySwzl); + c = spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0)); + c = spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0)); + c = spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0)); + c = spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w))); + c = spvGatherCompareSwizzle(depth2d, depth2dSmplr, depth2dSwzl, float2(0.0), 1.0); + c = spvGatherCompareSwizzle(depthCube, depthCubeSmplr, depthCubeSwzl, float3(0.0), 1.0); + c = spvGatherCompareSwizzle(depth2dArray, depth2dArraySmplr, depth2dArraySwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0); + c = spvGatherCompareSwizzle(depthCubeArray, depthCubeArraySmplr, depthCubeArraySwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); return c; } diff --git a/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag index 86b712536e8..0ec278f977c 100644 --- a/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag +++ b/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag @@ -6,22 +6,12 @@ using namespace metal; // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -34,6 +24,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -72,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -108,29 +109,6 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } } -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]]) { constant uint& tex1dSwzl = spvSwizzleConstants[0]; @@ -162,9 +140,9 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d c = float4(spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl)); c = float4(spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl)); c = float4(texBuffer.read(spvTexelBufferCoord(0))); - c = float4(spvGatherSwizzle, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl)); - c = float4(spvGatherSwizzle, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl)); - c = float4(spvGatherSwizzle, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl)); - c = float4(spvGatherSwizzle, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl)); + c = float4(spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0))); + c = float4(spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0))); + c = float4(spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0))); + c = float4(spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w)))); } diff --git a/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag index fb9865bcf08..9366eeab585 100644 --- a/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag +++ b/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag @@ -6,22 +6,12 @@ using namespace metal; // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -34,6 +24,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -72,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -109,8 +110,8 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } // Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +template class Tex, typename... Ts> +inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint sw, Ts... params) { if (sw) { @@ -149,41 +150,41 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0)), texCubeSwzl); c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z))), tex2dArraySwzl); c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), texCubeArraySwzl); - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), depth2dSwzl); - c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), depthCubeSwzl); - c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), depth2dArraySwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, 1.0), depth2dSwzl); + c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, 1.0), depthCubeSwzl); + c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), 1.0), depth2dArraySwzl); c.x = spvTextureSwizzle(depthCubeArray.sample_compare(depthCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), depthCubeArraySwzl); c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), tex2dSwzl); c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), tex3dSwzl); float4 _100 = float4(0.0, 0.0, 1.0, 1.0); - _100.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _100.xy / _100.z, float4(0.0, 0.0, 1.0, 1.0).z / _100.z), depth2dSwzl); + _100.z = 1.0; + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _100.xy / _100.z, 1.0 / _100.z), depth2dSwzl); c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl); c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0), level(0.0)), tex2dSwzl); c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0), level(0.0)), tex3dSwzl); c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0), level(0.0)), texCubeSwzl); c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), tex2dArraySwzl); c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), texCubeArraySwzl); - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), depth2dSwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, 1.0, level(0.0)), depth2dSwzl); c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), tex2dSwzl); c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), tex3dSwzl); float4 _128 = float4(0.0, 0.0, 1.0, 1.0); - _128.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _128.xy / _128.z, float4(0.0, 0.0, 1.0, 1.0).z / _128.z, level(0.0)), depth2dSwzl); + _128.z = 1.0; + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _128.xy / _128.z, 1.0 / _128.z, level(0.0)), depth2dSwzl); c = spvTextureSwizzle(tex1d.read(uint(0)), tex1dSwzl); c = spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), tex2dSwzl); c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl); c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl); c = texBuffer.read(spvTexelBufferCoord(0)); - c = spvGatherSwizzle, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl); - c = spvGatherSwizzle, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl); - c = spvGatherSwizzle, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl); - c = spvGatherSwizzle, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl); - c = spvGatherCompareSwizzle, float2, float>(depth2dSmplr, depth2d, float2(0.0), 1.0, depth2dSwzl); - c = spvGatherCompareSwizzle, float3, float>(depthCubeSmplr, depthCube, float3(0.0), 1.0, depthCubeSwzl); - c = spvGatherCompareSwizzle, float2, uint, float>(depth2dArraySmplr, depth2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, depth2dArraySwzl); - c = spvGatherCompareSwizzle, float3, uint, float>(depthCubeArraySmplr, depthCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, depthCubeArraySwzl); + c = spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0)); + c = spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0)); + c = spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0)); + c = spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w))); + c = spvGatherCompareSwizzle(depth2d, depth2dSmplr, depth2dSwzl, float2(0.0), 1.0); + c = spvGatherCompareSwizzle(depthCube, depthCubeSmplr, depthCubeSwzl, float3(0.0), 1.0); + c = spvGatherCompareSwizzle(depth2dArray, depth2dArraySmplr, depth2dArraySwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0); + c = spvGatherCompareSwizzle(depthCubeArray, depthCubeArraySmplr, depthCubeArraySwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); } diff --git a/reference/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag b/reference/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag new file mode 100644 index 00000000000..8fcb19a8505 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d uSamp [[texture(0)]], sampler uSampSmplr [[sampler(0)]]) +{ + main0_out out = {}; + out.FragColor = uSamp.gather(uSampSmplr, in.vUV, int2(0), component::y); + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag b/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag new file mode 100644 index 00000000000..7c601820789 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag @@ -0,0 +1,32 @@ +#include +#include + +using namespace metal; + +struct Foo +{ + float4 v; +}; + +struct UBO +{ + Foo foo; +}; + +struct spvDescriptorSetBuffer0 +{ + constant UBO* ubos [[id(0)]][2]; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]]) +{ + main0_out out = {}; + out.FragColor = spvDescriptorSet0.ubos[1]->foo.v; + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag b/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag new file mode 100644 index 00000000000..0b1ca91f547 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag @@ -0,0 +1,33 @@ +#include +#include + +using namespace metal; + +struct Foo +{ + float4 v; +}; + +struct UBO +{ + Foo foo; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(constant UBO* ubos_0 [[buffer(0)]], constant UBO* ubos_1 [[buffer(1)]]) +{ + constant UBO* ubos[] = + { + ubos_0, + ubos_1, + }; + + main0_out out = {}; + out.FragColor = ubos[1]->foo.v; + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag b/reference/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag new file mode 100644 index 00000000000..ce3291a28ea --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + float4 v; + float4x4 m; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vColor [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _13 [[buffer(0)]]) +{ + main0_out out = {}; + out.FragColor = (_13.m * in.vColor) + _13.v; + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/variables.zero-initialize.frag b/reference/shaders-msl-no-opt/frag/variables.zero-initialize.frag new file mode 100644 index 00000000000..0720087a637 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/variables.zero-initialize.frag @@ -0,0 +1,40 @@ +#include +#include + +using namespace metal; + +struct Foo +{ + int a; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vColor [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + int uninit_function_int = {}; + int uninit_int = {}; + int4 uninit_vector = {}; + float4x4 uninit_matrix = {}; + Foo uninit_foo = {}; + if (in.vColor.x > 10.0) + { + uninit_function_int = 10; + } + else + { + uninit_function_int = 20; + } + out.FragColor = in.vColor; + return out; +} + diff --git a/reference/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag b/reference/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag new file mode 100644 index 00000000000..f42aeb876a7 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + bool _12 = gl_HelperInvocation; + float _15 = float(_12); + out.FragColor = _15; + gl_HelperInvocation = true, discard_fragment(); + bool _16 = gl_HelperInvocation; + float _17 = float(_16); + out.FragColor = _17; + return out; +} + diff --git a/reference/shaders-msl-no-opt/packing/array-of-vec3.comp b/reference/shaders-msl-no-opt/packing/array-of-vec3.comp new file mode 100644 index 00000000000..0dd52ab36d9 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/array-of-vec3.comp @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + packed_float3 v[16]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _13 [[buffer(0)]]) +{ + _13.v[1] = float3(_13.v[0]); +} + diff --git a/reference/shaders-msl-no-opt/packing/array-of-vec4.comp b/reference/shaders-msl-no-opt/packing/array-of-vec4.comp new file mode 100644 index 00000000000..025cd425469 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/array-of-vec4.comp @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 v[16]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _13 [[buffer(0)]]) +{ + _13.v[1] = _13.v[0]; +} + diff --git a/reference/shaders-msl-no-opt/packing/isolated-scalar-access.comp b/reference/shaders-msl-no-opt/packing/isolated-scalar-access.comp new file mode 100644 index 00000000000..f1a3719158f --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/isolated-scalar-access.comp @@ -0,0 +1,26 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 v; + float4x4 cm; + float4x4 rm; + packed_float3 v3; + float f; +}; + +kernel void main0(device SSBO& _12 [[buffer(0)]]) +{ + threadgroup float4 shared_vec4; + threadgroup float3 shared_vec3; + ((device float*)&_12.v)[0u] = 10.0; + _12.v3[1u] = 40.0; + ((device float*)&_12.cm[1])[2u] = 20.0; + ((device float*)&_12.rm[1u])[3] = 30.0; + ((threadgroup float*)&shared_vec4)[2u] = 40.0; + ((threadgroup float*)&shared_vec3)[1u] = 1.0; +} + diff --git a/reference/shaders-msl-no-opt/packing/load-store-col-rows.comp b/reference/shaders-msl-no-opt/packing/load-store-col-rows.comp new file mode 100644 index 00000000000..020ccae0dae --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/load-store-col-rows.comp @@ -0,0 +1,76 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +typedef packed_float3 packed_float2x3[2]; +typedef packed_float3 packed_rm_float3x2[2]; + +struct SSBO1 +{ + float2x4 a; + float2x4 a2; +}; + +struct SSBO2 +{ + packed_float2x3 b; + packed_rm_float3x2 b2; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_column(device SSBO1& v_21) +{ + float2 u = v_21.a[0].xy; + float2 v = v_21.a[1].xy; + u += v; + (device float2&)v_21.a[0] = u; + (device float2&)v_21.a[1] = v; +} + +static inline __attribute__((always_inline)) +void load_store_row(device SSBO1& v_21) +{ + float2 u = float2(v_21.a2[0][0], v_21.a2[1][0]); + float2 v = float2(v_21.a2[0][1], v_21.a2[1][1]); + u += v; + ((device float*)&v_21.a2[0])[0] = u.x; + ((device float*)&v_21.a2[1])[0] = u.y; + ((device float*)&v_21.a2[0])[1] = v.x; + ((device float*)&v_21.a2[1])[1] = v.y; +} + +static inline __attribute__((always_inline)) +void load_store_packed_column(device SSBO2& v_58) +{ + float3 u = float3(v_58.b[0]); + float3 v = float3(v_58.b[1]); + u += v; + v_58.b[0] = u; + v_58.b[1] = v; +} + +static inline __attribute__((always_inline)) +void load_store_packed_row(device SSBO2& v_58) +{ + float2 u = float2(v_58.b2[0][0], v_58.b2[1][0]); + float2 v = float2(v_58.b2[0][1], v_58.b2[1][1]); + u += v; + ((device float*)&v_58.b2[0])[0] = u.x; + ((device float*)&v_58.b2[1])[0] = u.y; + ((device float*)&v_58.b2[0])[1] = v.x; + ((device float*)&v_58.b2[1])[1] = v.y; +} + +kernel void main0(device SSBO1& v_21 [[buffer(0)]], device SSBO2& v_58 [[buffer(1)]]) +{ + load_store_column(v_21); + load_store_row(v_21); + load_store_packed_column(v_58); + load_store_packed_row(v_58); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp new file mode 100644 index 00000000000..a00a679b64c --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp @@ -0,0 +1,86 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float2x2 col_major0; + float2x2 col_major1; +}; + +struct SSBORow +{ + float2x2 row_major0; + float2x2 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float2x2 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float2x2 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x2-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-2x2-std140.comp new file mode 100644 index 00000000000..fd81f3a9aca --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-2x2-std140.comp @@ -0,0 +1,92 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float2x4 col_major0; + float2x4 col_major1; +}; + +struct SSBORow +{ + float2x4 row_major0; + float2x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float2x2 loaded = float2x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy); + (device float2&)v_29.col_major1[0] = loaded[0]; + (device float2&)v_29.col_major1[1] = loaded[1]; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float2x2 loaded = transpose(float2x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy)); + (device float2&)v_41.row_major0[0] = float2(loaded[0][0], loaded[1][0]); + (device float2&)v_41.row_major0[1] = float2(loaded[0][1], loaded[1][1]); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + (device float2&)v_29.col_major0[0] = float2x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy)[0]; + (device float2&)v_29.col_major0[1] = float2x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy)[1]; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + (device float2&)v_41.row_major0[0] = float2(float2x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy)[0][0], float2x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy)[1][0]); + (device float2&)v_41.row_major0[1] = float2(float2x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy)[0][1], float2x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy)[1][1]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + (device float2&)v_29.col_major0[0] = float2(float2x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy)[0][0], float2x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy)[1][0]); + (device float2&)v_29.col_major0[1] = float2(float2x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy)[0][1], float2x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy)[1][1]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + (device float2&)v_41.row_major0[0] = float2x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy)[0]; + (device float2&)v_41.row_major0[1] = float2x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy)[1]; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + (device float2&)v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]); + ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1].x; + ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1].y; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x2-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-2x2-std430.comp new file mode 100644 index 00000000000..a00a679b64c --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-2x2-std430.comp @@ -0,0 +1,86 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float2x2 col_major0; + float2x2 col_major1; +}; + +struct SSBORow +{ + float2x2 row_major0; + float2x2 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float2x2 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float2x2 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp new file mode 100644 index 00000000000..963ec39dc88 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp @@ -0,0 +1,92 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +typedef packed_float3 packed_float2x3[2]; + +struct SSBOCol +{ + packed_float2x3 col_major0; + packed_float2x3 col_major1; +}; + +struct SSBORow +{ + float3x2 row_major0; + float3x2 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float2x3 loaded = float2x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1])); + v_29.col_major1[0] = loaded[0]; + v_29.col_major1[1] = loaded[1]; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float2x3 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0[0] = float2x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]))[0]; + v_29.col_major0[1] = float2x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]))[1]; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(float2x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]))); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[0] = float3(v_41.row_major0[0][0], v_41.row_major0[1][0], v_41.row_major0[2][0]); + v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]); + v_41.row_major0[0][1] = v_29.col_major0[1][0]; + v_41.row_major0[1][1] = v_29.col_major0[1][1]; + v_41.row_major0[2][1] = v_29.col_major0[1][2]; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[0][1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = v_29.col_major0[0][1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x3-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-2x3-std140.comp new file mode 100644 index 00000000000..d20a4a7da8e --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-2x3-std140.comp @@ -0,0 +1,93 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float2x3 col_major0; + float2x3 col_major1; +}; + +struct SSBORow +{ + float3x4 row_major0; + float3x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float2x3 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float2x3 loaded = transpose(float3x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy, v_41.row_major0[2].xy)); + (device float2&)v_41.row_major0[0] = float2(loaded[0][0], loaded[1][0]); + (device float2&)v_41.row_major0[1] = float2(loaded[0][1], loaded[1][1]); + (device float2&)v_41.row_major0[2] = float2(loaded[0][2], loaded[1][2]); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + (device float2&)v_41.row_major0[0] = float2(v_29.col_major0[0][0], v_29.col_major0[1][0]); + (device float2&)v_41.row_major0[1] = float2(v_29.col_major0[0][1], v_29.col_major0[1][1]); + (device float2&)v_41.row_major0[2] = float2(v_29.col_major0[0][2], v_29.col_major0[1][2]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(float3x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy, v_41.row_major0[2].xy)); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + (device float2&)v_41.row_major0[0] = float3x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy)[0]; + (device float2&)v_41.row_major0[1] = float3x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy)[1]; + (device float2&)v_41.row_major0[2] = float3x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy)[2]; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]); + ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1].x; + ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1].y; + ((device float*)&v_41.row_major0[2])[1] = v_29.col_major0[1].z; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x3-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-2x3-std430.comp new file mode 100644 index 00000000000..240111b9d23 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-2x3-std430.comp @@ -0,0 +1,87 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float2x3 col_major0; + float2x3 col_major1; +}; + +struct SSBORow +{ + float3x2 row_major0; + float3x2 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float2x3 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float2x3 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp new file mode 100644 index 00000000000..d9e8cca9277 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp @@ -0,0 +1,88 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float2x4 col_major0; + float2x4 col_major1; +}; + +struct SSBORow +{ + float4x2 row_major0; + float4x2 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float2x4 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float2x4 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; + v_41.row_major0[3][1] = v_29.col_major0[1].w; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x4-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-2x4-std140.comp new file mode 100644 index 00000000000..e1adc222a04 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-2x4-std140.comp @@ -0,0 +1,97 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float2x4 col_major0; + float2x4 col_major1; +}; + +struct SSBORow +{ + float4x4 row_major0; + float4x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float2x4 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float2x4 loaded = transpose(float4x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy, v_41.row_major0[2].xy, v_41.row_major0[3].xy)); + (device float2&)v_41.row_major0[0] = float2(loaded[0][0], loaded[1][0]); + (device float2&)v_41.row_major0[1] = float2(loaded[0][1], loaded[1][1]); + (device float2&)v_41.row_major0[2] = float2(loaded[0][2], loaded[1][2]); + (device float2&)v_41.row_major0[3] = float2(loaded[0][3], loaded[1][3]); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + (device float2&)v_41.row_major0[0] = float2(v_29.col_major0[0][0], v_29.col_major0[1][0]); + (device float2&)v_41.row_major0[1] = float2(v_29.col_major0[0][1], v_29.col_major0[1][1]); + (device float2&)v_41.row_major0[2] = float2(v_29.col_major0[0][2], v_29.col_major0[1][2]); + (device float2&)v_41.row_major0[3] = float2(v_29.col_major0[0][3], v_29.col_major0[1][3]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(float4x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy, v_41.row_major0[2].xy, v_41.row_major0[3].xy)); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + (device float2&)v_41.row_major0[0] = float4x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy, v_41.row_major1[3].xy)[0]; + (device float2&)v_41.row_major0[1] = float4x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy, v_41.row_major1[3].xy)[1]; + (device float2&)v_41.row_major0[2] = float4x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy, v_41.row_major1[3].xy)[2]; + (device float2&)v_41.row_major0[3] = float4x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy, v_41.row_major1[3].xy)[3]; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]); + ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1].x; + ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1].y; + ((device float*)&v_41.row_major0[2])[1] = v_29.col_major0[1].z; + ((device float*)&v_41.row_major0[3])[1] = v_29.col_major0[1].w; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x4-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-2x4-std430.comp new file mode 100644 index 00000000000..d9e8cca9277 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-2x4-std430.comp @@ -0,0 +1,88 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float2x4 col_major0; + float2x4 col_major1; +}; + +struct SSBORow +{ + float4x2 row_major0; + float4x2 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float2x4 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float2x4 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; + v_41.row_major0[3][1] = v_29.col_major0[1].w; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp new file mode 100644 index 00000000000..86be094fbe7 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp @@ -0,0 +1,91 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +typedef packed_float3 packed_rm_float3x2[2]; + +struct SSBOCol +{ + float3x2 col_major0; + float3x2 col_major1; +}; + +struct SSBORow +{ + packed_rm_float3x2 row_major0; + packed_rm_float3x2 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float3x2 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float3x2 loaded = transpose(float2x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]))); + v_41.row_major0[0] = float3(loaded[0][0], loaded[1][0], loaded[2][0]); + v_41.row_major0[1] = float3(loaded[0][1], loaded[1][1], loaded[2][1]); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0[0] = float3(v_29.col_major0[0][0], v_29.col_major0[1][0], v_29.col_major0[2][0]); + v_41.row_major0[1] = float3(v_29.col_major0[0][1], v_29.col_major0[1][1], v_29.col_major0[2][1]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(float2x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]))); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0[0] = float2x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]))[0]; + v_41.row_major0[1] = float2x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]))[1]; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]); + ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1].x; + ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1].y; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = v_41.row_major0[1u][0]; + v_41.row_major0[1u][0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x2-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-3x2-std140.comp new file mode 100644 index 00000000000..9144272f6de --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-3x2-std140.comp @@ -0,0 +1,92 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float3x4 col_major0; + float3x4 col_major1; +}; + +struct SSBORow +{ + float2x3 row_major0; + float2x3 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float3x2 loaded = float3x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy, v_29.col_major0[2].xy); + (device float2&)v_29.col_major1[0] = loaded[0]; + (device float2&)v_29.col_major1[1] = loaded[1]; + (device float2&)v_29.col_major1[2] = loaded[2]; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float3x2 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + (device float2&)v_29.col_major0[0] = float3x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy)[0]; + (device float2&)v_29.col_major0[1] = float3x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy)[1]; + (device float2&)v_29.col_major0[2] = float3x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy)[2]; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(float3x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy, v_29.col_major0[2].xy)); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + (device float2&)v_29.col_major0[0] = float2(v_41.row_major0[0][0], v_41.row_major0[1][0]); + (device float2&)v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]); + (device float2&)v_29.col_major0[2] = float2(v_41.row_major0[0][2], v_41.row_major0[1][2]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + (device float2&)v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x2-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-3x2-std430.comp new file mode 100644 index 00000000000..3266e6c33f0 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-3x2-std430.comp @@ -0,0 +1,86 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float3x2 col_major0; + float3x2 col_major1; +}; + +struct SSBORow +{ + float2x3 row_major0; + float2x3 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float3x2 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float3x2 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp new file mode 100644 index 00000000000..593a0133362 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp @@ -0,0 +1,102 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +typedef packed_float3 packed_float3x3[3]; +typedef packed_float3 packed_rm_float3x3[3]; + +struct SSBOCol +{ + packed_float3x3 col_major0; + packed_float3x3 col_major1; +}; + +struct SSBORow +{ + packed_rm_float3x3 row_major0; + packed_rm_float3x3 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float3x3 loaded = float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2])); + v_29.col_major1[0] = loaded[0]; + v_29.col_major1[1] = loaded[1]; + v_29.col_major1[2] = loaded[2]; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float3x3 loaded = transpose(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))); + v_41.row_major0[0] = float3(loaded[0][0], loaded[1][0], loaded[2][0]); + v_41.row_major0[1] = float3(loaded[0][1], loaded[1][1], loaded[2][1]); + v_41.row_major0[2] = float3(loaded[0][2], loaded[1][2], loaded[2][2]); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0[0] = float3x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]))[0]; + v_29.col_major0[1] = float3x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]))[1]; + v_29.col_major0[2] = float3x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]))[2]; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0[0] = float3(float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[0][0], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[1][0], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[2][0]); + v_41.row_major0[1] = float3(float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[0][1], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[1][1], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[2][1]); + v_41.row_major0[2] = float3(float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[0][2], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[1][2], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[2][2]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[0] = float3(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[0][0], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[1][0], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[2][0]); + v_29.col_major0[1] = float3(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[0][1], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[1][1], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[2][1]); + v_29.col_major0[2] = float3(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[0][2], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[1][2], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[2][2]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0[0] = float3x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]))[0]; + v_41.row_major0[1] = float3x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]))[1]; + v_41.row_major0[2] = float3x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]))[2]; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]); + ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1][0]; + ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1][1]; + ((device float*)&v_41.row_major0[2])[1] = v_29.col_major0[1][2]; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[0][1u] = v_41.row_major0[1u][0]; + v_41.row_major0[1u][0] = v_29.col_major0[0][1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x3-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-3x3-std140.comp new file mode 100644 index 00000000000..e2d4adb50bd --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-3x3-std140.comp @@ -0,0 +1,87 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float3x3 col_major0; + float3x3 col_major1; +}; + +struct SSBORow +{ + float3x3 row_major0; + float3x3 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float3x3 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float3x3 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x3-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-3x3-std430.comp new file mode 100644 index 00000000000..e2d4adb50bd --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-3x3-std430.comp @@ -0,0 +1,87 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float3x3 col_major0; + float3x3 col_major1; +}; + +struct SSBORow +{ + float3x3 row_major0; + float3x3 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float3x3 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float3x3 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp new file mode 100644 index 00000000000..360ef467cf5 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp @@ -0,0 +1,99 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +typedef packed_float3 packed_rm_float3x4[4]; + +struct SSBOCol +{ + float3x4 col_major0; + float3x4 col_major1; +}; + +struct SSBORow +{ + packed_rm_float3x4 row_major0; + packed_rm_float3x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float3x4 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float3x4 loaded = transpose(float4x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]), float3(v_41.row_major0[3]))); + v_41.row_major0[0] = float3(loaded[0][0], loaded[1][0], loaded[2][0]); + v_41.row_major0[1] = float3(loaded[0][1], loaded[1][1], loaded[2][1]); + v_41.row_major0[2] = float3(loaded[0][2], loaded[1][2], loaded[2][2]); + v_41.row_major0[3] = float3(loaded[0][3], loaded[1][3], loaded[2][3]); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0[0] = float3(v_29.col_major0[0][0], v_29.col_major0[1][0], v_29.col_major0[2][0]); + v_41.row_major0[1] = float3(v_29.col_major0[0][1], v_29.col_major0[1][1], v_29.col_major0[2][1]); + v_41.row_major0[2] = float3(v_29.col_major0[0][2], v_29.col_major0[1][2], v_29.col_major0[2][2]); + v_41.row_major0[3] = float3(v_29.col_major0[0][3], v_29.col_major0[1][3], v_29.col_major0[2][3]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(float4x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]), float3(v_41.row_major0[3]))); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0[0] = float4x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]), float3(v_41.row_major1[3]))[0]; + v_41.row_major0[1] = float4x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]), float3(v_41.row_major1[3]))[1]; + v_41.row_major0[2] = float4x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]), float3(v_41.row_major1[3]))[2]; + v_41.row_major0[3] = float4x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]), float3(v_41.row_major1[3]))[3]; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]); + ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1].x; + ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1].y; + ((device float*)&v_41.row_major0[2])[1] = v_29.col_major0[1].z; + ((device float*)&v_41.row_major0[3])[1] = v_29.col_major0[1].w; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = v_41.row_major0[1u][0]; + v_41.row_major0[1u][0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x4-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-3x4-std140.comp new file mode 100644 index 00000000000..f18917dd76c --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-3x4-std140.comp @@ -0,0 +1,88 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float3x4 col_major0; + float3x4 col_major1; +}; + +struct SSBORow +{ + float4x3 row_major0; + float4x3 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float3x4 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float3x4 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; + v_41.row_major0[3][1] = v_29.col_major0[1].w; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x4-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-3x4-std430.comp new file mode 100644 index 00000000000..f18917dd76c --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-3x4-std430.comp @@ -0,0 +1,88 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float3x4 col_major0; + float3x4 col_major1; +}; + +struct SSBORow +{ + float4x3 row_major0; + float4x3 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float3x4 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float3x4 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; + v_41.row_major0[3][1] = v_29.col_major0[1].w; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp new file mode 100644 index 00000000000..d98613e43db --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp @@ -0,0 +1,86 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float4x2 col_major0; + float4x2 col_major1; +}; + +struct SSBORow +{ + float2x4 row_major0; + float2x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float4x2 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float4x2 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x2-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-4x2-std140.comp new file mode 100644 index 00000000000..6c231cd8d34 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-4x2-std140.comp @@ -0,0 +1,95 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float4x4 col_major0; + float4x4 col_major1; +}; + +struct SSBORow +{ + float2x4 row_major0; + float2x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float4x2 loaded = float4x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy, v_29.col_major0[2].xy, v_29.col_major0[3].xy); + (device float2&)v_29.col_major1[0] = loaded[0]; + (device float2&)v_29.col_major1[1] = loaded[1]; + (device float2&)v_29.col_major1[2] = loaded[2]; + (device float2&)v_29.col_major1[3] = loaded[3]; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float4x2 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + (device float2&)v_29.col_major0[0] = float4x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy, v_29.col_major1[3].xy)[0]; + (device float2&)v_29.col_major0[1] = float4x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy, v_29.col_major1[3].xy)[1]; + (device float2&)v_29.col_major0[2] = float4x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy, v_29.col_major1[3].xy)[2]; + (device float2&)v_29.col_major0[3] = float4x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy, v_29.col_major1[3].xy)[3]; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(float4x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy, v_29.col_major0[2].xy, v_29.col_major0[3].xy)); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + (device float2&)v_29.col_major0[0] = float2(v_41.row_major0[0][0], v_41.row_major0[1][0]); + (device float2&)v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]); + (device float2&)v_29.col_major0[2] = float2(v_41.row_major0[0][2], v_41.row_major0[1][2]); + (device float2&)v_29.col_major0[3] = float2(v_41.row_major0[0][3], v_41.row_major0[1][3]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + (device float2&)v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x2-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-4x2-std430.comp new file mode 100644 index 00000000000..d98613e43db --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-4x2-std430.comp @@ -0,0 +1,86 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float4x2 col_major0; + float4x2 col_major1; +}; + +struct SSBORow +{ + float2x4 row_major0; + float2x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float4x2 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float4x2 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp new file mode 100644 index 00000000000..7a156f85e30 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp @@ -0,0 +1,98 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +typedef packed_float3 packed_float4x3[4]; + +struct SSBOCol +{ + packed_float4x3 col_major0; + packed_float4x3 col_major1; +}; + +struct SSBORow +{ + float3x4 row_major0; + float3x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float4x3 loaded = float4x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]), float3(v_29.col_major0[3])); + v_29.col_major1[0] = loaded[0]; + v_29.col_major1[1] = loaded[1]; + v_29.col_major1[2] = loaded[2]; + v_29.col_major1[3] = loaded[3]; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float4x3 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0[0] = float4x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]), float3(v_29.col_major1[3]))[0]; + v_29.col_major0[1] = float4x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]), float3(v_29.col_major1[3]))[1]; + v_29.col_major0[2] = float4x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]), float3(v_29.col_major1[3]))[2]; + v_29.col_major0[3] = float4x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]), float3(v_29.col_major1[3]))[3]; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(float4x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]), float3(v_29.col_major0[3]))); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[0] = float3(v_41.row_major0[0][0], v_41.row_major0[1][0], v_41.row_major0[2][0]); + v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]); + v_29.col_major0[2] = float3(v_41.row_major0[0][2], v_41.row_major0[1][2], v_41.row_major0[2][2]); + v_29.col_major0[3] = float3(v_41.row_major0[0][3], v_41.row_major0[1][3], v_41.row_major0[2][3]); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]); + v_41.row_major0[0][1] = v_29.col_major0[1][0]; + v_41.row_major0[1][1] = v_29.col_major0[1][1]; + v_41.row_major0[2][1] = v_29.col_major0[1][2]; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[0][1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = v_29.col_major0[0][1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x3-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-4x3-std140.comp new file mode 100644 index 00000000000..0964f849529 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-4x3-std140.comp @@ -0,0 +1,87 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float4x3 col_major0; + float4x3 col_major1; +}; + +struct SSBORow +{ + float3x4 row_major0; + float3x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float4x3 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float4x3 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x3-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-4x3-std430.comp new file mode 100644 index 00000000000..0964f849529 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-4x3-std430.comp @@ -0,0 +1,87 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float4x3 col_major0; + float4x3 col_major1; +}; + +struct SSBORow +{ + float3x4 row_major0; + float3x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float4x3 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float4x3 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp new file mode 100644 index 00000000000..865cc198651 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp @@ -0,0 +1,88 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float4x4 col_major0; + float4x4 col_major1; +}; + +struct SSBORow +{ + float4x4 row_major0; + float4x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float4x4 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float4x4 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; + v_41.row_major0[3][1] = v_29.col_major0[1].w; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x4-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-4x4-std140.comp new file mode 100644 index 00000000000..865cc198651 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-4x4-std140.comp @@ -0,0 +1,88 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float4x4 col_major0; + float4x4 col_major1; +}; + +struct SSBORow +{ + float4x4 row_major0; + float4x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float4x4 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float4x4 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; + v_41.row_major0[3][1] = v_29.col_major0[1].w; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x4-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-4x4-std430.comp new file mode 100644 index 00000000000..865cc198651 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-4x4-std430.comp @@ -0,0 +1,88 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBOCol +{ + float4x4 col_major0; + float4x4 col_major1; +}; + +struct SSBORow +{ + float4x4 row_major0; + float4x4 row_major1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +void load_store_to_variable_col_major(device SSBOCol& v_29) +{ + float4x4 loaded = v_29.col_major0; + v_29.col_major1 = loaded; +} + +static inline __attribute__((always_inline)) +void load_store_to_variable_row_major(device SSBORow& v_41) +{ + float4x4 loaded = transpose(v_41.row_major0); + v_41.row_major0 = transpose(loaded); +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_col_major(device SSBOCol& v_29) +{ + v_29.col_major0 = v_29.col_major1; +} + +static inline __attribute__((always_inline)) +void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_41.row_major0 = transpose(v_29.col_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0 = transpose(v_41.row_major0); +} + +static inline __attribute__((always_inline)) +void copy_row_major_to_row_major(device SSBORow& v_41) +{ + v_41.row_major0 = v_41.row_major1; +} + +static inline __attribute__((always_inline)) +void copy_columns(device SSBOCol& v_29, device SSBORow& v_41) +{ + v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]); + v_41.row_major0[0][1] = v_29.col_major0[1].x; + v_41.row_major0[1][1] = v_29.col_major0[1].y; + v_41.row_major0[2][1] = v_29.col_major0[1].z; + v_41.row_major0[3][1] = v_29.col_major0[1].w; +} + +static inline __attribute__((always_inline)) +void copy_elements(device SSBOCol& v_29, device SSBORow& v_41) +{ + ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0]; + ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u]; +} + +kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]]) +{ + load_store_to_variable_col_major(v_29); + load_store_to_variable_row_major(v_41); + copy_col_major_to_col_major(v_29); + copy_col_major_to_row_major(v_29, v_41); + copy_row_major_to_col_major(v_29, v_41); + copy_row_major_to_row_major(v_41); + copy_columns(v_29, v_41); + copy_elements(v_29, v_41); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp new file mode 100644 index 00000000000..2384e3648e9 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp @@ -0,0 +1,21 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float3x3 m0; + float3x3 m1; + float3 v0; + float3 v1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _11 [[buffer(0)]]) +{ + _11.v0 = _11.v1 * (_11.m1 * _11.m0); + _11.v0 = (_11.v1 * _11.m1) * _11.m0; +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp new file mode 100644 index 00000000000..3fb36e0561a --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +typedef packed_float3 packed_float3x3[3]; + +struct SSBO +{ + packed_float3x3 m0; + packed_float3x3 m1; + packed_float3 v0; + packed_float3 v1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _11 [[buffer(0)]]) +{ + _11.v0 = (float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2])) * float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2]))) * float3(_11.v1); + _11.v0 = float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2])) * (float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2])) * float3(_11.v1)); + _11.v0 = (float3(_11.v1) * float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2]))) * float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2])); + _11.v0 = float3(_11.v1) * (float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2])) * float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2]))); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp new file mode 100644 index 00000000000..40f00886dde --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float2x4 m0; + float2x4 m1; + float2 v0; + float2 v1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _11 [[buffer(0)]]) +{ + _11.v0 = (float2x2(_11.m0[0].xy, _11.m0[1].xy) * float2x2(_11.m1[0].xy, _11.m1[1].xy)) * _11.v1; + _11.v0 = float2x2(_11.m0[0].xy, _11.m0[1].xy) * (float2x2(_11.m1[0].xy, _11.m1[1].xy) * _11.v1); + _11.v0 = (_11.v1 * float2x2(_11.m0[0].xy, _11.m0[1].xy)) * float2x2(_11.m1[0].xy, _11.m1[1].xy); + _11.v0 = _11.v1 * (float2x2(_11.m0[0].xy, _11.m0[1].xy) * float2x2(_11.m1[0].xy, _11.m1[1].xy)); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp new file mode 100644 index 00000000000..7130c9a8e0c --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +typedef packed_float3 packed_rm_float3x3[3]; + +struct SSBO +{ + packed_rm_float3x3 m0; + packed_rm_float3x3 m1; + packed_float3 v0; + packed_float3 v1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _11 [[buffer(0)]]) +{ + _11.v0 = float3(_11.v1) * (float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2])) * float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2]))); + _11.v0 = (float3(_11.v1) * float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2]))) * float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2])); + _11.v0 = float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2])) * (float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2])) * float3(_11.v1)); + _11.v0 = (float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2])) * float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2]))) * float3(_11.v1); +} + diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp new file mode 100644 index 00000000000..f061dd66627 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float2x4 m0; + float2x4 m1; + float2 v0; + float2 v1; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _11 [[buffer(0)]]) +{ + _11.v0 = _11.v1 * (float2x2(_11.m1[0].xy, _11.m1[1].xy) * float2x2(_11.m0[0].xy, _11.m0[1].xy)); + _11.v0 = (_11.v1 * float2x2(_11.m1[0].xy, _11.m1[1].xy)) * float2x2(_11.m0[0].xy, _11.m0[1].xy); + _11.v0 = float2x2(_11.m1[0].xy, _11.m1[1].xy) * (float2x2(_11.m0[0].xy, _11.m0[1].xy) * _11.v1); + _11.v0 = (float2x2(_11.m1[0].xy, _11.m1[1].xy) * float2x2(_11.m0[0].xy, _11.m0[1].xy)) * _11.v1; +} + diff --git a/reference/shaders-msl-no-opt/packing/member-padding.comp b/reference/shaders-msl-no-opt/packing/member-padding.comp new file mode 100644 index 00000000000..4f653ecdd67 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/member-padding.comp @@ -0,0 +1,21 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + char _m0_pad[16]; + float a; + char _m1_pad[20]; + float b; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _9 [[buffer(0)]]) +{ + _9.a = 10.0; + _9.b = 20.0; +} + diff --git a/reference/shaders-msl-no-opt/packing/std140-array-of-vectors.comp b/reference/shaders-msl-no-opt/packing/std140-array-of-vectors.comp new file mode 100644 index 00000000000..4d5ba324a0d --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/std140-array-of-vectors.comp @@ -0,0 +1,42 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 v1[4]; + float4 v2[4]; + float3 v3[4]; + float4 v4[4]; + float4 v1_array_of_array[4][4]; + float4 v2_array_of_array[4][4]; + float3 v3_array_of_array[4][4]; + float4 v4_array_of_array[4][4]; + float4 v_unsized[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _29 [[buffer(0)]]) +{ + float loaded1 = _29.v1[1].x; + (device float&)_29.v1[2] = loaded1; + float2 loaded2 = _29.v2[1].xy; + (device float2&)_29.v2[2] = loaded2; + float3 loaded3 = _29.v3[1]; + _29.v3[2] = loaded3; + float4 loaded4 = _29.v4[1]; + _29.v4[2] = loaded4; + loaded1 = _29.v1_array_of_array[1][2].x; + (device float&)_29.v1_array_of_array[2][3] = loaded1; + loaded2 = _29.v2_array_of_array[1][2].xy; + (device float2&)_29.v2_array_of_array[2][3] = loaded2; + loaded3 = _29.v3_array_of_array[1][2]; + _29.v3_array_of_array[2][3] = loaded3; + loaded4 = _29.v4_array_of_array[1][2]; + _29.v4_array_of_array[2][3] = loaded4; + loaded1 = _29.v_unsized[1].x; + (device float&)_29.v_unsized[2] = loaded1; +} + diff --git a/reference/shaders-msl-no-opt/packing/struct-alignment.comp b/reference/shaders-msl-no-opt/packing/struct-alignment.comp new file mode 100644 index 00000000000..34647b46f45 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/struct-alignment.comp @@ -0,0 +1,27 @@ +#include +#include + +using namespace metal; + +struct Foo +{ + packed_float3 a; + float b; +}; + +struct SSBO +{ + float2 a; + float b; + char _m2_pad[4]; + Foo foo; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _12 [[buffer(0)]]) +{ + ((device float*)&_12.a)[0u] = 10.0; + _12.b = 20.0; +} + diff --git a/reference/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp b/reference/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp new file mode 100644 index 00000000000..587ee4ad158 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct Foo +{ + packed_float3 a; +}; + +struct SSBOScalar +{ + Foo v[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBOScalar& buffer_scalar [[buffer(0)]]) +{ + buffer_scalar.v[1].a[1u] = 1.0; +} + diff --git a/reference/shaders-msl-no-opt/packing/struct-packing-recursive.comp b/reference/shaders-msl-no-opt/packing/struct-packing-recursive.comp new file mode 100644 index 00000000000..e0652b933dd --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/struct-packing-recursive.comp @@ -0,0 +1,33 @@ +#include +#include + +using namespace metal; + +struct Foo +{ + packed_float4 a; +}; + +struct Bar +{ + Foo a; +}; + +struct Baz +{ + Bar a; +}; + +struct SSBOScalar +{ + float v; + Baz baz; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBOScalar& buffer_scalar [[buffer(0)]]) +{ + buffer_scalar.baz.a.a.a[3u] = 10.0; +} + diff --git a/reference/shaders-msl-no-opt/packing/struct-packing.comp b/reference/shaders-msl-no-opt/packing/struct-packing.comp new file mode 100644 index 00000000000..a86809fee97 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/struct-packing.comp @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct Foo +{ + packed_float3 a; +}; + +struct Bar +{ + packed_float3 a; +}; + +struct SSBOScalar +{ + Foo foo; + Bar bar; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBOScalar& buffer_scalar [[buffer(0)]]) +{ + buffer_scalar.foo.a[0u] = 10.0; + buffer_scalar.bar.a[0u] = 20.0; +} + diff --git a/reference/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp b/reference/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp new file mode 100644 index 00000000000..c30fd070ec4 --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp @@ -0,0 +1,54 @@ +#include +#include + +using namespace metal; + +struct A +{ + float v; + char _m0_final_padding[12]; +}; + +struct B +{ + float2 v; + char _m0_final_padding[8]; +}; + +struct C +{ + float3 v; +}; + +struct D +{ + float4 v; +}; + +struct E +{ + float4 a; + float2 b; + char _m0_final_padding[8]; +}; + +struct SSBO +{ + A a[2][4]; + B b[2][4]; + C c[2][4]; + D d[2][4]; + float2x4 e[2][4]; + E f[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _32 [[buffer(0)]]) +{ + _32.f[0].a = float4(2.0); + float2x2 tmp = float2x2(_32.e[0][1][0].xy, _32.e[0][1][1].xy); + (device float2&)_32.e[1][2][0] = tmp[0]; + (device float2&)_32.e[1][2][1] = tmp[1]; +} + diff --git a/reference/shaders-msl-no-opt/packing/struct-size-padding.comp b/reference/shaders-msl-no-opt/packing/struct-size-padding.comp new file mode 100644 index 00000000000..98f039fc98a --- /dev/null +++ b/reference/shaders-msl-no-opt/packing/struct-size-padding.comp @@ -0,0 +1,54 @@ +#include +#include + +using namespace metal; + +struct A +{ + float v; + char _m0_final_padding[12]; +}; + +struct B +{ + float2 v; + char _m0_final_padding[8]; +}; + +struct C +{ + float3 v; +}; + +struct D +{ + float4 v; +}; + +struct E +{ + float4 a; + float2 b; + char _m0_final_padding[8]; +}; + +struct SSBO +{ + A a[4]; + B b[4]; + C c[4]; + D d[4]; + float2x4 e[4]; + E f[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _26 [[buffer(0)]]) +{ + _26.f[0].a = float4(2.0); + float2x2 tmp = float2x2(_26.e[1][0].xy, _26.e[1][1].xy); + (device float2&)_26.e[2][0] = tmp[0]; + (device float2&)_26.e[2][1] = tmp[1]; +} + diff --git a/reference/shaders-msl-no-opt/tesc/copy-tess-level.tesc b/reference/shaders-msl-no-opt/tesc/copy-tess-level.tesc new file mode 100644 index 00000000000..3bb5419795b --- /dev/null +++ b/reference/shaders-msl-no-opt/tesc/copy-tess-level.tesc @@ -0,0 +1,70 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _14 = spvUnsafeArray({ 1.0, 2.0 }); +constant spvUnsafeArray _21 = spvUnsafeArray({ 1.0, 2.0, 3.0, 4.0 }); + +struct main0_out +{ + float4 gl_Position; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1]; + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(_14[0]); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(_14[1]); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_21[0]); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_21[1]); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_21[2]); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(_21[3]); + spvUnsafeArray inner; + inner = spvUnsafeArray({ float(spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1]) }); + spvUnsafeArray outer; + outer = spvUnsafeArray({ float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]) }); + gl_out[gl_InvocationID].gl_Position = float4(1.0); +} + diff --git a/reference/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc b/reference/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc new file mode 100644 index 00000000000..f55e47baf05 --- /dev/null +++ b/reference/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc @@ -0,0 +1,71 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_in +{ + uint3 m_57; + ushort2 m_61; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].gl_ClipDistance[0] = gl_in[gl_InvocationID].gl_ClipDistance[0]; + gl_out[gl_InvocationID].gl_ClipDistance[1] = gl_in[gl_InvocationID].gl_ClipDistance[1]; + gl_out[gl_InvocationID].gl_CullDistance[0] = gl_in[gl_InvocationID].gl_CullDistance[0]; +} + diff --git a/reference/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc b/reference/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc new file mode 100644 index 00000000000..c76da101bc6 --- /dev/null +++ b/reference/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc @@ -0,0 +1,37 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +static inline __attribute__((always_inline)) +void store_tess_level_in_func(device half (&gl_TessLevelInner)[2], device half (&gl_TessLevelOuter)[4]) +{ + gl_TessLevelInner[0] = half(1.0); + gl_TessLevelInner[1] = half(2.0); + gl_TessLevelOuter[0] = half(3.0); + gl_TessLevelOuter[1] = half(4.0); + gl_TessLevelOuter[2] = half(5.0); + gl_TessLevelOuter[3] = half(6.0); +} + +static inline __attribute__((always_inline)) +float load_tess_level_in_func(device half (&gl_TessLevelInner)[2], device half (&gl_TessLevelOuter)[4]) +{ + return float(gl_TessLevelInner[0]) + float(gl_TessLevelOuter[1]); +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1]; + store_tess_level_in_func(spvTessLevel[gl_PrimitiveID].insideTessellationFactor, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor); + float v = load_tess_level_in_func(spvTessLevel[gl_PrimitiveID].insideTessellationFactor, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor); + gl_out[gl_InvocationID].gl_Position = float4(v); +} + diff --git a/reference/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese b/reference/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese new file mode 100644 index 00000000000..15f04e7371c --- /dev/null +++ b/reference/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese @@ -0,0 +1,80 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 FragColors [[attribute(2)]]; + float4 gl_Position [[attribute(1)]]; +}; + +struct main0_patchIn +{ + float4 FragColor [[attribute(0)]]; + float4 gl_TessLevelOuter [[attribute(3)]]; + float2 gl_TessLevelInner [[attribute(4)]]; + patch_control_point gl_in; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], uint gl_PrimitiveID [[patch_id]]) +{ + main0_out out = {}; + spvUnsafeArray gl_TessLevelInner = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0]; + gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1]; + gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2]; + gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3]; + out.gl_Position = (((((float4(1.0) + patchIn.FragColor) + patchIn.gl_in[0].FragColors) + patchIn.gl_in[1].FragColors) + float4(gl_TessLevelInner[0])) + float4(gl_TessLevelOuter[int(gl_PrimitiveID) & 1])) + patchIn.gl_in[0].gl_Position; + return out; +} + diff --git a/reference/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese b/reference/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese new file mode 100644 index 00000000000..09c5cd75026 --- /dev/null +++ b/reference/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese @@ -0,0 +1,37 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 gl_Position [[attribute(0)]]; + float gl_ClipDistance_0 [[attribute(1)]]; + float gl_ClipDistance_1 [[attribute(2)]]; + float gl_CullDistance_0 [[attribute(3)]]; + float gl_CullDistance_1 [[attribute(4)]]; + float gl_CullDistance_2 [[attribute(5)]]; +}; + +struct main0_patchIn +{ + patch_control_point gl_in; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]]) +{ + main0_out out = {}; + out.gl_Position.x = patchIn.gl_in[0].gl_ClipDistance_0; + out.gl_Position.y = patchIn.gl_in[1].gl_CullDistance_0; + out.gl_Position.z = patchIn.gl_in[0].gl_ClipDistance_1; + out.gl_Position.w = patchIn.gl_in[1].gl_CullDistance_1; + out.gl_Position += patchIn.gl_in[0].gl_Position; + out.gl_Position += patchIn.gl_in[1].gl_Position; + return out; +} + diff --git a/reference/shaders-msl-no-opt/vert/cull-distance.for-tess.vert b/reference/shaders-msl-no-opt/vert/cull-distance.for-tess.vert new file mode 100644 index 00000000000..5c2311d7412 --- /dev/null +++ b/reference/shaders-msl-no-opt/vert/cull-distance.for-tess.vert @@ -0,0 +1,62 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position; + spvUnsafeArray gl_CullDistance; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.gl_CullDistance[0] = 1.0; + out.gl_CullDistance[1] = 3.0; + out.gl_Position = float4(1.0); +} + diff --git a/reference/shaders-msl-no-opt/vert/functions_nested.vert b/reference/shaders-msl-no-opt/vert/functions_nested.vert index 5d67f408bef..2d394f67d4d 100644 --- a/reference/shaders-msl-no-opt/vert/functions_nested.vert +++ b/reference/shaders-msl-no-opt/vert/functions_nested.vert @@ -5,6 +5,13 @@ using namespace metal; +// Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) +uint2 spvTexelBufferCoord(uint tc) +{ + return uint2(tc % 4096, tc / 4096); +} + struct attr_desc { int type; @@ -27,8 +34,6 @@ struct VertexConstantsBuffer float4 vc[16]; }; -constant float4 _295 = {}; - struct main0_out { float4 tc0 [[user(locn0)]]; @@ -36,12 +41,7 @@ struct main0_out float4 gl_Position [[position]]; }; -// Returns 2D texture coords corresponding to 1D texel buffer coords -uint2 spvTexelBufferCoord(uint tc) -{ - return uint2(tc % 4096, tc / 4096); -} - +static inline __attribute__((always_inline)) attr_desc fetch_desc(thread const int& location, constant VertexBuffer& v_227) { int attribute_flags = v_227.input_attributes[location].w; @@ -55,6 +55,7 @@ attr_desc fetch_desc(thread const int& location, constant VertexBuffer& v_227) return result; } +static inline __attribute__((always_inline)) uint get_bits(thread const uint4& v, thread const int& swap) { if (swap != 0) @@ -64,7 +65,8 @@ uint get_bits(thread const uint4& v, thread const int& swap) return ((v.x | (v.y << uint(8))) | (v.z << uint(16))) | (v.w << uint(24)); } -float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thread const texture2d input_stream) +static inline __attribute__((always_inline)) +float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, texture2d input_stream) { float4 result = float4(0.0, 0.0, 0.0, 1.0); bool reverse_order = false; @@ -132,11 +134,12 @@ float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thr return _210; } -float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d buff_in_2, thread texture2d buff_in_1) +static inline __attribute__((always_inline)) +float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, texture2d buff_in_2, texture2d buff_in_1) { int param = location; attr_desc desc = fetch_desc(param, v_227); - int vertex_id = gl_VertexIndex - int(v_227.vertex_base_index); + int vertex_id = int(gl_VertexIndex) - int(v_227.vertex_base_index); if (desc.is_volatile != 0) { attr_desc param_1 = desc; @@ -151,7 +154,8 @@ float4 read_location(thread const int& location, constant VertexBuffer& v_227, t } } -void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d buff_in_2, thread texture2d buff_in_1, constant VertexConstantsBuffer& v_309) +static inline __attribute__((always_inline)) +void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, texture2d buff_in_2, texture2d buff_in_1, constant VertexConstantsBuffer& v_309) { int param = 3; float4 in_diff_color = read_location(param, v_227, gl_VertexIndex, buff_in_2, buff_in_1); @@ -165,7 +169,8 @@ void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& tmp0.y = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[5])).y; tmp0.z = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[6])).z; float4 tmp1; - tmp1 = float4(in_tc0.xy.x, in_tc0.xy.y, tmp1.z, tmp1.w); + tmp1.x = in_tc0.xy.x; + tmp1.y = in_tc0.xy.y; tmp1.z = v_309.vc[15].x; dst_reg7.y = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[8])).y; dst_reg7.x = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[7])).x; diff --git a/reference/opt/shaders-msl/vert/layer.msl11.invalid.vert b/reference/shaders-msl-no-opt/vert/layer.msl11.invalid.vert similarity index 100% rename from reference/opt/shaders-msl/vert/layer.msl11.invalid.vert rename to reference/shaders-msl-no-opt/vert/layer.msl11.invalid.vert diff --git a/reference/shaders-msl-no-opt/vert/modf-storage-class.capture.vert b/reference/shaders-msl-no-opt/vert/modf-storage-class.capture.vert new file mode 100644 index 00000000000..87f4e955172 --- /dev/null +++ b/reference/shaders-msl-no-opt/vert/modf-storage-class.capture.vert @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 f [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 f2 [[attribute(0)]]; +}; + +vertex void main0(main0_in in [[stage_in]], uint gl_VertexIndex [[vertex_id]], uint gl_BaseVertex [[base_vertex]], uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]]) +{ + device main0_out& out = spvOut[(gl_InstanceIndex - gl_BaseInstance) * spvIndirectParams[0] + gl_VertexIndex - gl_BaseVertex]; + float4 _35; + float4 _21 = modf(in.f2, _35); + out.f = _35; + out.gl_Position = _21; +} + diff --git a/reference/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert b/reference/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert new file mode 100644 index 00000000000..9cdf12439ab --- /dev/null +++ b/reference/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert @@ -0,0 +1,157 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +constant float4 _68[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) }; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + int Index1 [[attribute(0)]]; + int Index2 [[attribute(1)]]; +}; + +static inline __attribute__((always_inline)) +float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2) +{ + float4 indexable[4]; + spvArrayCopyFromStackToStack1(indexable, positions); + float4 indexable_1[4]; + spvArrayCopyFromStackToStack1(indexable_1, positions2); + return indexable[Index1] + indexable_1[Index2]; +} + +static inline __attribute__((always_inline)) +float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2) +{ + return consume_constant_arrays2(positions, positions2, Index1, Index2); +} + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + float4 _68_array_copy[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) }; + main0_out out = {}; + float4 LUT2[4]; + LUT2[0] = float4(10.0); + LUT2[1] = float4(11.0); + LUT2[2] = float4(12.0); + LUT2[3] = float4(13.0); + out.gl_Position = consume_constant_arrays(_68_array_copy, LUT2, in.Index1, in.Index2); + return out; +} + diff --git a/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert b/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert index ab5631366a7..f6e3efbecf7 100644 --- a/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert +++ b/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert @@ -1,11 +1,50 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; -constant float4 _68[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) }; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _68 = spvUnsafeArray({ float4(0.0), float4(1.0), float4(2.0), float4(3.0) }); struct main0_out { @@ -18,43 +57,31 @@ struct main0_in int Index2 [[attribute(1)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2) +static inline __attribute__((always_inline)) +float4 consume_constant_arrays2(spvUnsafeArray positions, spvUnsafeArray positions2, thread int& Index1, thread int& Index2) { - float4 indexable[4]; - spvArrayCopyFromStack1(indexable, positions); - float4 indexable_1[4]; - spvArrayCopyFromStack1(indexable_1, positions2); + spvUnsafeArray indexable; + indexable = positions; + spvUnsafeArray indexable_1; + indexable_1 = positions2; return indexable[Index1] + indexable_1[Index2]; } -float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2) +static inline __attribute__((always_inline)) +float4 consume_constant_arrays(spvUnsafeArray positions, spvUnsafeArray positions2, thread int& Index1, thread int& Index2) { return consume_constant_arrays2(positions, positions2, Index1, Index2); } vertex main0_out main0(main0_in in [[stage_in]]) { - float4 _68_array_copy[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) }; main0_out out = {}; - float4 LUT2[4]; + spvUnsafeArray LUT2; LUT2[0] = float4(10.0); LUT2[1] = float4(11.0); LUT2[2] = float4(12.0); LUT2[3] = float4(13.0); - out.gl_Position = consume_constant_arrays(_68_array_copy, LUT2, in.Index1, in.Index2); + out.gl_Position = consume_constant_arrays(_68, LUT2, in.Index1, in.Index2); return out; } diff --git a/reference/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert b/reference/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert new file mode 100644 index 00000000000..f8209ae6fcf --- /dev/null +++ b/reference/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 Pos [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + out.gl_Position = float4(1.0); + return out; +} + diff --git a/reference/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert b/reference/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert new file mode 100644 index 00000000000..9e024c2095b --- /dev/null +++ b/reference/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert @@ -0,0 +1,9 @@ +#include +#include + +using namespace metal; + +vertex void main0() +{ +} + diff --git a/reference/opt/shaders-msl/vert/viewport-index.msl2.invalid.vert b/reference/shaders-msl-no-opt/vert/viewport-index.msl2.invalid.vert similarity index 100% rename from reference/opt/shaders-msl/vert/viewport-index.msl2.invalid.vert rename to reference/shaders-msl-no-opt/vert/viewport-index.msl2.invalid.vert diff --git a/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag b/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag index 9c3fff593b1..e402bbb259c 100644 --- a/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag +++ b/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag @@ -5,28 +5,13 @@ using namespace metal; -struct main0_out -{ - float4 fragColor [[color(0)]]; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -39,6 +24,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -77,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -114,8 +110,8 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } // Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +template class Tex, typename... Ts> +inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint sw, Ts... params) { if (sw) { @@ -136,7 +132,13 @@ inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... p return t.gather_compare(s, spvForward(params)...); } -float4 do_samples(thread const texture1d t1, thread const sampler t1Smplr, constant uint& t1Swzl, thread const texture2d t2, constant uint& t2Swzl, thread const texture3d t3, thread const sampler t3Smplr, constant uint& t3Swzl, thread const texturecube tc, constant uint& tcSwzl, thread const texture2d_array t2a, thread const sampler t2aSmplr, constant uint& t2aSwzl, thread const texturecube_array tca, thread const sampler tcaSmplr, constant uint& tcaSwzl, thread const texture2d tb, thread const depth2d d2, thread const sampler d2Smplr, constant uint& d2Swzl, thread const depthcube dc, thread const sampler dcSmplr, constant uint& dcSwzl, thread const depth2d_array d2a, constant uint& d2aSwzl, thread const depthcube_array dca, thread const sampler dcaSmplr, constant uint& dcaSwzl, thread sampler defaultSampler, thread sampler shadowSampler) +struct main0_out +{ + float4 fragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +float4 do_samples(texture1d t1, sampler t1Smplr, constant uint& t1Swzl, texture2d t2, constant uint& t2Swzl, texture3d t3, sampler t3Smplr, constant uint& t3Swzl, texturecube tc, constant uint& tcSwzl, texture2d_array t2a, sampler t2aSmplr, constant uint& t2aSwzl, texturecube_array tca, sampler tcaSmplr, constant uint& tcaSwzl, texture2d tb, depth2d d2, sampler d2Smplr, constant uint& d2Swzl, depthcube dc, sampler dcSmplr, constant uint& dcSwzl, depth2d_array d2a, constant uint& d2aSwzl, depthcube_array dca, sampler dcaSmplr, constant uint& dcaSwzl, sampler defaultSampler, sampler shadowSampler) { float4 c = spvTextureSwizzle(t1.sample(t1Smplr, 0.0), t1Swzl); c = spvTextureSwizzle(t2.sample(defaultSampler, float2(0.0)), t2Swzl); @@ -144,42 +146,42 @@ float4 do_samples(thread const texture1d t1, thread const sampler t1Smplr c = spvTextureSwizzle(tc.sample(defaultSampler, float3(0.0)), tcSwzl); c = spvTextureSwizzle(t2a.sample(t2aSmplr, float3(0.0).xy, uint(round(float3(0.0).z))), t2aSwzl); c = spvTextureSwizzle(tca.sample(tcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), tcaSwzl); - c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), d2Swzl); - c.x = spvTextureSwizzle(dc.sample_compare(dcSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), dcSwzl); - c.x = spvTextureSwizzle(d2a.sample_compare(shadowSampler, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), d2aSwzl); + c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, 1.0), d2Swzl); + c.x = spvTextureSwizzle(dc.sample_compare(dcSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, 1.0), dcSwzl); + c.x = spvTextureSwizzle(d2a.sample_compare(shadowSampler, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), 1.0), d2aSwzl); c.x = spvTextureSwizzle(dca.sample_compare(dcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), dcaSwzl); c = spvTextureSwizzle(t1.sample(t1Smplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), t1Swzl); c = spvTextureSwizzle(t2.sample(defaultSampler, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), t2Swzl); c = spvTextureSwizzle(t3.sample(t3Smplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), t3Swzl); float4 _119 = float4(0.0, 0.0, 1.0, 1.0); - _119.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, _119.xy / _119.z, float4(0.0, 0.0, 1.0, 1.0).z / _119.z), d2Swzl); + _119.z = 1.0; + c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, _119.xy / _119.z, 1.0 / _119.z), d2Swzl); c = spvTextureSwizzle(t1.sample(t1Smplr, 0.0), t1Swzl); c = spvTextureSwizzle(t2.sample(defaultSampler, float2(0.0), level(0.0)), t2Swzl); c = spvTextureSwizzle(t3.sample(t3Smplr, float3(0.0), level(0.0)), t3Swzl); c = spvTextureSwizzle(tc.sample(defaultSampler, float3(0.0), level(0.0)), tcSwzl); c = spvTextureSwizzle(t2a.sample(t2aSmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), t2aSwzl); c = spvTextureSwizzle(tca.sample(tcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), tcaSwzl); - c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), d2Swzl); + c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, 1.0, level(0.0)), d2Swzl); c = spvTextureSwizzle(t1.sample(t1Smplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), t1Swzl); c = spvTextureSwizzle(t2.sample(defaultSampler, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), t2Swzl); c = spvTextureSwizzle(t3.sample(t3Smplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), t3Swzl); float4 _153 = float4(0.0, 0.0, 1.0, 1.0); - _153.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, _153.xy / _153.z, float4(0.0, 0.0, 1.0, 1.0).z / _153.z, level(0.0)), d2Swzl); + _153.z = 1.0; + c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, _153.xy / _153.z, 1.0 / _153.z, level(0.0)), d2Swzl); c = spvTextureSwizzle(t1.read(uint(0)), t1Swzl); c = spvTextureSwizzle(t2.read(uint2(int2(0)), 0), t2Swzl); c = spvTextureSwizzle(t3.read(uint3(int3(0)), 0), t3Swzl); c = spvTextureSwizzle(t2a.read(uint2(int3(0).xy), uint(int3(0).z), 0), t2aSwzl); c = tb.read(spvTexelBufferCoord(0)); - c = spvGatherSwizzle, float2, int2>(defaultSampler, t2, float2(0.0), int2(0), component::x, t2Swzl); - c = spvGatherSwizzle, float3>(defaultSampler, tc, float3(0.0), component::y, tcSwzl); - c = spvGatherSwizzle, float2, uint, int2>(t2aSmplr, t2a, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, t2aSwzl); - c = spvGatherSwizzle, float3, uint>(tcaSmplr, tca, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, tcaSwzl); - c = spvGatherCompareSwizzle, float2, float>(d2Smplr, d2, float2(0.0), 1.0, d2Swzl); - c = spvGatherCompareSwizzle, float3, float>(dcSmplr, dc, float3(0.0), 1.0, dcSwzl); - c = spvGatherCompareSwizzle, float2, uint, float>(shadowSampler, d2a, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, d2aSwzl); - c = spvGatherCompareSwizzle, float3, uint, float>(dcaSmplr, dca, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, dcaSwzl); + c = spvGatherSwizzle(t2, defaultSampler, t2Swzl, component::x, float2(0.0), int2(0)); + c = spvGatherSwizzle(tc, defaultSampler, tcSwzl, component::y, float3(0.0)); + c = spvGatherSwizzle(t2a, t2aSmplr, t2aSwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0)); + c = spvGatherSwizzle(tca, tcaSmplr, tcaSwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w))); + c = spvGatherCompareSwizzle(d2, d2Smplr, d2Swzl, float2(0.0), 1.0); + c = spvGatherCompareSwizzle(dc, dcSmplr, dcSwzl, float3(0.0), 1.0); + c = spvGatherCompareSwizzle(d2a, shadowSampler, d2aSwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0); + c = spvGatherCompareSwizzle(dca, dcaSmplr, dcaSwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); return c; } diff --git a/reference/shaders-msl/amd/shader_trinary_minmax.msl21.comp b/reference/shaders-msl/amd/shader_trinary_minmax.msl21.comp new file mode 100644 index 00000000000..14404719640 --- /dev/null +++ b/reference/shaders-msl/amd/shader_trinary_minmax.msl21.comp @@ -0,0 +1,14 @@ +#include +#include + +using namespace metal; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +kernel void main0() +{ + int t11 = min3(0, 3, 2); + int t12 = max3(0, 3, 2); + int t13 = median3(0, 3, 2); +} + diff --git a/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp b/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp index 95841a78a2d..272337fc122 100644 --- a/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp +++ b/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp @@ -7,20 +7,21 @@ using namespace metal; -struct u0_counters -{ - uint c; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct u0_counters +{ + uint c; +}; + kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); + uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); float4 r0; r0.x = as_type(_29); u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type(r0.x)) * 1u) + (uint(0) >> 2u)))); diff --git a/reference/shaders-msl/asm/comp/atomic-increment.asm.comp b/reference/shaders-msl/asm/comp/atomic-increment.asm.comp index cd78fa2c64c..4518248b897 100644 --- a/reference/shaders-msl/asm/comp/atomic-increment.asm.comp +++ b/reference/shaders-msl/asm/comp/atomic-increment.asm.comp @@ -7,20 +7,21 @@ using namespace metal; -struct u0_counters -{ - uint c; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct u0_counters +{ + uint c; +}; + kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); + uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); float4 r0; r0.x = as_type(_29); u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type(r0.x)) * 1u) + (uint(0) >> 2u)))); diff --git a/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp b/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp index 47ce85f8fc3..cbbf27d65da 100644 --- a/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp +++ b/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp @@ -15,7 +15,7 @@ struct _4 int4 _m1; }; -kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]]) +kernel void main0(device _3& __restrict _5 [[buffer(0)]], device _4& __restrict _6 [[buffer(1)]]) { _6._m0 = _5._m1 + uint4(_5._m0); _6._m0 = uint4(_5._m0) + _5._m1; diff --git a/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000000..a55d8916dfa --- /dev/null +++ b/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +kernel void main0(device _3& __restrict _5 [[buffer(0)]], device _4& __restrict _6 [[buffer(1)]]) +{ + _6._m0 = uint4(int4(_5._m1) < _5._m0); + _6._m0 = uint4(int4(_5._m1) <= _5._m0); + _6._m0 = uint4(_5._m1 < uint4(_5._m0)); + _6._m0 = uint4(_5._m1 <= uint4(_5._m0)); + _6._m0 = uint4(int4(_5._m1) > _5._m0); + _6._m0 = uint4(int4(_5._m1) >= _5._m0); + _6._m0 = uint4(_5._m1 > uint4(_5._m0)); + _6._m0 = uint4(_5._m1 >= uint4(_5._m0)); +} + diff --git a/reference/shaders-msl/asm/comp/block-name-alias-global.asm.comp b/reference/shaders-msl/asm/comp/block-name-alias-global.asm.comp index 2928efda2c4..6dcc14ea8d5 100644 --- a/reference/shaders-msl/asm/comp/block-name-alias-global.asm.comp +++ b/reference/shaders-msl/asm/comp/block-name-alias-global.asm.comp @@ -18,11 +18,12 @@ struct A_2 { int a; int b; + char _m0_final_padding[8]; }; struct A_3 { - /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ A_2 Data[1024]; + A_2 Data[1024]; }; struct B @@ -32,7 +33,7 @@ struct B struct B_1 { - /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ A_2 Data[1024]; + A_2 Data[1024]; }; kernel void main0(device A_1& C1 [[buffer(0)]], constant A_3& C2 [[buffer(1)]], device B& C3 [[buffer(2)]], constant B_1& C4 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) diff --git a/reference/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp b/reference/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp index 217782ce784..ac78a65196b 100644 --- a/reference/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp +++ b/reference/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp @@ -5,17 +5,18 @@ using namespace metal; -struct cb5_struct -{ - float4 _m0[5]; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct cb5_struct +{ + float4 _m0[5]; +}; + kernel void main0(constant cb5_struct& cb0_5 [[buffer(0)]], texture2d u0 [[texture(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { float4 r0; diff --git a/reference/shaders-msl/asm/comp/buffer-write.asm.comp b/reference/shaders-msl/asm/comp/buffer-write.asm.comp index 159d09b38c8..89e8d83ea71 100644 --- a/reference/shaders-msl/asm/comp/buffer-write.asm.comp +++ b/reference/shaders-msl/asm/comp/buffer-write.asm.comp @@ -5,17 +5,18 @@ using namespace metal; -struct cb -{ - float value; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct cb +{ + float value; +}; + kernel void main0(constant cb& _6 [[buffer(0)]], texture2d _buffer [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { _buffer.write(float4(_6.value), spvTexelBufferCoord(((32u * gl_WorkGroupID.x) + gl_LocalInvocationIndex))); diff --git a/reference/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp b/reference/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp new file mode 100644 index 00000000000..986e9096633 --- /dev/null +++ b/reference/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct _19 +{ +}; +struct _5 +{ + int _m0; + _19 _m1; + char _m2_pad[4]; + _19 _m2; + char _m3_pad[4]; + int _m3; +}; + +kernel void main0(device _5& _3 [[buffer(0)]], device _5& _4 [[buffer(1)]]) +{ + _4 = _3; +} + diff --git a/reference/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp b/reference/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp new file mode 100644 index 00000000000..4bcfeb21ab5 --- /dev/null +++ b/reference/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct _19 +{ +}; +struct _5 +{ + int _m0; + char _m1_pad[12]; + _19 _m1; + char _m2_pad[16]; + _19 _m2; + char _m3_pad[16]; + int _m3; +}; + +kernel void main0(constant _5& _3 [[buffer(0)]], device _5& _4 [[buffer(1)]]) +{ + _4 = _3; +} + diff --git a/reference/shaders-msl/asm/comp/global-parameter-name-alias.asm.comp b/reference/shaders-msl/asm/comp/global-parameter-name-alias.asm.comp index 967d14bfcb7..ec1b2a2da24 100644 --- a/reference/shaders-msl/asm/comp/global-parameter-name-alias.asm.comp +++ b/reference/shaders-msl/asm/comp/global-parameter-name-alias.asm.comp @@ -10,12 +10,14 @@ struct ssbo uint _data[1]; }; +static inline __attribute__((always_inline)) void Load(thread const uint& size, const device ssbo& ssbo_1) { int byteAddrTemp = int(size >> uint(2)); uint4 data = uint4(ssbo_1._data[byteAddrTemp], ssbo_1._data[byteAddrTemp + 1], ssbo_1._data[byteAddrTemp + 2], ssbo_1._data[byteAddrTemp + 3]); } +static inline __attribute__((always_inline)) void _main(thread const uint3& id, const device ssbo& ssbo_1) { uint param = 4u; diff --git a/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp b/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp new file mode 100644 index 00000000000..536556391ec --- /dev/null +++ b/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp @@ -0,0 +1,22 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +static inline __attribute__((always_inline)) +void _main(thread const uint3& id, texture2d TargetTexture) +{ + float2 loaded = TargetTexture.read(uint2(id.xy)).xy; + float2 storeTemp = loaded + float2(1.0); + TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u)))); +} + +kernel void main0(texture2d TargetTexture [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + uint3 id = gl_WorkGroupID; + uint3 param = id; + _main(param, TargetTexture); +} + diff --git a/reference/shaders-msl/asm/comp/multiple-entry.asm.comp b/reference/shaders-msl/asm/comp/multiple-entry.asm.comp index 7652733268f..35843733790 100644 --- a/reference/shaders-msl/asm/comp/multiple-entry.asm.comp +++ b/reference/shaders-msl/asm/comp/multiple-entry.asm.comp @@ -15,7 +15,7 @@ struct _7 int4 _m1; }; -kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]]) +kernel void main0(device _6& __restrict _8 [[buffer(0)]], device _7& __restrict _9 [[buffer(1)]]) { _9._m0 = _8._m1 + uint4(_8._m0); _9._m0 = uint4(_8._m0) + _8._m1; diff --git a/reference/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp b/reference/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp new file mode 100644 index 00000000000..42c13f7a4d0 --- /dev/null +++ b/reference/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp @@ -0,0 +1,111 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _29 +{ + spvUnsafeArray, 3> _m0; +}; + +struct _7 +{ + int _m0[1]; +}; + +constant int3 _32 = {}; +constant int _3_tmp [[function_constant(0)]]; +constant int _3 = is_function_constant_defined(_3_tmp) ? _3_tmp : 0; +constant int _4_tmp [[function_constant(1)]]; +constant int _4 = is_function_constant_defined(_4_tmp) ? _4_tmp : 0; +constant int _5_tmp [[function_constant(2)]]; +constant int _5 = is_function_constant_defined(_5_tmp) ? _5_tmp : 0; +constant spvUnsafeArray _36 = spvUnsafeArray({ _3, 0, 0 }); +constant spvUnsafeArray _37 = spvUnsafeArray({ _3, _4, 0 }); +constant spvUnsafeArray _38 = spvUnsafeArray({ _3, _4, _5 }); +constant spvUnsafeArray _39 = spvUnsafeArray({ _4, 0, 0 }); +constant spvUnsafeArray _40 = spvUnsafeArray({ _4, _5, 0 }); +constant spvUnsafeArray _41 = spvUnsafeArray({ _4, _5, _3 }); +constant spvUnsafeArray _42 = spvUnsafeArray({ _5, 0, 0 }); +constant spvUnsafeArray _43 = spvUnsafeArray({ _5, _3, 0 }); +constant spvUnsafeArray _44 = spvUnsafeArray({ _5, _3, _4 }); +constant spvUnsafeArray, 3> _45 = spvUnsafeArray, 3>({ spvUnsafeArray({ _3, _4, _5 }), spvUnsafeArray({ 0, 0, 0 }), spvUnsafeArray({ 0, 0, 0 }) }); +constant spvUnsafeArray, 3> _46 = spvUnsafeArray, 3>({ spvUnsafeArray({ _3, _4, _5 }), spvUnsafeArray({ _4, _5, _3 }), spvUnsafeArray({ 0, 0, 0 }) }); +constant spvUnsafeArray, 3> _47 = spvUnsafeArray, 3>({ spvUnsafeArray({ _3, _4, _5 }), spvUnsafeArray({ _4, _5, _3 }), spvUnsafeArray({ _5, _3, _4 }) }); +constant _29 _48 = _29{ spvUnsafeArray, 3>({ spvUnsafeArray({ _3, _4, _5 }), spvUnsafeArray({ _4, _5, _3 }), spvUnsafeArray({ _5, _3, _4 }) }) }; +constant _29 _49 = _29{ spvUnsafeArray, 3>({ spvUnsafeArray({ _3, _4, _5 }), spvUnsafeArray({ _4, _5, _5 }), spvUnsafeArray({ _5, _3, _4 }) }) }; +constant int _50 = _48._m0[0][0]; +constant int _51 = _48._m0[1][0]; +constant int _52 = _48._m0[0][1]; +constant int _53 = _48._m0[2][2]; +constant int _54 = _48._m0[2][0]; +constant int _55 = _48._m0[1][1]; +constant bool _56 = (_50 == _51); +constant bool _57 = (_52 == _53); +constant bool _58 = (_54 == _55); +constant int _59 = int(_56); +constant int _60 = int(_57); +constant int _61 = _58 ? 2 : 1; +constant int3 _62 = int3(_3, 0, 0); +constant int3 _63 = int3(0, _4, 0); +constant int3 _64 = int3(0, 0, _5); +constant int3 _65 = int3(_62.x, 0, _62.z); +constant int3 _66 = int3(0, _63.y, _63.x); +constant int3 _67 = int3(_64.z, 0, _64.z); +constant int3 _68 = int3(_65.y, _65.x, _66.y); +constant int3 _69 = int3(_67.z, _68.y, _68.z); +constant int _70 = _69.x; +constant int _71 = _69.y; +constant int _72 = _69.z; +constant int _73 = (_70 - _71); +constant int _74 = (_73 * _72); + +constant spvUnsafeArray _33 = spvUnsafeArray({ 0, 0, 0 }); +constant spvUnsafeArray, 3> _34 = spvUnsafeArray, 3>({ spvUnsafeArray({ 0, 0, 0 }), spvUnsafeArray({ 0, 0, 0 }), spvUnsafeArray({ 0, 0, 0 }) }); + +kernel void main0(device _7& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + _9._m0[gl_GlobalInvocationID.x] = _8._m0[gl_GlobalInvocationID.x] + ((((1 - _59) * _60) * (_61 - 1)) * _74); +} + diff --git a/reference/shaders-msl/asm/comp/quantize.asm.comp b/reference/shaders-msl/asm/comp/quantize.asm.comp index 1839ec7a3b8..672c2b20883 100644 --- a/reference/shaders-msl/asm/comp/quantize.asm.comp +++ b/reference/shaders-msl/asm/comp/quantize.asm.comp @@ -1,8 +1,21 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + #include #include using namespace metal; +template struct SpvHalfTypeSelector; +template <> struct SpvHalfTypeSelector { public: using H = half; }; +template struct SpvHalfTypeSelector> { using H = vec; }; +template::H> +[[clang::optnone]] F spvQuantizeToF16(F fval) +{ + H hval = H(fval); + hval = select(copysign(H(0), hval), hval, isnormal(hval) || isinf(hval) || isnan(hval)); + return F(hval); +} + struct SSBO0 { float scalar; @@ -13,9 +26,9 @@ struct SSBO0 kernel void main0(device SSBO0& _4 [[buffer(0)]]) { - _4.scalar = float(half(_4.scalar)); - _4.vec2_val = float2(half2(_4.vec2_val)); - _4.vec3_val = float3(half3(_4.vec3_val)); - _4.vec4_val = float4(half4(_4.vec4_val)); + _4.scalar = spvQuantizeToF16(_4.scalar); + _4.vec2_val = spvQuantizeToF16(_4.vec2_val); + _4.vec3_val = spvQuantizeToF16(_4.vec3_val); + _4.vec4_val = spvQuantizeToF16(_4.vec4_val); } diff --git a/reference/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp b/reference/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp deleted file mode 100644 index 473298c2741..00000000000 --- a/reference/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include - -using namespace metal; - -struct _6 -{ - float _m0[1]; -}; - -constant uint _3_tmp [[function_constant(0)]]; -constant uint _3 = is_function_constant_defined(_3_tmp) ? _3_tmp : 1u; -constant uint _4_tmp [[function_constant(2)]]; -constant uint _4 = is_function_constant_defined(_4_tmp) ? _4_tmp : 3u; -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(_3, 2u, _4); - -kernel void main0(device _6& _8 [[buffer(0)]], device _6& _9 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - _8._m0[gl_WorkGroupID.x] = _9._m0[gl_WorkGroupID.x] + _8._m0[gl_WorkGroupID.x]; - uint3 _23 = gl_WorkGroupSize; -} - diff --git a/reference/shaders-msl/asm/comp/struct-resource-name-aliasing.asm.comp b/reference/shaders-msl/asm/comp/struct-resource-name-aliasing.asm.comp index a3323bf2514..fa7d2698210 100644 --- a/reference/shaders-msl/asm/comp/struct-resource-name-aliasing.asm.comp +++ b/reference/shaders-msl/asm/comp/struct-resource-name-aliasing.asm.comp @@ -10,6 +10,7 @@ struct bufA uint _data[1]; }; +static inline __attribute__((always_inline)) void _main(device bufA& bufA_1, device bufA& bufB) { bufA_1._data[0] = 0u; diff --git a/reference/shaders-msl/asm/comp/uint_smulextended.asm.comp b/reference/shaders-msl/asm/comp/uint_smulextended.asm.comp new file mode 100644 index 00000000000..6996f7fd26a --- /dev/null +++ b/reference/shaders-msl/asm/comp/uint_smulextended.asm.comp @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct _4 +{ + uint _m0[1]; +}; + +struct _20 +{ + uint _m0; + uint _m1; +}; + +kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + _20 _28; + _28._m0 = uint(int(_5._m0[gl_GlobalInvocationID.x]) * int(_6._m0[gl_GlobalInvocationID.x])); + _28._m1 = uint(mulhi(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x]))); + _7._m0[gl_GlobalInvocationID.x] = _28._m0; + _8._m0[gl_GlobalInvocationID.x] = _28._m1; +} + diff --git a/reference/shaders-msl/asm/comp/undefined-constant-composite.asm.comp b/reference/shaders-msl/asm/comp/undefined-constant-composite.asm.comp new file mode 100644 index 00000000000..e3ded1f2088 --- /dev/null +++ b/reference/shaders-msl/asm/comp/undefined-constant-composite.asm.comp @@ -0,0 +1,38 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct _20 +{ + int _m0; + int _m1; +}; + +struct _5 +{ + int _m0[10]; +}; + +struct _7 +{ + int _m0[10]; +}; + +constant int _28 = {}; + +static inline __attribute__((always_inline)) +int _39(thread const int& _41, thread const _20& _42) +{ + return _41 + _42._m1; +} + +kernel void main0(device _5& _6 [[buffer(0)]], device _7& _8 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + int _32 = _8._m0[gl_GlobalInvocationID.x]; + _20 _33 = _20{ _28, 200 }; + _6._m0[gl_GlobalInvocationID.x] = _39(_32, _33); +} + diff --git a/reference/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp b/reference/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp new file mode 100644 index 00000000000..d0f2790d8d0 --- /dev/null +++ b/reference/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp @@ -0,0 +1,42 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct _21 +{ + int _m0; + int _m1; +}; + +struct _5 +{ + int _m0[10]; +}; + +struct _7 +{ + int _m0[10]; +}; + +constant int _29 = {}; +constant int _9_tmp [[function_constant(0)]]; +constant int _9 = is_function_constant_defined(_9_tmp) ? _9_tmp : 0; +constant _21 _30 = _21{ _9, _29 }; + +static inline __attribute__((always_inline)) +int _42(thread const int& _44, thread const _21& _45, thread const _21& _46) +{ + return (_44 + _45._m0) + _46._m1; +} + +kernel void main0(device _5& _6 [[buffer(0)]], device _7& _8 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + int _34 = _8._m0[gl_GlobalInvocationID.x]; + _21 _35 = _30; + _21 _36 = _21{ _29, 200 }; + _6._m0[gl_GlobalInvocationID.x] = _42(_34, _35, _36); +} + diff --git a/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp b/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp index b4e22959520..dafd6d5d4f5 100644 --- a/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp +++ b/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp @@ -17,11 +17,13 @@ struct bar int d; }; +static inline __attribute__((always_inline)) device foo* select_buffer(device foo& a, constant bar& cb) { return (cb.d != 0) ? &a : nullptr; } +static inline __attribute__((always_inline)) thread uint3* select_input(thread uint3& gl_GlobalInvocationID, thread uint3& gl_LocalInvocationID, constant bar& cb) { return (cb.d != 0) ? &gl_GlobalInvocationID : &gl_LocalInvocationID; @@ -31,12 +33,31 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]], { device foo* _46 = select_buffer(buf, cb); device foo* _45 = _46; - for (device int* _52 = &_45->a[0u], * _55 = &buf.a[0u]; (*_52) != (*_55); _52 = &_52[1u], _55 = &_55[1u]) + thread uint3* _47 = select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb); + device foo* _48 = _45; + device int* _52; + device int* _55; + _52 = &_48->a[0u]; + _55 = &buf.a[0u]; + int _57; + int _58; + for (;;) { - int _66 = ((*_52) + (*_55)) + int((*select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb)).x); - *_52 = _66; - *_55 = _66; - continue; + _57 = *_52; + _58 = *_55; + if (_57 != _58) + { + int _66 = (_57 + _58) + int((*_47).x); + *_52 = _66; + *_55 = _66; + _52 = &_52[1u]; + _55 = &_55[1u]; + continue; + } + else + { + break; + } } } diff --git a/reference/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp b/reference/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp index b2f8fc424ed..00c490c77fc 100644 --- a/reference/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp +++ b/reference/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp @@ -15,6 +15,7 @@ struct bar int b; }; +static inline __attribute__((always_inline)) device int* _24(device foo& a, device bar& b, thread uint3& gl_GlobalInvocationID) { return (gl_GlobalInvocationID.x != 0u) ? &a.a : &b.b; diff --git a/reference/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp b/reference/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp index e4092261d6d..9fb68a63192 100644 --- a/reference/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp +++ b/reference/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp @@ -7,11 +7,12 @@ using namespace metal; struct cb1_struct { - float4 _m0[1]; + float4 _RESERVED_IDENTIFIER_FIXUP_m0[1]; }; constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 16u, 1u); +static inline __attribute__((always_inline)) int2 get_texcoord(thread const int2& base, thread const int2& index, thread uint3& gl_LocalInvocationID) { return (base * int3(gl_LocalInvocationID).xy) + index; @@ -26,7 +27,7 @@ kernel void main0(constant cb1_struct& cb0_1 [[buffer(0)]], texture2d +#include + +using namespace metal; + +struct _7 +{ + float4 _m0[64]; +}; + +struct main0_out +{ + float4 m_3 [[color(0)]]; +}; + +struct main0_in +{ + float4 m_2 [[user(locn1)]]; +}; + +static inline __attribute__((always_inline)) +void _108(int _109, texture2d v_8, device _7& v_10) +{ + int2 _113 = int2(_109 - 8 * (_109 / 8), _109 / 8); + v_10._m0[_109] = v_8.read(uint2(_113), 0); +} + +static inline __attribute__((always_inline)) +float4 _98(float4 _119, texture2d v_8, device _7& v_10) +{ + for (int _121 = 0; _121 < 64; _121++) + { + _108(_121, v_8, v_10); + } + return _119; +} + +fragment main0_out main0(main0_in in [[stage_in]], device _7& v_10 [[buffer(0)]], texture2d v_8 [[texture(0)]]) +{ + main0_out out = {}; + float4 _97 = _98(in.m_2, v_8, v_10); + out.m_3 = _97; + return out; +} + diff --git a/reference/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag b/reference/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag new file mode 100644 index 00000000000..df9f8f43cf3 --- /dev/null +++ b/reference/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag @@ -0,0 +1,46 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct _7 +{ + float4 _m0[64]; +}; + +struct main0_out +{ + float4 m_3 [[color(0)]]; +}; + +struct main0_in +{ + float4 m_2 [[user(locn1)]]; +}; + +static inline __attribute__((always_inline)) +void _108(int _109, texture2d v_8, sampler v_9, device _7& v_10) +{ + v_10._m0[_109] = v_8.sample(v_9, (float2(int2(_109 - 8 * (_109 / 8), _109 / 8)) / float2(8.0)), level(0.0)); +} + +static inline __attribute__((always_inline)) +float4 _98(float4 _121, texture2d v_8, sampler v_9, device _7& v_10) +{ + for (int _123 = 0; _123 < 64; _123++) + { + _108(_123, v_8, v_9, v_10); + } + return _121; +} + +fragment main0_out main0(main0_in in [[stage_in]], device _7& v_10 [[buffer(0)]], texture2d v_8 [[texture(0)]], sampler v_9 [[sampler(0)]]) +{ + main0_out out = {}; + float4 _97 = _98(in.m_2, v_8, v_9, v_10); + out.m_3 = _97; + return out; +} + diff --git a/reference/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag b/reference/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag new file mode 100644 index 00000000000..b64ccabe6bc --- /dev/null +++ b/reference/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 o1 [[color(1)]]; + float4 o3 [[color(3)]]; + float4 o6 [[color(6)]]; + float4 o7 [[color(7)]]; +}; + +fragment main0_out main0() +{ + float4 o0; + float4 o2; + float4 o4; + float4 o5; + float gl_FragDepth; + int gl_FragStencilRefARB; + main0_out out = {}; + o0 = float4(0.0, 0.0, 0.0, 1.0); + out.o1 = float4(1.0, 0.0, 0.0, 1.0); + o2 = float4(0.0, 1.0, 0.0, 1.0); + out.o3 = float4(0.0, 0.0, 1.0, 1.0); + o4 = float4(1.0, 0.0, 1.0, 0.5); + o5 = float4(0.25); + out.o6 = float4(0.75); + out.o7 = float4(1.0); + gl_FragDepth = 0.89999997615814208984375; + gl_FragStencilRefARB = uint(127); + return out; +} + diff --git a/reference/shaders-msl/asm/frag/empty-struct.asm.frag b/reference/shaders-msl/asm/frag/empty-struct.asm.frag index 0a56f1f153e..e30c5864b26 100644 --- a/reference/shaders-msl/asm/frag/empty-struct.asm.frag +++ b/reference/shaders-msl/asm/frag/empty-struct.asm.frag @@ -7,14 +7,14 @@ using namespace metal; struct EmptyStructTest { - int empty_struct_member; }; - +static inline __attribute__((always_inline)) float GetValue(thread const EmptyStructTest& self) { return 0.0; } +static inline __attribute__((always_inline)) float GetValue_1(EmptyStructTest self) { return 0.0; @@ -22,9 +22,8 @@ float GetValue_1(EmptyStructTest self) fragment void main0() { - EmptyStructTest _23 = EmptyStructTest{ 0 }; EmptyStructTest emptyStruct; float value = GetValue(emptyStruct); - value = GetValue_1(_23); + value = GetValue_1(EmptyStructTest{ }); } diff --git a/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag b/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag index 1f92b341a80..e926bc5f3e8 100644 --- a/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag +++ b/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag @@ -28,6 +28,7 @@ struct main0_out float4 _entryPointOutput [[color(0)]]; }; +static inline __attribute__((always_inline)) float4 _main(thread const float4& pos, constant buf& v_11) { int _46 = int(pos.x) % 16; diff --git a/reference/shaders-msl/asm/frag/function-overload-alias.asm.frag b/reference/shaders-msl/asm/frag/function-overload-alias.asm.frag index 1a6314c8199..40fe5c5d812 100644 --- a/reference/shaders-msl/asm/frag/function-overload-alias.asm.frag +++ b/reference/shaders-msl/asm/frag/function-overload-alias.asm.frag @@ -10,21 +10,25 @@ struct main0_out float4 FragColor [[color(0)]]; }; +static inline __attribute__((always_inline)) float4 foo(thread const float4& foo_1) { return foo_1 + float4(1.0); } +static inline __attribute__((always_inline)) float4 foo(thread const float3& foo_1) { return foo_1.xyzz + float4(1.0); } +static inline __attribute__((always_inline)) float4 foo_1(thread const float4& foo_2) { return foo_2 + float4(2.0); } +static inline __attribute__((always_inline)) float4 foo(thread const float2& foo_2) { return foo_2.xyxy + float4(2.0); diff --git a/reference/shaders-msl/asm/frag/inf-nan-constant.asm.frag b/reference/shaders-msl/asm/frag/inf-nan-constant.asm.frag index 8537dac19a1..067719896b8 100644 --- a/reference/shaders-msl/asm/frag/inf-nan-constant.asm.frag +++ b/reference/shaders-msl/asm/frag/inf-nan-constant.asm.frag @@ -11,7 +11,7 @@ struct main0_out fragment main0_out main0() { main0_out out = {}; - out.FragColor = float3(as_type(0x7f800000u), as_type(0xff800000u), as_type(0x7fc00000u)); + out.FragColor = float3(as_type(0x7f800000u /* inf */), as_type(0xff800000u /* -inf */), as_type(0x7fc00000u /* nan */)); return out; } diff --git a/reference/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag b/reference/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag index 41472adac94..6407b32b2e8 100644 --- a/reference/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag +++ b/reference/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag @@ -21,26 +21,26 @@ struct main0_out struct main0_in { - float2 Input_v0 [[user(locn0)]]; - float2 Input_v1 [[user(locn1), center_no_perspective]]; - float3 Input_v2 [[user(locn2), centroid_perspective]]; - float4 Input_v3 [[user(locn3), centroid_no_perspective]]; - float Input_v4 [[user(locn4), sample_perspective]]; - float Input_v5 [[user(locn5), sample_no_perspective]]; - float Input_v6 [[user(locn6), flat]]; + float2 inp_v0 [[user(locn0)]]; + float2 inp_v1 [[user(locn1), center_no_perspective]]; + float3 inp_v2 [[user(locn2), centroid_perspective]]; + float4 inp_v3 [[user(locn3), centroid_no_perspective]]; + float inp_v4 [[user(locn4), sample_perspective]]; + float inp_v5 [[user(locn5), sample_no_perspective]]; + float inp_v6 [[user(locn6), flat]]; }; fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; Input inp = {}; - inp.v0 = in.Input_v0; - inp.v1 = in.Input_v1; - inp.v2 = in.Input_v2; - inp.v3 = in.Input_v3; - inp.v4 = in.Input_v4; - inp.v5 = in.Input_v5; - inp.v6 = in.Input_v6; + inp.v0 = in.inp_v0; + inp.v1 = in.inp_v1; + inp.v2 = in.inp_v2; + inp.v3 = in.inp_v3; + inp.v4 = in.inp_v4; + inp.v5 = in.inp_v5; + inp.v6 = in.inp_v6; out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, ((inp.v3.w * inp.v4) + inp.v5) - inp.v6); return out; } diff --git a/reference/shaders-msl/asm/frag/line-directive.line.asm.frag b/reference/shaders-msl/asm/frag/line-directive.line.asm.frag index e1d2a9d7b9d..38fbd27ba3a 100644 --- a/reference/shaders-msl/asm/frag/line-directive.line.asm.frag +++ b/reference/shaders-msl/asm/frag/line-directive.line.asm.frag @@ -16,6 +16,7 @@ struct main0_in }; #line 6 "test.frag" +static inline __attribute__((always_inline)) void func(thread float& FragColor, thread float& vColor) { #line 8 "test.frag" diff --git a/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag index 48f3317d2e2..41193536e69 100644 --- a/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag +++ b/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag @@ -1,13 +1,52 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; -constant float _46[16] = { 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 }; -constant float4 _76[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; -constant float4 _90[4] = { float4(20.0), float4(30.0), float4(50.0), float4(60.0) }; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _46 = spvUnsafeArray({ 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 }); +constant spvUnsafeArray _76 = spvUnsafeArray({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) }); +constant spvUnsafeArray _90 = spvUnsafeArray({ float4(20.0), float4(30.0), float4(50.0), float4(60.0) }); struct main0_out { @@ -19,23 +58,10 @@ struct main0_in int index [[user(locn0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - fragment main0_out main0(main0_in in [[stage_in]]) { - float4 foobar[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; - float4 baz[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; + spvUnsafeArray foobar = spvUnsafeArray({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) }); + spvUnsafeArray baz = spvUnsafeArray({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) }); main0_out out = {}; out.FragColor = _46[in.index]; if (in.index < 10) @@ -59,7 +85,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) foobar[1].z = 20.0; } out.FragColor += foobar[in.index & 3].z; - spvArrayCopyFromConstant1(baz, _90); + baz = _90; out.FragColor += baz[in.index & 3].z; return out; } diff --git a/reference/shaders-msl/asm/frag/op-constant-null.asm.frag b/reference/shaders-msl/asm/frag/op-constant-null.asm.frag index 9d5d7fb1d69..f8104f81657 100644 --- a/reference/shaders-msl/asm/frag/op-constant-null.asm.frag +++ b/reference/shaders-msl/asm/frag/op-constant-null.asm.frag @@ -1,15 +1,56 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct D { float4 a; float b; }; -constant float4 _14[4] = { float4(0.0), float4(0.0), float4(0.0), float4(0.0) }; +constant spvUnsafeArray _14 = spvUnsafeArray({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) }); struct main0_out { diff --git a/reference/shaders-msl/asm/frag/pass-by-value.asm.frag b/reference/shaders-msl/asm/frag/pass-by-value.asm.frag index 6ed945ecff0..46648f6d309 100644 --- a/reference/shaders-msl/asm/frag/pass-by-value.asm.frag +++ b/reference/shaders-msl/asm/frag/pass-by-value.asm.frag @@ -15,6 +15,7 @@ struct main0_out float FragColor [[color(0)]]; }; +static inline __attribute__((always_inline)) float add_value(float v, float w) { return v + w; diff --git a/reference/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag b/reference/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag new file mode 100644 index 00000000000..02ec4751bb1 --- /dev/null +++ b/reference/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag @@ -0,0 +1,187 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _13 +{ + float4 x; + float4 y; + float4 z; + spvUnsafeArray u; + spvUnsafeArray v; + spvUnsafeArray w; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + interpolant foo [[user(locn0)]]; + interpolant bar [[user(locn1)]]; + interpolant baz [[user(locn2)]]; + int sid [[user(locn3)]]; + interpolant a_0 [[user(locn4)]]; + interpolant a_1 [[user(locn5)]]; + interpolant b_0 [[user(locn6)]]; + interpolant b_1 [[user(locn7)]]; + interpolant c_0 [[user(locn8)]]; + interpolant c_1 [[user(locn9)]]; + interpolant s_x [[user(locn10)]]; + interpolant s_y [[user(locn11)]]; + interpolant s_z [[user(locn12)]]; + interpolant s_u_0 [[user(locn13)]]; + interpolant s_u_1 [[user(locn14)]]; + interpolant s_v_0 [[user(locn15)]]; + interpolant s_v_1 [[user(locn16)]]; + interpolant s_w_0 [[user(locn17)]]; + interpolant s_w_1 [[user(locn18)]]; + interpolant s_w_2 [[user(locn19)]]; +}; + +static inline __attribute__((always_inline)) +void func(thread float4& FragColor, thread float2 baz, thread spvUnsafeArray& a, thread _13& s, thread main0_in& in) +{ + float2 _237 = FragColor.xy + baz; + FragColor = float4(_237.x, _237.y, FragColor.z, FragColor.w); + FragColor.x += in.baz.interpolate_at_centroid().x; + FragColor.y += in.baz.interpolate_at_sample(3).y; + FragColor.z += in.baz.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).y; + float2 _262 = FragColor.xy + in.a_1.interpolate_at_centroid(); + FragColor = float4(_262.x, _262.y, FragColor.z, FragColor.w); + float2 _269 = FragColor.xy + in.a_0.interpolate_at_sample(2); + FragColor = float4(_269.x, _269.y, FragColor.z, FragColor.w); + float2 _276 = FragColor.xy + in.a_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375); + FragColor = float4(_276.x, _276.y, FragColor.z, FragColor.w); + FragColor += s.z; + float2 _288 = FragColor.xy + in.s_z.interpolate_at_centroid().yy; + FragColor = float4(_288.x, _288.y, FragColor.z, FragColor.w); + float2 _296 = FragColor.yz + in.s_z.interpolate_at_sample(3).xy; + FragColor = float4(FragColor.x, _296.x, _296.y, FragColor.w); + float2 _304 = FragColor.zw + in.s_z.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).wx; + FragColor = float4(FragColor.x, FragColor.y, _304.x, _304.y); + FragColor += s.u[0]; + FragColor += in.s_u_1.interpolate_at_centroid(); + FragColor += in.s_u_0.interpolate_at_sample(2); + FragColor += in.s_u_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375); +} + +fragment main0_out main0(main0_in in [[stage_in]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + spvUnsafeArray a = {}; + _13 s = {}; + spvUnsafeArray b = {}; + spvUnsafeArray c = {}; + a[0] = in.a_0.interpolate_at_center(); + a[1] = in.a_1.interpolate_at_center(); + s.x = in.s_x.interpolate_at_center(); + s.y = in.s_y.interpolate_at_centroid(); + s.z = in.s_z.interpolate_at_sample(gl_SampleID); + s.u[0] = in.s_u_0.interpolate_at_centroid(); + s.u[1] = in.s_u_1.interpolate_at_centroid(); + s.v[0] = in.s_v_0.interpolate_at_sample(gl_SampleID); + s.v[1] = in.s_v_1.interpolate_at_sample(gl_SampleID); + s.w[0] = in.s_w_0.interpolate_at_center(); + s.w[1] = in.s_w_1.interpolate_at_center(); + s.w[2] = in.s_w_2.interpolate_at_center(); + b[0] = in.b_0.interpolate_at_centroid(); + b[1] = in.b_1.interpolate_at_centroid(); + c[0] = in.c_0.interpolate_at_sample(gl_SampleID); + c[1] = in.c_1.interpolate_at_sample(gl_SampleID); + out.FragColor = in.foo.interpolate_at_center(); + out.FragColor += in.foo.interpolate_at_centroid(); + out.FragColor += in.foo.interpolate_at_sample(in.sid); + out.FragColor += in.foo.interpolate_at_offset(float2(0.100000001490116119384765625) + 0.4375); + float3 _65 = out.FragColor.xyz + in.bar.interpolate_at_centroid(); + out.FragColor = float4(_65.x, _65.y, _65.z, out.FragColor.w); + float3 _71 = out.FragColor.xyz + in.bar.interpolate_at_centroid(); + out.FragColor = float4(_71.x, _71.y, _71.z, out.FragColor.w); + float3 _78 = out.FragColor.xyz + in.bar.interpolate_at_sample(in.sid); + out.FragColor = float4(_78.x, _78.y, _78.z, out.FragColor.w); + float3 _84 = out.FragColor.xyz + in.bar.interpolate_at_offset(float2(-0.100000001490116119384765625) + 0.4375); + out.FragColor = float4(_84.x, _84.y, _84.z, out.FragColor.w); + float2 _91 = out.FragColor.xy + b[0]; + out.FragColor = float4(_91.x, _91.y, out.FragColor.z, out.FragColor.w); + float2 _98 = out.FragColor.xy + in.b_1.interpolate_at_centroid(); + out.FragColor = float4(_98.x, _98.y, out.FragColor.z, out.FragColor.w); + float2 _105 = out.FragColor.xy + in.b_0.interpolate_at_sample(2); + out.FragColor = float4(_105.x, _105.y, out.FragColor.z, out.FragColor.w); + float2 _112 = out.FragColor.xy + in.b_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375); + out.FragColor = float4(_112.x, _112.y, out.FragColor.z, out.FragColor.w); + float2 _119 = out.FragColor.xy + c[0]; + out.FragColor = float4(_119.x, _119.y, out.FragColor.z, out.FragColor.w); + float2 _127 = out.FragColor.xy + in.c_1.interpolate_at_centroid().xy; + out.FragColor = float4(_127.x, _127.y, out.FragColor.z, out.FragColor.w); + float2 _135 = out.FragColor.xy + in.c_0.interpolate_at_sample(2).yx; + out.FragColor = float4(_135.x, _135.y, out.FragColor.z, out.FragColor.w); + float2 _143 = out.FragColor.xy + in.c_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).xx; + out.FragColor = float4(_143.x, _143.y, out.FragColor.z, out.FragColor.w); + out.FragColor += s.x; + out.FragColor += in.s_x.interpolate_at_centroid(); + out.FragColor += in.s_x.interpolate_at_sample(in.sid); + out.FragColor += in.s_x.interpolate_at_offset(float2(0.100000001490116119384765625) + 0.4375); + out.FragColor += s.y; + out.FragColor += in.s_y.interpolate_at_centroid(); + out.FragColor += in.s_y.interpolate_at_sample(in.sid); + out.FragColor += in.s_y.interpolate_at_offset(float2(-0.100000001490116119384765625) + 0.4375); + float2 _184 = out.FragColor.xy + s.v[0]; + out.FragColor = float4(_184.x, _184.y, out.FragColor.z, out.FragColor.w); + float2 _191 = out.FragColor.xy + in.s_v_1.interpolate_at_centroid(); + out.FragColor = float4(_191.x, _191.y, out.FragColor.z, out.FragColor.w); + float2 _198 = out.FragColor.xy + in.s_v_0.interpolate_at_sample(2); + out.FragColor = float4(_198.x, _198.y, out.FragColor.z, out.FragColor.w); + float2 _205 = out.FragColor.xy + in.s_v_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375); + out.FragColor = float4(_205.x, _205.y, out.FragColor.z, out.FragColor.w); + out.FragColor.x += s.w[0]; + out.FragColor.x += in.s_w_1.interpolate_at_centroid(); + out.FragColor.x += in.s_w_0.interpolate_at_sample(2); + out.FragColor.x += in.s_w_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375); + func(out.FragColor, in.baz.interpolate_at_sample(gl_SampleID), a, s, in); + return out; +} + diff --git a/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag b/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag index 628d5c7c1b4..2ea037407f2 100644 --- a/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag +++ b/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag @@ -1,44 +1,70 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; -struct myType -{ - float data; -}; - -constant myType _21[5] = { myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 } }; - -struct main0_out +template +struct spvUnsafeArray { - float4 o_color [[color(0)]]; + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } }; // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +struct myType { - for (uint i = 0; i < N; dst[i] = src[i], i++); -} + float data; +}; -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +struct main0_out { - for (uint i = 0; i < N; dst[i] = src[i], i++); -} + float4 o_color [[color(0)]]; +}; fragment main0_out main0(float4 gl_FragCoord [[position]]) { + spvUnsafeArray _21 = spvUnsafeArray({ myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 } }); + main0_out out = {}; float2 uv = gl_FragCoord.xy; int index = int(mod(uv.x, 4.0)); diff --git a/reference/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag b/reference/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag index 1bafc6953ba..d59013daaf8 100644 --- a/reference/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag +++ b/reference/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag @@ -1,9 +1,50 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; -constant float4 _20[2] = { float4(1.0, 2.0, 3.0, 4.0), float4(10.0) }; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _20 = spvUnsafeArray({ float4(1.0, 2.0, 3.0, 4.0), float4(10.0) }); struct main0_out { @@ -15,7 +56,7 @@ struct main0_out fragment main0_out main0() { main0_out out = {}; - float4 FragColors[2] = { float4(1.0, 2.0, 3.0, 4.0), float4(10.0) }; + spvUnsafeArray FragColors = spvUnsafeArray({ float4(1.0, 2.0, 3.0, 4.0), float4(10.0) }); out.FragColor = float4(5.0); out.FragColors_0 = FragColors[0]; out.FragColors_1 = FragColors[1]; diff --git a/reference/shaders-msl/asm/frag/switch-different-sizes.asm.frag b/reference/shaders-msl/asm/frag/switch-different-sizes.asm.frag new file mode 100644 index 00000000000..1ee9eebf005 --- /dev/null +++ b/reference/shaders-msl/asm/frag/switch-different-sizes.asm.frag @@ -0,0 +1,78 @@ +#include +#include + +using namespace metal; + +fragment void main0() +{ + int sw0 = 42; + int result = 0; + switch (sw0) + { + case -42: + { + result = 42; + } + case 420: + { + result = 420; + } + case -1234: + { + result = 420; + break; + } + } + char sw1 = char(10); + switch (sw1) + { + case -42: + { + result = 42; + } + case 42: + { + result = 420; + } + case -123: + { + result = 512; + break; + } + } + short sw2 = short(10); + switch (sw2) + { + case -42: + { + result = 42; + } + case 42: + { + result = 420; + } + case -1234: + { + result = 512; + break; + } + } + short sw3 = short(10); + switch (sw3) + { + case -42: + { + result = 42; + } + case 42: + { + result = 420; + } + case -1234: + { + result = 512; + break; + } + } +} + diff --git a/reference/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag b/reference/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag new file mode 100644 index 00000000000..5ec002e7806 --- /dev/null +++ b/reference/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag @@ -0,0 +1,27 @@ +#include +#include + +using namespace metal; + +fragment void main0() +{ + long sw = 42l; + int result = 0; + switch (sw) + { + case -42l: + { + result = 42; + } + case 420l: + { + result = 420; + } + case -34359738368l: + { + result = 420; + break; + } + } +} + diff --git a/reference/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag b/reference/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag new file mode 100644 index 00000000000..2bf44c207b8 --- /dev/null +++ b/reference/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag @@ -0,0 +1,27 @@ +#include +#include + +using namespace metal; + +fragment void main0() +{ + ulong sw = 42ul; + int result = 0; + switch (sw) + { + case 42ul: + { + result = 42; + } + case 420ul: + { + result = 420; + } + case 343597383680ul: + { + result = 420; + break; + } + } +} + diff --git a/reference/shaders-msl/asm/frag/texture-atomics.asm.frag b/reference/shaders-msl/asm/frag/texture-atomics.asm.frag new file mode 100644 index 00000000000..ab5be649849 --- /dev/null +++ b/reference/shaders-msl/asm/frag/texture-atomics.asm.frag @@ -0,0 +1,121 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_StructuredBuffer_v4float +{ + spvUnsafeArray _m0; +}; + +struct type_Globals +{ + uint2 ShadowTileListGroupSize; +}; + +constant float3 _70 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + uint in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + uint2 _77 = uint2(gl_FragCoord.xy); + uint _78 = _77.y; + uint _83 = _77.x; + float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78)); + float2 _93 = float2(_Globals.ShadowTileListGroupSize); + float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0); + float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0); + float3 _102 = float3(_100.x, _100.y, _70.z); + _102.z = 1.0; + uint _103 = in.in_var_TEXCOORD0 * 5u; + uint _107 = _103 + 1u; + if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102)) + { + float _122 = _96.x; + float _123 = _96.y; + spvUnsafeArray _73; + _73[0] = float3(_122, _123, -1000.0); + float _126 = _100.x; + _73[1] = float3(_126, _123, -1000.0); + float _129 = _100.y; + _73[2] = float3(_122, _129, -1000.0); + _73[3] = float3(_126, _129, -1000.0); + _73[4] = float3(_122, _123, 1.0); + _73[5] = float3(_126, _123, 1.0); + _73[6] = float3(_122, _129, 1.0); + _73[7] = float3(_126, _129, 1.0); + float3 _155; + float3 _158; + _155 = float3(-500000.0); + _158 = float3(500000.0); + for (int _160 = 0; _160 < 8; ) + { + float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz)); + float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + _155 = fast::max(_155, _170); + _158 = fast::min(_158, _170); + _160++; + continue; + } + if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0))) + { + uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed); + } + } + out.out_var_SV_Target0 = float4(0.0); + return out; +} + diff --git a/reference/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag b/reference/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag new file mode 100644 index 00000000000..ab5be649849 --- /dev/null +++ b/reference/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag @@ -0,0 +1,121 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_StructuredBuffer_v4float +{ + spvUnsafeArray _m0; +}; + +struct type_Globals +{ + uint2 ShadowTileListGroupSize; +}; + +constant float3 _70 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + uint in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + uint2 _77 = uint2(gl_FragCoord.xy); + uint _78 = _77.y; + uint _83 = _77.x; + float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78)); + float2 _93 = float2(_Globals.ShadowTileListGroupSize); + float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0); + float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0); + float3 _102 = float3(_100.x, _100.y, _70.z); + _102.z = 1.0; + uint _103 = in.in_var_TEXCOORD0 * 5u; + uint _107 = _103 + 1u; + if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102)) + { + float _122 = _96.x; + float _123 = _96.y; + spvUnsafeArray _73; + _73[0] = float3(_122, _123, -1000.0); + float _126 = _100.x; + _73[1] = float3(_126, _123, -1000.0); + float _129 = _100.y; + _73[2] = float3(_122, _129, -1000.0); + _73[3] = float3(_126, _129, -1000.0); + _73[4] = float3(_122, _123, 1.0); + _73[5] = float3(_126, _123, 1.0); + _73[6] = float3(_122, _129, 1.0); + _73[7] = float3(_126, _129, 1.0); + float3 _155; + float3 _158; + _155 = float3(-500000.0); + _158 = float3(500000.0); + for (int _160 = 0; _160 < 8; ) + { + float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz)); + float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + _155 = fast::max(_155, _170); + _158 = fast::min(_158, _170); + _160++; + continue; + } + if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0))) + { + uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed); + } + } + out.out_var_SV_Target0 = float4(0.0); + return out; +} + diff --git a/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag b/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag index ce2b95196e3..e512bdca497 100644 --- a/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag +++ b/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag @@ -15,12 +15,14 @@ struct main0_in float3 vUV [[user(locn0)]]; }; -float sample_combined(thread float3& vUV, thread depth2d uShadow, thread const sampler uShadowSmplr) +static inline __attribute__((always_inline)) +float sample_combined(thread float3& vUV, depth2d uShadow, sampler uShadowSmplr) { return uShadow.sample_compare(uShadowSmplr, vUV.xy, vUV.z); } -float sample_separate(thread float3& vUV, thread depth2d uTexture, thread sampler uSampler) +static inline __attribute__((always_inline)) +float sample_separate(thread float3& vUV, depth2d uTexture, sampler uSampler) { return uTexture.sample_compare(uSampler, vUV.xy, vUV.z); } diff --git a/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag b/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag index 8df57c55b0d..624408c4d6f 100644 --- a/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag +++ b/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag @@ -29,25 +29,26 @@ fragment main0_out main0(main0_in in [[stage_in]]) float t0 = a; float t1 = b; bool c1 = (isunordered(a, b) || a == b); - bool c2 = (isunordered(a, b) || a != b); + c1 = a != b; + bool c2 = a != b; bool c3 = (isunordered(a, b) || a < b); bool c4 = (isunordered(a, b) || a > b); bool c5 = (isunordered(a, b) || a <= b); bool c6 = (isunordered(a, b) || a >= b); bool2 c7 = (isunordered(in.c, in.d) || in.c == in.d); - bool2 c8 = (isunordered(in.c, in.d) || in.c != in.d); + bool2 c8 = in.c != in.d; bool2 c9 = (isunordered(in.c, in.d) || in.c < in.d); bool2 c10 = (isunordered(in.c, in.d) || in.c > in.d); bool2 c11 = (isunordered(in.c, in.d) || in.c <= in.d); bool2 c12 = (isunordered(in.c, in.d) || in.c >= in.d); bool3 c13 = (isunordered(in.e, in.f) || in.e == in.f); - bool3 c14 = (isunordered(in.e, in.f) || in.e != in.f); + bool3 c14 = in.e != in.f; bool3 c15 = (isunordered(in.e, in.f) || in.e < in.f); bool3 c16 = (isunordered(in.e, in.f) || in.e > in.f); bool3 c17 = (isunordered(in.e, in.f) || in.e <= in.f); bool3 c18 = (isunordered(in.e, in.f) || in.e >= in.f); bool4 c19 = (isunordered(in.g, in.h) || in.g == in.h); - bool4 c20 = (isunordered(in.g, in.h) || in.g != in.h); + bool4 c20 = in.g != in.h; bool4 c21 = (isunordered(in.g, in.h) || in.g < in.h); bool4 c22 = (isunordered(in.g, in.h) || in.g > in.h); bool4 c23 = (isunordered(in.g, in.h) || in.g <= in.h); diff --git a/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag new file mode 100644 index 00000000000..4848280635b --- /dev/null +++ b/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag @@ -0,0 +1,59 @@ +#include +#include + +using namespace metal; + +constant float a_tmp [[function_constant(1)]]; +constant float a = is_function_constant_defined(a_tmp) ? a_tmp : 1.0; +constant float b_tmp [[function_constant(2)]]; +constant float b = is_function_constant_defined(b_tmp) ? b_tmp : 2.0; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 c [[user(locn2)]]; + float2 d [[user(locn3)]]; + float3 e [[user(locn4)]]; + float3 f [[user(locn5)]]; + float4 g [[user(locn6)]]; + float4 h [[user(locn7)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float t0 = a; + float t1 = b; + bool c1 = a == b; + c1 = a != b; + bool c2 = a != b; + bool c3 = a < b; + bool c4 = a > b; + bool c5 = a <= b; + bool c6 = a >= b; + bool2 c7 = in.c == in.d; + bool2 c8 = in.c != in.d; + bool2 c9 = in.c < in.d; + bool2 c10 = in.c > in.d; + bool2 c11 = in.c <= in.d; + bool2 c12 = in.c >= in.d; + bool3 c13 = in.e == in.f; + bool3 c14 = in.e != in.f; + bool3 c15 = in.e < in.f; + bool3 c16 = in.e > in.f; + bool3 c17 = in.e <= in.f; + bool3 c18 = in.e >= in.f; + bool4 c19 = in.g == in.h; + bool4 c20 = in.g != in.h; + bool4 c21 = in.g < in.h; + bool4 c22 = in.g > in.h; + bool4 c23 = in.g <= in.h; + bool4 c24 = in.g >= in.h; + out.FragColor = float4(t0 + t1); + return out; +} + diff --git a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag index e55d9aeada7..9e73be477a9 100644 --- a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag +++ b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag @@ -128,7 +128,7 @@ struct main0_out fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buffer(1)]], constant _18& _19 [[buffer(2)]], texture2d _8 [[texture(0)]], texture2d _12 [[texture(1)]], texture2d _14 [[texture(2)]], sampler _9 [[sampler(0)]], sampler _13 [[sampler(1)]], sampler _15 [[sampler(2)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; - _28 _77 = _74; + _28 _77; _77._m0 = float4(0.0); float2 _82 = gl_FragCoord.xy * _19._m23.xy; float4 _88 = _7._m2 * _7._m0.xyxy; @@ -144,11 +144,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _129 = _109; } - float3 _130 = _129 * 0.5; - float3 _133 = float4(0.0).xyz + _130; + float3 _133 = float4(0.0).xyz + (_129 * 0.5); float4 _134 = float4(_133.x, _133.y, _133.z, float4(0.0).w); - _28 _135 = _77; - _135._m0 = _134; + _77._m0 = _134; float2 _144 = fast::clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); float3 _156 = float3(_11._m5) * fast::clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _160 = _12.sample(_13, _144, level(0.0)); @@ -161,11 +159,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _176 = _156; } - float3 _177 = _176 * 0.5; - float3 _180 = _134.xyz + _177; + float3 _180 = _134.xyz + (_176 * 0.5); float4 _181 = float4(_180.x, _180.y, _180.z, _134.w); - _28 _182 = _135; - _182._m0 = _181; + _77._m0 = _181; float2 _191 = fast::clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); float3 _203 = float3(_11._m5) * fast::clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _207 = _12.sample(_13, _191, level(0.0)); @@ -178,11 +174,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _223 = _203; } - float3 _224 = _223 * 0.75; - float3 _227 = _181.xyz + _224; + float3 _227 = _181.xyz + (_223 * 0.75); float4 _228 = float4(_227.x, _227.y, _227.z, _181.w); - _28 _229 = _182; - _229._m0 = _228; + _77._m0 = _228; float2 _238 = fast::clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); float3 _250 = float3(_11._m5) * fast::clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _254 = _12.sample(_13, _238, level(0.0)); @@ -195,11 +189,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _270 = _250; } - float3 _271 = _270 * 0.5; - float3 _274 = _228.xyz + _271; + float3 _274 = _228.xyz + (_270 * 0.5); float4 _275 = float4(_274.x, _274.y, _274.z, _228.w); - _28 _276 = _229; - _276._m0 = _275; + _77._m0 = _275; float2 _285 = fast::clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); float3 _297 = float3(_11._m5) * fast::clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _301 = _12.sample(_13, _285, level(0.0)); @@ -212,11 +204,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _317 = _297; } - float3 _318 = _317 * 0.5; - float3 _321 = _275.xyz + _318; + float3 _321 = _275.xyz + (_317 * 0.5); float4 _322 = float4(_321.x, _321.y, _321.z, _275.w); - _28 _323 = _276; - _323._m0 = _322; + _77._m0 = _322; float2 _332 = fast::clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); float3 _344 = float3(_11._m5) * fast::clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _348 = _12.sample(_13, _332, level(0.0)); @@ -229,11 +219,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _364 = _344; } - float3 _365 = _364 * 0.75; - float3 _368 = _322.xyz + _365; + float3 _368 = _322.xyz + (_364 * 0.75); float4 _369 = float4(_368.x, _368.y, _368.z, _322.w); - _28 _370 = _323; - _370._m0 = _369; + _77._m0 = _369; float2 _379 = fast::clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw); float3 _391 = float3(_11._m5) * fast::clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _395 = _12.sample(_13, _379, level(0.0)); @@ -246,11 +234,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _411 = _391; } - float3 _412 = _411 * 1.0; - float3 _415 = _369.xyz + _412; + float3 _415 = _369.xyz + (_411 * 1.0); float4 _416 = float4(_415.x, _415.y, _415.z, _369.w); - _28 _417 = _370; - _417._m0 = _416; + _77._m0 = _416; float2 _426 = fast::clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); float3 _438 = float3(_11._m5) * fast::clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _442 = _12.sample(_13, _426, level(0.0)); @@ -263,11 +249,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _458 = _438; } - float3 _459 = _458 * 0.75; - float3 _462 = _416.xyz + _459; + float3 _462 = _416.xyz + (_458 * 0.75); float4 _463 = float4(_462.x, _462.y, _462.z, _416.w); - _28 _464 = _417; - _464._m0 = _463; + _77._m0 = _463; float2 _473 = fast::clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); float3 _485 = float3(_11._m5) * fast::clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _489 = _12.sample(_13, _473, level(0.0)); @@ -280,11 +264,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _505 = _485; } - float3 _506 = _505 * 0.5; - float3 _509 = _463.xyz + _506; + float3 _509 = _463.xyz + (_505 * 0.5); float4 _510 = float4(_509.x, _509.y, _509.z, _463.w); - _28 _511 = _464; - _511._m0 = _510; + _77._m0 = _510; float2 _520 = fast::clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); float3 _532 = float3(_11._m5) * fast::clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _536 = _12.sample(_13, _520, level(0.0)); @@ -297,11 +279,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _552 = _532; } - float3 _553 = _552 * 0.5; - float3 _556 = _510.xyz + _553; + float3 _556 = _510.xyz + (_552 * 0.5); float4 _557 = float4(_556.x, _556.y, _556.z, _510.w); - _28 _558 = _511; - _558._m0 = _557; + _77._m0 = _557; float2 _567 = fast::clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); float3 _579 = float3(_11._m5) * fast::clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _583 = _12.sample(_13, _567, level(0.0)); @@ -314,11 +294,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _599 = _579; } - float3 _600 = _599 * 0.75; - float3 _603 = _557.xyz + _600; + float3 _603 = _557.xyz + (_599 * 0.75); float4 _604 = float4(_603.x, _603.y, _603.z, _557.w); - _28 _605 = _558; - _605._m0 = _604; + _77._m0 = _604; float2 _614 = fast::clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); float3 _626 = float3(_11._m5) * fast::clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _630 = _12.sample(_13, _614, level(0.0)); @@ -331,11 +309,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _646 = _626; } - float3 _647 = _646 * 0.5; - float3 _650 = _604.xyz + _647; + float3 _650 = _604.xyz + (_646 * 0.5); float4 _651 = float4(_650.x, _650.y, _650.z, _604.w); - _28 _652 = _605; - _652._m0 = _651; + _77._m0 = _651; float2 _661 = fast::clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); float3 _673 = float3(_11._m5) * fast::clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _677 = _12.sample(_13, _661, level(0.0)); @@ -350,14 +326,11 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff } float3 _697 = _651.xyz + (_693 * 0.5); float4 _698 = float4(_697.x, _697.y, _697.z, _651.w); - _28 _699 = _652; - _699._m0 = _698; + _77._m0 = _698; float3 _702 = _698.xyz / float3(((((((((((((0.0 + 0.5) + 0.5) + 0.75) + 0.5) + 0.5) + 0.75) + 1.0) + 0.75) + 0.5) + 0.5) + 0.75) + 0.5) + 0.5); - _28 _704 = _699; - _704._m0 = float4(_702.x, _702.y, _702.z, _698.w); - _28 _705 = _704; - _705._m0.w = 1.0; - out.m_5 = _705._m0; + _77._m0 = float4(_702.x, _702.y, _702.z, _698.w); + _77._m0.w = 1.0; + out.m_5 = _77._m0; return out; } diff --git a/reference/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc b/reference/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc new file mode 100644 index 00000000000..79395a4bbb2 --- /dev/null +++ b/reference/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct TessLevels +{ + float inner0; + float inner1; + float outer0; + float outer1; + float outer2; + float outer3; +}; + +kernel void main0(const device TessLevels& sb_levels [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(sb_levels.inner0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(sb_levels.outer0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(sb_levels.outer1); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(sb_levels.outer2); +} + diff --git a/reference/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese b/reference/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese index 83ef729321e..7fd48b4f7d0 100644 --- a/reference/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese +++ b/reference/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 gl_Position [[position]]; @@ -10,14 +51,23 @@ struct main0_out struct main0_patchIn { - float2 gl_TessLevelInner [[attribute(0)]]; - float4 gl_TessLevelOuter [[attribute(1)]]; + float4 gl_TessLevelOuter [[attribute(0)]]; + float2 gl_TessLevelInner [[attribute(1)]]; }; -[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]]) +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]]) { main0_out out = {}; - out.gl_Position = float4(((gl_TessCoord.x * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.z), ((gl_TessCoord.y * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.y) + (((1.0 - gl_TessCoord.y) * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.w), 0.0, 1.0); + spvUnsafeArray gl_TessLevelInner = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0]; + gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1]; + gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2]; + gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3]; + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); + out.gl_Position = float4(((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0]) + (((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), ((gl_TessCoord.y * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]) + (((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0); return out; } diff --git a/reference/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert b/reference/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert new file mode 100644 index 00000000000..ae42798e313 --- /dev/null +++ b/reference/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert @@ -0,0 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct VSOut +{ + float4 pos; + float2 clip; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +struct main0_in +{ + float4 pos [[attribute(0)]]; +}; + +static inline __attribute__((always_inline)) +VSOut _main(thread const float4& pos) +{ + VSOut vout; + vout.pos = pos; + vout.clip = pos.xy; + return vout; +} + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float4 pos = in.pos; + float4 param = pos; + VSOut flattenTemp = _main(param); + out.gl_Position = flattenTemp.pos; + out.gl_ClipDistance[0] = flattenTemp.clip.x; + out.gl_ClipDistance[1] = flattenTemp.clip.y; + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; + return out; +} + diff --git a/reference/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert b/reference/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert new file mode 100644 index 00000000000..c1e59b9c146 --- /dev/null +++ b/reference/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert @@ -0,0 +1,45 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct VSOut +{ + float4 pos; + float2 clip; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; +}; + +struct main0_in +{ + float4 pos [[attribute(0)]]; +}; + +static inline __attribute__((always_inline)) +VSOut _main(thread const float4& pos) +{ + VSOut vout; + vout.pos = pos; + vout.clip = pos.xy; + return vout; +} + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float4 pos = in.pos; + float4 param = pos; + VSOut flattenTemp = _main(param); + out.gl_Position = flattenTemp.pos; + out.gl_ClipDistance[0] = flattenTemp.clip.x; + out.gl_ClipDistance[1] = flattenTemp.clip.y; + return out; +} + diff --git a/reference/shaders-msl/asm/vert/extract-transposed-matrix-from-struct.asm.vert b/reference/shaders-msl/asm/vert/extract-transposed-matrix-from-struct.asm.vert index 667d363a389..b94687a52bc 100644 --- a/reference/shaders-msl/asm/vert/extract-transposed-matrix-from-struct.asm.vert +++ b/reference/shaders-msl/asm/vert/extract-transposed-matrix-from-struct.asm.vert @@ -39,6 +39,7 @@ struct main0_in float3 PosL [[attribute(0)]]; }; +static inline __attribute__((always_inline)) V2F _VS(thread const float3& PosL, thread const uint& instanceID, const device gInstanceData& gInstanceData_1) { InstanceData instData; diff --git a/reference/shaders-msl/asm/vert/fake-builtin-input.asm.vert b/reference/shaders-msl/asm/vert/fake-builtin-input.asm.vert index f9fcbc85c30..3079ae9bcbb 100644 --- a/reference/shaders-msl/asm/vert/fake-builtin-input.asm.vert +++ b/reference/shaders-msl/asm/vert/fake-builtin-input.asm.vert @@ -5,6 +5,7 @@ using namespace metal; struct main0_out { + half4 out_var_SV_Target [[user(locn0)]]; float4 gl_Position [[position]]; }; diff --git a/reference/shaders-msl/asm/vert/invariant.msl21.asm.vert b/reference/shaders-msl/asm/vert/invariant.msl21.asm.vert index 1e3a86531c8..d74c43b3f05 100644 --- a/reference/shaders-msl/asm/vert/invariant.msl21.asm.vert +++ b/reference/shaders-msl/asm/vert/invariant.msl21.asm.vert @@ -10,6 +10,7 @@ struct main0_out float4 gl_Position [[position, invariant]]; }; +static inline __attribute__((always_inline)) float4 _main() { return float4(1.0); diff --git a/reference/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert b/reference/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert new file mode 100644 index 00000000000..1926ff9e14e --- /dev/null +++ b/reference/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct Struct +{ + uint flags[1]; +}; + +struct defaultUniformsVS +{ + Struct flags; + float4 uquad[4]; + float4x4 umatrix; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 a_position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _9 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + out.gl_Position = _9.umatrix * float4(_9.uquad[int(gl_VertexIndex)].x, _9.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w); + if (_9.flags.flags[0] != 0u) + { + out.gl_Position.z = 0.0; + } + return out; +} + diff --git a/reference/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert b/reference/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert new file mode 100644 index 00000000000..ee206385746 --- /dev/null +++ b/reference/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct Struct +{ + uint2 flags[1]; +}; + +struct defaultUniformsVS +{ + Struct flags; + float4 uquad[4]; + float4x4 umatrix; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 a_position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _9 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + out.gl_Position = _9.umatrix * float4(_9.uquad[int(gl_VertexIndex)].x, _9.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w); + if (_9.flags.flags[0].x != 0u) + { + out.gl_Position.z = 0.0; + } + return out; +} + diff --git a/reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert b/reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert index 05a69e48a03..074a47a652c 100644 --- a/reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert +++ b/reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert @@ -1,19 +1,34 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + #include #include using namespace metal; +template struct SpvHalfTypeSelector; +template <> struct SpvHalfTypeSelector { public: using H = half; }; +template struct SpvHalfTypeSelector> { using H = vec; }; +template::H> +[[clang::optnone]] F spvQuantizeToF16(F fval) +{ + H hval = H(fval); + hval = select(copysign(H(0), hval), hval, isnormal(hval) || isinf(hval) || isnan(hval)); + return F(hval); +} + constant int _7_tmp [[function_constant(201)]]; constant int _7 = is_function_constant_defined(_7_tmp) ? _7_tmp : -10; constant int _20 = (_7 + 2); constant uint _8_tmp [[function_constant(202)]]; constant uint _8 = is_function_constant_defined(_8_tmp) ? _8_tmp : 100u; constant uint _25 = (_8 % 5u); -constant int4 _30 = int4(20, 30, _20, _20); -constant int2 _32 = int2(_30.y, _30.x); -constant int _33 = _30.y; +constant int _30 = _7 - (-3) * (_7 / (-3)); +constant int4 _32 = int4(20, 30, _20, _30); +constant int2 _34 = int2(_32.y, _32.x); +constant int _35 = _32.y; constant float _9_tmp [[function_constant(200)]]; constant float _9 = is_function_constant_defined(_9_tmp) ? _9_tmp : 3.141590118408203125; +constant float _41 = spvQuantizeToF16(_9); struct main0_out { @@ -27,11 +42,11 @@ vertex main0_out main0() float4 pos = float4(0.0); pos.y += float(_20); pos.z += float(_25); - pos += float4(_30); - float2 _56 = pos.xy + float2(_32); - pos = float4(_56.x, _56.y, pos.z, pos.w); + pos += float4(_32); + float2 _59 = pos.xy + float2(_34); + pos = float4(_59.x, _59.y, pos.z, pos.w); out.gl_Position = pos; - out.m_4 = _33; + out.m_4 = _35; return out; } diff --git a/reference/shaders-msl/asm/vert/uint-vertex-id-instance-id.asm.vert b/reference/shaders-msl/asm/vert/uint-vertex-id-instance-id.asm.vert index 89ca17f98b3..30df905e6ab 100644 --- a/reference/shaders-msl/asm/vert/uint-vertex-id-instance-id.asm.vert +++ b/reference/shaders-msl/asm/vert/uint-vertex-id-instance-id.asm.vert @@ -10,6 +10,7 @@ struct main0_out float4 gl_Position [[position]]; }; +static inline __attribute__((always_inline)) float4 _main(thread const uint& vid, thread const uint& iid) { return float4(float(vid + iid)); diff --git a/reference/shaders-msl/comp/access-private-workgroup-in-function.comp b/reference/shaders-msl/comp/access-private-workgroup-in-function.comp index 17acda96780..85185e6df6a 100644 --- a/reference/shaders-msl/comp/access-private-workgroup-in-function.comp +++ b/reference/shaders-msl/comp/access-private-workgroup-in-function.comp @@ -5,11 +5,15 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) void set_f(thread int& f) { f = 40; } +static inline __attribute__((always_inline)) void set_shared_u(threadgroup int& u) { u = 50; diff --git a/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp b/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp index f7757cd19f8..18cfd68c199 100644 --- a/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp +++ b/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp @@ -23,6 +23,8 @@ struct SSBO2 float4 v; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + struct spvDescriptorSetBuffer0 { const device SSBO0* ssbo0 [[id(0)]]; diff --git a/reference/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp b/reference/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp new file mode 100644 index 00000000000..c23a9d1d006 --- /dev/null +++ b/reference/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp @@ -0,0 +1,11 @@ +#include +#include + +using namespace metal; + +kernel void main0(texture2d uImage [[texture(0)]], texture2d uImageRead [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + int2 coord = int2(gl_GlobalInvocationID.xy); + uImage.write(uImageRead.read(uint2(coord)), uint2(coord)); +} + diff --git a/reference/shaders-msl/comp/array-length.comp b/reference/shaders-msl/comp/array-length.comp index 8406d1e50b0..9ac8c827e62 100644 --- a/reference/shaders-msl/comp/array-length.comp +++ b/reference/shaders-msl/comp/array-length.comp @@ -16,6 +16,9 @@ struct SSBO1 float bz[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) uint get_size(device SSBO& v_14, constant uint& v_14BufferSize, device SSBO1* (&ssbos)[2], constant uint* ssbosBufferSize) { return uint(int((v_14BufferSize - 16) / 16) + int((ssbosBufferSize[1] - 0) / 4)); diff --git a/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp b/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp index 73bd3f6d7b3..aa1f4424d12 100644 --- a/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp +++ b/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp @@ -27,6 +27,8 @@ struct SSBO3 float bz[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + struct spvDescriptorSetBuffer0 { device SSBO* v_16 [[id(0)]]; @@ -39,6 +41,7 @@ struct spvDescriptorSetBuffer1 constant uint* spvBufferSizeConstants [[id(2)]]; }; +static inline __attribute__((always_inline)) uint get_size(device SSBO& v_16, constant uint& v_16BufferSize, device SSBO1* constant (&ssbos)[2], constant uint* ssbosBufferSize, device SSBO2& v_38, constant uint& v_38BufferSize, device SSBO3* (&ssbos2)[2], constant uint* ssbos2BufferSize) { uint len = uint(int((v_16BufferSize - 16) / 16)); diff --git a/reference/shaders-msl/comp/atomic.comp b/reference/shaders-msl/comp/atomic.comp index 43e6a8f0380..fca72bfcfe9 100644 --- a/reference/shaders-msl/comp/atomic.comp +++ b/reference/shaders-msl/comp/atomic.comp @@ -12,59 +12,61 @@ struct SSBO int i32; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& ssbo [[buffer(0)]]) { threadgroup uint shared_u32; threadgroup int shared_i32; - uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); uint _32; do { _32 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u); - int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u); + int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); int _52; do { _52 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10); + } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10); shared_u32 = 10u; shared_i32 = 10; - uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); uint _64; do { _64 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u); - int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u); + int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); int _72; do { _72 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10); + } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10); } diff --git a/reference/shaders-msl/comp/barriers.comp b/reference/shaders-msl/comp/barriers.comp index 560fd8b53f6..82813906f68 100644 --- a/reference/shaders-msl/comp/barriers.comp +++ b/reference/shaders-msl/comp/barriers.comp @@ -7,56 +7,67 @@ using namespace metal; constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u); +static inline __attribute__((always_inline)) void barrier_shared() { threadgroup_barrier(mem_flags::mem_threadgroup); } +static inline __attribute__((always_inline)) void full_barrier() { threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); } +static inline __attribute__((always_inline)) void image_barrier() { threadgroup_barrier(mem_flags::mem_texture); } +static inline __attribute__((always_inline)) void buffer_barrier() { threadgroup_barrier(mem_flags::mem_device); } +static inline __attribute__((always_inline)) void group_barrier() { threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); } +static inline __attribute__((always_inline)) void barrier_shared_exec() { threadgroup_barrier(mem_flags::mem_threadgroup); } +static inline __attribute__((always_inline)) void full_barrier_exec() { threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); } +static inline __attribute__((always_inline)) void image_barrier_exec() { threadgroup_barrier(mem_flags::mem_texture); } +static inline __attribute__((always_inline)) void buffer_barrier_exec() { threadgroup_barrier(mem_flags::mem_device); } +static inline __attribute__((always_inline)) void group_barrier_exec() { threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); } +static inline __attribute__((always_inline)) void exec_barrier() { threadgroup_barrier(mem_flags::mem_threadgroup); diff --git a/reference/shaders-msl/comp/basic.comp b/reference/shaders-msl/comp/basic.comp index 6410894ba08..42518f0d1dd 100644 --- a/reference/shaders-msl/comp/basic.comp +++ b/reference/shaders-msl/comp/basic.comp @@ -21,13 +21,15 @@ struct SSBO3 uint counter; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; float4 idata = _23.in_data[ident]; if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) { - uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed); + uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed); _45.out_data[_52] = idata; } } diff --git a/reference/shaders-msl/comp/basic.dispatchbase.comp b/reference/shaders-msl/comp/basic.dispatchbase.comp new file mode 100644 index 00000000000..92d517cffb0 --- /dev/null +++ b/reference/shaders-msl/comp/basic.dispatchbase.comp @@ -0,0 +1,41 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 in_data[1]; +}; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +struct SSBO3 +{ + uint counter; +}; + +constant uint _59_tmp [[function_constant(10)]]; +constant uint _59 = is_function_constant_defined(_59_tmp) ? _59_tmp : 1u; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(_59, 1u, 1u); + +kernel void main0(const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 spvDispatchBase [[grid_origin]]) +{ + gl_GlobalInvocationID += spvDispatchBase * gl_WorkGroupSize; + gl_WorkGroupID += spvDispatchBase; + uint ident = gl_GlobalInvocationID.x; + uint workgroup = gl_WorkGroupID.x; + float4 idata = _27.in_data[ident]; + if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) + { + uint _56 = atomic_fetch_add_explicit((device atomic_uint*)&_52.counter, 1u, memory_order_relaxed); + _49.out_data[_56] = idata; + } +} + diff --git a/reference/shaders-msl/comp/basic.dispatchbase.msl11.comp b/reference/shaders-msl/comp/basic.dispatchbase.msl11.comp new file mode 100644 index 00000000000..87b0b442911 --- /dev/null +++ b/reference/shaders-msl/comp/basic.dispatchbase.msl11.comp @@ -0,0 +1,39 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 in_data[1]; +}; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +struct SSBO3 +{ + uint counter; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(constant uint3& spvDispatchBase [[buffer(29)]], const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + gl_GlobalInvocationID += spvDispatchBase * gl_WorkGroupSize; + gl_WorkGroupID += spvDispatchBase; + uint ident = gl_GlobalInvocationID.x; + uint workgroup = gl_WorkGroupID.x; + float4 idata = _27.in_data[ident]; + if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) + { + uint _56 = atomic_fetch_add_explicit((device atomic_uint*)&_52.counter, 1u, memory_order_relaxed); + _49.out_data[_56] = idata; + } +} + diff --git a/reference/shaders-msl/comp/basic.inline-block.msl2.comp b/reference/shaders-msl/comp/basic.inline-block.msl2.comp new file mode 100644 index 00000000000..337b1b73362 --- /dev/null +++ b/reference/shaders-msl/comp/basic.inline-block.msl2.comp @@ -0,0 +1,54 @@ +#include +#include + +using namespace metal; + +typedef packed_float4 packed_float4x4[4]; + +struct Baz +{ + int f; + int g; +}; + +struct X +{ + int x; + int y; + float z; +}; + +struct Foo +{ + int a; + int b; + packed_float4x4 c; + X x[2]; +}; + +struct Bar +{ + int d; + int e; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u); + +struct spvDescriptorSetBuffer0 +{ + constant Bar* m_38 [[id(0)]]; + Foo m_32 [[id(1)]]; +}; + +struct spvDescriptorSetBuffer1 +{ + device Baz* baz [[id(0)]][3]; +}; + +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint3 coords = gl_GlobalInvocationID; + spvDescriptorSet1.baz[coords.x]->f = spvDescriptorSet0.m_32.a + (*spvDescriptorSet0.m_38).d; + spvDescriptorSet1.baz[coords.x]->g = spvDescriptorSet0.m_32.b * (*spvDescriptorSet0.m_38).e; +} + diff --git a/reference/shaders-msl/comp/bitcast-16bit-2.invalid.comp b/reference/shaders-msl/comp/bitcast-16bit-2.invalid.comp deleted file mode 100644 index 59eb961f123..00000000000 --- a/reference/shaders-msl/comp/bitcast-16bit-2.invalid.comp +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include - -using namespace metal; - -struct SSBO1 -{ - short4 outputs[1]; -}; - -struct SSBO0 -{ - int4 inputs[1]; -}; - -struct UBO -{ - half4 const0; -}; - -kernel void main0(device SSBO1& _21 [[buffer(0)]], device SSBO0& _29 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - uint ident = gl_GlobalInvocationID.x; - short2 _47 = as_type(_29.inputs[ident].x) + as_type(_40.const0.xy); - _21.outputs[ident] = short4(_47.x, _47.y, _21.outputs[ident].z, _21.outputs[ident].w); - short2 _66 = short2(as_type(uint(_29.inputs[ident].y)) - as_type(_40.const0.zw)); - _21.outputs[ident] = short4(_21.outputs[ident].x, _21.outputs[ident].y, _66.x, _66.y); -} - diff --git a/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp new file mode 100644 index 00000000000..f79a8b52068 --- /dev/null +++ b/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp @@ -0,0 +1,26 @@ +#include +#include + +using namespace metal; + +struct SSBO; + +struct UBO +{ + uint2 b; +}; + +struct SSBO +{ + packed_float3 a1; + float a2; +}; + +kernel void main0(constant UBO& _10 [[buffer(0)]]) +{ + (reinterpret_cast(as_type(_10.b)))->a1 = float3(1.0, 2.0, 3.0); + uint2 v2 = as_type(reinterpret_cast(reinterpret_cast(as_type(_10.b + uint2(32u))))); + float3 v3 = float3((reinterpret_cast(as_type(v2)))->a1); + (reinterpret_cast(as_type(v2)))->a1 = v3 + float3(1.0); +} + diff --git a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp new file mode 100644 index 00000000000..35b7af540ca --- /dev/null +++ b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp @@ -0,0 +1,67 @@ +#include +#include + +using namespace metal; + +struct t21; + +struct t24 +{ + int4 m0[2]; + int m1; + ulong2 m2[2]; + device t21* m3; + float2x4 m4; +}; + +struct t21 +{ + int4 m0[2]; + int m1; + ulong2 m2[2]; + device t21* m3; + float2x4 m4; +}; + +struct t35 +{ + int m0[32]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1)]], texture2d v295 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + int v8 = 0; + v8 |= (u24.m0[0].x - 0); + v8 |= (u24.m0[u35.m0[1]].x - 1); + v8 |= (u24.m1 - 2); + v8 |= int(u24.m4[0u][0] - 3.0); + v8 |= int(u24.m4[1u][0] - 5.0); + v8 |= int(u24.m4[0u][1] - 4.0); + v8 |= int(u24.m4[1u][1] - 6.0); + v8 |= (((device t21*)u24.m2[0].x)->m0[0].x - 3); + v8 |= (((device t21*)u24.m2[0].x)->m0[u35.m0[1]].x - 4); + v8 |= (((device t21*)u24.m2[0].x)->m1 - 5); + v8 |= int(((device t21*)u24.m2[0].x)->m4[0u][0] - 6.0); + v8 |= int(((device t21*)u24.m2[0].x)->m4[1u][0] - 8.0); + v8 |= int(((device t21*)u24.m2[0].x)->m4[0u][1] - 7.0); + v8 |= int(((device t21*)u24.m2[0].x)->m4[1u][1] - 9.0); + v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m0[0].x - 6); + v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m0[u35.m0[1]].x - 7); + v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m1 - 8); + v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][0] - 9.0); + v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][0] - 11.0); + v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][1] - 10.0); + v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][1] - 12.0); + v8 |= (u24.m3->m0[0].x - 9); + v8 |= (u24.m3->m0[u35.m0[1]].x - 10); + v8 |= (u24.m3->m1 - 11); + v8 |= int(u24.m3->m4[0u][0] - 12.0); + v8 |= int(u24.m3->m4[1u][0] - 14.0); + v8 |= int(u24.m3->m4[0u][1] - 13.0); + v8 |= int(u24.m3->m4[1u][1] - 15.0); + uint4 v284 = select(uint4(1u, 0u, 0u, 1u), uint4(0u), bool4(v8 != 0)); + v295.write(v284, uint2(int2(gl_GlobalInvocationID.xy))); +} + diff --git a/reference/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/shaders-msl/comp/buffer_device_address.msl2.comp new file mode 100644 index 00000000000..f0f5ea9a650 --- /dev/null +++ b/reference/shaders-msl/comp/buffer_device_address.msl2.comp @@ -0,0 +1,43 @@ +#include +#include + +using namespace metal; + +struct Position; +struct PositionReferences; + +struct Position +{ + float2 positions[1]; +}; + +struct Registers +{ + device PositionReferences* references; + float fract_time; +}; + +struct PositionReferences +{ + device Position* buffers[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u); + +kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + uint2 local_offset = gl_GlobalInvocationID.xy; + uint local_index = ((local_offset.y * 8u) * gl_NumWorkGroups.x) + local_offset.x; + uint slice = gl_WorkGroupID.z; + device Position* __restrict positions = registers.references->buffers[slice]; + float offset = 6.283125400543212890625 * fract(registers.fract_time + (float(slice) * 0.100000001490116119384765625)); + float2 pos = float2(local_offset); + pos.x += (0.20000000298023223876953125 * sin((2.2000000476837158203125 * pos.x) + offset)); + pos.y += (0.20000000298023223876953125 * sin((2.25 * pos.y) + (2.0 * offset))); + pos.x += (0.20000000298023223876953125 * cos((1.7999999523162841796875 * pos.y) + (3.0 * offset))); + pos.y += (0.20000000298023223876953125 * cos((2.849999904632568359375 * pos.x) + (4.0 * offset))); + pos.x += (0.5 * sin(offset)); + pos.y += (0.5 * sin(offset + 0.300000011920928955078125)); + positions->positions[local_index] = (pos / ((float2(8.0) * float2(gl_NumWorkGroups.xy)) - float2(1.0))) - float2(0.5); +} + diff --git a/reference/shaders-msl/comp/cfg-preserve-parameter.comp b/reference/shaders-msl/comp/cfg-preserve-parameter.comp index d65beee5d2d..ce1bef3f8e6 100644 --- a/reference/shaders-msl/comp/cfg-preserve-parameter.comp +++ b/reference/shaders-msl/comp/cfg-preserve-parameter.comp @@ -5,6 +5,7 @@ using namespace metal; +static inline __attribute__((always_inline)) void out_test_0(thread const int& cond, thread int& i) { if (cond == 0) @@ -17,6 +18,7 @@ void out_test_0(thread const int& cond, thread int& i) } } +static inline __attribute__((always_inline)) void out_test_1(thread const int& cond, thread int& i) { switch (cond) @@ -34,6 +36,7 @@ void out_test_1(thread const int& cond, thread int& i) } } +static inline __attribute__((always_inline)) void inout_test_0(thread const int& cond, thread int& i) { if (cond == 0) @@ -42,6 +45,7 @@ void inout_test_0(thread const int& cond, thread int& i) } } +static inline __attribute__((always_inline)) void inout_test_1(thread const int& cond, thread int& i) { switch (cond) diff --git a/reference/shaders-msl/comp/coherent-block.comp b/reference/shaders-msl/comp/coherent-block.comp index bec9b218c7b..58bbacb7f0c 100644 --- a/reference/shaders-msl/comp/coherent-block.comp +++ b/reference/shaders-msl/comp/coherent-block.comp @@ -8,7 +8,9 @@ struct SSBO float4 value; }; -kernel void main0(device SSBO& _10 [[buffer(0)]]) +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(volatile device SSBO& _10 [[buffer(0)]]) { _10.value = float4(20.0); } diff --git a/reference/shaders-msl/comp/coherent-image.comp b/reference/shaders-msl/comp/coherent-image.comp index 0fe044fb9ae..5090484464d 100644 --- a/reference/shaders-msl/comp/coherent-image.comp +++ b/reference/shaders-msl/comp/coherent-image.comp @@ -8,7 +8,9 @@ struct SSBO int4 value; }; -kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d uImage [[texture(0)]]) +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d uImage [[texture(0)]]) { _10.value = uImage.read(uint2(int2(10))); } diff --git a/reference/shaders-msl/comp/complex-composite-constant-array.comp b/reference/shaders-msl/comp/complex-composite-constant-array.comp new file mode 100644 index 00000000000..2f5549f5fb2 --- /dev/null +++ b/reference/shaders-msl/comp/complex-composite-constant-array.comp @@ -0,0 +1,65 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct SSBO +{ + float4x4 a; + uint index; +}; + +constant spvUnsafeArray _32 = spvUnsafeArray({ float4x4(float4(1.0, 0.0, 0.0, 0.0), float4(0.0, 1.0, 0.0, 0.0), float4(0.0, 0.0, 1.0, 0.0), float4(0.0, 0.0, 0.0, 1.0)), float4x4(float4(2.0, 0.0, 0.0, 0.0), float4(0.0, 2.0, 0.0, 0.0), float4(0.0, 0.0, 2.0, 0.0), float4(0.0, 0.0, 0.0, 2.0)) }); + +static inline __attribute__((always_inline)) +void write_global(device SSBO& v_14) +{ + v_14.a = _32[v_14.index]; +} + +kernel void main0(device SSBO& v_14 [[buffer(0)]]) +{ + write_global(v_14); +} + diff --git a/reference/shaders-msl/comp/complex-type-alias.comp b/reference/shaders-msl/comp/complex-type-alias.comp index 9101f89b743..fc0d57500b7 100644 --- a/reference/shaders-msl/comp/complex-type-alias.comp +++ b/reference/shaders-msl/comp/complex-type-alias.comp @@ -44,6 +44,7 @@ struct SSBO constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u); +static inline __attribute__((always_inline)) void Zero(thread Foo0& v) { v.a = 0.0; diff --git a/reference/shaders-msl/comp/composite-array-initialization.comp b/reference/shaders-msl/comp/composite-array-initialization.comp index ac10e750120..c6c17b1f392 100644 --- a/reference/shaders-msl/comp/composite-array-initialization.comp +++ b/reference/shaders-msl/comp/composite-array-initialization.comp @@ -1,10 +1,49 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Data { float a; @@ -27,21 +66,9 @@ struct SSBO constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u); -constant Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; - -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} +constant spvUnsafeArray _25 = spvUnsafeArray({ Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }); +static inline __attribute__((always_inline)) Data combine(thread const Data& a, thread const Data& b) { return Data{ a.a + b.a, a.b + b.b }; @@ -49,10 +76,10 @@ Data combine(thread const Data& a, thread const Data& b) kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - Data data[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; - Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } }; - Data data2[2]; - spvArrayCopyFromStack1(data2, _31); + spvUnsafeArray data = spvUnsafeArray({ Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }); + spvUnsafeArray _31 = spvUnsafeArray({ Data{ X, 2.0 }, Data{ 3.0, 5.0 } }); + spvUnsafeArray data2; + data2 = _31; Data param = data[gl_LocalInvocationID.x]; Data param_1 = data2[gl_LocalInvocationID.x]; Data _73 = combine(param, param_1); diff --git a/reference/shaders-msl/comp/composite-array-initialization.force-native-array.comp b/reference/shaders-msl/comp/composite-array-initialization.force-native-array.comp new file mode 100644 index 00000000000..1d451a95928 --- /dev/null +++ b/reference/shaders-msl/comp/composite-array-initialization.force-native-array.comp @@ -0,0 +1,158 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +struct Data +{ + float a; + float b; +}; + +constant float X_tmp [[function_constant(0)]]; +constant float X = is_function_constant_defined(X_tmp) ? X_tmp : 4.0; + +struct Data_1 +{ + float a; + float b; +}; + +struct SSBO +{ + Data_1 outdata[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u); + +constant Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; + +static inline __attribute__((always_inline)) +Data combine(thread const Data& a, thread const Data& b) +{ + return Data{ a.a + b.a, a.b + b.b }; +} + +kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +{ + Data data[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; + Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } }; + Data data2[2]; + spvArrayCopyFromStackToStack1(data2, _31); + Data param = data[gl_LocalInvocationID.x]; + Data param_1 = data2[gl_LocalInvocationID.x]; + Data _73 = combine(param, param_1); + _53.outdata[gl_WorkGroupID.x].a = _73.a; + _53.outdata[gl_WorkGroupID.x].b = _73.b; +} + diff --git a/reference/shaders-msl/comp/composite-construct.comp b/reference/shaders-msl/comp/composite-construct.comp index 4b5ea37e98f..aada82fc92c 100644 --- a/reference/shaders-msl/comp/composite-construct.comp +++ b/reference/shaders-msl/comp/composite-construct.comp @@ -1,10 +1,49 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct SSBO0 { float4 as[1]; @@ -21,26 +60,15 @@ struct Composite float4 b; }; -constant float4 _43[2] = { float4(20.0), float4(40.0) }; - -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} +constant spvUnsafeArray _43 = spvUnsafeArray({ float4(20.0), float4(40.0) }); kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO1& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { - float4 _37[2] = { _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] }; - float4 values[2]; - spvArrayCopyFromStack1(values, _37); + spvUnsafeArray _37 = spvUnsafeArray({ _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] }); + spvUnsafeArray values; + values = _37; Composite c = Composite{ values[0], _43[1] }; _16.as[0] = values[gl_LocalInvocationIndex]; _32.bs[1] = c.b; diff --git a/reference/shaders-msl/comp/copy-array-of-arrays.comp b/reference/shaders-msl/comp/copy-array-of-arrays.comp index 0fa6c6bfd31..21fb9b367c5 100644 --- a/reference/shaders-msl/comp/copy-array-of-arrays.comp +++ b/reference/shaders-msl/comp/copy-array-of-arrays.comp @@ -1,87 +1,79 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; -struct BUF -{ - int a; - float b; - float c; -}; - -constant float _16[2] = { 1.0, 2.0 }; -constant float _19[2] = { 3.0, 4.0 }; -constant float _20[2][2] = { { 1.0, 2.0 }, { 3.0, 4.0 } }; -constant float _21[2][2][2] = { { { 1.0, 2.0 }, { 3.0, 4.0 } }, { { 1.0, 2.0 }, { 3.0, 4.0 } } }; - -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +struct spvUnsafeArray { - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromStack2(thread T (&dst)[A][B], thread const T (&src)[A][B]) -{ - for (uint i = 0; i < A; i++) + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread { - spvArrayCopyFromStack1(dst[i], src[i]); + return elements[pos]; } -} - -template -void spvArrayCopyFromConstant2(thread T (&dst)[A][B], constant T (&src)[A][B]) -{ - for (uint i = 0; i < A; i++) + constexpr const thread T& operator [] (size_t pos) const thread { - spvArrayCopyFromConstant1(dst[i], src[i]); + return elements[pos]; } -} - -template -void spvArrayCopyFromStack3(thread T (&dst)[A][B][C], thread const T (&src)[A][B][C]) -{ - for (uint i = 0; i < A; i++) + + device T& operator [] (size_t pos) device { - spvArrayCopyFromStack2(dst[i], src[i]); + return elements[pos]; } -} - -template -void spvArrayCopyFromConstant3(thread T (&dst)[A][B][C], constant T (&src)[A][B][C]) -{ - for (uint i = 0; i < A; i++) + constexpr const device T& operator [] (size_t pos) const device { - spvArrayCopyFromConstant2(dst[i], src[i]); + return elements[pos]; } -} + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct BUF +{ + int a; + float b; + float c; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +constant spvUnsafeArray _16 = spvUnsafeArray({ 1.0, 2.0 }); +constant spvUnsafeArray _19 = spvUnsafeArray({ 3.0, 4.0 }); +constant spvUnsafeArray, 2> _20 = spvUnsafeArray, 2>({ spvUnsafeArray({ 1.0, 2.0 }), spvUnsafeArray({ 3.0, 4.0 }) }); +constant spvUnsafeArray, 2>, 2> _21 = spvUnsafeArray, 2>, 2>({ spvUnsafeArray, 2>({ spvUnsafeArray({ 1.0, 2.0 }), spvUnsafeArray({ 3.0, 4.0 }) }), spvUnsafeArray, 2>({ spvUnsafeArray({ 1.0, 2.0 }), spvUnsafeArray({ 3.0, 4.0 }) }) }); kernel void main0(device BUF& o [[buffer(0)]]) { - float c[2][2][2]; - spvArrayCopyFromConstant3(c, _21); + spvUnsafeArray, 2>, 2> c; + c = _21; o.a = int(c[1][1][1]); - float _43[2] = { o.b, o.c }; - float _48[2] = { o.b, o.b }; - float _49[2][2] = { { _43[0], _43[1] }, { _48[0], _48[1] } }; - float _54[2] = { o.c, o.c }; - float _59[2] = { o.c, o.b }; - float _60[2][2] = { { _54[0], _54[1] }, { _59[0], _59[1] } }; - float _61[2][2][2] = { { { _49[0][0], _49[0][1] }, { _49[1][0], _49[1][1] } }, { { _60[0][0], _60[0][1] }, { _60[1][0], _60[1][1] } } }; - float d[2][2][2]; - spvArrayCopyFromStack3(d, _61); - float e[2][2][2]; - spvArrayCopyFromStack3(e, d); + spvUnsafeArray _43 = spvUnsafeArray({ o.b, o.c }); + spvUnsafeArray _48 = spvUnsafeArray({ o.b, o.b }); + spvUnsafeArray, 2> _49 = spvUnsafeArray, 2>({ _43, _48 }); + spvUnsafeArray _54 = spvUnsafeArray({ o.c, o.c }); + spvUnsafeArray _59 = spvUnsafeArray({ o.c, o.b }); + spvUnsafeArray, 2> _60 = spvUnsafeArray, 2>({ _54, _59 }); + spvUnsafeArray, 2>, 2> _61 = spvUnsafeArray, 2>, 2>({ _49, _60 }); + spvUnsafeArray, 2>, 2> d; + d = _61; + spvUnsafeArray, 2>, 2> e; + e = d; o.b = e[1][0][1]; } diff --git a/reference/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp b/reference/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp new file mode 100644 index 00000000000..0eacb269b2a --- /dev/null +++ b/reference/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp @@ -0,0 +1,364 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToStack2(thread T (&dst)[A][B], constant T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromConstantToStack1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup2(threadgroup T (&dst)[A][B], constant T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromConstantToThreadGroup1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromStackToStack2(thread T (&dst)[A][B], thread const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToStack1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup2(threadgroup T (&dst)[A][B], thread const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToThreadGroup1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack2(thread T (&dst)[A][B], threadgroup const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToStack1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup2(threadgroup T (&dst)[A][B], threadgroup const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToThreadGroup1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromDeviceToDevice2(device T (&dst)[A][B], device const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromDeviceToDevice1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromConstantToDevice2(device T (&dst)[A][B], constant T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromConstantToDevice1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromStackToDevice2(device T (&dst)[A][B], thread const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToDevice1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice2(device T (&dst)[A][B], threadgroup const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToDevice1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromDeviceToStack2(thread T (&dst)[A][B], device const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromDeviceToStack1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup2(threadgroup T (&dst)[A][B], device const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromDeviceToThreadGroup1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromConstantToStack3(thread T (&dst)[A][B][C], constant T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromConstantToStack2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup3(threadgroup T (&dst)[A][B][C], constant T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromConstantToThreadGroup2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromStackToStack3(thread T (&dst)[A][B][C], thread const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToStack2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup3(threadgroup T (&dst)[A][B][C], thread const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToThreadGroup2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack3(thread T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToStack2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup3(threadgroup T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToThreadGroup2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromDeviceToDevice3(device T (&dst)[A][B][C], device const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromDeviceToDevice2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromConstantToDevice3(device T (&dst)[A][B][C], constant T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromConstantToDevice2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromStackToDevice3(device T (&dst)[A][B][C], thread const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToDevice2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice3(device T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToDevice2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromDeviceToStack3(thread T (&dst)[A][B][C], device const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromDeviceToStack2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup3(threadgroup T (&dst)[A][B][C], device const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromDeviceToThreadGroup2(dst[i], src[i]); + } +} + +struct BUF +{ + int a; + float b; + float c; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +constant float _16[2] = { 1.0, 2.0 }; +constant float _19[2] = { 3.0, 4.0 }; +constant float _20[2][2] = { { 1.0, 2.0 }, { 3.0, 4.0 } }; +constant float _21[2][2][2] = { { { 1.0, 2.0 }, { 3.0, 4.0 } }, { { 1.0, 2.0 }, { 3.0, 4.0 } } }; + +kernel void main0(device BUF& o [[buffer(0)]]) +{ + float c[2][2][2]; + spvArrayCopyFromConstantToStack3(c, _21); + o.a = int(c[1][1][1]); + float _43[2] = { o.b, o.c }; + float _48[2] = { o.b, o.b }; + float _49[2][2] = { { _43[0], _43[1] }, { _48[0], _48[1] } }; + float _54[2] = { o.c, o.c }; + float _59[2] = { o.c, o.b }; + float _60[2][2] = { { _54[0], _54[1] }, { _59[0], _59[1] } }; + float _61[2][2][2] = { { { _49[0][0], _49[0][1] }, { _49[1][0], _49[1][1] } }, { { _60[0][0], _60[0][1] }, { _60[1][0], _60[1][1] } } }; + float d[2][2][2]; + spvArrayCopyFromStackToStack3(d, _61); + float e[2][2][2]; + spvArrayCopyFromStackToStack3(e, d); + o.b = e[1][0][1]; +} + diff --git a/reference/shaders-msl/comp/culling.comp b/reference/shaders-msl/comp/culling.comp index 32acf599e77..13578363b7a 100644 --- a/reference/shaders-msl/comp/culling.comp +++ b/reference/shaders-msl/comp/culling.comp @@ -29,7 +29,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff float idata = _22.in_data[ident]; if (idata > 12.0) { - uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed); + uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed); _38.out_data[_45] = idata; } } diff --git a/reference/shaders-msl/comp/defer-parens.comp b/reference/shaders-msl/comp/defer-parens.comp index 76dce777340..9a567fa6ebe 100644 --- a/reference/shaders-msl/comp/defer-parens.comp +++ b/reference/shaders-msl/comp/defer-parens.comp @@ -9,6 +9,8 @@ struct SSBO int index; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _13 [[buffer(0)]]) { float4 d = _13.data; diff --git a/reference/shaders-msl/comp/dowhile.comp b/reference/shaders-msl/comp/dowhile.comp index 3482fb355b4..2b4de9ebb73 100644 --- a/reference/shaders-msl/comp/dowhile.comp +++ b/reference/shaders-msl/comp/dowhile.comp @@ -14,6 +14,8 @@ struct SSBO2 float4 out_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp b/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp index f9608de34d7..856efeabaf0 100644 --- a/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp +++ b/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp @@ -5,17 +5,6 @@ using namespace metal; -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -28,6 +17,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -65,66 +65,6 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) -{ - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} - -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - kernel void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture2d foo [[texture(0)]], texture2d bar [[texture(1)]], sampler fooSmplr [[sampler(0)]]) { constant uint& fooSwzl = spvSwizzleConstants[0]; diff --git a/reference/shaders-msl/comp/functions.comp b/reference/shaders-msl/comp/functions.comp index d8f6e55a467..b1072608787 100644 --- a/reference/shaders-msl/comp/functions.comp +++ b/reference/shaders-msl/comp/functions.comp @@ -5,6 +5,7 @@ using namespace metal; +static inline __attribute__((always_inline)) void myfunc(threadgroup int (&foo)[1337]) { foo[0] = 13; diff --git a/reference/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp b/reference/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp index 1b525c1f90e..f6b8845343a 100644 --- a/reference/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp +++ b/reference/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp @@ -5,19 +5,20 @@ using namespace metal; -struct myBlock -{ - int a; - float b[1]; -}; - // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } +struct myBlock +{ + int a; + float b[1]; +}; + +static inline __attribute__((always_inline)) float getB(device myBlock& myStorage, thread uint3& gl_GlobalInvocationID) { return myStorage.b[gl_GlobalInvocationID.x]; diff --git a/reference/shaders-msl/comp/global-invocation-id.comp b/reference/shaders-msl/comp/global-invocation-id.comp index fe0212ec3ff..333485a256a 100644 --- a/reference/shaders-msl/comp/global-invocation-id.comp +++ b/reference/shaders-msl/comp/global-invocation-id.comp @@ -5,19 +5,19 @@ using namespace metal; -struct myBlock -{ - int a; - float b[1]; -}; - // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } +struct myBlock +{ + int a; + float b[1]; +}; + kernel void main0(device myBlock& myStorage [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { myStorage.a = (myStorage.a + 1) % 256; diff --git a/reference/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp b/reference/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp new file mode 100644 index 00000000000..24a6c04841b --- /dev/null +++ b/reference/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp @@ -0,0 +1,38 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct SSBO +{ + float4 outdata; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +struct spvDescriptorSetBuffer0 +{ + texture2d uImage [[id(0)]]; + device atomic_uint* uImage_atomic [[id(1)]]; + device SSBO* m_31 [[id(2)]]; + texture2d uTexture [[id(3)]]; + sampler uTextureSmplr [[id(4)]]; +}; + +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), spvDescriptorSet0.uImage)], 10u, memory_order_relaxed); + uint ret = _26; + (*spvDescriptorSet0.m_31).outdata = spvDescriptorSet0.uTexture.sample(spvDescriptorSet0.uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(ret)); +} + diff --git a/reference/shaders-msl/comp/image-atomic-automatic-bindings.comp b/reference/shaders-msl/comp/image-atomic-automatic-bindings.comp new file mode 100644 index 00000000000..60d5421e56f --- /dev/null +++ b/reference/shaders-msl/comp/image-atomic-automatic-bindings.comp @@ -0,0 +1,29 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct SSBO +{ + float4 outdata; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _31 [[buffer(1)]], texture2d uImage [[texture(0)]], device atomic_uint* uImage_atomic [[buffer(0)]], texture2d uTexture [[texture(1)]], sampler uTextureSmplr [[sampler(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), uImage)], 10u, memory_order_relaxed); + uint ret = _26; + _31.outdata = uTexture.sample(uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(ret)); +} + diff --git a/reference/shaders-msl/comp/image-cube-array-load-store.comp b/reference/shaders-msl/comp/image-cube-array-load-store.comp index ef67a326f59..c0b83c46c8c 100644 --- a/reference/shaders-msl/comp/image-cube-array-load-store.comp +++ b/reference/shaders-msl/comp/image-cube-array-load-store.comp @@ -3,6 +3,8 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(texturecube_array uImageIn [[texture(0)]], texturecube_array uImageOut [[texture(1)]]) { int3 coord = int3(9, 7, 11); diff --git a/reference/shaders-msl/comp/image.comp b/reference/shaders-msl/comp/image.comp index f3bc1455db5..e7c9c763a34 100644 --- a/reference/shaders-msl/comp/image.comp +++ b/reference/shaders-msl/comp/image.comp @@ -3,6 +3,8 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(texture2d uImageIn [[texture(0)]], texture2d uImageOut [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { float4 v = uImageIn.read(uint2((int2(gl_GlobalInvocationID.xy) + int2(uImageIn.get_width(), uImageIn.get_height())))); diff --git a/reference/shaders-msl/comp/insert.comp b/reference/shaders-msl/comp/insert.comp index 0f56a651534..c4611ba2225 100644 --- a/reference/shaders-msl/comp/insert.comp +++ b/reference/shaders-msl/comp/insert.comp @@ -8,6 +8,8 @@ struct SSBO float4 out_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { float4 v; @@ -16,6 +18,6 @@ kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[ v.z = 70.0; v.w = 90.0; _27.out_data[gl_GlobalInvocationID.x] = v; - _27.out_data[gl_GlobalInvocationID.x].y = 20.0; + ((device float*)&_27.out_data[gl_GlobalInvocationID.x])[1u] = 20.0; } diff --git a/reference/shaders-msl/comp/int64.invalid.msl22.comp b/reference/shaders-msl/comp/int64.invalid.msl22.comp deleted file mode 100644 index 6eb4a8a8d98..00000000000 --- a/reference/shaders-msl/comp/int64.invalid.msl22.comp +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include - -using namespace metal; - -struct M0 -{ - long v; - long2 b[2]; - ulong c; - ulong d[5]; -}; - -struct SSBO0_Type -{ - long4 a; - M0 m0; -}; - -struct SSBO1_Type -{ - ulong4 b; - M0 m0; -}; - -struct SSBO2_Type -{ - long a[4]; - long2 b[4]; -}; - -struct SSBO3_Type -{ - long a[4]; - long2 b[4]; -}; - -struct SSBO -{ - int s32; - uint u32; -}; - -kernel void main0(device SSBO& _96 [[buffer(0)]]) -{ - SSBO0_Type ssbo_0; - ssbo_0.a += long4(10l, 20l, 30l, 40l); - SSBO1_Type ssbo_1; - ssbo_1.b += ulong4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul); - ssbo_0.a += long4(20l); - ssbo_0.a = abs(ssbo_0.a + long4(ssbo_1.b)); - ssbo_0.a += long4(1l); - ssbo_1.b += ulong4(long4(1l)); - ssbo_0.a -= long4(1l); - ssbo_1.b -= ulong4(long4(1l)); - SSBO2_Type ssbo_2; - ssbo_2.a[0] += 1l; - SSBO3_Type ssbo_3; - ssbo_3.a[0] += 2l; - _96.s32 = int(uint(((ulong(ssbo_0.a.x) + ssbo_1.b.y) + ulong(ssbo_2.a[1])) + ulong(ssbo_3.a[2]))); - _96.u32 = uint(((ulong(ssbo_0.a.y) + ssbo_1.b.z) + ulong(ssbo_2.a[0])) + ulong(ssbo_3.a[1])); -} - diff --git a/reference/shaders-msl/comp/inverse.comp b/reference/shaders-msl/comp/inverse.comp index f2f499b91eb..0a1d298b0da 100644 --- a/reference/shaders-msl/comp/inverse.comp +++ b/reference/shaders-msl/comp/inverse.comp @@ -5,34 +5,23 @@ using namespace metal; -struct MatrixOut -{ - float2x2 m2out; - float3x3 m3out; - float4x4 m4out; -}; - -struct MatrixIn -{ - float2x2 m2in; - float3x3 m3in; - float4x4 m4in; -}; - // Returns the determinant of a 2x2 matrix. -inline float spvDet2x2(float a1, float a2, float b1, float b2) +static inline __attribute__((always_inline)) +float spvDet2x2(float a1, float a2, float b1, float b2) { return a1 * b2 - b1 * a2; } // Returns the determinant of a 3x3 matrix. -inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) +static inline __attribute__((always_inline)) +float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) { return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3); } // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. +static inline __attribute__((always_inline)) float4x4 spvInverse4x4(float4x4 m) { float4x4 adj; // The adjoint matrix (inverse after dividing by determinant) @@ -68,6 +57,7 @@ float4x4 spvInverse4x4(float4x4 m) // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. +static inline __attribute__((always_inline)) float3x3 spvInverse3x3(float3x3 m) { float3x3 adj; // The adjoint matrix (inverse after dividing by determinant) @@ -95,6 +85,7 @@ float3x3 spvInverse3x3(float3x3 m) // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. +static inline __attribute__((always_inline)) float2x2 spvInverse2x2(float2x2 m) { float2x2 adj; // The adjoint matrix (inverse after dividing by determinant) @@ -114,6 +105,22 @@ float2x2 spvInverse2x2(float2x2 m) return (det != 0.0f) ? (adj * (1.0f / det)) : m; } +struct MatrixOut +{ + float2x2 m2out; + float3x3 m3out; + float4x4 m4out; +}; + +struct MatrixIn +{ + float2x2 m2in; + float3x3 m3in; + float4x4 m4in; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device MatrixOut& _15 [[buffer(0)]], const device MatrixIn& _20 [[buffer(1)]]) { _15.m2out = spvInverse2x2(_20.m2in); diff --git a/reference/shaders-msl/comp/local-invocation-id.comp b/reference/shaders-msl/comp/local-invocation-id.comp index 772e5e0d867..45059905881 100644 --- a/reference/shaders-msl/comp/local-invocation-id.comp +++ b/reference/shaders-msl/comp/local-invocation-id.comp @@ -5,19 +5,19 @@ using namespace metal; -struct myBlock -{ - int a; - float b[1]; -}; - // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } +struct myBlock +{ + int a; + float b[1]; +}; + kernel void main0(device myBlock& myStorage [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { myStorage.a = (myStorage.a + 1) % 256; diff --git a/reference/shaders-msl/comp/local-invocation-index.comp b/reference/shaders-msl/comp/local-invocation-index.comp index 41adbdca5cf..67426dd3f6b 100644 --- a/reference/shaders-msl/comp/local-invocation-index.comp +++ b/reference/shaders-msl/comp/local-invocation-index.comp @@ -5,19 +5,19 @@ using namespace metal; -struct myBlock -{ - int a; - float b[1]; -}; - // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } +struct myBlock +{ + int a; + float b[1]; +}; + kernel void main0(device myBlock& myStorage [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { myStorage.a = (myStorage.a + 1) % 256; diff --git a/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp new file mode 100644 index 00000000000..3de0ef44f0f --- /dev/null +++ b/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct model_t +{ + float3x3 mtx_rm; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device model_t& model [[buffer(0)]]) +{ + float3x3 mtx_cm = transpose(model.mtx_rm); + float3x3 mtx1 = mtx_cm * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0)); + if (mtx1[0].x != 0.0) + { + model.mtx_rm = transpose(float3x3(float3(-5.0, -3.0, -5.0), float3(-2.0, 2.0, -5.0), float3(6.0, 3.0, -8.0))); + } +} + diff --git a/reference/shaders-msl/comp/mat3.comp b/reference/shaders-msl/comp/mat3.comp index c2d9a7c8382..fcb8f7a60fc 100644 --- a/reference/shaders-msl/comp/mat3.comp +++ b/reference/shaders-msl/comp/mat3.comp @@ -8,6 +8,8 @@ struct SSBO2 float3x3 out_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/mod.comp b/reference/shaders-msl/comp/mod.comp index e0d290259c6..9c43d7321bd 100644 --- a/reference/shaders-msl/comp/mod.comp +++ b/reference/shaders-msl/comp/mod.comp @@ -5,6 +5,13 @@ using namespace metal; +// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() +template +inline Tx mod(Tx x, Ty y) +{ + return x - y * floor(x / y); +} + struct SSBO { float4 in_data[1]; @@ -15,12 +22,7 @@ struct SSBO2 float4 out_data[1]; }; -// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() -template -Tx mod(Tx x, Ty y) -{ - return x - y * floor(x / y); -} +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _33 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { diff --git a/reference/shaders-msl/comp/modf.comp b/reference/shaders-msl/comp/modf.comp index ef50a021354..5a5ac3dbd75 100644 --- a/reference/shaders-msl/comp/modf.comp +++ b/reference/shaders-msl/comp/modf.comp @@ -13,6 +13,8 @@ struct SSBO2 float4 out_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/outer-product.comp b/reference/shaders-msl/comp/outer-product.comp index 8e32db392ea..e589642dbda 100644 --- a/reference/shaders-msl/comp/outer-product.comp +++ b/reference/shaders-msl/comp/outer-product.comp @@ -23,6 +23,8 @@ struct ReadSSBO float4 v4; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]]) { _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y); diff --git a/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp b/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp new file mode 100644 index 00000000000..14723cbe80f --- /dev/null +++ b/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp @@ -0,0 +1,142 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO_A +{ + float data[1]; +}; + +struct UBO_C +{ + float4 data[1024]; +}; + +struct Registers +{ + float reg; +}; + +struct SSBO_B +{ + uint2 data[1]; +}; + +struct UBO_D +{ + uint4 data[1024]; +}; + +struct SSBO_BRO +{ + uint2 data[1]; +}; + +struct SSBO_As +{ + float data[1]; +}; + +struct UBO_Cs +{ + float4 data[1024]; +}; + +struct SSBO_Bs +{ + uint2 data[1024]; +}; + +struct UBO_Ds +{ + uint4 data[1024]; +}; + +struct SSBO_BsRO +{ + uint2 data[1024]; +}; + +struct SSBO_E +{ + float data[1]; +}; + +struct UBO_G +{ + float4 data[1024]; +}; + +struct SSBO_F +{ + uint2 data[1]; +}; + +struct UBO_H +{ + uint4 data[1024]; +}; + +struct SSBO_I +{ + uint2 data[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +struct spvDescriptorSetBuffer0 +{ + device SSBO_A* ssbo_a [[id(0)]]; + constant UBO_C* ubo_c [[id(1)]]; + device SSBO_As* ssbo_as [[id(2)]][4]; + constant UBO_Cs* ubo_cs [[id(6)]][4]; +}; + +static inline __attribute__((always_inline)) +void func0(device SSBO_A& ssbo_a, thread uint3& gl_GlobalInvocationID, constant UBO_C& ubo_c, thread uint3& gl_WorkGroupID, constant Registers& v_42, device SSBO_B& ssbo_b, constant UBO_D& ubo_d, const device SSBO_BRO& ssbo_b_readonly) +{ + ssbo_a.data[gl_GlobalInvocationID.x] = ubo_c.data[gl_WorkGroupID.x].x + v_42.reg; + ssbo_b.data[gl_GlobalInvocationID.x] = ubo_d.data[gl_WorkGroupID.y].xy + ssbo_b_readonly.data[gl_GlobalInvocationID.x]; +} + +static inline __attribute__((always_inline)) +void func1(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_As* const device (&ssbo_as)[4], constant UBO_Cs* const device (&ubo_cs)[4]) +{ + ssbo_as[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_cs[gl_WorkGroupID.x]->data[0].x; +} + +static inline __attribute__((always_inline)) +void func2(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_Bs* const device (&ssbo_bs)[4], constant UBO_Ds* const device (&ubo_ds)[4], const device SSBO_BsRO* const device (&ssbo_bs_readonly)[4]) +{ + ssbo_bs[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_ds[gl_WorkGroupID.x]->data[0].xy + ssbo_bs_readonly[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x]; +} + +static inline __attribute__((always_inline)) +void func3(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_E& ssbo_e, constant UBO_G& ubo_g, device SSBO_F& ssbo_f, constant UBO_H& ubo_h, const device SSBO_I& ssbo_i) +{ + ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x].x; + ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y].xy + ssbo_i.data[gl_GlobalInvocationID.x]; +} + +kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& v_42 [[buffer(1)]], device void* spvBufferAliasSet2Binding0 [[buffer(2)]], constant void* spvBufferAliasSet2Binding1 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding0; + constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding1; + device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0; + constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1; + const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0; + device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a); + constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c); + const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a); + const device auto& ssbo_bs = (device SSBO_Bs* const device (&)[4])spvDescriptorSet0.ssbo_as; + const device auto& ubo_ds = (constant UBO_Ds* const device (&)[4])spvDescriptorSet0.ubo_cs; + const device auto& ssbo_bs_readonly = (const device SSBO_BsRO* const device (&)[4])spvDescriptorSet0.ssbo_as; + func0((*spvDescriptorSet0.ssbo_a), gl_GlobalInvocationID, (*spvDescriptorSet0.ubo_c), gl_WorkGroupID, v_42, ssbo_b, ubo_d, ssbo_b_readonly); + func1(gl_GlobalInvocationID, gl_WorkGroupID, spvDescriptorSet0.ssbo_as, spvDescriptorSet0.ubo_cs); + func2(gl_GlobalInvocationID, gl_WorkGroupID, ssbo_bs, ubo_ds, ssbo_bs_readonly); + func3(gl_GlobalInvocationID, gl_WorkGroupID, ssbo_e, ubo_g, ssbo_f, ubo_h, ssbo_i); +} + diff --git a/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp b/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp new file mode 100644 index 00000000000..587f1ee8e0d --- /dev/null +++ b/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp @@ -0,0 +1,142 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO_A +{ + float data[1]; +}; + +struct UBO_C +{ + float4 data[1024]; +}; + +struct Registers +{ + float reg; +}; + +struct SSBO_B +{ + uint2 data[1]; +}; + +struct UBO_D +{ + uint4 data[1024]; +}; + +struct SSBO_BRO +{ + uint2 data[1]; +}; + +struct SSBO_As +{ + float data[1]; +}; + +struct UBO_Cs +{ + float4 data[1024]; +}; + +struct SSBO_Bs +{ + uint2 data[1024]; +}; + +struct UBO_Ds +{ + uint4 data[1024]; +}; + +struct SSBO_BsRO +{ + uint2 data[1024]; +}; + +struct SSBO_E +{ + float data[1]; +}; + +struct UBO_G +{ + float4 data[1024]; +}; + +struct SSBO_F +{ + uint2 data[1]; +}; + +struct UBO_H +{ + uint4 data[1024]; +}; + +struct SSBO_I +{ + uint2 data[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +struct spvDescriptorSetBuffer0 +{ + device SSBO_A* ssbo_a [[id(0)]]; + constant UBO_C* ubo_c [[id(1)]]; + device SSBO_As* ssbo_as [[id(2)]][4]; + constant UBO_Cs* ubo_cs [[id(6)]][4]; +}; + +static inline __attribute__((always_inline)) +void func0(device SSBO_A& ssbo_a, thread uint3& gl_GlobalInvocationID, constant UBO_C& ubo_c, thread uint3& gl_WorkGroupID, constant Registers& v_42, device SSBO_B& ssbo_b, constant UBO_D& ubo_d, const device SSBO_BRO& ssbo_b_readonly) +{ + ssbo_a.data[gl_GlobalInvocationID.x] = ubo_c.data[gl_WorkGroupID.x].x + v_42.reg; + ssbo_b.data[gl_GlobalInvocationID.x] = ubo_d.data[gl_WorkGroupID.y].xy + ssbo_b_readonly.data[gl_GlobalInvocationID.x]; +} + +static inline __attribute__((always_inline)) +void func1(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_As* constant (&ssbo_as)[4], constant UBO_Cs* constant (&ubo_cs)[4]) +{ + ssbo_as[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_cs[gl_WorkGroupID.x]->data[0].x; +} + +static inline __attribute__((always_inline)) +void func2(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_Bs* constant (&ssbo_bs)[4], constant UBO_Ds* constant (&ubo_ds)[4], const device SSBO_BsRO* constant (&ssbo_bs_readonly)[4]) +{ + ssbo_bs[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_ds[gl_WorkGroupID.x]->data[0].xy + ssbo_bs_readonly[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x]; +} + +static inline __attribute__((always_inline)) +void func3(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_E& ssbo_e, constant UBO_G& ubo_g, device SSBO_F& ssbo_f, constant UBO_H& ubo_h, const device SSBO_I& ssbo_i) +{ + ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x].x; + ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y].xy + ssbo_i.data[gl_GlobalInvocationID.x]; +} + +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& v_42 [[buffer(1)]], device void* spvBufferAliasSet2Binding0 [[buffer(2)]], constant void* spvBufferAliasSet2Binding1 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding0; + constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding1; + device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0; + constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1; + const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0; + device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a); + constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c); + const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a); + constant auto& ssbo_bs = (device SSBO_Bs* constant (&)[4])spvDescriptorSet0.ssbo_as; + constant auto& ubo_ds = (constant UBO_Ds* constant (&)[4])spvDescriptorSet0.ubo_cs; + constant auto& ssbo_bs_readonly = (const device SSBO_BsRO* constant (&)[4])spvDescriptorSet0.ssbo_as; + func0((*spvDescriptorSet0.ssbo_a), gl_GlobalInvocationID, (*spvDescriptorSet0.ubo_c), gl_WorkGroupID, v_42, ssbo_b, ubo_d, ssbo_b_readonly); + func1(gl_GlobalInvocationID, gl_WorkGroupID, spvDescriptorSet0.ssbo_as, spvDescriptorSet0.ubo_cs); + func2(gl_GlobalInvocationID, gl_WorkGroupID, ssbo_bs, ubo_ds, ssbo_bs_readonly); + func3(gl_GlobalInvocationID, gl_WorkGroupID, ssbo_e, ubo_g, ssbo_f, ubo_h, ssbo_i); +} + diff --git a/reference/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp b/reference/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp new file mode 100644 index 00000000000..dde7f47b085 --- /dev/null +++ b/reference/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp @@ -0,0 +1,71 @@ +#include +#include +#if __METAL_VERSION__ >= 230 +#include +using namespace metal::raytracing; +#endif + +using namespace metal; + +struct Params +{ + uint ray_flags; + uint cull_mask; + char _m2_pad[8]; + packed_float3 origin; + float tmin; + packed_float3 dir; + float tmax; + float thit; +}; + +kernel void main0(constant Params& _18 [[buffer(1)]], raytracing::acceleration_structure AS0 [[buffer(0)]], raytracing::acceleration_structure AS1 [[buffer(2)]]) +{ + raytracing::intersection_query q; + q.reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS0, intersection_params()); + raytracing::intersection_query q2[2]; + q2[1].reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS1, intersection_params()); + bool _63 = q.next(); + bool res = _63; + q2[0].abort(); + q.commit_bounding_box_intersection(_18.thit); + q2[1].commit_triangle_intersection(); + float _71 = q.get_ray_min_distance(); + float fval = _71; + float3 _74 = q.get_world_space_ray_direction(); + float3 fvals = _74; + float3 _75 = q.get_world_space_ray_origin(); + fvals = _75; + uint _80 = uint(q2[1].get_committed_intersection_type()); + uint type = _80; + uint _83 = uint(q2[0].get_candidate_intersection_type()) - 1; + type = _83; + bool _85 = q2[1].is_candidate_non_opaque_bounding_box(); + res = _85; + float _87 = q2[1].get_committed_distance(); + fval = _87; + float _89 = q2[1].get_candidate_triangle_distance(); + fval = _89; + int _92 = q.get_committed_user_instance_id(); + int ival = _92; + int _94 = q2[0].get_candidate_instance_id(); + ival = _94; + int _96 = q2[1].get_candidate_geometry_id(); + ival = _96; + int _97 = q.get_committed_primitive_id(); + ival = _97; + float2 _100 = q2[0].get_candidate_triangle_barycentric_coord(); + fvals.x = _100.x; + fvals.y = _100.y; + bool _107 = q.is_committed_triangle_front_facing(); + res = _107; + float3 _108 = q.get_candidate_ray_direction(); + fvals = _108; + float3 _110 = q2[0].get_committed_ray_origin(); + fvals = _110; + float4x3 _114 = q.get_candidate_object_to_world_transform(); + float4x3 matrices = _114; + float4x3 _116 = q2[1].get_committed_world_to_object_transform(); + matrices = _116; +} + diff --git a/reference/shaders-msl/comp/read-write-only.comp b/reference/shaders-msl/comp/read-write-only.comp index 7547b417d8f..0cf8d8e3215 100644 --- a/reference/shaders-msl/comp/read-write-only.comp +++ b/reference/shaders-msl/comp/read-write-only.comp @@ -21,6 +21,8 @@ struct SSBO1 float4 data3; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _10 [[buffer(0)]], const device SSBO0& _15 [[buffer(1)]], device SSBO1& _21 [[buffer(2)]]) { _10.data4 = _15.data0 + _21.data2; diff --git a/reference/shaders-msl/comp/rmw-matrix.comp b/reference/shaders-msl/comp/rmw-matrix.comp index 150db7ede98..b53a3a75c27 100644 --- a/reference/shaders-msl/comp/rmw-matrix.comp +++ b/reference/shaders-msl/comp/rmw-matrix.comp @@ -13,6 +13,8 @@ struct SSBO float4x4 c1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _11 [[buffer(0)]]) { _11.a *= _11.a1; diff --git a/reference/shaders-msl/comp/rmw-opt.comp b/reference/shaders-msl/comp/rmw-opt.comp index 060f9f9c717..229154fc219 100644 --- a/reference/shaders-msl/comp/rmw-opt.comp +++ b/reference/shaders-msl/comp/rmw-opt.comp @@ -8,6 +8,8 @@ struct SSBO int a; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _9 [[buffer(0)]]) { _9.a += 10; diff --git a/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp index 312a6f9453a..9bf87817747 100644 --- a/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp +++ b/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp @@ -10,12 +10,16 @@ struct SSBO float c; float d; float e; + float f; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _9 [[buffer(0)]]) { _9.c = abs(_9.a - _9.b); _9.d = abs(_9.a); _9.e = sign(_9.a); + _9.f = abs((_9.a - 1.0) - (_9.b - 2.0)); } diff --git a/reference/shaders-msl/comp/shared-array-of-arrays.comp b/reference/shaders-msl/comp/shared-array-of-arrays.comp index 7acb0ab8573..8b532368959 100644 --- a/reference/shaders-msl/comp/shared-array-of-arrays.comp +++ b/reference/shaders-msl/comp/shared-array-of-arrays.comp @@ -12,6 +12,7 @@ struct SSBO constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 4u, 1u); +static inline __attribute__((always_inline)) void work(threadgroup float (&foo)[4][4], thread uint3& gl_LocalInvocationID, thread uint& gl_LocalInvocationIndex, device SSBO& v_67, thread uint3& gl_GlobalInvocationID) { foo[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = float(gl_LocalInvocationIndex); diff --git a/reference/shaders-msl/comp/shared-matrix-array-of-array.comp b/reference/shaders-msl/comp/shared-matrix-array-of-array.comp new file mode 100644 index 00000000000..173b31cde62 --- /dev/null +++ b/reference/shaders-msl/comp/shared-matrix-array-of-array.comp @@ -0,0 +1,1286 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +struct spvStorageMatrix +{ + vec columns[Cols]; + + spvStorageMatrix() thread = default; + thread spvStorageMatrix& operator=(initializer_list> cols) thread + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default; + #endif + + operator matrix() const thread + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const thread + { + return columns[idx]; + } + thread vec& operator[](size_t idx) thread + { + return columns[idx]; + } + + spvStorageMatrix() constant = default; + + spvStorageMatrix(const thread matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const constant matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const device matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const threadgroup matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default; + #endif + + operator matrix() const constant + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const constant + { + return columns[idx]; + } + + spvStorageMatrix() device = default; + device spvStorageMatrix& operator=(initializer_list> cols) device + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default; + + spvStorageMatrix(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default; + + spvStorageMatrix(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default; + + spvStorageMatrix(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default; + #endif + + operator matrix() const device + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const device + { + return columns[idx]; + } + device vec& operator[](size_t idx) device + { + return columns[idx]; + } + + spvStorageMatrix() threadgroup = default; + threadgroup spvStorageMatrix& operator=(initializer_list> cols) threadgroup + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default; + #endif + + operator matrix() const threadgroup + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup + { + return columns[idx]; + } + threadgroup vec& operator[](size_t idx) threadgroup + { + return columns[idx]; + } + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix() threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(initializer_list> cols) threadgroup_imageblock + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + operator matrix() const threadgroup_imageblock + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup_imageblock + { + return columns[idx]; + } + threadgroup_imageblock vec& operator[](size_t idx) threadgroup_imageblock + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix() ray_data = default; + ray_data spvStorageMatrix& operator=(initializer_list> cols) ray_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + #endif + + spvStorageMatrix(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default; + #endif + + operator matrix() const ray_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const ray_data + { + return columns[idx]; + } + ray_data vec& operator[](size_t idx) ray_data + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix() object_data = default; + object_data spvStorageMatrix& operator=(initializer_list> cols) object_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default; + #endif + + spvStorageMatrix(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default; + + operator matrix() const object_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const object_data + { + return columns[idx]; + } + object_data vec& operator[](size_t idx) object_data + { + return columns[idx]; + } + #endif + +}; + +template +matrix transpose(spvStorageMatrix m) +{ + return transpose(matrix(m)); +} + +typedef spvStorageMatrix spvStorage_half2x2; +typedef spvStorageMatrix spvStorage_half2x3; +typedef spvStorageMatrix spvStorage_half2x4; +typedef spvStorageMatrix spvStorage_half3x2; +typedef spvStorageMatrix spvStorage_half3x3; +typedef spvStorageMatrix spvStorage_half3x4; +typedef spvStorageMatrix spvStorage_half4x2; +typedef spvStorageMatrix spvStorage_half4x3; +typedef spvStorageMatrix spvStorage_half4x4; +typedef spvStorageMatrix spvStorage_float2x2; +typedef spvStorageMatrix spvStorage_float2x3; +typedef spvStorageMatrix spvStorage_float2x4; +typedef spvStorageMatrix spvStorage_float3x2; +typedef spvStorageMatrix spvStorage_float3x3; +typedef spvStorageMatrix spvStorage_float3x4; +typedef spvStorageMatrix spvStorage_float4x2; +typedef spvStorageMatrix spvStorage_float4x3; +typedef spvStorageMatrix spvStorage_float4x4; + +struct S1 +{ + spvStorage_float4x3 a[2]; + float b; + spvUnsafeArray c; +}; + +struct S2 +{ + int4 a; + spvUnsafeArray, 1>, 3> b; +}; + +struct block +{ + uint passed; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +bool compare_float(thread const float& a, thread const float& b) +{ + return abs(a - b) < 0.0500000007450580596923828125; +} + +static inline __attribute__((always_inline)) +bool compare_vec3(thread const float3& a, thread const float3& b) +{ + float param = a.x; + float param_1 = b.x; + bool _85 = compare_float(param, param_1); + bool _95; + if (_85) + { + float param_2 = a.y; + float param_3 = b.y; + _95 = compare_float(param_2, param_3); + } + else + { + _95 = _85; + } + bool _106; + if (_95) + { + float param_4 = a.z; + float param_5 = b.z; + _106 = compare_float(param_4, param_5); + } + else + { + _106 = _95; + } + return _106; +} + +static inline __attribute__((always_inline)) +bool compare_mat4x3(thread const float4x3& a, thread const float4x3& b) +{ + float3 param = a[0]; + float3 param_1 = b[0]; + bool _116 = compare_vec3(param, param_1); + bool _127; + if (_116) + { + float3 param_2 = a[1]; + float3 param_3 = b[1]; + _127 = compare_vec3(param_2, param_3); + } + else + { + _127 = _116; + } + bool _138; + if (_127) + { + float3 param_4 = a[2]; + float3 param_5 = b[2]; + _138 = compare_vec3(param_4, param_5); + } + else + { + _138 = _127; + } + bool _149; + if (_138) + { + float3 param_6 = a[3]; + float3 param_7 = b[3]; + _149 = compare_vec3(param_6, param_7); + } + else + { + _149 = _138; + } + return _149; +} + +static inline __attribute__((always_inline)) +bool compare_vec2(thread const float2& a, thread const float2& b) +{ + float param = a.x; + float param_1 = b.x; + bool _65 = compare_float(param, param_1); + bool _76; + if (_65) + { + float param_2 = a.y; + float param_3 = b.y; + _76 = compare_float(param_2, param_3); + } + else + { + _76 = _65; + } + return _76; +} + +static inline __attribute__((always_inline)) +bool compare_ivec4(thread const int4& a, thread const int4& b) +{ + return all(a == b); +} + +static inline __attribute__((always_inline)) +bool compare_bool(thread const bool& a, thread const bool& b) +{ + return a == b; +} + +kernel void main0(device block& _383 [[buffer(0)]]) +{ + threadgroup S1 s1; + threadgroup S2 s2; + s1.a[0] = spvStorage_float4x3(float4x3(float3(0.0, 2.0, -8.0), float3(6.0, 7.0, 5.0), float3(-6.0, 1.0, 9.0), float3(-4.0, -3.0, 4.0))); + s1.a[1] = spvStorage_float4x3(float4x3(float3(4.0, 9.0, -9.0), float3(-8.0, -9.0, 8.0), float3(0.0, 4.0, -4.0), float3(7.0, 2.0, -1.0))); + s1.b = 7.0; + s1.c[0] = float2(-5.0, -4.0); + s1.c[1] = float2(3.0, -5.0); + s1.c[2] = float2(-3.0, -1.0); + s2.a = int4(1, 0, -3, 1); + s2.b[0][0][0] = short(true); + s2.b[0][0][1] = short(false); + s2.b[0][0][2] = short(false); + s2.b[1][0][0] = short(true); + s2.b[1][0][1] = short(false); + s2.b[1][0][2] = short(true); + s2.b[2][0][0] = short(false); + s2.b[2][0][1] = short(true); + s2.b[2][0][2] = short(true); + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + bool allOk = true; + bool _242; + if (allOk) + { + float4x3 param = float4x3(float3(0.0, 2.0, -8.0), float3(6.0, 7.0, 5.0), float3(-6.0, 1.0, 9.0), float3(-4.0, -3.0, 4.0)); + float4x3 param_1 = float4x3(s1.a[0]); + _242 = compare_mat4x3(param, param_1); + } + else + { + _242 = allOk; + } + allOk = _242; + bool _251; + if (allOk) + { + float4x3 param_2 = float4x3(float3(4.0, 9.0, -9.0), float3(-8.0, -9.0, 8.0), float3(0.0, 4.0, -4.0), float3(7.0, 2.0, -1.0)); + float4x3 param_3 = float4x3(s1.a[1]); + _251 = compare_mat4x3(param_2, param_3); + } + else + { + _251 = allOk; + } + allOk = _251; + bool _260; + if (allOk) + { + float param_4 = 7.0; + float param_5 = s1.b; + _260 = compare_float(param_4, param_5); + } + else + { + _260 = allOk; + } + allOk = _260; + bool _269; + if (allOk) + { + float2 param_6 = float2(-5.0, -4.0); + float2 param_7 = s1.c[0]; + _269 = compare_vec2(param_6, param_7); + } + else + { + _269 = allOk; + } + allOk = _269; + bool _278; + if (allOk) + { + float2 param_8 = float2(3.0, -5.0); + float2 param_9 = s1.c[1]; + _278 = compare_vec2(param_8, param_9); + } + else + { + _278 = allOk; + } + allOk = _278; + bool _287; + if (allOk) + { + float2 param_10 = float2(-3.0, -1.0); + float2 param_11 = s1.c[2]; + _287 = compare_vec2(param_10, param_11); + } + else + { + _287 = allOk; + } + allOk = _287; + bool _296; + if (allOk) + { + int4 param_12 = int4(1, 0, -3, 1); + int4 param_13 = s2.a; + _296 = compare_ivec4(param_12, param_13); + } + else + { + _296 = allOk; + } + allOk = _296; + bool _305; + if (allOk) + { + bool param_14 = true; + bool param_15 = bool(s2.b[0][0][0]); + _305 = compare_bool(param_14, param_15); + } + else + { + _305 = allOk; + } + allOk = _305; + bool _314; + if (allOk) + { + bool param_16 = false; + bool param_17 = bool(s2.b[0][0][1]); + _314 = compare_bool(param_16, param_17); + } + else + { + _314 = allOk; + } + allOk = _314; + bool _323; + if (allOk) + { + bool param_18 = false; + bool param_19 = bool(s2.b[0][0][2]); + _323 = compare_bool(param_18, param_19); + } + else + { + _323 = allOk; + } + allOk = _323; + bool _332; + if (allOk) + { + bool param_20 = true; + bool param_21 = bool(s2.b[1][0][0]); + _332 = compare_bool(param_20, param_21); + } + else + { + _332 = allOk; + } + allOk = _332; + bool _341; + if (allOk) + { + bool param_22 = false; + bool param_23 = bool(s2.b[1][0][1]); + _341 = compare_bool(param_22, param_23); + } + else + { + _341 = allOk; + } + allOk = _341; + bool _350; + if (allOk) + { + bool param_24 = true; + bool param_25 = bool(s2.b[1][0][2]); + _350 = compare_bool(param_24, param_25); + } + else + { + _350 = allOk; + } + allOk = _350; + bool _359; + if (allOk) + { + bool param_26 = false; + bool param_27 = bool(s2.b[2][0][0]); + _359 = compare_bool(param_26, param_27); + } + else + { + _359 = allOk; + } + allOk = _359; + bool _368; + if (allOk) + { + bool param_28 = true; + bool param_29 = bool(s2.b[2][0][1]); + _368 = compare_bool(param_28, param_29); + } + else + { + _368 = allOk; + } + allOk = _368; + bool _377; + if (allOk) + { + bool param_30 = true; + bool param_31 = bool(s2.b[2][0][2]); + _377 = compare_bool(param_30, param_31); + } + else + { + _377 = allOk; + } + allOk = _377; + if (allOk) + { + _383.passed++; + } +} + diff --git a/reference/shaders-msl/comp/shared-matrix-cast.comp b/reference/shaders-msl/comp/shared-matrix-cast.comp new file mode 100644 index 00000000000..c764c1fdbd5 --- /dev/null +++ b/reference/shaders-msl/comp/shared-matrix-cast.comp @@ -0,0 +1,1065 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +struct spvStorageMatrix +{ + vec columns[Cols]; + + spvStorageMatrix() thread = default; + thread spvStorageMatrix& operator=(initializer_list> cols) thread + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default; + #endif + + operator matrix() const thread + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const thread + { + return columns[idx]; + } + thread vec& operator[](size_t idx) thread + { + return columns[idx]; + } + + spvStorageMatrix() constant = default; + + spvStorageMatrix(const thread matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const constant matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const device matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const threadgroup matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default; + #endif + + operator matrix() const constant + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const constant + { + return columns[idx]; + } + + spvStorageMatrix() device = default; + device spvStorageMatrix& operator=(initializer_list> cols) device + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default; + + spvStorageMatrix(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default; + + spvStorageMatrix(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default; + + spvStorageMatrix(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default; + #endif + + operator matrix() const device + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const device + { + return columns[idx]; + } + device vec& operator[](size_t idx) device + { + return columns[idx]; + } + + spvStorageMatrix() threadgroup = default; + threadgroup spvStorageMatrix& operator=(initializer_list> cols) threadgroup + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default; + #endif + + operator matrix() const threadgroup + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup + { + return columns[idx]; + } + threadgroup vec& operator[](size_t idx) threadgroup + { + return columns[idx]; + } + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix() threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(initializer_list> cols) threadgroup_imageblock + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + operator matrix() const threadgroup_imageblock + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup_imageblock + { + return columns[idx]; + } + threadgroup_imageblock vec& operator[](size_t idx) threadgroup_imageblock + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix() ray_data = default; + ray_data spvStorageMatrix& operator=(initializer_list> cols) ray_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + #endif + + spvStorageMatrix(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default; + #endif + + operator matrix() const ray_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const ray_data + { + return columns[idx]; + } + ray_data vec& operator[](size_t idx) ray_data + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix() object_data = default; + object_data spvStorageMatrix& operator=(initializer_list> cols) object_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default; + #endif + + spvStorageMatrix(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default; + + operator matrix() const object_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const object_data + { + return columns[idx]; + } + object_data vec& operator[](size_t idx) object_data + { + return columns[idx]; + } + #endif + +}; + +template +matrix transpose(spvStorageMatrix m) +{ + return transpose(matrix(m)); +} + +typedef spvStorageMatrix spvStorage_half2x2; +typedef spvStorageMatrix spvStorage_half2x3; +typedef spvStorageMatrix spvStorage_half2x4; +typedef spvStorageMatrix spvStorage_half3x2; +typedef spvStorageMatrix spvStorage_half3x3; +typedef spvStorageMatrix spvStorage_half3x4; +typedef spvStorageMatrix spvStorage_half4x2; +typedef spvStorageMatrix spvStorage_half4x3; +typedef spvStorageMatrix spvStorage_half4x4; +typedef spvStorageMatrix spvStorage_float2x2; +typedef spvStorageMatrix spvStorage_float2x3; +typedef spvStorageMatrix spvStorage_float2x4; +typedef spvStorageMatrix spvStorage_float3x2; +typedef spvStorageMatrix spvStorage_float3x3; +typedef spvStorageMatrix spvStorage_float3x4; +typedef spvStorageMatrix spvStorage_float4x2; +typedef spvStorageMatrix spvStorage_float4x3; +typedef spvStorageMatrix spvStorage_float4x4; + +struct S1 +{ + float4 a; + spvStorage_float3x2 b; + short4 c; +}; + +struct block +{ + uint passed; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +bool compare_float(thread const float& a, thread const float& b) +{ + return abs(a - b) < 0.0500000007450580596923828125; +} + +static inline __attribute__((always_inline)) +bool compare_vec4(thread const float4& a, thread const float4& b) +{ + float param = a.x; + float param_1 = b.x; + bool _78 = compare_float(param, param_1); + bool _88; + if (_78) + { + float param_2 = a.y; + float param_3 = b.y; + _88 = compare_float(param_2, param_3); + } + else + { + _88 = _78; + } + bool _99; + if (_88) + { + float param_4 = a.z; + float param_5 = b.z; + _99 = compare_float(param_4, param_5); + } + else + { + _99 = _88; + } + bool _110; + if (_99) + { + float param_6 = a.w; + float param_7 = b.w; + _110 = compare_float(param_6, param_7); + } + else + { + _110 = _99; + } + return _110; +} + +static inline __attribute__((always_inline)) +bool compare_vec2(thread const float2& a, thread const float2& b) +{ + float param = a.x; + float param_1 = b.x; + bool _58 = compare_float(param, param_1); + bool _69; + if (_58) + { + float param_2 = a.y; + float param_3 = b.y; + _69 = compare_float(param_2, param_3); + } + else + { + _69 = _58; + } + return _69; +} + +static inline __attribute__((always_inline)) +bool compare_mat3x2(thread const float3x2& a, thread const float3x2& b) +{ + float2 param = a[0]; + float2 param_1 = b[0]; + bool _121 = compare_vec2(param, param_1); + bool _132; + if (_121) + { + float2 param_2 = a[1]; + float2 param_3 = b[1]; + _132 = compare_vec2(param_2, param_3); + } + else + { + _132 = _121; + } + bool _143; + if (_132) + { + float2 param_4 = a[2]; + float2 param_5 = b[2]; + _143 = compare_vec2(param_4, param_5); + } + else + { + _143 = _132; + } + return _143; +} + +static inline __attribute__((always_inline)) +bool compare_bvec4(thread const bool4& a, thread const bool4& b) +{ + return all(a == b); +} + +kernel void main0(device block& _212 [[buffer(0)]]) +{ + threadgroup S1 s1; + s1.a = float4(1.0, -5.0, -9.0, -5.0); + s1.b = spvStorage_float3x2(float3x2(float2(1.0, -7.0), float2(1.0, 2.0), float2(8.0, 7.0))); + s1.c = short4(bool4(false, true, false, false)); + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + bool allOk = true; + bool _188; + if (allOk) + { + float4 param = float4(1.0, -5.0, -9.0, -5.0); + float4 param_1 = s1.a; + _188 = compare_vec4(param, param_1); + } + else + { + _188 = allOk; + } + allOk = _188; + bool _197; + if (allOk) + { + float3x2 param_2 = float3x2(float2(1.0, -7.0), float2(1.0, 2.0), float2(8.0, 7.0)); + float3x2 param_3 = float3x2(s1.b); + _197 = compare_mat3x2(param_2, param_3); + } + else + { + _197 = allOk; + } + allOk = _197; + bool _206; + if (allOk) + { + bool4 param_4 = bool4(false, true, false, false); + bool4 param_5 = bool4(s1.c); + _206 = compare_bvec4(param_4, param_5); + } + else + { + _206 = allOk; + } + allOk = _206; + if (allOk) + { + _212.passed++; + } +} + diff --git a/reference/shaders-msl/comp/shared-matrix-nested-struct-array.comp b/reference/shaders-msl/comp/shared-matrix-nested-struct-array.comp new file mode 100644 index 00000000000..db5ed440f88 --- /dev/null +++ b/reference/shaders-msl/comp/shared-matrix-nested-struct-array.comp @@ -0,0 +1,1316 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +struct spvStorageMatrix +{ + vec columns[Cols]; + + spvStorageMatrix() thread = default; + thread spvStorageMatrix& operator=(initializer_list> cols) thread + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default; + #endif + + operator matrix() const thread + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const thread + { + return columns[idx]; + } + thread vec& operator[](size_t idx) thread + { + return columns[idx]; + } + + spvStorageMatrix() constant = default; + + spvStorageMatrix(const thread matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const constant matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const device matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const threadgroup matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default; + #endif + + operator matrix() const constant + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const constant + { + return columns[idx]; + } + + spvStorageMatrix() device = default; + device spvStorageMatrix& operator=(initializer_list> cols) device + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default; + + spvStorageMatrix(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default; + + spvStorageMatrix(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default; + + spvStorageMatrix(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default; + #endif + + operator matrix() const device + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const device + { + return columns[idx]; + } + device vec& operator[](size_t idx) device + { + return columns[idx]; + } + + spvStorageMatrix() threadgroup = default; + threadgroup spvStorageMatrix& operator=(initializer_list> cols) threadgroup + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default; + #endif + + operator matrix() const threadgroup + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup + { + return columns[idx]; + } + threadgroup vec& operator[](size_t idx) threadgroup + { + return columns[idx]; + } + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix() threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(initializer_list> cols) threadgroup_imageblock + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + operator matrix() const threadgroup_imageblock + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup_imageblock + { + return columns[idx]; + } + threadgroup_imageblock vec& operator[](size_t idx) threadgroup_imageblock + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix() ray_data = default; + ray_data spvStorageMatrix& operator=(initializer_list> cols) ray_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + #endif + + spvStorageMatrix(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default; + #endif + + operator matrix() const ray_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const ray_data + { + return columns[idx]; + } + ray_data vec& operator[](size_t idx) ray_data + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix() object_data = default; + object_data spvStorageMatrix& operator=(initializer_list> cols) object_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default; + #endif + + spvStorageMatrix(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default; + + operator matrix() const object_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const object_data + { + return columns[idx]; + } + object_data vec& operator[](size_t idx) object_data + { + return columns[idx]; + } + #endif + +}; + +template +matrix transpose(spvStorageMatrix m) +{ + return transpose(matrix(m)); +} + +typedef spvStorageMatrix spvStorage_half2x2; +typedef spvStorageMatrix spvStorage_half2x3; +typedef spvStorageMatrix spvStorage_half2x4; +typedef spvStorageMatrix spvStorage_half3x2; +typedef spvStorageMatrix spvStorage_half3x3; +typedef spvStorageMatrix spvStorage_half3x4; +typedef spvStorageMatrix spvStorage_half4x2; +typedef spvStorageMatrix spvStorage_half4x3; +typedef spvStorageMatrix spvStorage_half4x4; +typedef spvStorageMatrix spvStorage_float2x2; +typedef spvStorageMatrix spvStorage_float2x3; +typedef spvStorageMatrix spvStorage_float2x4; +typedef spvStorageMatrix spvStorage_float3x2; +typedef spvStorageMatrix spvStorage_float3x3; +typedef spvStorageMatrix spvStorage_float3x4; +typedef spvStorageMatrix spvStorage_float4x2; +typedef spvStorageMatrix spvStorage_float4x3; +typedef spvStorageMatrix spvStorage_float4x4; + +struct sA +{ + spvStorage_float2x3 mA; +}; + +struct sB +{ + spvStorage_float2x2 mA; + spvStorage_float3x2 mB; + uint3 mC; +}; + +struct sC +{ + sA mA; + sB mB; +}; + +struct sD +{ + sC mA; +}; + +struct sE +{ + spvStorage_float3x2 mA; + spvStorage_float4x3 mB; +}; + +struct sF +{ + sE mA; +}; + +struct sG +{ + sF mA; +}; + +struct sH +{ + spvUnsafeArray mA; +}; + +struct S1 +{ + sD a; + sG b; + spvUnsafeArray c; +}; + +struct block +{ + uint passed; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +bool compare_float(thread const float& a, thread const float& b) +{ + return abs(a - b) < 0.0500000007450580596923828125; +} + +static inline __attribute__((always_inline)) +bool compare_vec3(thread const float3& a, thread const float3& b) +{ + float param = a.x; + float param_1 = b.x; + bool _106 = compare_float(param, param_1); + bool _116; + if (_106) + { + float param_2 = a.y; + float param_3 = b.y; + _116 = compare_float(param_2, param_3); + } + else + { + _116 = _106; + } + bool _127; + if (_116) + { + float param_4 = a.z; + float param_5 = b.z; + _127 = compare_float(param_4, param_5); + } + else + { + _127 = _116; + } + return _127; +} + +static inline __attribute__((always_inline)) +bool compare_mat2x3(thread const float2x3& a, thread const float2x3& b) +{ + float3 param = a[0]; + float3 param_1 = b[0]; + bool _158 = compare_vec3(param, param_1); + bool _168; + if (_158) + { + float3 param_2 = a[1]; + float3 param_3 = b[1]; + _168 = compare_vec3(param_2, param_3); + } + else + { + _168 = _158; + } + return _168; +} + +static inline __attribute__((always_inline)) +bool compare_vec2(thread const float2& a, thread const float2& b) +{ + float param = a.x; + float param_1 = b.x; + bool _86 = compare_float(param, param_1); + bool _97; + if (_86) + { + float param_2 = a.y; + float param_3 = b.y; + _97 = compare_float(param_2, param_3); + } + else + { + _97 = _86; + } + return _97; +} + +static inline __attribute__((always_inline)) +bool compare_mat2(thread const float2x2& a, thread const float2x2& b) +{ + float2 param = a[0]; + float2 param_1 = b[0]; + bool _138 = compare_vec2(param, param_1); + bool _149; + if (_138) + { + float2 param_2 = a[1]; + float2 param_3 = b[1]; + _149 = compare_vec2(param_2, param_3); + } + else + { + _149 = _138; + } + return _149; +} + +static inline __attribute__((always_inline)) +bool compare_mat3x2(thread const float3x2& a, thread const float3x2& b) +{ + float2 param = a[0]; + float2 param_1 = b[0]; + bool _177 = compare_vec2(param, param_1); + bool _187; + if (_177) + { + float2 param_2 = a[1]; + float2 param_3 = b[1]; + _187 = compare_vec2(param_2, param_3); + } + else + { + _187 = _177; + } + bool _198; + if (_187) + { + float2 param_4 = a[2]; + float2 param_5 = b[2]; + _198 = compare_vec2(param_4, param_5); + } + else + { + _198 = _187; + } + return _198; +} + +static inline __attribute__((always_inline)) +bool compare_uvec3(thread const uint3& a, thread const uint3& b) +{ + return all(a == b); +} + +static inline __attribute__((always_inline)) +bool compare_mat4x3(thread const float4x3& a, thread const float4x3& b) +{ + float3 param = a[0]; + float3 param_1 = b[0]; + bool _207 = compare_vec3(param, param_1); + bool _217; + if (_207) + { + float3 param_2 = a[1]; + float3 param_3 = b[1]; + _217 = compare_vec3(param_2, param_3); + } + else + { + _217 = _207; + } + bool _227; + if (_217) + { + float3 param_4 = a[2]; + float3 param_5 = b[2]; + _227 = compare_vec3(param_4, param_5); + } + else + { + _227 = _217; + } + bool _238; + if (_227) + { + float3 param_6 = a[3]; + float3 param_7 = b[3]; + _238 = compare_vec3(param_6, param_7); + } + else + { + _238 = _227; + } + return _238; +} + +static inline __attribute__((always_inline)) +bool compare_bvec3(thread const bool3& a, thread const bool3& b) +{ + return all(a == b); +} + +kernel void main0(device block& _424 [[buffer(0)]]) +{ + threadgroup S1 s1; + s1.a.mA.mA.mA = spvStorage_float2x3(float2x3(float3(6.0, 8.0, 8.0), float3(0.0, -4.0, -5.0))); + s1.a.mA.mB.mA = spvStorage_float2x2(float2x2(float2(9.0, -4.0), float2(-6.0, -1.0))); + s1.a.mA.mB.mB = spvStorage_float3x2(float3x2(float2(-1.0, -2.0), float2(1.0, 6.0), float2(5.0, 7.0))); + s1.a.mA.mB.mC = uint3(3u, 1u, 5u); + s1.b.mA.mA.mA = spvStorage_float3x2(float3x2(float2(8.0, 3.0), float2(0.0, 2.0), float2(1.0, 8.0))); + s1.b.mA.mA.mB = spvStorage_float4x3(float4x3(float3(0.0, 9.0, -1.0), float3(-1.0, -7.0, 7.0), float3(-4.0, -3.0, 1.0), float3(-4.0, -9.0, 1.0))); + s1.c[0].mA[0] = short3(bool3(true, false, false)); + s1.c[0].mA[1] = short3(bool3(true, false, false)); + s1.c[1].mA[0] = short3(bool3(false)); + s1.c[1].mA[1] = short3(bool3(false)); + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + bool allOk = true; + bool _337; + if (allOk) + { + float2x3 param = float2x3(float3(6.0, 8.0, 8.0), float3(0.0, -4.0, -5.0)); + float2x3 param_1 = float2x3(s1.a.mA.mA.mA); + _337 = compare_mat2x3(param, param_1); + } + else + { + _337 = allOk; + } + allOk = _337; + bool _346; + if (allOk) + { + float2x2 param_2 = float2x2(float2(9.0, -4.0), float2(-6.0, -1.0)); + float2x2 param_3 = float2x2(s1.a.mA.mB.mA); + _346 = compare_mat2(param_2, param_3); + } + else + { + _346 = allOk; + } + allOk = _346; + bool _355; + if (allOk) + { + float3x2 param_4 = float3x2(float2(-1.0, -2.0), float2(1.0, 6.0), float2(5.0, 7.0)); + float3x2 param_5 = float3x2(s1.a.mA.mB.mB); + _355 = compare_mat3x2(param_4, param_5); + } + else + { + _355 = allOk; + } + allOk = _355; + bool _364; + if (allOk) + { + uint3 param_6 = uint3(3u, 1u, 5u); + uint3 param_7 = s1.a.mA.mB.mC; + _364 = compare_uvec3(param_6, param_7); + } + else + { + _364 = allOk; + } + allOk = _364; + bool _373; + if (allOk) + { + float3x2 param_8 = float3x2(float2(8.0, 3.0), float2(0.0, 2.0), float2(1.0, 8.0)); + float3x2 param_9 = float3x2(s1.b.mA.mA.mA); + _373 = compare_mat3x2(param_8, param_9); + } + else + { + _373 = allOk; + } + allOk = _373; + bool _382; + if (allOk) + { + float4x3 param_10 = float4x3(float3(0.0, 9.0, -1.0), float3(-1.0, -7.0, 7.0), float3(-4.0, -3.0, 1.0), float3(-4.0, -9.0, 1.0)); + float4x3 param_11 = float4x3(s1.b.mA.mA.mB); + _382 = compare_mat4x3(param_10, param_11); + } + else + { + _382 = allOk; + } + allOk = _382; + bool _391; + if (allOk) + { + bool3 param_12 = bool3(true, false, false); + bool3 param_13 = bool3(s1.c[0].mA[0]); + _391 = compare_bvec3(param_12, param_13); + } + else + { + _391 = allOk; + } + allOk = _391; + bool _400; + if (allOk) + { + bool3 param_14 = bool3(true, false, false); + bool3 param_15 = bool3(s1.c[0].mA[1]); + _400 = compare_bvec3(param_14, param_15); + } + else + { + _400 = allOk; + } + allOk = _400; + bool _409; + if (allOk) + { + bool3 param_16 = bool3(false); + bool3 param_17 = bool3(s1.c[1].mA[0]); + _409 = compare_bvec3(param_16, param_17); + } + else + { + _409 = allOk; + } + allOk = _409; + bool _418; + if (allOk) + { + bool3 param_18 = bool3(false); + bool3 param_19 = bool3(s1.c[1].mA[1]); + _418 = compare_bvec3(param_18, param_19); + } + else + { + _418 = allOk; + } + allOk = _418; + if (allOk) + { + _424.passed++; + } +} + diff --git a/reference/shaders-msl/comp/shared-matrix-nested-struct.comp b/reference/shaders-msl/comp/shared-matrix-nested-struct.comp new file mode 100644 index 00000000000..2526c6c93b7 --- /dev/null +++ b/reference/shaders-msl/comp/shared-matrix-nested-struct.comp @@ -0,0 +1,1473 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +struct spvStorageMatrix +{ + vec columns[Cols]; + + spvStorageMatrix() thread = default; + thread spvStorageMatrix& operator=(initializer_list> cols) thread + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const thread matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const constant matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const device matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default; + + spvStorageMatrix(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const ray_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default; + thread spvStorageMatrix& operator=(const object_data matrix& m) thread + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default; + #endif + + operator matrix() const thread + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const thread + { + return columns[idx]; + } + thread vec& operator[](size_t idx) thread + { + return columns[idx]; + } + + spvStorageMatrix() constant = default; + + spvStorageMatrix(const thread matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const constant matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const device matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) constant = default; + + spvStorageMatrix(const threadgroup matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) constant + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default; + #endif + + operator matrix() const constant + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const constant + { + return columns[idx]; + } + + spvStorageMatrix() device = default; + device spvStorageMatrix& operator=(initializer_list> cols) device + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const thread matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default; + + spvStorageMatrix(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const constant matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default; + + spvStorageMatrix(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const device matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default; + + spvStorageMatrix(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const ray_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) device = default; + device spvStorageMatrix& operator=(const object_data matrix& m) device + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default; + #endif + + operator matrix() const device + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const device + { + return columns[idx]; + } + device vec& operator[](size_t idx) device + { + return columns[idx]; + } + + spvStorageMatrix() threadgroup = default; + threadgroup spvStorageMatrix& operator=(initializer_list> cols) threadgroup + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const thread matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const constant matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const device matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default; + threadgroup spvStorageMatrix& operator=(const object_data matrix& m) threadgroup + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default; + #endif + + operator matrix() const threadgroup + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup + { + return columns[idx]; + } + threadgroup vec& operator[](size_t idx) threadgroup + { + return columns[idx]; + } + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix() threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(initializer_list> cols) threadgroup_imageblock + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const device matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default; + + spvStorageMatrix(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default; + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix& m) threadgroup_imageblock + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default; + #endif + + operator matrix() const threadgroup_imageblock + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const threadgroup_imageblock + { + return columns[idx]; + } + threadgroup_imageblock vec& operator[](size_t idx) threadgroup_imageblock + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix() ray_data = default; + ray_data spvStorageMatrix& operator=(initializer_list> cols) ray_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const thread matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const constant matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const device matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default; + + spvStorageMatrix(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default; + #endif + + spvStorageMatrix(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const ray_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default; + + #ifdef __HAVE_MESH__ + spvStorageMatrix(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default; + ray_data spvStorageMatrix& operator=(const object_data matrix& m) ray_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default; + #endif + + operator matrix() const ray_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const ray_data + { + return columns[idx]; + } + ray_data vec& operator[](size_t idx) ray_data + { + return columns[idx]; + } + #endif + + #ifdef __HAVE_MESH__ + spvStorageMatrix() object_data = default; + object_data spvStorageMatrix& operator=(initializer_list> cols) object_data + { + size_t i; + thread vec* col; + for (i = 0, col = cols.begin(); i < Cols; ++i, ++col) + columns[i] = *col; + return *this; + } + + spvStorageMatrix(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const thread matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const constant matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const device spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const device matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default; + + spvStorageMatrix(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default; + + #ifdef __HAVE_IMAGEBLOCKS__ + spvStorageMatrix(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default; + #endif + + #ifdef __HAVE_RAYTRACING__ + spvStorageMatrix(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const ray_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default; + #endif + + spvStorageMatrix(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + } + spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default; + object_data spvStorageMatrix& operator=(const object_data matrix& m) object_data + { + for (size_t i = 0; i < Cols; ++i) + columns[i] = m.columns[i]; + return *this; + } + object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default; + + operator matrix() const object_data + { + matrix m; + for (int i = 0; i < Cols; ++i) + m.columns[i] = columns[i]; + return m; + } + + vec operator[](size_t idx) const object_data + { + return columns[idx]; + } + object_data vec& operator[](size_t idx) object_data + { + return columns[idx]; + } + #endif + +}; + +template +matrix transpose(spvStorageMatrix m) +{ + return transpose(matrix(m)); +} + +typedef spvStorageMatrix spvStorage_half2x2; +typedef spvStorageMatrix spvStorage_half2x3; +typedef spvStorageMatrix spvStorage_half2x4; +typedef spvStorageMatrix spvStorage_half3x2; +typedef spvStorageMatrix spvStorage_half3x3; +typedef spvStorageMatrix spvStorage_half3x4; +typedef spvStorageMatrix spvStorage_half4x2; +typedef spvStorageMatrix spvStorage_half4x3; +typedef spvStorageMatrix spvStorage_half4x4; +typedef spvStorageMatrix spvStorage_float2x2; +typedef spvStorageMatrix spvStorage_float2x3; +typedef spvStorageMatrix spvStorage_float2x4; +typedef spvStorageMatrix spvStorage_float3x2; +typedef spvStorageMatrix spvStorage_float3x3; +typedef spvStorageMatrix spvStorage_float3x4; +typedef spvStorageMatrix spvStorage_float4x2; +typedef spvStorageMatrix spvStorage_float4x3; +typedef spvStorageMatrix spvStorage_float4x4; + +struct S1 +{ + uint a; + float4 b; +}; + +struct sA +{ + spvStorage_float4x4 mA; + short3 mB; + short4 mC; +}; + +struct sB +{ + short2 mA; +}; + +struct sC +{ + float mA; + uint4 mB; + float mC; +}; + +struct sD +{ + sA mA; + sB mB; + sC mC; +}; + +struct sE +{ + sD mA; +}; + +struct sF +{ + uint3 mA; + short mB; +}; + +struct sG +{ + sF mA; + spvStorage_float3x2 mB; +}; + +struct sH +{ + sG mA; + float2 mB; +}; + +struct sI +{ + spvStorage_float2x2 mA; + short3 mB; + short4 mC; +}; + +struct sJ +{ + sI mA; + short3 mB; +}; + +struct sK +{ + short2 mA; + sJ mB; + int2 mC; +}; + +struct S2 +{ + sE a; + int3 b; + sH c; + sK d; +}; + +struct block +{ + uint passed; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +bool compare_uint(thread const uint& a, thread const uint& b) +{ + return a == b; +} + +static inline __attribute__((always_inline)) +bool compare_float(thread const float& a, thread const float& b) +{ + return abs(a - b) < 0.0500000007450580596923828125; +} + +static inline __attribute__((always_inline)) +bool compare_vec4(thread const float4& a, thread const float4& b) +{ + float param = a.x; + float param_1 = b.x; + bool _147 = compare_float(param, param_1); + bool _157; + if (_147) + { + float param_2 = a.y; + float param_3 = b.y; + _157 = compare_float(param_2, param_3); + } + else + { + _157 = _147; + } + bool _168; + if (_157) + { + float param_4 = a.z; + float param_5 = b.z; + _168 = compare_float(param_4, param_5); + } + else + { + _168 = _157; + } + bool _179; + if (_168) + { + float param_6 = a.w; + float param_7 = b.w; + _179 = compare_float(param_6, param_7); + } + else + { + _179 = _168; + } + return _179; +} + +static inline __attribute__((always_inline)) +bool compare_mat4(thread const float4x4& a, thread const float4x4& b) +{ + float4 param = a[0]; + float4 param_1 = b[0]; + bool _239 = compare_vec4(param, param_1); + bool _249; + if (_239) + { + float4 param_2 = a[1]; + float4 param_3 = b[1]; + _249 = compare_vec4(param_2, param_3); + } + else + { + _249 = _239; + } + bool _259; + if (_249) + { + float4 param_4 = a[2]; + float4 param_5 = b[2]; + _259 = compare_vec4(param_4, param_5); + } + else + { + _259 = _249; + } + bool _270; + if (_259) + { + float4 param_6 = a[3]; + float4 param_7 = b[3]; + _270 = compare_vec4(param_6, param_7); + } + else + { + _270 = _259; + } + return _270; +} + +static inline __attribute__((always_inline)) +bool compare_bvec3(thread const bool3& a, thread const bool3& b) +{ + return all(a == b); +} + +static inline __attribute__((always_inline)) +bool compare_bvec4(thread const bool4& a, thread const bool4& b) +{ + return all(a == b); +} + +static inline __attribute__((always_inline)) +bool compare_bvec2(thread const bool2& a, thread const bool2& b) +{ + return all(a == b); +} + +static inline __attribute__((always_inline)) +bool compare_uvec4(thread const uint4& a, thread const uint4& b) +{ + return all(a == b); +} + +static inline __attribute__((always_inline)) +bool compare_ivec3(thread const int3& a, thread const int3& b) +{ + return all(a == b); +} + +static inline __attribute__((always_inline)) +bool compare_uvec3(thread const uint3& a, thread const uint3& b) +{ + return all(a == b); +} + +static inline __attribute__((always_inline)) +bool compare_bool(thread const bool& a, thread const bool& b) +{ + return a == b; +} + +static inline __attribute__((always_inline)) +bool compare_vec2(thread const float2& a, thread const float2& b) +{ + float param = a.x; + float param_1 = b.x; + bool _127 = compare_float(param, param_1); + bool _138; + if (_127) + { + float param_2 = a.y; + float param_3 = b.y; + _138 = compare_float(param_2, param_3); + } + else + { + _138 = _127; + } + return _138; +} + +static inline __attribute__((always_inline)) +bool compare_mat3x2(thread const float3x2& a, thread const float3x2& b) +{ + float2 param = a[0]; + float2 param_1 = b[0]; + bool _209 = compare_vec2(param, param_1); + bool _219; + if (_209) + { + float2 param_2 = a[1]; + float2 param_3 = b[1]; + _219 = compare_vec2(param_2, param_3); + } + else + { + _219 = _209; + } + bool _230; + if (_219) + { + float2 param_4 = a[2]; + float2 param_5 = b[2]; + _230 = compare_vec2(param_4, param_5); + } + else + { + _230 = _219; + } + return _230; +} + +static inline __attribute__((always_inline)) +bool compare_mat2(thread const float2x2& a, thread const float2x2& b) +{ + float2 param = a[0]; + float2 param_1 = b[0]; + bool _189 = compare_vec2(param, param_1); + bool _200; + if (_189) + { + float2 param_2 = a[1]; + float2 param_3 = b[1]; + _200 = compare_vec2(param_2, param_3); + } + else + { + _200 = _189; + } + return _200; +} + +static inline __attribute__((always_inline)) +bool compare_ivec2(thread const int2& a, thread const int2& b) +{ + return all(a == b); +} + +kernel void main0(device block& _612 [[buffer(0)]]) +{ + threadgroup S1 s1; + threadgroup S2 s2; + s1.a = 0u; + s1.b = float4(8.0, 8.0, 0.0, -4.0); + s2.a.mA.mA.mA = spvStorage_float4x4(float4x4(float4(-5.0, 9.0, -4.0, -6.0), float4(-1.0, -1.0, -2.0, 1.0), float4(6.0, 5.0, 7.0, -2.0), float4(-4.0, -9.0, 8.0, 3.0))); + s2.a.mA.mA.mB = short3(bool3(true, false, false)); + s2.a.mA.mA.mC = short4(bool4(true, true, true, false)); + s2.a.mA.mB.mA = short2(bool2(true)); + s2.a.mA.mC.mA = 7.0; + s2.a.mA.mC.mB = uint4(8u, 6u, 2u, 0u); + s2.a.mA.mC.mC = -9.0; + s2.b = int3(1, -4, 0); + s2.c.mA.mA.mA = uint3(4u, 9u, 1u); + s2.c.mA.mA.mB = short(false); + s2.c.mA.mB = spvStorage_float3x2(float3x2(float2(3.0, -5.0), float2(-1.0, -5.0), float2(-1.0, -9.0))); + s2.c.mB = float2(-6.0, -9.0); + s2.d.mA = short2(bool2(true, false)); + s2.d.mB.mA.mA = spvStorage_float2x2(float2x2(float2(-2.0, 3.0), float2(7.0, 2.0))); + s2.d.mB.mA.mB = short3(bool3(false)); + s2.d.mB.mA.mC = short4(bool4(false, false, false, true)); + s2.d.mB.mB = short3(bool3(true, false, false)); + s2.d.mC = int2(-9, 0); + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + bool allOk = true; + bool _435; + if (allOk) + { + uint param = 0u; + uint param_1 = s1.a; + _435 = compare_uint(param, param_1); + } + else + { + _435 = allOk; + } + allOk = _435; + bool _444; + if (allOk) + { + float4 param_2 = float4(8.0, 8.0, 0.0, -4.0); + float4 param_3 = s1.b; + _444 = compare_vec4(param_2, param_3); + } + else + { + _444 = allOk; + } + allOk = _444; + bool _453; + if (allOk) + { + float4x4 param_4 = float4x4(float4(-5.0, 9.0, -4.0, -6.0), float4(-1.0, -1.0, -2.0, 1.0), float4(6.0, 5.0, 7.0, -2.0), float4(-4.0, -9.0, 8.0, 3.0)); + float4x4 param_5 = float4x4(s2.a.mA.mA.mA); + _453 = compare_mat4(param_4, param_5); + } + else + { + _453 = allOk; + } + allOk = _453; + bool _462; + if (allOk) + { + bool3 param_6 = bool3(true, false, false); + bool3 param_7 = bool3(s2.a.mA.mA.mB); + _462 = compare_bvec3(param_6, param_7); + } + else + { + _462 = allOk; + } + allOk = _462; + bool _471; + if (allOk) + { + bool4 param_8 = bool4(true, true, true, false); + bool4 param_9 = bool4(s2.a.mA.mA.mC); + _471 = compare_bvec4(param_8, param_9); + } + else + { + _471 = allOk; + } + allOk = _471; + bool _480; + if (allOk) + { + bool2 param_10 = bool2(true); + bool2 param_11 = bool2(s2.a.mA.mB.mA); + _480 = compare_bvec2(param_10, param_11); + } + else + { + _480 = allOk; + } + allOk = _480; + bool _489; + if (allOk) + { + float param_12 = 7.0; + float param_13 = s2.a.mA.mC.mA; + _489 = compare_float(param_12, param_13); + } + else + { + _489 = allOk; + } + allOk = _489; + bool _498; + if (allOk) + { + uint4 param_14 = uint4(8u, 6u, 2u, 0u); + uint4 param_15 = s2.a.mA.mC.mB; + _498 = compare_uvec4(param_14, param_15); + } + else + { + _498 = allOk; + } + allOk = _498; + bool _507; + if (allOk) + { + float param_16 = -9.0; + float param_17 = s2.a.mA.mC.mC; + _507 = compare_float(param_16, param_17); + } + else + { + _507 = allOk; + } + allOk = _507; + bool _516; + if (allOk) + { + int3 param_18 = int3(1, -4, 0); + int3 param_19 = s2.b; + _516 = compare_ivec3(param_18, param_19); + } + else + { + _516 = allOk; + } + allOk = _516; + bool _525; + if (allOk) + { + uint3 param_20 = uint3(4u, 9u, 1u); + uint3 param_21 = s2.c.mA.mA.mA; + _525 = compare_uvec3(param_20, param_21); + } + else + { + _525 = allOk; + } + allOk = _525; + bool _534; + if (allOk) + { + bool param_22 = false; + bool param_23 = bool(s2.c.mA.mA.mB); + _534 = compare_bool(param_22, param_23); + } + else + { + _534 = allOk; + } + allOk = _534; + bool _543; + if (allOk) + { + float3x2 param_24 = float3x2(float2(3.0, -5.0), float2(-1.0, -5.0), float2(-1.0, -9.0)); + float3x2 param_25 = float3x2(s2.c.mA.mB); + _543 = compare_mat3x2(param_24, param_25); + } + else + { + _543 = allOk; + } + allOk = _543; + bool _552; + if (allOk) + { + float2 param_26 = float2(-6.0, -9.0); + float2 param_27 = s2.c.mB; + _552 = compare_vec2(param_26, param_27); + } + else + { + _552 = allOk; + } + allOk = _552; + bool _561; + if (allOk) + { + bool2 param_28 = bool2(true, false); + bool2 param_29 = bool2(s2.d.mA); + _561 = compare_bvec2(param_28, param_29); + } + else + { + _561 = allOk; + } + allOk = _561; + bool _570; + if (allOk) + { + float2x2 param_30 = float2x2(float2(-2.0, 3.0), float2(7.0, 2.0)); + float2x2 param_31 = float2x2(s2.d.mB.mA.mA); + _570 = compare_mat2(param_30, param_31); + } + else + { + _570 = allOk; + } + allOk = _570; + bool _579; + if (allOk) + { + bool3 param_32 = bool3(false); + bool3 param_33 = bool3(s2.d.mB.mA.mB); + _579 = compare_bvec3(param_32, param_33); + } + else + { + _579 = allOk; + } + allOk = _579; + bool _588; + if (allOk) + { + bool4 param_34 = bool4(false, false, false, true); + bool4 param_35 = bool4(s2.d.mB.mA.mC); + _588 = compare_bvec4(param_34, param_35); + } + else + { + _588 = allOk; + } + allOk = _588; + bool _597; + if (allOk) + { + bool3 param_36 = bool3(true, false, false); + bool3 param_37 = bool3(s2.d.mB.mB); + _597 = compare_bvec3(param_36, param_37); + } + else + { + _597 = allOk; + } + allOk = _597; + bool _606; + if (allOk) + { + int2 param_38 = int2(-9, 0); + int2 param_39 = s2.d.mC; + _606 = compare_ivec2(param_38, param_39); + } + else + { + _606 = allOk; + } + allOk = _606; + if (allOk) + { + _612.passed++; + } +} + diff --git a/reference/shaders-msl/comp/shared-struct-bool-cast.comp b/reference/shaders-msl/comp/shared-struct-bool-cast.comp new file mode 100644 index 00000000000..806cb0a86cb --- /dev/null +++ b/reference/shaders-msl/comp/shared-struct-bool-cast.comp @@ -0,0 +1,110 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct S1 +{ + int3 a; + uint2 b; + short4 c; + uint d; +}; + +struct block +{ + uint passed; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) +bool compare_ivec3(thread const int3& a, thread const int3& b) +{ + return all(a == b); +} + +static inline __attribute__((always_inline)) +bool compare_uvec2(thread const uint2& a, thread const uint2& b) +{ + return all(a == b); +} + +static inline __attribute__((always_inline)) +bool compare_bvec4(thread const bool4& a, thread const bool4& b) +{ + return all(a == b); +} + +static inline __attribute__((always_inline)) +bool compare_uint(thread const uint& a, thread const uint& b) +{ + return a == b; +} + +kernel void main0(device block& _132 [[buffer(0)]]) +{ + threadgroup S1 s1; + s1.a = int3(6, 8, 8); + s1.b = uint2(4u); + s1.c = short4(bool4(false, false, false, true)); + s1.d = 6u; + threadgroup_barrier(mem_flags::mem_threadgroup); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); + bool allOk = true; + bool _99; + if (allOk) + { + int3 param = int3(6, 8, 8); + int3 param_1 = s1.a; + _99 = compare_ivec3(param, param_1); + } + else + { + _99 = allOk; + } + allOk = _99; + bool _108; + if (allOk) + { + uint2 param_2 = uint2(4u); + uint2 param_3 = s1.b; + _108 = compare_uvec2(param_2, param_3); + } + else + { + _108 = allOk; + } + allOk = _108; + bool _117; + if (allOk) + { + bool4 param_4 = bool4(false, false, false, true); + bool4 param_5 = bool4(s1.c); + _117 = compare_bvec4(param_4, param_5); + } + else + { + _117 = allOk; + } + allOk = _117; + bool _126; + if (allOk) + { + uint param_6 = 6u; + uint param_7 = s1.d; + _126 = compare_uint(param_6, param_7); + } + else + { + _126 = allOk; + } + allOk = _126; + if (allOk) + { + _132.passed++; + } +} + diff --git a/reference/shaders-msl/comp/spec-constant-op-member-array.comp b/reference/shaders-msl/comp/spec-constant-op-member-array.comp index d3c8b7dc4a3..8f54f0528dc 100644 --- a/reference/shaders-msl/comp/spec-constant-op-member-array.comp +++ b/reference/shaders-msl/comp/spec-constant-op-member-array.comp @@ -40,6 +40,7 @@ struct SSBO constant int e_tmp [[function_constant(3)]]; constant int e = is_function_constant_defined(e_tmp) ? e_tmp : 400; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); kernel void main0(device SSBO& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { diff --git a/reference/shaders-msl/comp/spec-constant-work-group-size.comp b/reference/shaders-msl/comp/spec-constant-work-group-size.comp index bb796ab95d7..de30edec155 100644 --- a/reference/shaders-msl/comp/spec-constant-work-group-size.comp +++ b/reference/shaders-msl/comp/spec-constant-work-group-size.comp @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + #ifndef SPIRV_CROSS_CONSTANT_ID_1 #define SPIRV_CROSS_CONSTANT_ID_1 2 #endif @@ -27,7 +68,7 @@ constant int _32 = (1 - a); kernel void main0(device SSBO& _17 [[buffer(0)]]) { - int spec_const_array_size[b]; + spvUnsafeArray spec_const_array_size; spec_const_array_size[a] = a; _17.v[_30] = b + spec_const_array_size[_32]; } diff --git a/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp b/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp index 6988febfacc..5593629c064 100644 --- a/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp +++ b/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp @@ -1,14 +1,55 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Sub { - float f[2]; - float2 f2[2]; - float3 f3[2]; - float4 f4[2]; + spvUnsafeArray f; + spvUnsafeArray f2; + spvUnsafeArray f3; + spvUnsafeArray f4; }; struct Sub_1 @@ -24,6 +65,8 @@ struct SSBO Sub_1 sub[2]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { Sub foo; @@ -39,15 +82,15 @@ kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_WorkGroupID [[threadg foo.f2[gl_GlobalInvocationID.x] += float2(2.0); foo.f3[gl_GlobalInvocationID.x] += float3(3.0); foo.f4[gl_GlobalInvocationID.x] += float4(4.0); - _27.sub[gl_WorkGroupID.x].f[0].x = foo.f[0]; - _27.sub[gl_WorkGroupID.x].f[1].x = foo.f[1]; - _27.sub[gl_WorkGroupID.x].f2[0].xy = foo.f2[0]; - _27.sub[gl_WorkGroupID.x].f2[1].xy = foo.f2[1]; + (device float&)_27.sub[gl_WorkGroupID.x].f[0] = foo.f[0]; + (device float&)_27.sub[gl_WorkGroupID.x].f[1] = foo.f[1]; + (device float2&)_27.sub[gl_WorkGroupID.x].f2[0] = foo.f2[0]; + (device float2&)_27.sub[gl_WorkGroupID.x].f2[1] = foo.f2[1]; _27.sub[gl_WorkGroupID.x].f3[0] = foo.f3[0]; _27.sub[gl_WorkGroupID.x].f3[1] = foo.f3[1]; _27.sub[gl_WorkGroupID.x].f4[0] = foo.f4[0]; _27.sub[gl_WorkGroupID.x].f4[1] = foo.f4[1]; - _27.sub[0].f[0].x += 5.0; - _27.sub[0].f2[1].xy += float2(5.0); + (device float&)_27.sub[0].f[0] = _27.sub[0].f[0].x + 5.0; + (device float2&)_27.sub[0].f2[1] = _27.sub[0].f2[1].xy + float2(5.0); } diff --git a/reference/shaders-msl/comp/struct-layout.comp b/reference/shaders-msl/comp/struct-layout.comp index b6ee59f1693..8f2ab2d60ca 100644 --- a/reference/shaders-msl/comp/struct-layout.comp +++ b/reference/shaders-msl/comp/struct-layout.comp @@ -18,6 +18,8 @@ struct SSBO Foo in_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _23 [[buffer(0)]], const device SSBO& _30 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/struct-nested.comp b/reference/shaders-msl/comp/struct-nested.comp index 86229e3417f..e3d04bedaa6 100644 --- a/reference/shaders-msl/comp/struct-nested.comp +++ b/reference/shaders-msl/comp/struct-nested.comp @@ -28,6 +28,8 @@ struct dstbuffer s2_1 test[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device dstbuffer& _19 [[buffer(0)]]) { s2 testVal; diff --git a/reference/shaders-msl/comp/struct-packing.comp b/reference/shaders-msl/comp/struct-packing.comp index 35cf1b22cb4..dc1654399d3 100644 --- a/reference/shaders-msl/comp/struct-packing.comp +++ b/reference/shaders-msl/comp/struct-packing.comp @@ -3,12 +3,11 @@ using namespace metal; -typedef packed_float2 packed_rm_float2x3[3]; - struct S0 { float2 a[1]; float b; + char _m0_final_padding[4]; }; struct S1 @@ -21,6 +20,7 @@ struct S2 { float3 a[1]; float b; + char _m0_final_padding[12]; }; struct S3 @@ -45,6 +45,7 @@ struct Content S3 m3; float m4; S4 m3s[8]; + char _m0_final_padding[8]; }; struct SSBO1 @@ -58,17 +59,17 @@ struct SSBO1 float3x2 m3; float2x2 m4; float2x2 m5[9]; - packed_rm_float2x3 m6[4][2]; - char _m10_pad[8]; - float3x2 m7; - char _m11_pad[8]; + float3x2 m6[4][2]; + float2x3 m7; float array[1]; }; struct S0_1 { - float4 a[1]; + float2 a[1]; + char _m1_pad[8]; float b; + char _m0_final_padding[12]; }; struct S1_1 @@ -81,6 +82,7 @@ struct S2_1 { float3 a[1]; float b; + char _m0_final_padding[12]; }; struct S3_1 @@ -92,6 +94,7 @@ struct S3_1 struct S4_1 { float2 c; + char _m0_final_padding[8]; }; struct Content_1 @@ -104,8 +107,8 @@ struct Content_1 S2_1 m2; S3_1 m3; float m4; - char _m8_pad[12]; - /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8]; + char _m8_pad[8]; + S4_1 m3s[8]; }; struct SSBO0 @@ -116,16 +119,18 @@ struct SSBO0 float4 array[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]]) { Content_1 _60 = ssbo_140.content; - ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0].xy; + ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0]; ssbo_430.content.m0s[0].b = _60.m0s[0].b; ssbo_430.content.m1s[0].a = float3(_60.m1s[0].a); ssbo_430.content.m1s[0].b = _60.m1s[0].b; ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0]; ssbo_430.content.m2s[0].b = _60.m2s[0].b; - ssbo_430.content.m0.a[0] = _60.m0.a[0].xy; + ssbo_430.content.m0.a[0] = _60.m0.a[0]; ssbo_430.content.m0.b = _60.m0.b; ssbo_430.content.m1.a = float3(_60.m1.a); ssbo_430.content.m1.b = _60.m1.b; @@ -142,6 +147,6 @@ kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [ ssbo_430.content.m3s[5].c = _60.m3s[5].c; ssbo_430.content.m3s[6].c = _60.m3s[6].c; ssbo_430.content.m3s[7].c = _60.m3s[7].c; - ssbo_430.content.m1.a = ssbo_430.content.m3.a * float3x2(float2(ssbo_430.m6[1][1][0]), float2(ssbo_430.m6[1][1][1]), float2(ssbo_430.m6[1][1][2])); + ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1]; } diff --git a/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp b/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp new file mode 100644 index 00000000000..754f7357d4f --- /dev/null +++ b/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp @@ -0,0 +1,28 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO +{ + float4 values[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u); + +static inline __attribute__((always_inline)) +void in_function(threadgroup short4 (&foo)[4], thread uint& gl_LocalInvocationIndex, device SSBO& v_23, thread uint3& gl_GlobalInvocationID) +{ + foo[gl_LocalInvocationIndex] = short4(v_23.values[gl_GlobalInvocationID.x] != float4(10.0)); + threadgroup_barrier(mem_flags::mem_threadgroup); + v_23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u])); +} + +kernel void main0(device SSBO& v_23 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + threadgroup short4 foo[4]; + in_function(foo, gl_LocalInvocationIndex, v_23, gl_GlobalInvocationID); +} + diff --git a/reference/shaders-msl/comp/torture-loop.comp b/reference/shaders-msl/comp/torture-loop.comp index 1b65a3afaba..e92e71d61dd 100644 --- a/reference/shaders-msl/comp/torture-loop.comp +++ b/reference/shaders-msl/comp/torture-loop.comp @@ -14,6 +14,8 @@ struct SSBO2 float4 out_data[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/type-alias.comp b/reference/shaders-msl/comp/type-alias.comp index 25a49f59f0f..e3ac031668f 100644 --- a/reference/shaders-msl/comp/type-alias.comp +++ b/reference/shaders-msl/comp/type-alias.comp @@ -40,11 +40,15 @@ struct SSBO2 float4 outputs[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +static inline __attribute__((always_inline)) float4 overload(thread const S0& s0) { return s0.a; } +static inline __attribute__((always_inline)) float4 overload(thread const S1& s1) { return s1.a; diff --git a/reference/shaders-msl/comp/type_casting_i64.msl22.comp b/reference/shaders-msl/comp/type_casting_i64.msl22.comp new file mode 100644 index 00000000000..6820b077a1a --- /dev/null +++ b/reference/shaders-msl/comp/type_casting_i64.msl22.comp @@ -0,0 +1,27 @@ +#include +#include + +using namespace metal; + +struct dst_buff_t +{ + int m0[1]; +}; + +struct src_buff_t +{ + int m0[1]; +}; + +constant int base_val_tmp [[function_constant(0)]]; +constant int base_val = is_function_constant_defined(base_val_tmp) ? base_val_tmp : 0; +constant long shift_val_tmp [[function_constant(1)]]; +constant long shift_val = is_function_constant_defined(shift_val_tmp) ? shift_val_tmp : 0l; +constant int offset = (base_val >> int(shift_val)); +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device dst_buff_t& dst_buff [[buffer(0)]], device src_buff_t& src_buff [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + dst_buff.m0[gl_GlobalInvocationID.x] = src_buff.m0[gl_GlobalInvocationID.x] + offset; +} + diff --git a/reference/shaders-msl/comp/udiv.comp b/reference/shaders-msl/comp/udiv.comp index 32874ad7879..7f7315b882a 100644 --- a/reference/shaders-msl/comp/udiv.comp +++ b/reference/shaders-msl/comp/udiv.comp @@ -13,6 +13,8 @@ struct SSBO uint inputs[1]; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _10 [[buffer(0)]], device SSBO& _23 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { _10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u; diff --git a/reference/shaders-msl/comp/writable-ssbo.comp b/reference/shaders-msl/comp/writable-ssbo.comp index 9dc53b6dd5d..310cda7fef9 100644 --- a/reference/shaders-msl/comp/writable-ssbo.comp +++ b/reference/shaders-msl/comp/writable-ssbo.comp @@ -5,19 +5,19 @@ using namespace metal; -struct myBlock -{ - int a; - float b; -}; - // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template -Tx mod(Tx x, Ty y) +inline Tx mod(Tx x, Ty y) { return x - y * floor(x / y); } +struct myBlock +{ + int a; + float b; +}; + kernel void main0(device myBlock& myStorage [[buffer(0)]]) { myStorage.a = (myStorage.a + 1) % 256; diff --git a/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp b/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp index a37fe519a55..cea12980c67 100644 --- a/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp +++ b/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp @@ -91,6 +91,8 @@ struct ResType_7 int4 _m1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBOUint& u [[buffer(0)]], device SSBOInt& i [[buffer(1)]]) { ResType _25; diff --git a/reference/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc b/reference/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc index a5e30b6de1a..01fceeb6c7b 100644 --- a/reference/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc +++ b/reference/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float3 vVertex; @@ -10,7 +51,7 @@ struct main0_out struct main0_patchOut { - float3 vPatch[2]; + spvUnsafeArray vPatch; }; struct main0_in @@ -28,7 +69,7 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_ if (gl_InvocationID >= 4) return; gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput; - threadgroup_barrier(mem_flags::mem_device); + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); if (gl_InvocationID == 0) { patchOut.vPatch[0] = float3(10.0); diff --git a/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc b/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc new file mode 100644 index 00000000000..6aca0157833 --- /dev/null +++ b/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc @@ -0,0 +1,47 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + float3 vFoo; +}; + +struct main0_in +{ + uint3 m_78; + ushort2 m_82; + float4 gl_Position; +}; + +static inline __attribute__((always_inline)) +void set_position(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device main0_in* thread & gl_in) +{ + gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position; +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 1]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 1; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625); + patchOut.vFoo = float3(1.0); + set_position(gl_out, gl_InvocationID, gl_in); +} + diff --git a/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc b/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc index 9acece62c61..054b4e74188 100644 --- a/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc +++ b/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc @@ -20,6 +20,7 @@ struct main0_in float4 gl_Position [[attribute(0)]]; }; +static inline __attribute__((always_inline)) void set_position(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup main0_in* thread & gl_in) { gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position; diff --git a/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc b/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc new file mode 100644 index 00000000000..184a4a6f9b3 --- /dev/null +++ b/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc @@ -0,0 +1,36 @@ +#include +#include + +using namespace metal; + +struct Boo +{ + float3 a; + uint3 b; +}; + +struct main0_out +{ + Boo vVertex; +}; + +struct main0_in +{ + Boo vInput; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput; + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0); +} + diff --git a/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc b/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc index cd4d8d80e52..f5fd60a9f71 100644 --- a/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc +++ b/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc @@ -16,8 +16,8 @@ struct main0_out struct main0_in { - float3 Boo_a [[attribute(0)]]; - float3 Boo_b [[attribute(1)]]; + float3 vInput_a [[attribute(0)]]; + float3 vInput_b [[attribute(1)]]; }; kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) @@ -28,10 +28,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_ threadgroup_barrier(mem_flags::mem_threadgroup); if (gl_InvocationID >= 4) return; - Boo vInput_24; - vInput_24.a = gl_in[gl_InvocationID].Boo_a; - vInput_24.b = gl_in[gl_InvocationID].Boo_b; - gl_out[gl_InvocationID].vVertex = vInput_24; + Boo _25 = Boo{ gl_in[gl_InvocationID].vInput_a, gl_in[gl_InvocationID].vInput_b }; + gl_out[gl_InvocationID].vVertex = _25; spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0); diff --git a/reference/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert b/reference/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert new file mode 100644 index 00000000000..a414c98542c --- /dev/null +++ b/reference/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + out.gl_Position = float4(10.0); + out.gl_ClipDistance[0] = 1.0; + out.gl_ClipDistance[1] = 4.0; + return out; +} + diff --git a/reference/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert b/reference/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert index a414c98542c..2d98929051b 100644 --- a/reference/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert +++ b/reference/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert @@ -7,6 +7,8 @@ struct main0_out { float4 gl_Position [[position]]; float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; }; vertex main0_out main0() @@ -15,6 +17,8 @@ vertex main0_out main0() out.gl_Position = float4(10.0); out.gl_ClipDistance[0] = 1.0; out.gl_ClipDistance[1] = 4.0; + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; return out; } diff --git a/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert b/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert new file mode 100644 index 00000000000..b3c8b6bb278 --- /dev/null +++ b/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], uint3 spvDispatchBase [[grid_origin]], device main0_out* spvOut [[buffer(28)]]) +{ + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + uint gl_BaseVertex = spvDispatchBase.x; + uint gl_BaseInstance = spvDispatchBase.y; + out.gl_Position = float4(float(int(gl_BaseVertex)), float(int(gl_BaseInstance)), 0.0, 1.0); +} + diff --git a/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert b/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert index 1d203ba98bc..a32c1948f88 100644 --- a/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert +++ b/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert @@ -11,7 +11,7 @@ struct main0_out vertex main0_out main0(uint gl_BaseVertex [[base_vertex]], uint gl_BaseInstance [[base_instance]]) { main0_out out = {}; - out.gl_Position = float4(float(gl_BaseVertex), float(gl_BaseInstance), 0.0, 1.0); + out.gl_Position = float4(float(int(gl_BaseVertex)), float(int(gl_BaseInstance)), 0.0, 1.0); return out; } diff --git a/reference/shaders-msl/flatten/rowmajor.flatten.vert b/reference/shaders-msl/flatten/rowmajor.flatten.vert index b5df8b064f5..3ea30e65c92 100644 --- a/reference/shaders-msl/flatten/rowmajor.flatten.vert +++ b/reference/shaders-msl/flatten/rowmajor.flatten.vert @@ -1,5 +1,3 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - #include #include @@ -9,7 +7,7 @@ struct UBO { float4x4 uMVPR; float4x4 uMVPC; - float2x4 uMVP; + float4x4 uMVP; }; struct main0_out @@ -22,16 +20,10 @@ struct main0_in float4 aVertex [[attribute(0)]]; }; -// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization. -float2x4 spvConvertFromRowMajor2x4(float2x4 m) -{ - return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], m[1][3])); -} - vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]) { main0_out out = {}; - float2 v = in.aVertex * spvConvertFromRowMajor2x4(_18.uMVP); + float2 v = float4x2(_18.uMVP[0].xy, _18.uMVP[1].xy, _18.uMVP[2].xy, _18.uMVP[3].xy) * in.aVertex; out.gl_Position = (_18.uMVPR * in.aVertex) + (in.aVertex * _18.uMVPC); return out; } diff --git a/reference/shaders-msl/flatten/struct.flatten.vert b/reference/shaders-msl/flatten/struct.flatten.vert index 954f9255c49..f79a794cde3 100644 --- a/reference/shaders-msl/flatten/struct.flatten.vert +++ b/reference/shaders-msl/flatten/struct.flatten.vert @@ -34,7 +34,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]] out.gl_Position = _18.uMVP * in.aVertex; out.vColor = float4(0.0); float3 L = in.aVertex.xyz - float3(_18.light.Position); - out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); + out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(L))); return out; } diff --git a/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag b/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag index 1cf97277105..fd0a3def314 100644 --- a/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag +++ b/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag @@ -62,7 +62,8 @@ struct main0_in float2 vUV [[user(locn0)]]; }; -float4 sample_in_function2(thread texture2d uTexture, thread const sampler uTextureSmplr, thread float2& vUV, thread const array, 4> uTexture2, thread const array uSampler, thread const array, 2> uTextures, thread const array uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers) +static inline __attribute__((always_inline)) +float4 sample_in_function2(texture2d uTexture, sampler uTextureSmplr, thread float2& vUV, constant array, 4>& uTexture2, constant array& uSampler, constant array, 2>& uTextures, constant array& uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers) { float4 ret = uTexture.sample(uTextureSmplr, vUV); ret += uTexture2[2].sample(uSampler[1], vUV); @@ -73,7 +74,8 @@ float4 sample_in_function2(thread texture2d uTexture, thread const sample return ret; } -float4 sample_in_function(thread texture2d uTexture, thread const sampler uTextureSmplr, thread float2& vUV, thread const array, 4> uTexture2, thread const array uSampler, thread const array, 2> uTextures, thread const array uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers, constant UBO& v_90, constant UBOs* constant (&ubos)[4]) +static inline __attribute__((always_inline)) +float4 sample_in_function(texture2d uTexture, sampler uTextureSmplr, thread float2& vUV, constant array, 4>& uTexture2, constant array& uSampler, constant array, 2>& uTextures, constant array& uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers, constant UBO& v_90, constant UBOs* constant (&ubos)[4]) { float4 ret = sample_in_function2(uTexture, uTextureSmplr, vUV, uTexture2, uSampler, uTextures, uTexturesSmplr, v_60, ssbos, registers); ret += v_90.ubo; diff --git a/reference/shaders-msl/frag/array-component-io.frag b/reference/shaders-msl/frag/array-component-io.frag new file mode 100644 index 00000000000..9b4c5b5204f --- /dev/null +++ b/reference/shaders-msl/frag/array-component-io.frag @@ -0,0 +1,99 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 m_location_0 [[color(0)]]; + float4 m_location_1 [[color(1)]]; + float4 m_location_2 [[color(2)]]; +}; + +struct main0_in +{ + float InC_0 [[user(locn0_1), flat]]; + float InA_0 [[user(locn1), flat]]; + float InC_1 [[user(locn1_1), flat]]; + float2 InB_0 [[user(locn1_2), flat]]; + float InA_1 [[user(locn2), flat]]; + float InC_2 [[user(locn2_1), flat]]; + float2 InB_1 [[user(locn2_2), flat]]; + float InD [[user(locn3_1), sample_perspective]]; + float InE [[user(locn4_2), center_no_perspective]]; + float InF [[user(locn5_3), centroid_perspective]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray A = {}; + spvUnsafeArray B = {}; + spvUnsafeArray C = {}; + float D = {}; + spvUnsafeArray InA = {}; + spvUnsafeArray InB = {}; + spvUnsafeArray InC = {}; + InA[0] = in.InA_0; + InA[1] = in.InA_1; + InB[0] = in.InB_0; + InB[1] = in.InB_1; + InC[0] = in.InC_0; + InC[1] = in.InC_1; + InC[2] = in.InC_2; + A = InA; + B = InB; + C = InC; + D = (in.InD + in.InE) + in.InF; + out.m_location_1.x = A[0]; + out.m_location_2.x = A[1]; + out.m_location_1.zw = B[0]; + out.m_location_2.zw = B[1]; + out.m_location_0.y = C[0]; + out.m_location_1.y = C[1]; + out.m_location_2.y = C[2]; + out.m_location_0.w = D; + return out; +} + diff --git a/reference/shaders-msl/frag/array-lut-no-loop-variable.frag b/reference/shaders-msl/frag/array-lut-no-loop-variable.frag index 87158849b6f..cdedd73a748 100644 --- a/reference/shaders-msl/frag/array-lut-no-loop-variable.frag +++ b/reference/shaders-msl/frag/array-lut-no-loop-variable.frag @@ -1,9 +1,50 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; -constant float _17[5] = { 1.0, 2.0, 3.0, 4.0, 5.0 }; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _17 = spvUnsafeArray({ 1.0, 2.0, 3.0, 4.0, 5.0 }); struct main0_out { diff --git a/reference/shaders-msl/frag/array-of-array-lut.frag b/reference/shaders-msl/frag/array-of-array-lut.frag new file mode 100644 index 00000000000..ba553824e79 --- /dev/null +++ b/reference/shaders-msl/frag/array-of-array-lut.frag @@ -0,0 +1,68 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _17 = spvUnsafeArray({ 1.0, 2.0, 3.0 }); +constant spvUnsafeArray _21 = spvUnsafeArray({ 4.0, 5.0, 6.0 }); +constant spvUnsafeArray, 2> _22 = spvUnsafeArray, 2>({ spvUnsafeArray({ 1.0, 2.0, 3.0 }), spvUnsafeArray({ 4.0, 5.0, 6.0 }) }); + +struct main0_out +{ + float vOutput [[color(0)]]; +}; + +struct main0_in +{ + int vIndex1 [[user(locn0)]]; + int vIndex2 [[user(locn1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.vOutput = _22[in.vIndex1][in.vIndex2]; + return out; +} + diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag new file mode 100644 index 00000000000..9c02d302ed3 --- /dev/null +++ b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag @@ -0,0 +1,117 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +{ + return static_cast(x); +} +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +{ + return static_cast(x); +} + +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + +template +inline T spvGetSwizzle(vec x, T c, spvSwizzle s) +{ + switch (s) + { + case spvSwizzle::none: + return c; + case spvSwizzle::zero: + return 0; + case spvSwizzle::one: + return 1; + case spvSwizzle::red: + return x.r; + case spvSwizzle::green: + return x.g; + case spvSwizzle::blue: + return x.b; + case spvSwizzle::alpha: + return x.a; + } +} + +// Wrapper function that swizzles texture samples and fetches. +template +inline vec spvTextureSwizzle(vec x, uint s) +{ + if (!s) + return x; + return vec(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF))); +} + +template +inline T spvTextureSwizzle(T x, uint s) +{ + return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; +} + +struct UBO +{ + uint index; +}; + +struct UBO2 +{ + uint index2; +}; + +struct spvDescriptorSetBuffer0 +{ + array, 4> uSampler [[id(0)]]; + array uSamplerSmplr [[id(4)]]; + constant UBO* uUBO [[id(8)]]; + constant UBO2* m_50 [[id(9)]]; + constant uint* spvSwizzleConstants [[id(10)]]; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vUV [[user(locn0)]]; +}; + +static inline __attribute__((always_inline)) +float4 sample_in_func(constant array, 4>& uSampler, constant array& uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV) +{ + return spvTextureSwizzle(uSampler[uUBO.index].sample(uSamplerSmplr[uUBO.index], vUV), uSamplerSwzl[uUBO.index]); +} + +static inline __attribute__((always_inline)) +float4 sample_single_in_func(texture2d s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV) +{ + return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl); +} + +fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(30)]]) +{ + main0_out out = {}; + constant uint* spvDescriptorSet0_uSamplerSwzl = &spvDescriptorSet0.spvSwizzleConstants[0]; + out.FragColor = sample_in_func(spvDescriptorSet0.uSampler, spvDescriptorSet0.uSamplerSmplr, spvDescriptorSet0_uSamplerSwzl, (*spvDescriptorSet0.uUBO), in.vUV); + out.FragColor += sample_single_in_func(spvDescriptorSet0.uSampler[(*spvDescriptorSet0.m_50).index2], spvDescriptorSet0.uSamplerSmplr[(*spvDescriptorSet0.m_50).index2], spvDescriptorSet0_uSamplerSwzl[(*spvDescriptorSet0.m_50).index2], in.vUV); + return out; +} + diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag new file mode 100644 index 00000000000..978ecbe1efa --- /dev/null +++ b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag @@ -0,0 +1,108 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +{ + return static_cast(x); +} +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +{ + return static_cast(x); +} + +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + +template +inline T spvGetSwizzle(vec x, T c, spvSwizzle s) +{ + switch (s) + { + case spvSwizzle::none: + return c; + case spvSwizzle::zero: + return 0; + case spvSwizzle::one: + return 1; + case spvSwizzle::red: + return x.r; + case spvSwizzle::green: + return x.g; + case spvSwizzle::blue: + return x.b; + case spvSwizzle::alpha: + return x.a; + } +} + +// Wrapper function that swizzles texture samples and fetches. +template +inline vec spvTextureSwizzle(vec x, uint s) +{ + if (!s) + return x; + return vec(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF))); +} + +template +inline T spvTextureSwizzle(T x, uint s) +{ + return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; +} + +struct UBO +{ + uint index; +}; + +struct UBO2 +{ + uint index2; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vUV [[user(locn0)]]; +}; + +static inline __attribute__((always_inline)) +float4 sample_in_func(thread const array, 4>& uSampler, thread const array& uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV) +{ + return spvTextureSwizzle(uSampler[uUBO.index].sample(uSamplerSmplr[uUBO.index], vUV), uSamplerSwzl[uUBO.index]); +} + +static inline __attribute__((always_inline)) +float4 sample_single_in_func(texture2d s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV) +{ + return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl); +} + +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(30)]], constant UBO& uUBO [[buffer(0)]], constant UBO2& _50 [[buffer(1)]], array, 4> uSampler [[texture(0)]], array uSamplerSmplr [[sampler(0)]]) +{ + main0_out out = {}; + constant uint* uSamplerSwzl = &spvSwizzleConstants[0]; + out.FragColor = sample_in_func(uSampler, uSamplerSmplr, uSamplerSwzl, uUBO, in.vUV); + out.FragColor += sample_single_in_func(uSampler[_50.index2], uSamplerSmplr[_50.index2], uSamplerSwzl[_50.index2], in.vUV); + return out; +} + diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag index 702409ee7b3..43a61e1f053 100644 --- a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag +++ b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag @@ -5,22 +5,17 @@ using namespace metal; -struct spvDescriptorSetBuffer0 -{ - array, 4> uSampler0 [[id(0)]]; - array uSampler0Smplr [[id(4)]]; - constant uint* spvSwizzleConstants [[id(8)]]; -}; - -struct main0_out +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { - float4 FragColor [[color(0)]]; -}; - -struct main0_in + return static_cast(x); +} +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { - float2 vUV [[user(locn0)]]; -}; + return static_cast(x); +} enum class spvSwizzle : uint { @@ -33,18 +28,6 @@ enum class spvSwizzle : uint alpha }; -template struct spvRemoveReference { typedef T type; }; -template struct spvRemoveReference { typedef T type; }; -template struct spvRemoveReference { typedef T type; }; -template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) -{ - return static_cast(x); -} -template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) -{ - return static_cast(x); -} - template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -82,77 +65,37 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +struct spvDescriptorSetBuffer0 { - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} + array, 4> uSampler0 [[id(0)]]; + array uSampler0Smplr [[id(4)]]; + constant uint* spvSwizzleConstants [[id(8)]]; +}; -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +struct main0_out { - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vUV [[user(locn0)]]; +}; -float4 sample_in_func_1(thread const array, 4> uSampler0, thread const array uSampler0Smplr, constant uint* uSampler0Swzl, thread float2& vUV) +static inline __attribute__((always_inline)) +float4 sample_in_func_1(constant array, 4>& uSampler0, constant array& uSampler0Smplr, constant uint* uSampler0Swzl, thread float2& vUV) { return spvTextureSwizzle(uSampler0[2].sample(uSampler0Smplr[2], vUV), uSampler0Swzl[2]); } -float4 sample_in_func_2(thread float2& vUV, thread texture2d uSampler1, thread const sampler uSampler1Smplr, constant uint& uSampler1Swzl) +static inline __attribute__((always_inline)) +float4 sample_in_func_2(thread float2& vUV, texture2d uSampler1, sampler uSampler1Smplr, constant uint& uSampler1Swzl) { return spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, vUV), uSampler1Swzl); } -float4 sample_single_in_func(thread const texture2d s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV) +static inline __attribute__((always_inline)) +float4 sample_single_in_func(texture2d s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV) { return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl); } diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag index ebaa28a57ea..1db803c5604 100644 --- a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag +++ b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag @@ -5,15 +5,17 @@ using namespace metal; -struct main0_out +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { - float4 FragColor [[color(0)]]; -}; - -struct main0_in + return static_cast(x); +} +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { - float2 vUV [[user(locn0)]]; -}; + return static_cast(x); +} enum class spvSwizzle : uint { @@ -26,18 +28,6 @@ enum class spvSwizzle : uint alpha }; -template struct spvRemoveReference { typedef T type; }; -template struct spvRemoveReference { typedef T type; }; -template struct spvRemoveReference { typedef T type; }; -template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) -{ - return static_cast(x); -} -template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) -{ - return static_cast(x); -} - template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -75,72 +65,24 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +struct main0_out { - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} + float4 FragColor [[color(0)]]; +}; -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +struct main0_in { - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} + float2 vUV [[user(locn0)]]; +}; -float4 sample_in_func(thread const array, 4> uSampler, thread const array uSamplerSmplr, constant uint* uSamplerSwzl, thread float2& vUV) +static inline __attribute__((always_inline)) +float4 sample_in_func(thread const array, 4>& uSampler, thread const array& uSamplerSmplr, constant uint* uSamplerSwzl, thread float2& vUV) { return spvTextureSwizzle(uSampler[2].sample(uSamplerSmplr[2], vUV), uSamplerSwzl[2]); } -float4 sample_single_in_func(thread const texture2d s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV) +static inline __attribute__((always_inline)) +float4 sample_single_in_func(texture2d s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV) { return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl); } diff --git a/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag b/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag index ef19fbf8569..1259283caaa 100644 --- a/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag +++ b/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag @@ -15,7 +15,7 @@ struct main0_out struct main0_in { - float3 gl_BaryCoordNoPerspNV [[barycentric_coord, center_no_perspective]]; + float3 gl_BaryCoordNoPerspEXT [[barycentric_coord, center_no_perspective]]; }; fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]]) @@ -25,7 +25,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[ float2 uv0 = _19.uvs[(3 * prim) + 0]; float2 uv1 = _19.uvs[(3 * prim) + 1]; float2 uv2 = _19.uvs[(3 * prim) + 2]; - out.value = ((uv0 * in.gl_BaryCoordNoPerspNV.x) + (uv1 * in.gl_BaryCoordNoPerspNV.y)) + (uv2 * in.gl_BaryCoordNoPerspNV.z); + out.value = ((uv0 * in.gl_BaryCoordNoPerspEXT.x) + (uv1 * in.gl_BaryCoordNoPerspEXT.y)) + (uv2 * in.gl_BaryCoordNoPerspEXT.z); return out; } diff --git a/reference/shaders-msl/frag/barycentric-nv.msl22.frag b/reference/shaders-msl/frag/barycentric-nv.msl22.frag index 1d2e4c2f21e..386d2d26f7f 100644 --- a/reference/shaders-msl/frag/barycentric-nv.msl22.frag +++ b/reference/shaders-msl/frag/barycentric-nv.msl22.frag @@ -15,7 +15,7 @@ struct main0_out struct main0_in { - float3 gl_BaryCoordNV [[barycentric_coord, center_perspective]]; + float3 gl_BaryCoordEXT [[barycentric_coord, center_perspective]]; }; fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]]) @@ -25,7 +25,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[ float2 uv0 = _19.uvs[(3 * prim) + 0]; float2 uv1 = _19.uvs[(3 * prim) + 1]; float2 uv2 = _19.uvs[(3 * prim) + 2]; - out.value = ((uv0 * in.gl_BaryCoordNV.x) + (uv1 * in.gl_BaryCoordNV.y)) + (uv2 * in.gl_BaryCoordNV.z); + out.value = ((uv0 * in.gl_BaryCoordEXT.x) + (uv1 * in.gl_BaryCoordEXT.y)) + (uv2 * in.gl_BaryCoordEXT.z); return out; } diff --git a/reference/shaders-msl/frag/basic.force-sample.frag b/reference/shaders-msl/frag/basic.force-sample.frag new file mode 100644 index 00000000000..b9706b73f56 --- /dev/null +++ b/reference/shaders-msl/frag/basic.force-sample.frag @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vColor [[user(locn0)]]; + float2 vTex [[user(locn1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + out.FragColor = in.vColor * uTex.sample(uTexSmplr, in.vTex); + return out; +} + diff --git a/reference/shaders-msl/frag/bitcasting.1d-as-2d.frag b/reference/shaders-msl/frag/bitcasting.1d-as-2d.frag new file mode 100644 index 00000000000..ea49c067c21 --- /dev/null +++ b/reference/shaders-msl/frag/bitcasting.1d-as-2d.frag @@ -0,0 +1,30 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor0 [[color(0)]]; + float4 FragColor1 [[color(1)]]; +}; + +struct main0_in +{ + float4 VertGeom [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d TextureBase [[texture(0)]], texture2d TextureDetail [[texture(1)]], sampler TextureBaseSmplr [[sampler(0)]], sampler TextureDetailSmplr [[sampler(1)]]) +{ + main0_out out = {}; + float4 texSample0 = TextureBase.sample(TextureBaseSmplr, float2(in.VertGeom.x, 0.5)); + float4 texSample1 = TextureDetail.sample(TextureDetailSmplr, float2(in.VertGeom.x, 0.5), int2(3, 0)); + int4 iResult0 = as_type(texSample0); + int4 iResult1 = as_type(texSample1); + out.FragColor0 = as_type(iResult0) * as_type(iResult1); + uint4 uResult0 = as_type(texSample0); + uint4 uResult1 = as_type(texSample1); + out.FragColor1 = as_type(uResult0) * as_type(uResult1); + return out; +} + diff --git a/reference/shaders-msl/frag/buffer-read-write.frag b/reference/shaders-msl/frag/buffer-read-write.frag index 2b2ac7f0608..4f114ed7247 100644 --- a/reference/shaders-msl/frag/buffer-read-write.frag +++ b/reference/shaders-msl/frag/buffer-read-write.frag @@ -5,17 +5,18 @@ using namespace metal; -struct main0_out -{ - float4 FragColor [[color(0)]]; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + fragment main0_out main0(texture2d buf [[texture(0)]], texture2d bufOut [[texture(1)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; diff --git a/reference/shaders-msl/frag/clip-distance-varying.frag b/reference/shaders-msl/frag/clip-distance-varying.frag new file mode 100644 index 00000000000..9a72d5ba39f --- /dev/null +++ b/reference/shaders-msl/frag/clip-distance-varying.frag @@ -0,0 +1,67 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray gl_ClipDistance = {}; + gl_ClipDistance[0] = in.gl_ClipDistance_0; + gl_ClipDistance[1] = in.gl_ClipDistance_1; + out.FragColor = float4((1.0 - gl_ClipDistance[0]) - gl_ClipDistance[1]); + return out; +} + diff --git a/reference/shaders-msl/frag/constant-array.frag b/reference/shaders-msl/frag/constant-array.frag index c4e87d98725..990860762c3 100644 --- a/reference/shaders-msl/frag/constant-array.frag +++ b/reference/shaders-msl/frag/constant-array.frag @@ -1,21 +1,60 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Foobar { float a; float b; }; -constant float4 _37[3] = { float4(1.0), float4(2.0), float4(3.0) }; -constant float4 _49[2] = { float4(1.0), float4(2.0) }; -constant float4 _54[2] = { float4(8.0), float4(10.0) }; -constant float4 _55[2][2] = { { float4(1.0), float4(2.0) }, { float4(8.0), float4(10.0) } }; -constant Foobar _75[2] = { Foobar{ 10.0, 40.0 }, Foobar{ 90.0, 70.0 } }; +constant spvUnsafeArray _37 = spvUnsafeArray({ float4(1.0), float4(2.0), float4(3.0) }); +constant spvUnsafeArray _49 = spvUnsafeArray({ float4(1.0), float4(2.0) }); +constant spvUnsafeArray _54 = spvUnsafeArray({ float4(8.0), float4(10.0) }); +constant spvUnsafeArray, 2> _55 = spvUnsafeArray, 2>({ spvUnsafeArray({ float4(1.0), float4(2.0) }), spvUnsafeArray({ float4(8.0), float4(10.0) }) }); +constant spvUnsafeArray _75 = spvUnsafeArray({ Foobar{ 10.0, 40.0 }, Foobar{ 90.0, 70.0 } }); struct main0_out { @@ -27,6 +66,7 @@ struct main0_in int index [[user(locn0)]]; }; +static inline __attribute__((always_inline)) float4 resolve(thread const Foobar& f) { return float4(f.a + f.b); diff --git a/reference/shaders-msl/frag/constant-composites.frag b/reference/shaders-msl/frag/constant-composites.frag index 504beaa067d..e0fa980fb85 100644 --- a/reference/shaders-msl/frag/constant-composites.frag +++ b/reference/shaders-msl/frag/constant-composites.frag @@ -1,18 +1,56 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Foo { float a; float b; }; -constant float _16[4] = { 1.0, 4.0, 3.0, 2.0 }; -constant Foo _28[2] = { Foo{ 10.0, 20.0 }, Foo{ 30.0, 40.0 } }; +constant spvUnsafeArray _16 = spvUnsafeArray({ 1.0, 4.0, 3.0, 2.0 }); struct main0_out { @@ -24,21 +62,10 @@ struct main0_in int line [[user(locn0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - fragment main0_out main0(main0_in in [[stage_in]]) { + spvUnsafeArray _28 = spvUnsafeArray({ Foo{ 10.0, 20.0 }, Foo{ 30.0, 40.0 } }); + main0_out out = {}; out.FragColor = float4(_16[in.line]); out.FragColor += float4(_28[in.line].a * _28[1 - in.line].a); diff --git a/reference/shaders-msl/frag/cull-distance-varying.frag b/reference/shaders-msl/frag/cull-distance-varying.frag new file mode 100644 index 00000000000..708a295710d --- /dev/null +++ b/reference/shaders-msl/frag/cull-distance-varying.frag @@ -0,0 +1,67 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float gl_CullDistance_0 [[user(cull0)]]; + float gl_CullDistance_1 [[user(cull1)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray gl_CullDistance = {}; + gl_CullDistance[0] = in.gl_CullDistance_0; + gl_CullDistance[1] = in.gl_CullDistance_1; + out.FragColor = float4((1.0 - gl_CullDistance[0]) - gl_CullDistance[1]); + return out; +} + diff --git a/reference/shaders-msl/frag/depth-out-early-frag-tests.frag b/reference/shaders-msl/frag/depth-out-early-frag-tests.frag new file mode 100644 index 00000000000..21884d81c5b --- /dev/null +++ b/reference/shaders-msl/frag/depth-out-early-frag-tests.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 color_out [[color(0)]]; +}; + +[[ early_fragment_tests ]] fragment main0_out main0() +{ + float gl_FragDepth; + main0_out out = {}; + out.color_out = float4(1.0, 0.0, 0.0, 1.0); + gl_FragDepth = 0.699999988079071044921875; + return out; +} + diff --git a/reference/shaders-msl/frag/depth-out-no-early-frag-tests.frag b/reference/shaders-msl/frag/depth-out-no-early-frag-tests.frag new file mode 100644 index 00000000000..57d810fafcb --- /dev/null +++ b/reference/shaders-msl/frag/depth-out-no-early-frag-tests.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 color_out [[color(0)]]; + float gl_FragDepth [[depth(less)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + out.color_out = float4(1.0, 0.0, 0.0, 1.0); + out.gl_FragDepth = 0.699999988079071044921875; + return out; +} + diff --git a/reference/shaders-msl/frag/disable-frag-output.frag-output.frag b/reference/shaders-msl/frag/disable-frag-output.frag-output.frag new file mode 100644 index 00000000000..63bc45b8af7 --- /dev/null +++ b/reference/shaders-msl/frag/disable-frag-output.frag-output.frag @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 buf1 [[color(1)]]; + float4 buf3 [[color(3)]]; + float4 buf6 [[color(6)]]; + float4 buf7 [[color(7)]]; +}; + +fragment main0_out main0() +{ + float4 buf0; + float4 buf2; + float4 buf4; + float4 buf5; + float gl_FragDepth; + int gl_FragStencilRefARB; + main0_out out = {}; + buf0 = float4(0.0, 0.0, 0.0, 1.0); + out.buf1 = float4(1.0, 0.0, 0.0, 1.0); + buf2 = float4(0.0, 1.0, 0.0, 1.0); + out.buf3 = float4(0.0, 0.0, 1.0, 1.0); + buf4 = float4(1.0, 0.0, 1.0, 0.5); + buf5 = float4(0.25); + out.buf6 = float4(0.75); + out.buf7 = float4(1.0); + gl_FragDepth = 0.89999997615814208984375; + gl_FragStencilRefARB = uint(127); + return out; +} + diff --git a/reference/shaders-msl/frag/flush_params.frag b/reference/shaders-msl/frag/flush_params.frag index e2f2a48cb25..905a179bc6a 100644 --- a/reference/shaders-msl/frag/flush_params.frag +++ b/reference/shaders-msl/frag/flush_params.frag @@ -15,11 +15,13 @@ struct main0_out float4 FragColor [[color(0)]]; }; +static inline __attribute__((always_inline)) void foo2(thread Structy& f) { f.c = float4(10.0); } +static inline __attribute__((always_inline)) Structy foo() { Structy param; diff --git a/reference/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag b/reference/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag new file mode 100644 index 00000000000..648dc9cf89a --- /dev/null +++ b/reference/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag @@ -0,0 +1,71 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct foo_t +{ + float x; + uint y; +}; + +struct main0_out +{ + float4 fragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +float4 frag_body(device foo_t& foo, thread float4& gl_FragCoord, texture2d bar, device atomic_uint* bar_atomic, thread bool& gl_HelperInvocation) +{ + if (!gl_HelperInvocation) + { + foo.x = 1.0; + } + uint _25 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + if (int(gl_FragCoord.x) == 3) + { + gl_HelperInvocation = true, discard_fragment(); + } + (gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(int2(gl_FragCoord.xy)))); + uint _50 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _57 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], 62u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed)); + uint _60 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _63 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _65 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _71 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], 100u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed)); + uint _76; + if (!gl_HelperInvocation) + { + do + { + _76 = 100u; + } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], &_76, 42u, memory_order_relaxed, memory_order_relaxed) && _76 == 100u); + } + else + { + _76 = atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed); + } + bool _77 = gl_HelperInvocation; + return float4(1.0, float(_77), 0.0, 1.0); +} + +fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d bar [[texture(0)]], device atomic_uint* bar_atomic [[buffer(1)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + float4 _85 = frag_body(foo, gl_FragCoord, bar, bar_atomic, gl_HelperInvocation); + out.fragColor = _85; + return out; +} + diff --git a/reference/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag b/reference/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag new file mode 100644 index 00000000000..0c11898b089 --- /dev/null +++ b/reference/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag @@ -0,0 +1,49 @@ +#include +#include + +using namespace metal; + +struct foo +{ + int x; +}; + +struct main0_out +{ + float4 fragColor [[color(0)]]; +}; + +fragment main0_out main0(device foo& _24 [[buffer(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + if (gl_FragCoord.y == 7.0) + { + gl_HelperInvocation = true, discard_fragment(); + } + if (!gl_HelperInvocation) + { + _24.x = 0; + } + for (;;) + { + if (float(_24.x) < gl_FragCoord.x) + { + int _41 = _24.x; + int _43 = _41 + 1; + if (!gl_HelperInvocation) + { + _24.x = _43; + } + continue; + } + else + { + break; + } + } + out.fragColor = float4(float(_24.x), 0.0, 0.0, 1.0); + return out; +} + diff --git a/reference/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag b/reference/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag new file mode 100644 index 00000000000..c2ae69695fc --- /dev/null +++ b/reference/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag @@ -0,0 +1,70 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct foo_t +{ + float x; + uint y; +}; + +struct main0_out +{ + float4 fragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +float4 frag_body(device foo_t& foo, thread float4& gl_FragCoord, texture2d bar, device atomic_uint* bar_atomic, thread bool& gl_HelperInvocation) +{ + if (!gl_HelperInvocation) + { + foo.x = 1.0; + } + uint _25 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + if (int(gl_FragCoord.x) == 3) + { + gl_HelperInvocation = true, discard_fragment(); + } + (gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(int2(gl_FragCoord.xy)))); + uint _51 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _58 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], 62u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed)); + uint _61 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _64 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _66 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed)); + uint _72 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], 100u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed)); + uint _77; + if (!gl_HelperInvocation) + { + do + { + _77 = 100u; + } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], &_77, 42u, memory_order_relaxed, memory_order_relaxed) && _77 == 100u); + } + else + { + _77 = atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed); + } + return float4(1.0, 0.0, 0.0, 1.0); +} + +fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d bar [[texture(0)]], device atomic_uint* bar_atomic [[buffer(1)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + float4 _84 = frag_body(foo, gl_FragCoord, bar, bar_atomic, gl_HelperInvocation); + out.fragColor = _84; + return out; +} + diff --git a/reference/shaders-msl/frag/fragment-component-padding.pad-fragment.frag b/reference/shaders-msl/frag/fragment-component-padding.pad-fragment.frag index 2d339c47353..6420bb9f4e9 100644 --- a/reference/shaders-msl/frag/fragment-component-padding.pad-fragment.frag +++ b/reference/shaders-msl/frag/fragment-component-padding.pad-fragment.frag @@ -1,10 +1,49 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 FragColors_0 [[color(0)]]; @@ -18,7 +57,8 @@ struct main0_in float3 vColor [[user(locn0)]]; }; -void set_globals(thread float (&FragColors)[2], thread float3& vColor, thread float2& FragColor2, thread float3& FragColor3) +static inline __attribute__((always_inline)) +void set_globals(thread spvUnsafeArray& FragColors, thread float3& vColor, thread float2& FragColor2, thread float3& FragColor3) { FragColors[0] = vColor.x; FragColors[1] = vColor.y; @@ -29,14 +69,14 @@ void set_globals(thread float (&FragColors)[2], thread float3& vColor, thread fl fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float FragColors[2] = {}; + spvUnsafeArray FragColors = {}; float2 FragColor2 = {}; float3 FragColor3 = {}; set_globals(FragColors, in.vColor, FragColor2, FragColor3); out.FragColors_0 = float4(FragColors[0]); out.FragColors_1 = float4(FragColors[1]); - out.FragColor2 = FragColor2.xyyy; - out.FragColor3 = FragColor3.xyzz; + out.FragColor2.xy = FragColor2; + out.FragColor3.xyz = FragColor3; return out; } diff --git a/reference/shaders-msl/frag/helper-invocation.msl21.frag b/reference/shaders-msl/frag/helper-invocation.msl21.frag index 8d32f4860dc..36d14239093 100644 --- a/reference/shaders-msl/frag/helper-invocation.msl21.frag +++ b/reference/shaders-msl/frag/helper-invocation.msl21.frag @@ -15,10 +15,11 @@ struct main0_in float2 vUV [[user(locn0)]]; }; -float4 foo(thread bool& gl_HelperInvocation, thread texture2d uSampler, thread const sampler uSamplerSmplr, thread float2& vUV) +static inline __attribute__((always_inline)) +float4 foo(texture2d uSampler, sampler uSamplerSmplr, thread float2& vUV) { float4 color; - if (!gl_HelperInvocation) + if (!simd_is_helper_thread()) { color = uSampler.sample(uSamplerSmplr, vUV, level(0.0)); } @@ -32,8 +33,7 @@ float4 foo(thread bool& gl_HelperInvocation, thread texture2d uSampler, t fragment main0_out main0(main0_in in [[stage_in]], texture2d uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]]) { main0_out out = {}; - bool gl_HelperInvocation = simd_is_helper_thread(); - out.FragColor = foo(gl_HelperInvocation, uSampler, uSamplerSmplr, in.vUV); + out.FragColor = foo(uSampler, uSamplerSmplr, in.vUV); return out; } diff --git a/reference/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag b/reference/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag new file mode 100644 index 00000000000..a35835846d5 --- /dev/null +++ b/reference/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag @@ -0,0 +1,58 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct UBO +{ + float4 v; +}; + +struct spvDescriptorSetBuffer0 +{ + array, 10000> uSamplers [[id(0)]]; + array uSamplersSmplr [[id(10000)]]; +}; + +struct spvDescriptorSetBuffer1 +{ + constant UBO* vs [[id(0)]][10000]; +}; + +struct spvDescriptorSetBuffer2 +{ + texture2d uSampler [[id(0)]]; + sampler uSamplerSmplr [[id(1)]]; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vUV [[user(locn0)]]; +}; + +static inline __attribute__((always_inline)) +float4 samp_array(const device array, 10000>& uSamplers, const device array& uSamplersSmplr, thread float2& vUV, constant UBO* const device (&vs)[10000]) +{ + return uSamplers[9999].sample(uSamplersSmplr[9999], vUV) + vs[5000]->v; +} + +static inline __attribute__((always_inline)) +float4 samp_single(thread float2& vUV, texture2d uSampler, sampler uSamplerSmplr) +{ + return uSampler.sample(uSamplerSmplr, vUV); +} + +fragment main0_out main0(main0_in in [[stage_in]], const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], const device spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant spvDescriptorSetBuffer2& spvDescriptorSet2 [[buffer(2)]]) +{ + main0_out out = {}; + out.FragColor = samp_array(spvDescriptorSet0.uSamplers, spvDescriptorSet0.uSamplersSmplr, in.vUV, spvDescriptorSet1.vs) + samp_single(in.vUV, spvDescriptorSet2.uSampler, spvDescriptorSet2.uSamplerSmplr); + return out; +} + diff --git a/reference/shaders-msl/frag/image-query-lod.msl22.frag b/reference/shaders-msl/frag/image-query-lod.msl22.frag index 4ec61c24d70..2362597fa51 100644 --- a/reference/shaders-msl/frag/image-query-lod.msl22.frag +++ b/reference/shaders-msl/frag/image-query-lod.msl22.frag @@ -15,7 +15,8 @@ struct main0_in float3 vUV [[user(locn0)]]; }; -void from_function(thread float2& FragColor, thread texture2d uSampler2D, thread const sampler uSampler2DSmplr, thread float3& vUV, thread texture3d uSampler3D, thread const sampler uSampler3DSmplr, thread texturecube uSamplerCube, thread const sampler uSamplerCubeSmplr, thread texture2d uTexture2D, thread sampler uSampler, thread texture3d uTexture3D, thread texturecube uTextureCube) +static inline __attribute__((always_inline)) +void from_function(thread float2& FragColor, texture2d uSampler2D, sampler uSampler2DSmplr, thread float3& vUV, texture3d uSampler3D, sampler uSampler3DSmplr, texturecube uSamplerCube, sampler uSamplerCubeSmplr, texture2d uTexture2D, sampler uSampler, texture3d uTexture3D, texturecube uTextureCube) { float2 _22; _22.x = uSampler2D.calculate_clamped_lod(uSampler2DSmplr, vUV.xy); diff --git a/reference/shaders-msl/frag/in_block.frag b/reference/shaders-msl/frag/in_block.frag index 8178c9a4ed6..efb0cbd4296 100644 --- a/reference/shaders-msl/frag/in_block.frag +++ b/reference/shaders-msl/frag/in_block.frag @@ -16,16 +16,16 @@ struct main0_out struct main0_in { - float4 VertexOut_color [[user(locn2)]]; - float4 VertexOut_color2 [[user(locn3)]]; + float4 inputs_color [[user(locn2)]]; + float4 inputs_color2 [[user(locn3)]]; }; fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; VertexOut inputs = {}; - inputs.color = in.VertexOut_color; - inputs.color2 = in.VertexOut_color2; + inputs.color = in.inputs_color; + inputs.color2 = in.inputs_color2; out.FragColor = inputs.color + inputs.color2; return out; } diff --git a/reference/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag b/reference/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag new file mode 100644 index 00000000000..7b011ffb580 --- /dev/null +++ b/reference/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag @@ -0,0 +1,105 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Foo +{ + float a; + float b; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float foos_0_a [[user(locn1)]]; + float foos_0_b [[user(locn2)]]; + float foos_1_a [[user(locn3)]]; + float foos_1_b [[user(locn4)]]; + float foos_2_a [[user(locn5)]]; + float foos_2_b [[user(locn6)]]; + float foos_3_a [[user(locn7)]]; + float foos_3_b [[user(locn8)]]; + float bars_0_a [[user(locn10)]]; + float bars_0_b [[user(locn11)]]; + float bars_1_a [[user(locn12)]]; + float bars_1_b [[user(locn13)]]; + float bars_2_a [[user(locn14)]]; + float bars_2_b [[user(locn15)]]; + float bars_3_a [[user(locn16)]]; + float bars_3_b [[user(locn17)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray foos = {}; + spvUnsafeArray bars = {}; + foos[0].a = in.foos_0_a; + foos[0].b = in.foos_0_b; + foos[1].a = in.foos_1_a; + foos[1].b = in.foos_1_b; + foos[2].a = in.foos_2_a; + foos[2].b = in.foos_2_b; + foos[3].a = in.foos_3_a; + foos[3].b = in.foos_3_b; + bars[0].a = in.bars_0_a; + bars[0].b = in.bars_0_b; + bars[1].a = in.bars_1_a; + bars[1].b = in.bars_1_b; + bars[2].a = in.bars_2_a; + bars[2].b = in.bars_2_b; + bars[3].a = in.bars_3_a; + bars[3].b = in.bars_3_b; + out.FragColor.x = foos[0].a; + out.FragColor.y = foos[1].b; + out.FragColor.z = foos[2].a; + out.FragColor.w = bars[3].b; + return out; +} + diff --git a/reference/shaders-msl/frag/in_mat.frag b/reference/shaders-msl/frag/in_mat.frag index 70ff4860b80..cf7da2ef292 100644 --- a/reference/shaders-msl/frag/in_mat.frag +++ b/reference/shaders-msl/frag/in_mat.frag @@ -27,8 +27,8 @@ fragment main0_out main0(main0_in in [[stage_in]], texturecube samplerCol inInvModelView[1] = in.inInvModelView_1; inInvModelView[2] = in.inInvModelView_2; inInvModelView[3] = in.inInvModelView_3; - float3 cI = normalize(in.inPos); - float3 cR = reflect(cI, normalize(in.inNormal)); + float3 cI = fast::normalize(in.inPos); + float3 cR = reflect(cI, fast::normalize(in.inNormal)); cR = float3((inInvModelView * float4(cR, 0.0)).xyz); cR.x *= (-1.0); out.outFragColor = samplerColor.sample(samplerColorSmplr, cR, bias(in.inLodBias)); diff --git a/reference/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag b/reference/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag new file mode 100644 index 00000000000..3f91c2c409d --- /dev/null +++ b/reference/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag @@ -0,0 +1,27 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +float4 load_subpasses(texture2d_ms_array uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_Layer) +{ + float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_Layer, gl_SampleID); + return _24; +} + +fragment main0_out main0(texture2d_ms_array uSubpass0 [[texture(0)]], texture2d_ms_array uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]], uint gl_Layer [[render_target_array_index]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), gl_Layer, 1) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_Layer, 2)) + load_subpasses(uSubpass0, gl_SampleID, gl_FragCoord, gl_Layer); + return out; +} + diff --git a/reference/shaders-msl/frag/input-attachment-ms.frag b/reference/shaders-msl/frag/input-attachment-ms.frag index d38712e91cd..97629d8e443 100644 --- a/reference/shaders-msl/frag/input-attachment-ms.frag +++ b/reference/shaders-msl/frag/input-attachment-ms.frag @@ -10,14 +10,17 @@ struct main0_out float4 FragColor [[color(0)]]; }; -float4 load_subpasses(thread const texture2d_ms uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord) +static inline __attribute__((always_inline)) +float4 load_subpasses(texture2d_ms uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord) { - return uInput.read(uint2(gl_FragCoord.xy), gl_SampleID); + float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_SampleID); + return _24; } fragment main0_out main0(texture2d_ms uSubpass0 [[texture(0)]], texture2d_ms uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]]) { main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), 1) + uSubpass1.read(uint2(gl_FragCoord.xy), 2)) + load_subpasses(uSubpass0, gl_SampleID, gl_FragCoord); return out; } diff --git a/reference/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag b/reference/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag new file mode 100644 index 00000000000..1b6b9f6aaea --- /dev/null +++ b/reference/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag @@ -0,0 +1,28 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +float4 load_subpasses(texture2d_ms_array uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_ViewIndex) +{ + float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_ViewIndex, gl_SampleID); + return _24; +} + +fragment main0_out main0(constant uint* spvViewMask [[buffer(24)]], texture2d_ms_array uSubpass0 [[texture(0)]], texture2d_ms_array uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]], uint gl_ViewIndex [[render_target_array_index]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + gl_ViewIndex += spvViewMask[0]; + out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), gl_ViewIndex, 1) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_ViewIndex, 2)) + load_subpasses(uSubpass0, gl_SampleID, gl_FragCoord, gl_ViewIndex); + return out; +} + diff --git a/reference/shaders-msl/frag/input-attachment.arrayed-subpass.frag b/reference/shaders-msl/frag/input-attachment.arrayed-subpass.frag new file mode 100644 index 00000000000..934abd585f6 --- /dev/null +++ b/reference/shaders-msl/frag/input-attachment.arrayed-subpass.frag @@ -0,0 +1,25 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +float4 load_subpasses(texture2d_array uInput, thread float4& gl_FragCoord, thread uint& gl_Layer) +{ + return uInput.read(uint2(gl_FragCoord.xy), gl_Layer); +} + +fragment main0_out main0(texture2d_array uSubpass0 [[texture(0)]], texture2d_array uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]], uint gl_Layer [[render_target_array_index]]) +{ + main0_out out = {}; + out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), gl_Layer) + load_subpasses(uSubpass1, gl_FragCoord, gl_Layer); + return out; +} + diff --git a/reference/shaders-msl/frag/input-attachment.frag b/reference/shaders-msl/frag/input-attachment.frag index 3cc929182b0..0643acfa72d 100644 --- a/reference/shaders-msl/frag/input-attachment.frag +++ b/reference/shaders-msl/frag/input-attachment.frag @@ -10,15 +10,16 @@ struct main0_out float4 FragColor [[color(0)]]; }; -float4 load_subpasses(thread const texture2d uInput, thread float4& gl_FragCoord) +static inline __attribute__((always_inline)) +float4 load_subpasses(texture2d uInput, thread float4& gl_FragCoord) { - return uInput.read(uint2(gl_FragCoord.xy), 0); + return uInput.read(uint2(gl_FragCoord.xy)); } fragment main0_out main0(texture2d uSubpass0 [[texture(0)]], texture2d uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; - out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), 0) + load_subpasses(uSubpass1, gl_FragCoord); + out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy)) + load_subpasses(uSubpass1, gl_FragCoord); return out; } diff --git a/reference/shaders-msl/frag/input-attachment.multiview.frag b/reference/shaders-msl/frag/input-attachment.multiview.frag new file mode 100644 index 00000000000..6ba2421659f --- /dev/null +++ b/reference/shaders-msl/frag/input-attachment.multiview.frag @@ -0,0 +1,26 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +float4 load_subpasses(texture2d_array uInput, thread float4& gl_FragCoord, thread uint& gl_ViewIndex) +{ + return uInput.read(uint2(gl_FragCoord.xy), gl_ViewIndex); +} + +fragment main0_out main0(constant uint* spvViewMask [[buffer(24)]], texture2d_array uSubpass0 [[texture(0)]], texture2d_array uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]], uint gl_ViewIndex [[render_target_array_index]]) +{ + main0_out out = {}; + gl_ViewIndex += spvViewMask[0]; + out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), gl_ViewIndex) + load_subpasses(uSubpass1, gl_FragCoord, gl_ViewIndex); + return out; +} + diff --git a/reference/shaders-msl/frag/interpolation-qualifiers-block.frag b/reference/shaders-msl/frag/interpolation-qualifiers-block.frag index 2b420195ffc..6148b8915c1 100644 --- a/reference/shaders-msl/frag/interpolation-qualifiers-block.frag +++ b/reference/shaders-msl/frag/interpolation-qualifiers-block.frag @@ -21,26 +21,26 @@ struct main0_out struct main0_in { - float2 Input_v0 [[user(locn0), centroid_no_perspective]]; - float2 Input_v1 [[user(locn1), centroid_no_perspective]]; - float3 Input_v2 [[user(locn2), centroid_no_perspective]]; - float4 Input_v3 [[user(locn3), centroid_no_perspective]]; - float Input_v4 [[user(locn4), centroid_no_perspective]]; - float Input_v5 [[user(locn5), centroid_no_perspective]]; - float Input_v6 [[user(locn6), centroid_no_perspective]]; + float2 inp_v0 [[user(locn0), centroid_no_perspective]]; + float2 inp_v1 [[user(locn1), centroid_no_perspective]]; + float3 inp_v2 [[user(locn2), centroid_no_perspective]]; + float4 inp_v3 [[user(locn3), centroid_no_perspective]]; + float inp_v4 [[user(locn4), centroid_no_perspective]]; + float inp_v5 [[user(locn5), centroid_no_perspective]]; + float inp_v6 [[user(locn6), centroid_no_perspective]]; }; fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; Input inp = {}; - inp.v0 = in.Input_v0; - inp.v1 = in.Input_v1; - inp.v2 = in.Input_v2; - inp.v3 = in.Input_v3; - inp.v4 = in.Input_v4; - inp.v5 = in.Input_v5; - inp.v6 = in.Input_v6; + inp.v0 = in.inp_v0; + inp.v1 = in.inp_v1; + inp.v2 = in.inp_v2; + inp.v3 = in.inp_v3; + inp.v4 = in.inp_v4; + inp.v5 = in.inp_v5; + inp.v6 = in.inp_v6; out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, ((inp.v3.w * inp.v4) + inp.v5) - inp.v6); return out; } diff --git a/reference/shaders-msl/frag/lut-promotion.frag b/reference/shaders-msl/frag/lut-promotion.frag index f7e51edb84c..b1e0e7311dc 100644 --- a/reference/shaders-msl/frag/lut-promotion.frag +++ b/reference/shaders-msl/frag/lut-promotion.frag @@ -1,13 +1,52 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; -constant float _16[16] = { 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 }; -constant float4 _60[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; -constant float4 _104[4] = { float4(20.0), float4(30.0), float4(50.0), float4(60.0) }; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _16 = spvUnsafeArray({ 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 }); +constant spvUnsafeArray _60 = spvUnsafeArray({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) }); +constant spvUnsafeArray _104 = spvUnsafeArray({ float4(20.0), float4(30.0), float4(50.0), float4(60.0) }); struct main0_out { @@ -19,19 +58,6 @@ struct main0_in int index [[user(locn0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; @@ -52,14 +78,14 @@ fragment main0_out main0(main0_in in [[stage_in]]) { out.FragColor += _60[in.index & 1].x; } - float4 foobar[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; + spvUnsafeArray foobar = spvUnsafeArray({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) }); if (in.index > 30) { foobar[1].z = 20.0; } out.FragColor += foobar[in.index & 3].z; - float4 baz[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; - spvArrayCopyFromConstant1(baz, _104); + spvUnsafeArray baz = spvUnsafeArray({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) }); + baz = _104; out.FragColor += baz[in.index & 3].z; return out; } diff --git a/reference/shaders-msl/frag/mix.frag b/reference/shaders-msl/frag/mix.frag index ad7c5adeeb9..ee28bf92616 100644 --- a/reference/shaders-msl/frag/mix.frag +++ b/reference/shaders-msl/frag/mix.frag @@ -20,11 +20,10 @@ fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; bool4 l = bool4(false, true, false, false); - out.FragColor = float4(l.x ? in.vIn1.x : in.vIn0.x, l.y ? in.vIn1.y : in.vIn0.y, l.z ? in.vIn1.z : in.vIn0.z, l.w ? in.vIn1.w : in.vIn0.w); + out.FragColor = select(in.vIn0, in.vIn1, l); bool f = true; out.FragColor = float4(f ? in.vIn3 : in.vIn2); - bool4 _37 = bool4(f); - out.FragColor = float4(_37.x ? in.vIn0.x : in.vIn1.x, _37.y ? in.vIn0.y : in.vIn1.y, _37.z ? in.vIn0.z : in.vIn1.z, _37.w ? in.vIn0.w : in.vIn1.w); + out.FragColor = select(in.vIn1, in.vIn0, bool4(f)); out.FragColor = float4(f ? in.vIn2 : in.vIn3); return out; } diff --git a/reference/shaders-msl/frag/modf-access-tracking-function.frag b/reference/shaders-msl/frag/modf-access-tracking-function.frag new file mode 100644 index 00000000000..934561e809f --- /dev/null +++ b/reference/shaders-msl/frag/modf-access-tracking-function.frag @@ -0,0 +1,33 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 vo0 [[color(0)]]; + float4 vo1 [[color(1)]]; +}; + +struct main0_in +{ + float4 v [[user(locn0)]]; +}; + +static inline __attribute__((always_inline)) +float4 modf_inner(thread float4& v, thread float4& vo1) +{ + float4 _16 = modf(v, vo1); + return _16; +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float4 _20 = modf_inner(in.v, out.vo1); + out.vo0 = _20; + return out; +} + diff --git a/reference/shaders-msl/frag/mrt-array.frag b/reference/shaders-msl/frag/mrt-array.frag index daf7edb4ccf..bc268284417 100644 --- a/reference/shaders-msl/frag/mrt-array.frag +++ b/reference/shaders-msl/frag/mrt-array.frag @@ -1,10 +1,56 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() +template +inline Tx mod(Tx x, Ty y) +{ + return x - y * floor(x / y); +} + struct main0_out { float4 FragColor_0 [[color(0)]]; @@ -19,19 +65,14 @@ struct main0_in float4 vB [[user(locn1)]]; }; -// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() -template -Tx mod(Tx x, Ty y) -{ - return x - y * floor(x / y); -} - -void write_deeper_in_function(thread float4 (&FragColor)[4], thread float4& vA, thread float4& vB) +static inline __attribute__((always_inline)) +void write_deeper_in_function(thread spvUnsafeArray& FragColor, thread float4& vA, thread float4& vB) { FragColor[3] = vA * vB; } -void write_in_function(thread float4 (&FragColor)[4], thread float4& vA, thread float4& vB) +static inline __attribute__((always_inline)) +void write_in_function(thread spvUnsafeArray& FragColor, thread float4& vA, thread float4& vB) { FragColor[2] = vA - vB; write_deeper_in_function(FragColor, vA, vB); @@ -40,7 +81,7 @@ void write_in_function(thread float4 (&FragColor)[4], thread float4& vA, thread fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float4 FragColor[4] = {}; + spvUnsafeArray FragColor = {}; FragColor[0] = mod(in.vA, in.vB); FragColor[1] = in.vA + in.vB; write_in_function(FragColor, in.vA, in.vB); diff --git a/reference/shaders-msl/frag/nonuniform-qualifier.msl2.frag b/reference/shaders-msl/frag/nonuniform-qualifier.msl2.frag index 34f2bde1096..401703cbafa 100644 --- a/reference/shaders-msl/frag/nonuniform-qualifier.msl2.frag +++ b/reference/shaders-msl/frag/nonuniform-qualifier.msl2.frag @@ -40,12 +40,17 @@ fragment main0_out main0(main0_in in [[stage_in]], constant UBO* ubos_0 [[buffer main0_out out = {}; int i = in.vIndex; - int _24 = i + 10; - out.FragColor = uSamplers[_24].sample(uSamps[i + 40], in.vUV); - int _50 = i + 10; - out.FragColor = uCombinedSamplers[_50].sample(uCombinedSamplersSmplr[_50], in.vUV); - out.FragColor += ubos[(i + 20)]->v[i + 40]; - out.FragColor += ssbos[(i + 50)]->v[i + 60]; + int _25 = i + 10; + int _37 = i + 40; + out.FragColor = uSamplers[_25].sample(uSamps[_37], in.vUV); + int _53 = i + 10; + out.FragColor = uCombinedSamplers[_53].sample(uCombinedSamplersSmplr[_53], in.vUV); + int _69 = i + 20; + int _73 = i + 40; + out.FragColor += ubos[_69]->v[_73]; + int _87 = i + 50; + int _91 = i + 60; + out.FragColor += ssbos[_87]->v[_91]; return out; } diff --git a/reference/shaders-msl/frag/packed-expression-vector-shuffle.frag b/reference/shaders-msl/frag/packed-expression-vector-shuffle.frag index dc8947425a7..2ed369353fb 100644 --- a/reference/shaders-msl/frag/packed-expression-vector-shuffle.frag +++ b/reference/shaders-msl/frag/packed-expression-vector-shuffle.frag @@ -18,7 +18,9 @@ fragment main0_out main0(constant UBO& _15 [[buffer(0)]]) { main0_out out = {}; float4 f = float4(1.0); - f = float4(_15.color[0], _15.color[1], _15.color[2], f.w); + f.x = _15.color[0]; + f.y = _15.color[1]; + f.z = _15.color[2]; out.FragColor = f; return out; } diff --git a/reference/shaders-msl/frag/packing-test-3.frag b/reference/shaders-msl/frag/packing-test-3.frag index d8310d6ac76..f82d8a57970 100644 --- a/reference/shaders-msl/frag/packing-test-3.frag +++ b/reference/shaders-msl/frag/packing-test-3.frag @@ -32,23 +32,24 @@ struct main0_out float4 _entryPointOutput [[color(0)]]; }; -float4 _main(thread const VertexOutput& IN, constant CB0& v_26) +static inline __attribute__((always_inline)) +float4 _main(thread const VertexOutput& IN, constant CB0& _RESERVED_IDENTIFIER_FIXUP_24) { TestStruct st; - st.position = float3(v_26.CB0[1].position); - st.radius = v_26.CB0[1].radius; + st.position = float3(_RESERVED_IDENTIFIER_FIXUP_24.CB0[1].position); + st.radius = _RESERVED_IDENTIFIER_FIXUP_24.CB0[1].radius; float4 col = float4(st.position, st.radius); return col; } -fragment main0_out main0(constant CB0& v_26 [[buffer(0)]], float4 gl_FragCoord [[position]]) +fragment main0_out main0(constant CB0& _RESERVED_IDENTIFIER_FIXUP_24 [[buffer(0)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; VertexOutput IN; IN.HPosition = gl_FragCoord; VertexOutput param = IN; VertexOutput param_1 = param; - out._entryPointOutput = _main(param_1, v_26); + out._entryPointOutput = _main(param_1, _RESERVED_IDENTIFIER_FIXUP_24); return out; } diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag new file mode 100644 index 00000000000..1bfaff53bf8 --- /dev/null +++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -0,0 +1,53 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +struct spvDescriptorSetBuffer0 +{ + device Buffer3* m_9 [[id(0)]]; + texture2d img4 [[id(1)]]; + texture2d img [[id(2), raster_order_group(0)]]; + texture2d img3 [[id(3), raster_order_group(0)]]; + texture2d img2 [[id(4), raster_order_group(0)]]; + device atomic_uint* img2_atomic [[id(5), raster_order_group(0)]]; + volatile device Buffer* m_42 [[id(6), raster_order_group(0)]]; + device Buffer2* m_52 [[id(7), raster_order_group(0)]]; +}; + +fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]]) +{ + (*spvDescriptorSet0.m_9).baz = 0; + spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0))); + uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.img2_atomic[spvImage2DAtomicCoord(int2(0), spvDescriptorSet0.img2)], 1u, memory_order_relaxed); + (*spvDescriptorSet0.m_42).foo += 42; + uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_42).bar, (*spvDescriptorSet0.m_52).quux, memory_order_relaxed); +} + diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag new file mode 100644 index 00000000000..6a300e8c589 --- /dev/null +++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -0,0 +1,41 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +// The required alignment of a linear texture of R32Uint format. +constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; +constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _42 [[buffer(2), raster_order_group(0)]], device Buffer2& _52 [[buffer(3), raster_order_group(0)]], texture2d img4 [[texture(0)]], texture2d img [[texture(1), raster_order_group(0)]], texture2d img3 [[texture(2), raster_order_group(0)]], texture2d img2 [[texture(3), raster_order_group(0)]], device atomic_uint* img2_atomic [[buffer(1), raster_order_group(0)]]) +{ + _9.baz = 0; + img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + img.write(img3.read(uint2(int2(0))), uint2(int2(0))); + uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&img2_atomic[spvImage2DAtomicCoord(int2(0), img2)], 1u, memory_order_relaxed); + _42.foo += 42; + uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_42.bar, _52.quux, memory_order_relaxed); +} + diff --git a/reference/shaders-msl/frag/post-depth-coverage.ios.msl2.frag b/reference/shaders-msl/frag/post-depth-coverage.ios.msl2.frag new file mode 100644 index 00000000000..3b2885e2e2a --- /dev/null +++ b/reference/shaders-msl/frag/post-depth-coverage.ios.msl2.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +[[ early_fragment_tests ]] fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask, post_depth_coverage]]) +{ + main0_out out = {}; + out.FragColor = float4(float(gl_SampleMaskIn)); + return out; +} + diff --git a/reference/shaders-msl/frag/post-depth-coverage.msl23.frag b/reference/shaders-msl/frag/post-depth-coverage.msl23.frag new file mode 100644 index 00000000000..3b2885e2e2a --- /dev/null +++ b/reference/shaders-msl/frag/post-depth-coverage.msl23.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +[[ early_fragment_tests ]] fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask, post_depth_coverage]]) +{ + main0_out out = {}; + out.FragColor = float4(float(gl_SampleMaskIn)); + return out; +} + diff --git a/reference/shaders-msl/frag/private-variable-prototype-declaration.frag b/reference/shaders-msl/frag/private-variable-prototype-declaration.frag index d014623bce1..7c11a937a21 100644 --- a/reference/shaders-msl/frag/private-variable-prototype-declaration.frag +++ b/reference/shaders-msl/frag/private-variable-prototype-declaration.frag @@ -15,11 +15,13 @@ struct main0_out float3 FragColor [[color(0)]]; }; +static inline __attribute__((always_inline)) void someFunction(thread AStruct& s) { s.foobar = float4(1.0); } +static inline __attribute__((always_inline)) void otherFunction(thread float3& global_variable) { global_variable = float3(1.0); diff --git a/reference/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag b/reference/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag new file mode 100644 index 00000000000..3ab6a47161b --- /dev/null +++ b/reference/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag @@ -0,0 +1,60 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include +#if __METAL_VERSION__ >= 230 +#include +using namespace metal::raytracing; +#endif + +using namespace metal; + +struct main0_out +{ + float4 outColor [[color(0)]]; +}; + +struct main0_in +{ + float4 inPos [[user(locn0)]]; +}; + +static inline __attribute__((always_inline)) +uint doRay(thread const float3& rayOrigin, thread const float3& rayDirection, thread const float& rayDistance, thread raytracing::intersection_query& rayQuery, thread const raytracing::acceleration_structure& topLevelAS) +{ + rayQuery.reset(ray(rayOrigin, rayDirection, 0.001000000047497451305389404296875, rayDistance), topLevelAS, intersection_params()); + for (;;) + { + bool _36 = rayQuery.next(); + if (_36) + { + continue; + } + else + { + break; + } + } + uint _40 = uint(rayQuery.get_committed_intersection_type()); + return _40; +} + +fragment main0_out main0(main0_in in [[stage_in]], raytracing::acceleration_structure topLevelAS [[buffer(0)]]) +{ + main0_out out = {}; + float3 rayOrigin = float3((in.inPos.xy * 4.0) - float2(2.0), 1.0); + float3 rayDirection = float3(0.0, 0.0, -1.0); + float rayDistance = 2.0; + float3 param = rayOrigin; + float3 param_1 = rayDirection; + float param_2 = rayDistance; + raytracing::intersection_query rayQuery; + uint _70 = doRay(param, param_1, param_2, rayQuery, topLevelAS); + if (_70 == 0u) + { + discard_fragment(); + } + out.outColor = in.inPos; + return out; +} + diff --git a/reference/shaders-msl/frag/read-cull-clip-distance-in-function.frag b/reference/shaders-msl/frag/read-cull-clip-distance-in-function.frag new file mode 100644 index 00000000000..02d57d9beea --- /dev/null +++ b/reference/shaders-msl/frag/read-cull-clip-distance-in-function.frag @@ -0,0 +1,78 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; + float gl_CullDistance_0 [[user(cull0)]]; + float gl_CullDistance_1 [[user(cull1)]]; +}; + +static inline __attribute__((always_inline)) +float4 read_in_func(thread spvUnsafeArray& gl_CullDistance, thread spvUnsafeArray& gl_ClipDistance) +{ + return float4(gl_CullDistance[0], gl_CullDistance[1], gl_ClipDistance[0], gl_ClipDistance[1]); +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray gl_CullDistance = {}; + spvUnsafeArray gl_ClipDistance = {}; + gl_CullDistance[0] = in.gl_CullDistance_0; + gl_CullDistance[1] = in.gl_CullDistance_1; + gl_ClipDistance[0] = in.gl_ClipDistance_0; + gl_ClipDistance[1] = in.gl_ClipDistance_1; + out.FragColor = read_in_func(gl_CullDistance, gl_ClipDistance); + return out; +} + diff --git a/reference/shaders-msl/frag/readonly-ssbo.frag b/reference/shaders-msl/frag/readonly-ssbo.frag index 771c225d6ee..7d73da79b2f 100644 --- a/reference/shaders-msl/frag/readonly-ssbo.frag +++ b/reference/shaders-msl/frag/readonly-ssbo.frag @@ -15,6 +15,7 @@ struct main0_out float4 FragColor [[color(0)]]; }; +static inline __attribute__((always_inline)) float4 read_from_function(const device SSBO& v_13) { return v_13.v; diff --git a/reference/shaders-msl/frag/return-value-after-discard-terminator.frag b/reference/shaders-msl/frag/return-value-after-discard-terminator.frag new file mode 100644 index 00000000000..d8895e0e7d2 --- /dev/null +++ b/reference/shaders-msl/frag/return-value-after-discard-terminator.frag @@ -0,0 +1,26 @@ +#include +#include + +using namespace metal; + +struct buff_t +{ + int m0[1024]; +}; + +struct main0_out +{ + float4 frag_clr [[color(0)]]; +}; + +fragment main0_out main0(device buff_t& buff [[buffer(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + int2 frag_coord = int2(int4(gl_FragCoord).xy); + int buff_idx = (frag_coord.y * 32) + frag_coord.x; + out.frag_clr = float4(0.0, 0.0, 1.0, 1.0); + buff.m0[buff_idx] = 1; + discard_fragment(); + return out; +} + diff --git a/reference/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag b/reference/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag new file mode 100644 index 00000000000..f0bf396c50b --- /dev/null +++ b/reference/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag @@ -0,0 +1,43 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + float3 vUV [[user(locn0)]]; +}; + +static inline __attribute__((always_inline)) +float sample_normal2(depth2d tex, sampler uSampler, thread float3& vUV) +{ + return float4(tex.sample(uSampler, vUV.xy)).x; +} + +static inline __attribute__((always_inline)) +float sample_normal(depth2d tex, sampler uSampler, thread float3& vUV) +{ + return sample_normal2(tex, uSampler, vUV); +} + +static inline __attribute__((always_inline)) +float sample_comp(depth2d tex, thread float3& vUV, sampler uSamplerShadow) +{ + return tex.sample_compare(uSamplerShadow, vUV.xy, vUV.z); +} + +fragment main0_out main0(main0_in in [[stage_in]], depth2d uTexture [[texture(0)]], sampler uSampler [[sampler(0)]], sampler uSamplerShadow [[sampler(1)]]) +{ + main0_out out = {}; + out.FragColor = sample_normal(uTexture, uSampler, in.vUV); + out.FragColor += sample_comp(uTexture, in.vUV, uSamplerShadow); + return out; +} + diff --git a/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag b/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag index a9c0f8b41bb..27653a06a43 100644 --- a/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag +++ b/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag @@ -10,12 +10,14 @@ struct main0_out float FragColor [[color(0)]]; }; -float sample_depth_from_function(thread const depth2d uT, thread const sampler uS) +static inline __attribute__((always_inline)) +float sample_depth_from_function(depth2d uT, sampler uS) { - return uT.sample_compare(uS, float3(0.5).xy, float3(0.5).z); + return uT.sample_compare(uS, float3(0.5).xy, 0.5); } -float sample_color_from_function(thread const texture2d uT, thread const sampler uS) +static inline __attribute__((always_inline)) +float sample_color_from_function(texture2d uT, sampler uS) { return uT.sample(uS, float2(0.5)).x; } diff --git a/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag b/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag new file mode 100644 index 00000000000..626fe4c79c2 --- /dev/null +++ b/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; + uint gl_SampleMask [[sample_mask]]; +}; + +fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + out.FragColor = float4(1.0); + out.gl_SampleMask = (gl_SampleMaskIn & 0x22 & (1 << gl_SampleID)); + out.gl_SampleMask &= 0x22; + return out; +} + diff --git a/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag b/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag new file mode 100644 index 00000000000..f478901b6be --- /dev/null +++ b/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; + uint gl_SampleMask [[sample_mask]]; +}; + +fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask]]) +{ + main0_out out = {}; + out.FragColor = float4(1.0); + out.gl_SampleMask = (gl_SampleMaskIn & 0x22); + out.gl_SampleMask &= 0x22; + return out; +} + diff --git a/reference/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag b/reference/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag new file mode 100644 index 00000000000..d04f2033bb5 --- /dev/null +++ b/reference/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; + uint gl_SampleMask [[sample_mask]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + out.FragColor = float4(1.0); + out.gl_SampleMask = 0x22; + return out; +} + diff --git a/reference/shaders-msl/frag/sample-mask.fixed-sample-mask.frag b/reference/shaders-msl/frag/sample-mask.fixed-sample-mask.frag new file mode 100644 index 00000000000..76306b5ade8 --- /dev/null +++ b/reference/shaders-msl/frag/sample-mask.fixed-sample-mask.frag @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; + uint gl_SampleMask [[sample_mask]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + out.FragColor = float4(1.0); + out.gl_SampleMask = 0; + out.gl_SampleMask &= 0x22; + return out; +} + diff --git a/reference/shaders-msl/frag/sample-position-func.frag b/reference/shaders-msl/frag/sample-position-func.frag index 06fa53063ac..025f874d266 100644 --- a/reference/shaders-msl/frag/sample-position-func.frag +++ b/reference/shaders-msl/frag/sample-position-func.frag @@ -15,6 +15,7 @@ struct main0_in int index [[user(locn0)]]; }; +static inline __attribute__((always_inline)) float4 getColor(thread const int& i, thread float2& gl_SamplePosition) { return float4(gl_SamplePosition, float(i), 1.0); diff --git a/reference/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag new file mode 100644 index 00000000000..5df60f909e5 --- /dev/null +++ b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(texture2d_array tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + float3 _28 = float3(gl_FragCoord.xy, float(gl_SampleID)); + out.FragColor = tex.sample(texSmplr, _28.xy, uint(round(_28.z))); + return out; +} + diff --git a/reference/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag new file mode 100644 index 00000000000..386230ef0c3 --- /dev/null +++ b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float foo [[user(locn0), sample_perspective]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d_array tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + float3 _26 = float3(gl_FragCoord.xy, in.foo); + out.FragColor = tex.sample(texSmplr, _26.xy, uint(round(_26.z))); + return out; +} + diff --git a/reference/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag new file mode 100644 index 00000000000..f8f357fe7c9 --- /dev/null +++ b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(texture2d tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + float2 gl_SamplePosition = get_sample_position(gl_SampleID); + out.FragColor = tex.sample(texSmplr, (gl_FragCoord.xy - gl_SamplePosition)); + return out; +} + diff --git a/reference/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag b/reference/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag new file mode 100644 index 00000000000..1ed8148d4c0 --- /dev/null +++ b/reference/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(texture2d tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]]) +{ + main0_out out = {}; + gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5; + out.FragColor = tex.sample(texSmplr, gl_FragCoord.xy); + return out; +} + diff --git a/reference/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag b/reference/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag new file mode 100644 index 00000000000..70278b12907 --- /dev/null +++ b/reference/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float vTex [[user(locn0), flat]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]]) +{ + main0_out out = {}; + out.FragColor += ((uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), bias(2.0)) + uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), level(3.0))) + uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), gradient2d(5.0, 8.0))); + return out; +} + diff --git a/reference/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag b/reference/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag new file mode 100644 index 00000000000..6aaffe532ba --- /dev/null +++ b/reference/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + float3 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], depth2d_array uTex [[texture(0)]], sampler uShadow [[sampler(0)]]) +{ + main0_out out = {}; + out.FragColor = uTex.sample_compare(uShadow, float2(in.vUV.x, 0.5), uint(round(in.vUV.y)), in.vUV.z, bias(1.0)); + return out; +} + diff --git a/reference/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag b/reference/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag new file mode 100644 index 00000000000..07845691942 --- /dev/null +++ b/reference/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], depth2d_array uTex [[texture(0)]], sampler uShadow [[sampler(0)]]) +{ + main0_out out = {}; + out.FragColor = uTex.sample_compare(uShadow, in.vUV.xy, uint(round(in.vUV.z)), in.vUV.w, level(0)) + uTex.sample_compare(uShadow, in.vUV.xy, uint(round(in.vUV.z)), in.vUV.w, gradient2d(float2(1.0), float2(1.0))); + return out; +} + diff --git a/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag b/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag index 10a7716261c..8d1934d4efc 100644 --- a/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag +++ b/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag @@ -16,17 +16,20 @@ struct main0_in int vIndex [[user(locn1)]]; }; -float4 sample_from_global(thread int& vIndex, thread float2& vTex, thread const array, 4> uSampler, thread const array uSamplerSmplr) +static inline __attribute__((always_inline)) +float4 sample_from_global(thread int& vIndex, thread float2& vTex, thread const array, 4>& uSampler, thread const array& uSamplerSmplr) { return uSampler[vIndex].sample(uSamplerSmplr[vIndex], (vTex + float2(0.100000001490116119384765625))); } -float4 sample_from_argument(thread const array, 4> samplers, thread const array samplersSmplr, thread int& vIndex, thread float2& vTex) +static inline __attribute__((always_inline)) +float4 sample_from_argument(thread const array, 4>& samplers, thread const array& samplersSmplr, thread int& vIndex, thread float2& vTex) { return samplers[vIndex].sample(samplersSmplr[vIndex], (vTex + float2(0.20000000298023223876953125))); } -float4 sample_single_from_argument(thread const texture2d samp, thread const sampler sampSmplr, thread float2& vTex) +static inline __attribute__((always_inline)) +float4 sample_single_from_argument(texture2d samp, sampler sampSmplr, thread float2& vTex) { return samp.sample(sampSmplr, (vTex + float2(0.300000011920928955078125))); } diff --git a/reference/shaders-msl/frag/sampler.frag b/reference/shaders-msl/frag/sampler.frag index 395854699e3..84743fbd6e0 100644 --- a/reference/shaders-msl/frag/sampler.frag +++ b/reference/shaders-msl/frag/sampler.frag @@ -16,7 +16,8 @@ struct main0_in float2 vTex [[user(locn1)]]; }; -float4 sample_texture(thread const texture2d tex, thread const sampler texSmplr, thread const float2& uv) +static inline __attribute__((always_inline)) +float4 sample_texture(texture2d tex, sampler texSmplr, thread const float2& uv) { return tex.sample(texSmplr, uv); } diff --git a/reference/shaders-msl/frag/scalar-refract-reflect.frag b/reference/shaders-msl/frag/scalar-refract-reflect.frag index 592d445810f..fc908cb3e18 100644 --- a/reference/shaders-msl/frag/scalar-refract-reflect.frag +++ b/reference/shaders-msl/frag/scalar-refract-reflect.frag @@ -5,18 +5,8 @@ using namespace metal; -struct main0_out -{ - float FragColor [[color(0)]]; -}; - -struct main0_in -{ - float3 vRefract [[user(locn0)]]; -}; - template -inline T spvReflect(T i, T n) +[[clang::optnone]] T spvReflect(T i, T n) { return i - T(2) * i * n * n; } @@ -37,6 +27,16 @@ inline T spvRefract(T i, T n, T eta) } } +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + float3 vRefract [[user(locn0)]]; +}; + fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; diff --git a/reference/shaders-msl/frag/separate-image-sampler-argument.frag b/reference/shaders-msl/frag/separate-image-sampler-argument.frag index 46c0524ab72..208f5d90951 100644 --- a/reference/shaders-msl/frag/separate-image-sampler-argument.frag +++ b/reference/shaders-msl/frag/separate-image-sampler-argument.frag @@ -10,7 +10,8 @@ struct main0_out float4 FragColor [[color(0)]]; }; -float4 samp(thread const texture2d t, thread const sampler s) +static inline __attribute__((always_inline)) +float4 samp(texture2d t, sampler s) { return t.sample(s, float2(0.5)); } diff --git a/reference/shaders-msl/frag/shader-arithmetic-8bit.frag b/reference/shaders-msl/frag/shader-arithmetic-8bit.frag index 28ef0437268..e9694aa196c 100644 --- a/reference/shaders-msl/frag/shader-arithmetic-8bit.frag +++ b/reference/shaders-msl/frag/shader-arithmetic-8bit.frag @@ -34,9 +34,10 @@ struct main0_in int4 vColor [[user(locn0)]]; }; +static inline __attribute__((always_inline)) void packing_int8(device SSBO& ssbo) { - short i16 = 10; + short i16 = short(10); int i32 = 20; char2 i8_2 = as_type(i16); char4 i8_4 = as_type(i32); @@ -48,9 +49,10 @@ void packing_int8(device SSBO& ssbo) ssbo.i8[3] = i8_4.w; } +static inline __attribute__((always_inline)) void packing_uint8(device SSBO& ssbo) { - ushort u16 = 10u; + ushort u16 = ushort(10); uint u32 = 20u; uchar2 u8_2 = as_type(u16); uchar4 u8_4 = as_type(u32); @@ -62,6 +64,7 @@ void packing_uint8(device SSBO& ssbo) ssbo.u8[3] = u8_4.w; } +static inline __attribute__((always_inline)) void compute_int8(device SSBO& ssbo, thread int4& vColor, constant Push& registers, constant UBO& ubo, thread int4& FragColorInt) { char4 tmp = char4(vColor); @@ -74,6 +77,7 @@ void compute_int8(device SSBO& ssbo, thread int4& vColor, constant Push& registe FragColorInt = int4(tmp); } +static inline __attribute__((always_inline)) void compute_uint8(device SSBO& ssbo, thread int4& vColor, constant Push& registers, constant UBO& ubo, thread uint4& FragColorUint) { uchar4 tmp = uchar4(char4(vColor)); diff --git a/reference/shaders-msl/frag/stencil-export.msl21.frag b/reference/shaders-msl/frag/stencil-export.msl21.frag index eb85a2158d9..f3629e18beb 100644 --- a/reference/shaders-msl/frag/stencil-export.msl21.frag +++ b/reference/shaders-msl/frag/stencil-export.msl21.frag @@ -12,6 +12,7 @@ struct main0_out uint gl_FragStencilRefARB [[stencil]]; }; +static inline __attribute__((always_inline)) void update_stencil(thread uint& gl_FragStencilRefARB) { gl_FragStencilRefARB = uint(int(gl_FragStencilRefARB) + 10); diff --git a/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag b/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag new file mode 100644 index 00000000000..ec0911aed6e --- /dev/null +++ b/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag @@ -0,0 +1,86 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); +} + +inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); +} + +inline uint spvPopCount4(uint4 ballot) +{ + return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); +} + +inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +struct main0_out +{ + uint2 FragColor [[color(0)]]; +}; + +static inline __attribute__((always_inline)) +uint sub1(thread uint& gl_SubgroupSize) +{ + return spvSubgroupBallotFindLSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize); +} + +static inline __attribute__((always_inline)) +uint sub2(thread uint& gl_SubgroupSize) +{ + return spvSubgroupBallotFindMSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize); +} + +static inline __attribute__((always_inline)) +uint sub3(thread uint& gl_SubgroupSize) +{ + return spvSubgroupBallotBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize); +} + +static inline __attribute__((always_inline)) +uint sub4(thread uint& gl_SubgroupInvocationID) +{ + return spvSubgroupBallotInclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID); +} + +static inline __attribute__((always_inline)) +uint sub5(thread uint& gl_SubgroupInvocationID) +{ + return spvSubgroupBallotExclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID); +} + +fragment main0_out main0(uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]], uint gl_SubgroupSize [[threads_per_simdgroup]]) +{ + main0_out out = {}; + out.FragColor.x = (((sub1(gl_SubgroupSize) + sub2(gl_SubgroupSize)) + sub3(gl_SubgroupSize)) + sub4(gl_SubgroupInvocationID)) + sub5(gl_SubgroupInvocationID); + return out; +} + diff --git a/reference/shaders-msl/frag/switch-unreachable-break.frag b/reference/shaders-msl/frag/switch-unreachable-break.frag new file mode 100644 index 00000000000..8d7903b79b4 --- /dev/null +++ b/reference/shaders-msl/frag/switch-unreachable-break.frag @@ -0,0 +1,43 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + int cond; + int cond2; +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(constant UBO& _15 [[buffer(0)]]) +{ + main0_out out = {}; + out.FragColor = float4(10.0); + switch (_15.cond) + { + case 1: + { + if (_15.cond2 < 50) + { + break; + } + else + { + discard_fragment(); + } + break; // unreachable workaround + } + default: + { + out.FragColor = float4(20.0); + break; + } + } + return out; +} + diff --git a/reference/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag b/reference/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag new file mode 100644 index 00000000000..98b9bb7ef80 --- /dev/null +++ b/reference/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0(texture2d uTexture [[texture(0)]], texture2d uTexture2 [[texture(1)]], sampler uTextureSmplr [[sampler(0)]], sampler uTexture2Smplr [[sampler(1)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + out.FragColor = uTexture.read(uint2(int2(gl_FragCoord.xy)) + uint2(int2(1)), 0); + out.FragColor += uTexture2.read(uint2(uint(int(gl_FragCoord.x)), 0) + uint2(uint(-1), 0), 0); + return out; +} + diff --git a/reference/shaders-msl/frag/texture-cube-array.frag b/reference/shaders-msl/frag/texture-cube-array.frag new file mode 100644 index 00000000000..9f1832ec04b --- /dev/null +++ b/reference/shaders-msl/frag/texture-cube-array.frag @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texturecube cubeSampler [[texture(0)]], texturecube_array cubeArraySampler [[texture(1)]], texture2d_array texArraySampler [[texture(2)]], sampler cubeSamplerSmplr [[sampler(0)]], sampler cubeArraySamplerSmplr [[sampler(1)]], sampler texArraySamplerSmplr [[sampler(2)]]) +{ + main0_out out = {}; + float4 a = cubeSampler.sample(cubeSamplerSmplr, in.vUV.xyz); + float4 b = cubeArraySampler.sample(cubeArraySamplerSmplr, in.vUV.xyz, uint(round(in.vUV.w))); + float4 c = texArraySampler.sample(texArraySamplerSmplr, in.vUV.xyz.xy, uint(round(in.vUV.xyz.z))); + out.FragColor = (a + b) + c; + return out; +} + diff --git a/reference/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag b/reference/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag new file mode 100644 index 00000000000..02541f3ce8a --- /dev/null +++ b/reference/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag @@ -0,0 +1,61 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +static inline __attribute__((always_inline)) +float3 spvCubemapTo2DArrayFace(float3 P) +{ + float3 Coords = abs(P.xyz); + float CubeFace = 0; + float ProjectionAxis = 0; + float u = 0; + float v = 0; + if (Coords.x >= Coords.y && Coords.x >= Coords.z) + { + CubeFace = P.x >= 0 ? 0 : 1; + ProjectionAxis = Coords.x; + u = P.x >= 0 ? -P.z : P.z; + v = -P.y; + } + else if (Coords.y >= Coords.x && Coords.y >= Coords.z) + { + CubeFace = P.y >= 0 ? 2 : 3; + ProjectionAxis = Coords.y; + u = P.x; + v = P.y >= 0 ? P.z : -P.z; + } + else + { + CubeFace = P.z >= 0 ? 4 : 5; + ProjectionAxis = Coords.z; + u = P.z >= 0 ? P.x : -P.x; + v = -P.y; + } + u = 0.5 * (u/ProjectionAxis + 1); + v = 0.5 * (v/ProjectionAxis + 1); + return float3(u, v, CubeFace); +} + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vUV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texturecube cubeSampler [[texture(0)]], texture2d_array cubeArraySampler [[texture(1)]], texture2d_array texArraySampler [[texture(2)]], sampler cubeSamplerSmplr [[sampler(0)]], sampler cubeArraySamplerSmplr [[sampler(1)]], sampler texArraySamplerSmplr [[sampler(2)]]) +{ + main0_out out = {}; + float4 a = cubeSampler.sample(cubeSamplerSmplr, in.vUV.xyz); + float4 b = cubeArraySampler.sample(cubeArraySamplerSmplr, spvCubemapTo2DArrayFace(in.vUV.xyz).xy, uint(spvCubemapTo2DArrayFace(in.vUV.xyz).z) + (uint(round(in.vUV.w)) * 6u)); + float4 c = texArraySampler.sample(texArraySamplerSmplr, in.vUV.xyz.xy, uint(round(in.vUV.xyz.z))); + out.FragColor = (a + b) + c; + return out; +} + diff --git a/reference/shaders-msl/frag/texture-proj-shadow.frag b/reference/shaders-msl/frag/texture-proj-shadow.frag index 52d4a026d2d..1ef450a2b32 100644 --- a/reference/shaders-msl/frag/texture-proj-shadow.frag +++ b/reference/shaders-msl/frag/texture-proj-shadow.frag @@ -18,9 +18,10 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]], depth2d uShadow2D [[texture(0)]], texture1d uSampler1D [[texture(1)]], texture2d uSampler2D [[texture(2)]], texture3d uSampler3D [[texture(3)]], sampler uShadow2DSmplr [[sampler(0)]], sampler uSampler1DSmplr [[sampler(1)]], sampler uSampler2DSmplr [[sampler(2)]], sampler uSampler3DSmplr [[sampler(3)]]) { main0_out out = {}; - float4 _20 = in.vClip4; - _20.z = in.vClip4.w; - out.FragColor = uShadow2D.sample_compare(uShadow2DSmplr, _20.xy / _20.z, in.vClip4.z / _20.z); + float4 _17 = in.vClip4; + float4 _20 = _17; + _20.z = _17.w; + out.FragColor = uShadow2D.sample_compare(uShadow2DSmplr, _20.xy / _20.z, _17.z / _20.z); out.FragColor = uSampler1D.sample(uSampler1DSmplr, in.vClip2.x / in.vClip2.y).x; out.FragColor = uSampler2D.sample(uSampler2DSmplr, in.vClip3.xy / in.vClip3.z).x; out.FragColor = uSampler3D.sample(uSampler3DSmplr, in.vClip4.xyz / in.vClip4.w).x; diff --git a/reference/shaders-msl/frag/ubo_layout.frag b/reference/shaders-msl/frag/ubo_layout.frag index 0bc27462b2e..4ca603d6431 100644 --- a/reference/shaders-msl/frag/ubo_layout.frag +++ b/reference/shaders-msl/frag/ubo_layout.frag @@ -31,7 +31,7 @@ struct main0_out fragment main0_out main0(constant UBO1& ubo1 [[buffer(0)]], constant UBO2& ubo0 [[buffer(1)]]) { main0_out out = {}; - out.FragColor = transpose(ubo1.foo.foo)[0] + ubo0.foo.foo[0]; + out.FragColor = float4(ubo1.foo.foo[0][0], ubo1.foo.foo[1][0], ubo1.foo.foo[2][0], ubo1.foo.foo[3][0]) + ubo0.foo.foo[0]; return out; } diff --git a/reference/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag b/reference/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag new file mode 100644 index 00000000000..1cb7aa70328 --- /dev/null +++ b/reference/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag @@ -0,0 +1,75 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + ushort2 a [[user(locn0)]]; + uint3 b [[user(locn1)]]; + ushort c_0 [[user(locn2)]]; + ushort c_1 [[user(locn3)]]; + uint4 e_0 [[user(locn4)]]; + uint4 e_1 [[user(locn5)]]; + float4 d [[user(locn6)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray c = {}; + spvUnsafeArray e = {}; + c[0] = in.c_0; + c[1] = in.c_1; + e[0] = in.e_0; + e[1] = in.e_1; + out.FragColor = float4(float(int(short(in.a.x))), float(int(in.b.x)), float2(float(uint(c[1])), float(e[0].w)) + in.d.xy); + return out; +} + diff --git a/reference/shaders-msl/frag/write-depth-in-function.frag b/reference/shaders-msl/frag/write-depth-in-function.frag index 6837a9b3ea2..c3ab221fc32 100644 --- a/reference/shaders-msl/frag/write-depth-in-function.frag +++ b/reference/shaders-msl/frag/write-depth-in-function.frag @@ -11,6 +11,7 @@ struct main0_out float gl_FragDepth [[depth(any)]]; }; +static inline __attribute__((always_inline)) void set_output_depth(thread float& gl_FragDepth) { gl_FragDepth = 0.20000000298023223876953125; diff --git a/reference/shaders-msl/intel/shader-integer-functions2.asm.comp b/reference/shaders-msl/intel/shader-integer-functions2.asm.comp new file mode 100644 index 00000000000..1e5d889d462 --- /dev/null +++ b/reference/shaders-msl/intel/shader-integer-functions2.asm.comp @@ -0,0 +1,31 @@ +#include +#include + +using namespace metal; + +struct foo +{ + uint a; + uint b; + int c; + int d; +}; + +kernel void main0(device foo& _4 [[buffer(0)]]) +{ + _4.a = clz(_4.a); + _4.a = ctz(_4.a); + _4.a = absdiff(_4.c, _4.d); + _4.a = absdiff(_4.a, _4.b); + _4.c = addsat(_4.c, _4.d); + _4.a = addsat(_4.a, _4.b); + _4.c = hadd(_4.c, _4.d); + _4.a = hadd(_4.a, _4.b); + _4.c = rhadd(_4.c, _4.d); + _4.a = rhadd(_4.a, _4.b); + _4.c = subsat(_4.c, _4.d); + _4.a = subsat(_4.a, _4.b); + _4.c = int(short(_4.c)) * int(short(_4.d)); + _4.a = uint(ushort(_4.a)) * uint(ushort(_4.b)); +} + diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc new file mode 100644 index 00000000000..24928da01df --- /dev/null +++ b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc @@ -0,0 +1,188 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + spvUnsafeArray iFoo; + float4 ipFoo; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup float4 spvStorageFoo[8][4][2]; + threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements); + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc new file mode 100644 index 00000000000..a08364e2b34 --- /dev/null +++ b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc @@ -0,0 +1,191 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + float4 iFoo_0 [[attribute(0)]]; + float4 iFoo_1 [[attribute(1)]]; + float4 ipFoo [[attribute(2)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup float4 Foo[4][2]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvUnsafeArray _38 = spvUnsafeArray({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 }); + spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements); + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc new file mode 100644 index 00000000000..abc95ca899e --- /dev/null +++ b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc @@ -0,0 +1,79 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + spvUnsafeArray Foo; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + spvUnsafeArray iFoo; + float4 ipFoo; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo; + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc new file mode 100644 index 00000000000..3da1d18c61d --- /dev/null +++ b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc @@ -0,0 +1,83 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + spvUnsafeArray Foo; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + float4 iFoo_0 [[attribute(0)]]; + float4 iFoo_1 [[attribute(1)]]; + float4 ipFoo [[attribute(2)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvUnsafeArray _38 = spvUnsafeArray({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 }); + gl_out[gl_InvocationID].Foo = _38; + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert new file mode 100644 index 00000000000..2bf5c257d6b --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct V +{ + float4 a; + float4 b; + float4 c; + float4 d; +}; + +struct main0_out +{ + float4 m_22_b; + float4 m_22_c; + float4 m_22_d; + float4 gl_Position; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + V _22 = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.gl_Position = float4(1.0); + _22.a = float4(2.0); + _22.b = float4(3.0); + out.m_22_b = _22.b; + out.m_22_c = _22.c; + out.m_22_d = _22.d; +} + diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc new file mode 100644 index 00000000000..cc6364fcd7d --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc @@ -0,0 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct P +{ + float a; + float b; +}; + +struct C +{ + float a; + float b; +}; + +struct main0_out +{ + float c_a; + float c_b; + float4 gl_Position; +}; + +struct main0_patchOut +{ + float m_11_b; +}; + +static inline __attribute__((always_inline)) +void write_in_function(threadgroup P& _11, device main0_patchOut& patchOut, device main0_out* thread & gl_out, thread uint& gl_InvocationID) +{ + _11.a = 1.0; + patchOut.m_11_b = 2.0; + gl_out[gl_InvocationID].c_a = 3.0; + gl_out[gl_InvocationID].c_b = 4.0; + gl_out[gl_InvocationID].gl_Position = float4(1.0); +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup P _11; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + write_in_function(_11, patchOut, gl_out, gl_InvocationID); +} + diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc new file mode 100644 index 00000000000..7336d094c84 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc @@ -0,0 +1,52 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct P +{ + float a; + float b; +}; + +struct C +{ + float a; + float b; +}; + +struct main0_out +{ + float c_a; + float c_b; + float4 gl_Position; +}; + +struct main0_patchOut +{ + float m_11_b; +}; + +static inline __attribute__((always_inline)) +void write_in_function(threadgroup P& _11, device main0_patchOut& patchOut, device main0_out* thread & gl_out, thread uint& gl_InvocationID) +{ + _11.a = 1.0; + patchOut.m_11_b = 2.0; + gl_out[gl_InvocationID].c_a = 3.0; + gl_out[gl_InvocationID].c_b = 4.0; + gl_out[gl_InvocationID].gl_Position = float4(1.0); +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + threadgroup P spvStorage_11[8]; + threadgroup P (&_11) = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + write_in_function(_11, patchOut, gl_out, gl_InvocationID); +} + diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-0.vert b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.vert new file mode 100644 index 00000000000..ad6079061ec --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.vert @@ -0,0 +1,34 @@ +#include +#include + +using namespace metal; + +struct V +{ + float4 a; + float4 b; + float4 c; + float4 d; +}; + +struct main0_out +{ + float4 m_22_b [[user(locn1)]]; + float4 m_22_c [[user(locn2)]]; + float4 m_22_d [[user(locn3)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + V _22 = {}; + out.gl_Position = float4(1.0); + _22.a = float4(2.0); + _22.b = float4(3.0); + out.m_22_b = _22.b; + out.m_22_c = _22.c; + out.m_22_d = _22.d; + return out; +} + diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert new file mode 100644 index 00000000000..2b535c312ef --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct V +{ + float4 a; + float4 b; + float4 c; + float4 d; +}; + +struct main0_out +{ + float4 m_22_a; + float4 m_22_c; + float4 m_22_d; + float4 gl_Position; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + V _22 = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.gl_Position = float4(1.0); + _22.a = float4(2.0); + _22.b = float4(3.0); + out.m_22_a = _22.a; + out.m_22_c = _22.c; + out.m_22_d = _22.d; +} + diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc new file mode 100644 index 00000000000..c3b54c7dff7 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc @@ -0,0 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct P +{ + float a; + float b; +}; + +struct C +{ + float a; + float b; +}; + +struct main0_out +{ + float c_b; + float4 gl_Position; +}; + +struct main0_patchOut +{ + float m_11_a; + float m_11_b; +}; + +static inline __attribute__((always_inline)) +void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], device main0_out* thread & gl_out, thread uint& gl_InvocationID) +{ + patchOut.m_11_a = 1.0; + patchOut.m_11_b = 2.0; + c[gl_InvocationID].a = 3.0; + gl_out[gl_InvocationID].c_b = 4.0; + gl_out[gl_InvocationID].gl_Position = float4(1.0); +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup C c[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + write_in_function(patchOut, c, gl_out, gl_InvocationID); +} + diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc new file mode 100644 index 00000000000..a881a682a74 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc @@ -0,0 +1,52 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct P +{ + float a; + float b; +}; + +struct C +{ + float a; + float b; +}; + +struct main0_out +{ + float c_b; + float4 gl_Position; +}; + +struct main0_patchOut +{ + float m_11_a; + float m_11_b; +}; + +static inline __attribute__((always_inline)) +void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], device main0_out* thread & gl_out, thread uint& gl_InvocationID) +{ + patchOut.m_11_a = 1.0; + patchOut.m_11_b = 2.0; + c[gl_InvocationID].a = 3.0; + gl_out[gl_InvocationID].c_b = 4.0; + gl_out[gl_InvocationID].gl_Position = float4(1.0); +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup C spvStoragec[8][4]; + threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + write_in_function(patchOut, c, gl_out, gl_InvocationID); +} + diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-1.vert b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.vert new file mode 100644 index 00000000000..3b830290f7c --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.vert @@ -0,0 +1,34 @@ +#include +#include + +using namespace metal; + +struct V +{ + float4 a; + float4 b; + float4 c; + float4 d; +}; + +struct main0_out +{ + float4 m_22_a [[user(locn0)]]; + float4 m_22_c [[user(locn2)]]; + float4 m_22_d [[user(locn3)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + V _22 = {}; + out.gl_Position = float4(1.0); + _22.a = float4(2.0); + _22.b = float4(3.0); + out.m_22_a = _22.a; + out.m_22_c = _22.c; + out.m_22_d = _22.d; + return out; +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-clip-distance.vert b/reference/shaders-msl/masking/write-outputs.mask-clip-distance.vert new file mode 100644 index 00000000000..2a0508361eb --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-clip-distance.vert @@ -0,0 +1,73 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 v0 [[user(locn0)]]; + float4 v1 [[user(locn1)]]; + float4 gl_Position [[position]]; + float gl_PointSize [[point_size]]; +}; + +static inline __attribute__((always_inline)) +void write_in_func(thread float4& v0, thread float4& v1, thread float4& gl_Position, thread float& gl_PointSize, thread spvUnsafeArray& gl_ClipDistance) +{ + v0 = float4(1.0); + v1 = float4(2.0); + gl_Position = float4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +vertex main0_out main0() +{ + main0_out out = {}; + spvUnsafeArray gl_ClipDistance = {}; + write_in_func(out.v0, out.v1, out.gl_Position, out.gl_PointSize, gl_ClipDistance); + return out; +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert b/reference/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert new file mode 100644 index 00000000000..53f76b575e7 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert @@ -0,0 +1,74 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 v1; + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; +}; + +static inline __attribute__((always_inline)) +void write_in_func(thread float4& v0, device float4& v1, device float4& gl_Position, device float& gl_PointSize, device spvUnsafeArray& gl_ClipDistance) +{ + v0 = float4(1.0); + v1 = float4(2.0); + gl_Position = float4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + float4 v0 = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + write_in_func(v0, out.v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc new file mode 100644 index 00000000000..e4f047d3e3f --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc @@ -0,0 +1,42 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + float4 v1; +}; + +static inline __attribute__((always_inline)) +void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device float4& v1, device main0_out* thread & gl_out) +{ + v0[gl_InvocationID] = float4(1.0); + v0[gl_InvocationID].x = 2.0; + if (gl_InvocationID == 0) + { + v1 = float4(2.0); + ((device float*)&v1)[3u] = 4.0; + } + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; + gl_out[gl_InvocationID].gl_Position.z = 5.0; + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup float4 v0[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + write_in_func(v0, gl_InvocationID, patchOut.v1, gl_out); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc new file mode 100644 index 00000000000..7465cc64db9 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc @@ -0,0 +1,87 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +static inline __attribute__((always_inline)) +void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device spvUnsafeArray& v1, device float4& v3, device main0_out* thread & gl_out) +{ + v0[gl_InvocationID] = float4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = float4(2.0); + ((device float*)&v1[0])[0u] = 3.0; + v1[1] = float4(2.0); + ((device float*)&v1[1])[0u] = 5.0; + } + v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup float4 spvStoragev0[8][4]; + threadgroup float4 (&v0)[4] = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + write_in_func(v0, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-0.tesc new file mode 100644 index 00000000000..0ae265e9e7b --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-location-0.tesc @@ -0,0 +1,84 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +static inline __attribute__((always_inline)) +void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device spvUnsafeArray& v1, device float4& v3, device main0_out* thread & gl_out) +{ + v0[gl_InvocationID] = float4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = float4(2.0); + ((device float*)&v1[0])[0u] = 3.0; + v1[1] = float4(2.0); + ((device float*)&v1[1])[0u] = 5.0; + } + v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup float4 v0[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + write_in_func(v0, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.vert b/reference/shaders-msl/masking/write-outputs.mask-location-0.vert new file mode 100644 index 00000000000..12c111aef43 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-location-0.vert @@ -0,0 +1,38 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v1 [[user(locn1)]]; + float4 gl_Position [[position]]; + float gl_PointSize [[point_size]]; + float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +static inline __attribute__((always_inline)) +void write_in_func(thread float4& v0, thread float4& v1, thread float4& gl_Position, thread float& gl_PointSize, thread float (&gl_ClipDistance)[2]) +{ + v0 = float4(1.0); + v1 = float4(2.0); + gl_Position = float4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +vertex main0_out main0() +{ + main0_out out = {}; + float4 v0 = {}; + write_in_func(v0, out.v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance); + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; + return out; +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert b/reference/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert new file mode 100644 index 00000000000..8f9cfce5362 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert @@ -0,0 +1,74 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 v0; + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device float4& v0, thread float4& v1, device float4& gl_Position, device float& gl_PointSize, device spvUnsafeArray& gl_ClipDistance) +{ + v0 = float4(1.0); + v1 = float4(2.0); + gl_Position = float4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + float4 v1 = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + write_in_func(out.v0, v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc new file mode 100644 index 00000000000..85160a9dee1 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc @@ -0,0 +1,41 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v0; + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ +}; +static inline __attribute__((always_inline)) +void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup float4& v1) +{ + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.x = 2.0; + if (gl_InvocationID == 0) + { + v1 = float4(2.0); + ((threadgroup float*)&v1)[3u] = 4.0; + } + gl_out[gl_InvocationID].gl_Position = float4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; + gl_out[gl_InvocationID].gl_Position.z = 5.0; + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup float4 v1; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + write_in_func(gl_out, gl_InvocationID, v1); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc new file mode 100644 index 00000000000..df057861eee --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc @@ -0,0 +1,48 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v0; + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + float4 v3; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup float4 (&v1)[2], device float4& v3) +{ + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = float4(2.0); + ((threadgroup float*)&v1[0])[0u] = 3.0; + v1[1] = float4(2.0); + ((threadgroup float*)&v1[1])[0u] = 5.0; + } + v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + threadgroup float4 spvStoragev1[8][2]; + threadgroup float4 (&v1)[2] = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + write_in_func(gl_out, gl_InvocationID, v1, patchOut.v3); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-1.tesc new file mode 100644 index 00000000000..0ad2727673d --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-location-1.tesc @@ -0,0 +1,45 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v0; + float4 gl_Position; + float gl_PointSize; +}; + +struct main0_patchOut +{ + float4 v3; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup float4 (&v1)[2], device float4& v3) +{ + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = float4(2.0); + ((threadgroup float*)&v1[0])[0u] = 3.0; + v1[1] = float4(2.0); + ((threadgroup float*)&v1[1])[0u] = 5.0; + } + v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup float4 v1[2]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + write_in_func(gl_out, gl_InvocationID, v1, patchOut.v3); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.vert b/reference/shaders-msl/masking/write-outputs.mask-location-1.vert new file mode 100644 index 00000000000..2290e5cca49 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-location-1.vert @@ -0,0 +1,38 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v0 [[user(locn0)]]; + float4 gl_Position [[position]]; + float gl_PointSize [[point_size]]; + float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +static inline __attribute__((always_inline)) +void write_in_func(thread float4& v0, thread float4& v1, thread float4& gl_Position, thread float& gl_PointSize, thread float (&gl_ClipDistance)[2]) +{ + v0 = float4(1.0); + v1 = float4(2.0); + gl_Position = float4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +vertex main0_out main0() +{ + main0_out out = {}; + float4 v1 = {}; + write_in_func(out.v0, v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance); + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; + return out; +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert b/reference/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert new file mode 100644 index 00000000000..07494ea399a --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert @@ -0,0 +1,74 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 v0; + float4 v1; + float4 gl_Position; + spvUnsafeArray gl_ClipDistance; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device float4& v0, device float4& v1, device float4& gl_Position, thread float& gl_PointSize, device spvUnsafeArray& gl_ClipDistance) +{ + v0 = float4(1.0); + v1 = float4(2.0); + gl_Position = float4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + float gl_PointSize = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + write_in_func(out.v0, out.v1, out.gl_Position, gl_PointSize, out.gl_ClipDistance); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc new file mode 100644 index 00000000000..05d58634ef2 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc @@ -0,0 +1,95 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_out +{ + float4 v0; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4]) +{ + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = float4(2.0); + ((device float*)&v1[0])[0u] = 3.0; + v1[1] = float4(2.0); + ((device float*)&v1[1])[0u] = 5.0; + } + v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out_masked[gl_InvocationID].gl_PointSize = 40.0; +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup gl_PerVertex spvStoragegl_out_masked[8][4]; + threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-point-size.tesc b/reference/shaders-msl/masking/write-outputs.mask-point-size.tesc new file mode 100644 index 00000000000..8ec2a663b7c --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-point-size.tesc @@ -0,0 +1,92 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_out +{ + float4 v0; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4]) +{ + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = float4(2.0); + ((device float*)&v1[0])[0u] = 3.0; + v1[1] = float4(2.0); + ((device float*)&v1[1])[0u] = 5.0; + } + v3 = float4(5.0); + gl_out[gl_InvocationID].gl_Position = float4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out_masked[gl_InvocationID].gl_PointSize = 40.0; +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup gl_PerVertex gl_out_masked[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-point-size.vert b/reference/shaders-msl/masking/write-outputs.mask-point-size.vert new file mode 100644 index 00000000000..b37b92dd4a6 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-point-size.vert @@ -0,0 +1,38 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 v0 [[user(locn0)]]; + float4 v1 [[user(locn1)]]; + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +static inline __attribute__((always_inline)) +void write_in_func(thread float4& v0, thread float4& v1, thread float4& gl_Position, thread float& gl_PointSize, thread float (&gl_ClipDistance)[2]) +{ + v0 = float4(1.0); + v1 = float4(2.0); + gl_Position = float4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +vertex main0_out main0() +{ + main0_out out = {}; + float gl_PointSize = {}; + write_in_func(out.v0, out.v1, out.gl_Position, gl_PointSize, out.gl_ClipDistance); + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; + return out; +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc new file mode 100644 index 00000000000..86bc7d37be2 --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc @@ -0,0 +1,95 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_out +{ + float4 v0; + float gl_PointSize; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4]) +{ + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = float4(2.0); + ((device float*)&v1[0])[0u] = 3.0; + v1[1] = float4(2.0); + ((device float*)&v1[1])[0u] = 5.0; + } + v3 = float4(5.0); + gl_out_masked[gl_InvocationID].gl_Position = float4(10.0); + gl_out_masked[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup gl_PerVertex spvStoragegl_out_masked[8][4]; + threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked); +} + diff --git a/reference/shaders-msl/masking/write-outputs.mask-position.tesc b/reference/shaders-msl/masking/write-outputs.mask-position.tesc new file mode 100644 index 00000000000..da0d2a2d10f --- /dev/null +++ b/reference/shaders-msl/masking/write-outputs.mask-position.tesc @@ -0,0 +1,92 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct gl_PerVertex +{ + float4 gl_Position; + float gl_PointSize; + spvUnsafeArray gl_ClipDistance; + spvUnsafeArray gl_CullDistance; +}; + +struct main0_out +{ + float4 v0; + float gl_PointSize; +}; + +struct main0_patchOut +{ + spvUnsafeArray v1; + float4 v3; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4]) +{ + gl_out[gl_InvocationID].v0 = float4(1.0); + gl_out[gl_InvocationID].v0.z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = float4(2.0); + ((device float*)&v1[0])[0u] = 3.0; + v1[1] = float4(2.0); + ((device float*)&v1[1])[0u] = 5.0; + } + v3 = float4(5.0); + gl_out_masked[gl_InvocationID].gl_Position = float4(10.0); + gl_out_masked[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + threadgroup gl_PerVertex gl_out_masked[4]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked); +} + diff --git a/reference/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc b/reference/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc new file mode 100644 index 00000000000..c11c7410c65 --- /dev/null +++ b/reference/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc @@ -0,0 +1,111 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct S +{ + int x; + float4 y; + spvUnsafeArray z; +}; + +struct TheBlock +{ + spvUnsafeArray blockFa; + spvUnsafeArray blockSa; + float blockF; +}; + +struct main0_patchOut +{ + float2 in_te_positionScale; + float2 in_te_positionOffset; + spvUnsafeArray tcBlock; +}; + +struct main0_in +{ + float3 in_tc_attr; + ushort2 m_179; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 5]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 5, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 5, spvIndirectParams[1] - 1); + float v = 1.2999999523162841796875; + for (int i0 = 0; i0 < 2; i0++) + { + for (int i1 = 0; i1 < 3; i1++) + { + patchOut.tcBlock[i0].blockFa[i1] = v; + v += 0.4000000059604644775390625; + } + for (int i1_1 = 0; i1_1 < 2; i1_1++) + { + patchOut.tcBlock[i0].blockSa[i1_1].x = int(v); + v += 0.4000000059604644775390625; + patchOut.tcBlock[i0].blockSa[i1_1].y = float4(v, v + 0.800000011920928955078125, v + 1.60000002384185791015625, v + 2.400000095367431640625); + v += 0.4000000059604644775390625; + for (int i2 = 0; i2 < 2; i2++) + { + patchOut.tcBlock[i0].blockSa[i1_1].z[i2] = v; + v += 0.4000000059604644775390625; + } + } + patchOut.tcBlock[i0].blockF = v; + v += 0.4000000059604644775390625; + } + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(gl_in[0].in_tc_attr.x); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(gl_in[1].in_tc_attr.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(gl_in[2].in_tc_attr.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(gl_in[3].in_tc_attr.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(gl_in[4].in_tc_attr.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(gl_in[5].in_tc_attr.x); + patchOut.in_te_positionScale = float2(gl_in[6].in_tc_attr.x, gl_in[7].in_tc_attr.x); + patchOut.in_te_positionOffset = float2(gl_in[8].in_tc_attr.x, gl_in[9].in_tc_attr.x); +} + diff --git a/reference/shaders-msl/tesc/basic.multi-patch.tesc b/reference/shaders-msl/tesc/basic.multi-patch.tesc new file mode 100644 index 00000000000..fe268316737 --- /dev/null +++ b/reference/shaders-msl/tesc/basic.multi-patch.tesc @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct main0_patchOut +{ + float3 vFoo; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1]; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625); + patchOut.vFoo = float3(1.0); +} + diff --git a/reference/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc b/reference/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc new file mode 100644 index 00000000000..d266d2512a9 --- /dev/null +++ b/reference/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc @@ -0,0 +1,134 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Meep +{ + float a; + float b; +}; + +struct Block +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; +}; + +struct Block_1 +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; +}; + +struct main0_out +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; + spvUnsafeArray B_a; + float B_b; + float2x2 B_m; + Meep B_meep; + spvUnsafeArray B_meeps; + float4 gl_Position; +}; + +struct main0_in +{ + spvUnsafeArray in_a; + float in_b; + float2x2 in_m; + Meep in_meep; + spvUnsafeArray in_meeps; + spvUnsafeArray in_B_a; + float in_B_b; + float2x2 in_B_m; + Meep in_B_meep; + spvUnsafeArray in_B_meeps; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device main0_in* thread & gl_in) +{ + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].a[0] = gl_in[gl_InvocationID].in_a[0]; + gl_out[gl_InvocationID].a[1] = gl_in[gl_InvocationID].in_a[1]; + gl_out[gl_InvocationID].b = gl_in[gl_InvocationID].in_b; + gl_out[gl_InvocationID].m = gl_in[gl_InvocationID].in_m; + gl_out[gl_InvocationID].meep.a = gl_in[gl_InvocationID].in_meep.a; + gl_out[gl_InvocationID].meep.b = gl_in[gl_InvocationID].in_meep.b; + gl_out[gl_InvocationID].meeps[0].a = gl_in[gl_InvocationID].in_meeps[0].a; + gl_out[gl_InvocationID].meeps[0].b = gl_in[gl_InvocationID].in_meeps[0].b; + gl_out[gl_InvocationID].meeps[1].a = gl_in[gl_InvocationID].in_meeps[1].a; + gl_out[gl_InvocationID].meeps[1].b = gl_in[gl_InvocationID].in_meeps[1].b; + gl_out[gl_InvocationID].B_a[0] = gl_in[gl_InvocationID].in_B_a[0]; + gl_out[gl_InvocationID].B_a[1] = gl_in[gl_InvocationID].in_B_a[1]; + gl_out[gl_InvocationID].B_b = gl_in[gl_InvocationID].in_B_b; + gl_out[gl_InvocationID].B_m = gl_in[gl_InvocationID].in_B_m; + gl_out[gl_InvocationID].B_meep.a = gl_in[gl_InvocationID].in_B_meep.a; + gl_out[gl_InvocationID].B_meep.b = gl_in[gl_InvocationID].in_B_meep.b; + gl_out[gl_InvocationID].B_meeps[0].a = gl_in[gl_InvocationID].in_B_meeps[0].a; + gl_out[gl_InvocationID].B_meeps[0].b = gl_in[gl_InvocationID].in_B_meeps[0].b; + gl_out[gl_InvocationID].B_meeps[1].a = gl_in[gl_InvocationID].in_B_meeps[1].a; + gl_out[gl_InvocationID].B_meeps[1].b = gl_in[gl_InvocationID].in_B_meeps[1].b; +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + write_in_func(gl_out, gl_InvocationID, gl_in); +} + diff --git a/reference/shaders-msl/tesc/complex-control-point-inout-types.tesc b/reference/shaders-msl/tesc/complex-control-point-inout-types.tesc new file mode 100644 index 00000000000..48f10baec85 --- /dev/null +++ b/reference/shaders-msl/tesc/complex-control-point-inout-types.tesc @@ -0,0 +1,138 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Meep +{ + float a; + float b; +}; + +struct Block +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; +}; + +struct Block_1 +{ + spvUnsafeArray a; + float b; + float2x2 m; +}; + +struct main0_out +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; + spvUnsafeArray B_a; + float B_b; + float2x2 B_m; + Meep B_meep; + spvUnsafeArray B_meeps; + float4 gl_Position; +}; + +struct main0_in +{ + float in_a_0 [[attribute(0)]]; + float in_a_1 [[attribute(1)]]; + float in_b [[attribute(2)]]; + float2 in_m_0 [[attribute(3)]]; + float2 in_m_1 [[attribute(4)]]; + float in_meep_a [[attribute(5)]]; + float in_meep_b [[attribute(6)]]; + float in_B_a_0 [[attribute(11)]]; + float in_B_a_1 [[attribute(12)]]; + float in_B_b [[attribute(13)]]; + float2 in_B_m_0 [[attribute(14)]]; + float2 in_B_m_1 [[attribute(15)]]; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup main0_in* thread & gl_in) +{ + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].a[0] = gl_in[gl_InvocationID].in_a_0; + gl_out[gl_InvocationID].a[1] = gl_in[gl_InvocationID].in_a_1; + gl_out[gl_InvocationID].b = gl_in[gl_InvocationID].in_b; + float2x2 _72 = float2x2(gl_in[gl_InvocationID].in_m_0, gl_in[gl_InvocationID].in_m_1); + gl_out[gl_InvocationID].m = _72; + gl_out[gl_InvocationID].meep.a = gl_in[gl_InvocationID].in_meep_a; + gl_out[gl_InvocationID].meep.b = gl_in[gl_InvocationID].in_meep_b; + gl_out[gl_InvocationID].meeps[0].a = 1.0; + gl_out[gl_InvocationID].meeps[0].b = 2.0; + gl_out[gl_InvocationID].meeps[1].a = 3.0; + gl_out[gl_InvocationID].meeps[1].b = 4.0; + gl_out[gl_InvocationID].B_a[0] = gl_in[gl_InvocationID].in_B_a_0; + gl_out[gl_InvocationID].B_a[1] = gl_in[gl_InvocationID].in_B_a_1; + gl_out[gl_InvocationID].B_b = gl_in[gl_InvocationID].in_B_b; + float2x2 _134 = float2x2(gl_in[gl_InvocationID].in_B_m_0, gl_in[gl_InvocationID].in_B_m_1); + gl_out[gl_InvocationID].B_m = _134; + gl_out[gl_InvocationID].B_meep.a = 10.0; + gl_out[gl_InvocationID].B_meep.b = 20.0; + gl_out[gl_InvocationID].B_meeps[0].a = 5.0; + gl_out[gl_InvocationID].B_meeps[0].b = 6.0; + gl_out[gl_InvocationID].B_meeps[1].a = 7.0; + gl_out[gl_InvocationID].B_meeps[1].b = 8.0; +} + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + write_in_func(gl_out, gl_InvocationID, gl_in); +} + diff --git a/reference/shaders-msl/tesc/complex-patch-out-types.tesc b/reference/shaders-msl/tesc/complex-patch-out-types.tesc new file mode 100644 index 00000000000..bd24f58029b --- /dev/null +++ b/reference/shaders-msl/tesc/complex-patch-out-types.tesc @@ -0,0 +1,113 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Meep +{ + float a; + float b; +}; + +struct Block +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; +}; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; + spvUnsafeArray B_a; + float B_b; + float2x2 B_m; + Meep B_meep; + spvUnsafeArray B_meeps; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray& a, device float& b, device float2x2& m, device Meep& meep, device spvUnsafeArray& meeps, device main0_patchOut& patchOut) +{ + gl_out[gl_InvocationID].gl_Position = float4(1.0); + a[0] = 1.0; + a[1] = 2.0; + b = 3.0; + m = float2x2(float2(2.0, 0.0), float2(0.0, 2.0)); + meep.a = 4.0; + meep.b = 5.0; + meeps[0].a = 6.0; + meeps[0].b = 7.0; + meeps[1].a = 8.0; + meeps[1].b = 9.0; + patchOut.B_a[0] = 1.0; + patchOut.B_a[1] = 2.0; + patchOut.B_b = 3.0; + patchOut.B_m = float2x2(float2(4.0, 0.0), float2(0.0, 4.0)); + patchOut.B_meep.a = 4.0; + patchOut.B_meep.b = 5.0; + patchOut.B_meeps[0].a = 6.0; + patchOut.B_meeps[0].b = 7.0; + patchOut.B_meeps[1].a = 8.0; + patchOut.B_meeps[1].b = 9.0; +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + write_in_func(gl_out, gl_InvocationID, patchOut.a, patchOut.b, patchOut.m, patchOut.meep, patchOut.meeps, patchOut); +} + diff --git a/reference/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc b/reference/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc new file mode 100644 index 00000000000..28effad215f --- /dev/null +++ b/reference/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc @@ -0,0 +1,68 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4x4 vOutputs; +}; + +struct main0_in +{ + float4x4 vInputs; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + spvUnsafeArray _16 = spvUnsafeArray({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs }); + spvUnsafeArray tmp; + tmp = _16; + gl_out[gl_InvocationID].vOutputs = tmp[gl_InvocationID]; +} + diff --git a/reference/shaders-msl/tesc/load-control-point-array-of-matrix.tesc b/reference/shaders-msl/tesc/load-control-point-array-of-matrix.tesc new file mode 100644 index 00000000000..46d4b4ad588 --- /dev/null +++ b/reference/shaders-msl/tesc/load-control-point-array-of-matrix.tesc @@ -0,0 +1,73 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4x4 vOutputs; +}; + +struct main0_in +{ + float4 vInputs_0 [[attribute(0)]]; + float4 vInputs_1 [[attribute(1)]]; + float4 vInputs_2 [[attribute(2)]]; + float4 vInputs_3 [[attribute(3)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + spvUnsafeArray _16 = spvUnsafeArray({ float4x4(gl_in[0].vInputs_0, gl_in[0].vInputs_1, gl_in[0].vInputs_2, gl_in[0].vInputs_3), float4x4(gl_in[1].vInputs_0, gl_in[1].vInputs_1, gl_in[1].vInputs_2, gl_in[1].vInputs_3), float4x4(gl_in[2].vInputs_0, gl_in[2].vInputs_1, gl_in[2].vInputs_2, gl_in[2].vInputs_3), float4x4(gl_in[3].vInputs_0, gl_in[3].vInputs_1, gl_in[3].vInputs_2, gl_in[3].vInputs_3), float4x4(gl_in[4].vInputs_0, gl_in[4].vInputs_1, gl_in[4].vInputs_2, gl_in[4].vInputs_3), float4x4(gl_in[5].vInputs_0, gl_in[5].vInputs_1, gl_in[5].vInputs_2, gl_in[5].vInputs_3), float4x4(gl_in[6].vInputs_0, gl_in[6].vInputs_1, gl_in[6].vInputs_2, gl_in[6].vInputs_3), float4x4(gl_in[7].vInputs_0, gl_in[7].vInputs_1, gl_in[7].vInputs_2, gl_in[7].vInputs_3), float4x4(gl_in[8].vInputs_0, gl_in[8].vInputs_1, gl_in[8].vInputs_2, gl_in[8].vInputs_3), float4x4(gl_in[9].vInputs_0, gl_in[9].vInputs_1, gl_in[9].vInputs_2, gl_in[9].vInputs_3), float4x4(gl_in[10].vInputs_0, gl_in[10].vInputs_1, gl_in[10].vInputs_2, gl_in[10].vInputs_3), float4x4(gl_in[11].vInputs_0, gl_in[11].vInputs_1, gl_in[11].vInputs_2, gl_in[11].vInputs_3), float4x4(gl_in[12].vInputs_0, gl_in[12].vInputs_1, gl_in[12].vInputs_2, gl_in[12].vInputs_3), float4x4(gl_in[13].vInputs_0, gl_in[13].vInputs_1, gl_in[13].vInputs_2, gl_in[13].vInputs_3), float4x4(gl_in[14].vInputs_0, gl_in[14].vInputs_1, gl_in[14].vInputs_2, gl_in[14].vInputs_3), float4x4(gl_in[15].vInputs_0, gl_in[15].vInputs_1, gl_in[15].vInputs_2, gl_in[15].vInputs_3), float4x4(gl_in[16].vInputs_0, gl_in[16].vInputs_1, gl_in[16].vInputs_2, gl_in[16].vInputs_3), float4x4(gl_in[17].vInputs_0, gl_in[17].vInputs_1, gl_in[17].vInputs_2, gl_in[17].vInputs_3), float4x4(gl_in[18].vInputs_0, gl_in[18].vInputs_1, gl_in[18].vInputs_2, gl_in[18].vInputs_3), float4x4(gl_in[19].vInputs_0, gl_in[19].vInputs_1, gl_in[19].vInputs_2, gl_in[19].vInputs_3), float4x4(gl_in[20].vInputs_0, gl_in[20].vInputs_1, gl_in[20].vInputs_2, gl_in[20].vInputs_3), float4x4(gl_in[21].vInputs_0, gl_in[21].vInputs_1, gl_in[21].vInputs_2, gl_in[21].vInputs_3), float4x4(gl_in[22].vInputs_0, gl_in[22].vInputs_1, gl_in[22].vInputs_2, gl_in[22].vInputs_3), float4x4(gl_in[23].vInputs_0, gl_in[23].vInputs_1, gl_in[23].vInputs_2, gl_in[23].vInputs_3), float4x4(gl_in[24].vInputs_0, gl_in[24].vInputs_1, gl_in[24].vInputs_2, gl_in[24].vInputs_3), float4x4(gl_in[25].vInputs_0, gl_in[25].vInputs_1, gl_in[25].vInputs_2, gl_in[25].vInputs_3), float4x4(gl_in[26].vInputs_0, gl_in[26].vInputs_1, gl_in[26].vInputs_2, gl_in[26].vInputs_3), float4x4(gl_in[27].vInputs_0, gl_in[27].vInputs_1, gl_in[27].vInputs_2, gl_in[27].vInputs_3), float4x4(gl_in[28].vInputs_0, gl_in[28].vInputs_1, gl_in[28].vInputs_2, gl_in[28].vInputs_3), float4x4(gl_in[29].vInputs_0, gl_in[29].vInputs_1, gl_in[29].vInputs_2, gl_in[29].vInputs_3), float4x4(gl_in[30].vInputs_0, gl_in[30].vInputs_1, gl_in[30].vInputs_2, gl_in[30].vInputs_3), float4x4(gl_in[31].vInputs_0, gl_in[31].vInputs_1, gl_in[31].vInputs_2, gl_in[31].vInputs_3) }); + spvUnsafeArray tmp; + tmp = _16; + gl_out[gl_InvocationID].vOutputs = tmp[gl_InvocationID]; +} + diff --git a/reference/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc b/reference/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc new file mode 100644 index 00000000000..d24c271dd4c --- /dev/null +++ b/reference/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc @@ -0,0 +1,76 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct VertexData +{ + float4x4 a; + spvUnsafeArray b; + float4 c; +}; + +struct main0_out +{ + float4 vOutputs; +}; + +struct main0_in +{ + VertexData vInputs; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + spvUnsafeArray _19 = spvUnsafeArray({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs }); + spvUnsafeArray tmp; + tmp = _19; + VertexData tmp_single = gl_in[gl_InvocationID ^ 1].vInputs; + gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + tmp_single.c; +} + diff --git a/reference/shaders-msl/tesc/load-control-point-array-of-struct.tesc b/reference/shaders-msl/tesc/load-control-point-array-of-struct.tesc new file mode 100644 index 00000000000..08392cabbca --- /dev/null +++ b/reference/shaders-msl/tesc/load-control-point-array-of-struct.tesc @@ -0,0 +1,86 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct VertexData +{ + float4x4 a; + spvUnsafeArray b; + float4 c; +}; + +struct main0_out +{ + float4 vOutputs; +}; + +struct main0_in +{ + float4 vInputs_a_0 [[attribute(0)]]; + float4 vInputs_a_1 [[attribute(1)]]; + float4 vInputs_a_2 [[attribute(2)]]; + float4 vInputs_a_3 [[attribute(3)]]; + float4 vInputs_b_0 [[attribute(4)]]; + float4 vInputs_b_1 [[attribute(5)]]; + float4 vInputs_c [[attribute(6)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + spvUnsafeArray _19 = spvUnsafeArray({ VertexData{ float4x4(gl_in[0].vInputs_a_0, gl_in[0].vInputs_a_1, gl_in[0].vInputs_a_2, gl_in[0].vInputs_a_3), spvUnsafeArray({ gl_in[0].vInputs_b_0, gl_in[0].vInputs_b_1 }), gl_in[0].vInputs_c }, VertexData{ float4x4(gl_in[1].vInputs_a_0, gl_in[1].vInputs_a_1, gl_in[1].vInputs_a_2, gl_in[1].vInputs_a_3), spvUnsafeArray({ gl_in[1].vInputs_b_0, gl_in[1].vInputs_b_1 }), gl_in[1].vInputs_c }, VertexData{ float4x4(gl_in[2].vInputs_a_0, gl_in[2].vInputs_a_1, gl_in[2].vInputs_a_2, gl_in[2].vInputs_a_3), spvUnsafeArray({ gl_in[2].vInputs_b_0, gl_in[2].vInputs_b_1 }), gl_in[2].vInputs_c }, VertexData{ float4x4(gl_in[3].vInputs_a_0, gl_in[3].vInputs_a_1, gl_in[3].vInputs_a_2, gl_in[3].vInputs_a_3), spvUnsafeArray({ gl_in[3].vInputs_b_0, gl_in[3].vInputs_b_1 }), gl_in[3].vInputs_c }, VertexData{ float4x4(gl_in[4].vInputs_a_0, gl_in[4].vInputs_a_1, gl_in[4].vInputs_a_2, gl_in[4].vInputs_a_3), spvUnsafeArray({ gl_in[4].vInputs_b_0, gl_in[4].vInputs_b_1 }), gl_in[4].vInputs_c }, VertexData{ float4x4(gl_in[5].vInputs_a_0, gl_in[5].vInputs_a_1, gl_in[5].vInputs_a_2, gl_in[5].vInputs_a_3), spvUnsafeArray({ gl_in[5].vInputs_b_0, gl_in[5].vInputs_b_1 }), gl_in[5].vInputs_c }, VertexData{ float4x4(gl_in[6].vInputs_a_0, gl_in[6].vInputs_a_1, gl_in[6].vInputs_a_2, gl_in[6].vInputs_a_3), spvUnsafeArray({ gl_in[6].vInputs_b_0, gl_in[6].vInputs_b_1 }), gl_in[6].vInputs_c }, VertexData{ float4x4(gl_in[7].vInputs_a_0, gl_in[7].vInputs_a_1, gl_in[7].vInputs_a_2, gl_in[7].vInputs_a_3), spvUnsafeArray({ gl_in[7].vInputs_b_0, gl_in[7].vInputs_b_1 }), gl_in[7].vInputs_c }, VertexData{ float4x4(gl_in[8].vInputs_a_0, gl_in[8].vInputs_a_1, gl_in[8].vInputs_a_2, gl_in[8].vInputs_a_3), spvUnsafeArray({ gl_in[8].vInputs_b_0, gl_in[8].vInputs_b_1 }), gl_in[8].vInputs_c }, VertexData{ float4x4(gl_in[9].vInputs_a_0, gl_in[9].vInputs_a_1, gl_in[9].vInputs_a_2, gl_in[9].vInputs_a_3), spvUnsafeArray({ gl_in[9].vInputs_b_0, gl_in[9].vInputs_b_1 }), gl_in[9].vInputs_c }, VertexData{ float4x4(gl_in[10].vInputs_a_0, gl_in[10].vInputs_a_1, gl_in[10].vInputs_a_2, gl_in[10].vInputs_a_3), spvUnsafeArray({ gl_in[10].vInputs_b_0, gl_in[10].vInputs_b_1 }), gl_in[10].vInputs_c }, VertexData{ float4x4(gl_in[11].vInputs_a_0, gl_in[11].vInputs_a_1, gl_in[11].vInputs_a_2, gl_in[11].vInputs_a_3), spvUnsafeArray({ gl_in[11].vInputs_b_0, gl_in[11].vInputs_b_1 }), gl_in[11].vInputs_c }, VertexData{ float4x4(gl_in[12].vInputs_a_0, gl_in[12].vInputs_a_1, gl_in[12].vInputs_a_2, gl_in[12].vInputs_a_3), spvUnsafeArray({ gl_in[12].vInputs_b_0, gl_in[12].vInputs_b_1 }), gl_in[12].vInputs_c }, VertexData{ float4x4(gl_in[13].vInputs_a_0, gl_in[13].vInputs_a_1, gl_in[13].vInputs_a_2, gl_in[13].vInputs_a_3), spvUnsafeArray({ gl_in[13].vInputs_b_0, gl_in[13].vInputs_b_1 }), gl_in[13].vInputs_c }, VertexData{ float4x4(gl_in[14].vInputs_a_0, gl_in[14].vInputs_a_1, gl_in[14].vInputs_a_2, gl_in[14].vInputs_a_3), spvUnsafeArray({ gl_in[14].vInputs_b_0, gl_in[14].vInputs_b_1 }), gl_in[14].vInputs_c }, VertexData{ float4x4(gl_in[15].vInputs_a_0, gl_in[15].vInputs_a_1, gl_in[15].vInputs_a_2, gl_in[15].vInputs_a_3), spvUnsafeArray({ gl_in[15].vInputs_b_0, gl_in[15].vInputs_b_1 }), gl_in[15].vInputs_c }, VertexData{ float4x4(gl_in[16].vInputs_a_0, gl_in[16].vInputs_a_1, gl_in[16].vInputs_a_2, gl_in[16].vInputs_a_3), spvUnsafeArray({ gl_in[16].vInputs_b_0, gl_in[16].vInputs_b_1 }), gl_in[16].vInputs_c }, VertexData{ float4x4(gl_in[17].vInputs_a_0, gl_in[17].vInputs_a_1, gl_in[17].vInputs_a_2, gl_in[17].vInputs_a_3), spvUnsafeArray({ gl_in[17].vInputs_b_0, gl_in[17].vInputs_b_1 }), gl_in[17].vInputs_c }, VertexData{ float4x4(gl_in[18].vInputs_a_0, gl_in[18].vInputs_a_1, gl_in[18].vInputs_a_2, gl_in[18].vInputs_a_3), spvUnsafeArray({ gl_in[18].vInputs_b_0, gl_in[18].vInputs_b_1 }), gl_in[18].vInputs_c }, VertexData{ float4x4(gl_in[19].vInputs_a_0, gl_in[19].vInputs_a_1, gl_in[19].vInputs_a_2, gl_in[19].vInputs_a_3), spvUnsafeArray({ gl_in[19].vInputs_b_0, gl_in[19].vInputs_b_1 }), gl_in[19].vInputs_c }, VertexData{ float4x4(gl_in[20].vInputs_a_0, gl_in[20].vInputs_a_1, gl_in[20].vInputs_a_2, gl_in[20].vInputs_a_3), spvUnsafeArray({ gl_in[20].vInputs_b_0, gl_in[20].vInputs_b_1 }), gl_in[20].vInputs_c }, VertexData{ float4x4(gl_in[21].vInputs_a_0, gl_in[21].vInputs_a_1, gl_in[21].vInputs_a_2, gl_in[21].vInputs_a_3), spvUnsafeArray({ gl_in[21].vInputs_b_0, gl_in[21].vInputs_b_1 }), gl_in[21].vInputs_c }, VertexData{ float4x4(gl_in[22].vInputs_a_0, gl_in[22].vInputs_a_1, gl_in[22].vInputs_a_2, gl_in[22].vInputs_a_3), spvUnsafeArray({ gl_in[22].vInputs_b_0, gl_in[22].vInputs_b_1 }), gl_in[22].vInputs_c }, VertexData{ float4x4(gl_in[23].vInputs_a_0, gl_in[23].vInputs_a_1, gl_in[23].vInputs_a_2, gl_in[23].vInputs_a_3), spvUnsafeArray({ gl_in[23].vInputs_b_0, gl_in[23].vInputs_b_1 }), gl_in[23].vInputs_c }, VertexData{ float4x4(gl_in[24].vInputs_a_0, gl_in[24].vInputs_a_1, gl_in[24].vInputs_a_2, gl_in[24].vInputs_a_3), spvUnsafeArray({ gl_in[24].vInputs_b_0, gl_in[24].vInputs_b_1 }), gl_in[24].vInputs_c }, VertexData{ float4x4(gl_in[25].vInputs_a_0, gl_in[25].vInputs_a_1, gl_in[25].vInputs_a_2, gl_in[25].vInputs_a_3), spvUnsafeArray({ gl_in[25].vInputs_b_0, gl_in[25].vInputs_b_1 }), gl_in[25].vInputs_c }, VertexData{ float4x4(gl_in[26].vInputs_a_0, gl_in[26].vInputs_a_1, gl_in[26].vInputs_a_2, gl_in[26].vInputs_a_3), spvUnsafeArray({ gl_in[26].vInputs_b_0, gl_in[26].vInputs_b_1 }), gl_in[26].vInputs_c }, VertexData{ float4x4(gl_in[27].vInputs_a_0, gl_in[27].vInputs_a_1, gl_in[27].vInputs_a_2, gl_in[27].vInputs_a_3), spvUnsafeArray({ gl_in[27].vInputs_b_0, gl_in[27].vInputs_b_1 }), gl_in[27].vInputs_c }, VertexData{ float4x4(gl_in[28].vInputs_a_0, gl_in[28].vInputs_a_1, gl_in[28].vInputs_a_2, gl_in[28].vInputs_a_3), spvUnsafeArray({ gl_in[28].vInputs_b_0, gl_in[28].vInputs_b_1 }), gl_in[28].vInputs_c }, VertexData{ float4x4(gl_in[29].vInputs_a_0, gl_in[29].vInputs_a_1, gl_in[29].vInputs_a_2, gl_in[29].vInputs_a_3), spvUnsafeArray({ gl_in[29].vInputs_b_0, gl_in[29].vInputs_b_1 }), gl_in[29].vInputs_c }, VertexData{ float4x4(gl_in[30].vInputs_a_0, gl_in[30].vInputs_a_1, gl_in[30].vInputs_a_2, gl_in[30].vInputs_a_3), spvUnsafeArray({ gl_in[30].vInputs_b_0, gl_in[30].vInputs_b_1 }), gl_in[30].vInputs_c }, VertexData{ float4x4(gl_in[31].vInputs_a_0, gl_in[31].vInputs_a_1, gl_in[31].vInputs_a_2, gl_in[31].vInputs_a_3), spvUnsafeArray({ gl_in[31].vInputs_b_0, gl_in[31].vInputs_b_1 }), gl_in[31].vInputs_c } }); + spvUnsafeArray tmp; + tmp = _19; + int _27 = gl_InvocationID ^ 1; + VertexData _30 = VertexData{ float4x4(gl_in[_27].vInputs_a_0, gl_in[_27].vInputs_a_1, gl_in[_27].vInputs_a_2, gl_in[_27].vInputs_a_3), spvUnsafeArray({ gl_in[_27].vInputs_b_0, gl_in[_27].vInputs_b_1 }), gl_in[_27].vInputs_c }; + VertexData tmp_single = _30; + gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + tmp_single.c; +} + diff --git a/reference/shaders-msl/tesc/load-control-point-array.multi-patch.tesc b/reference/shaders-msl/tesc/load-control-point-array.multi-patch.tesc new file mode 100644 index 00000000000..45baadb6f26 --- /dev/null +++ b/reference/shaders-msl/tesc/load-control-point-array.multi-patch.tesc @@ -0,0 +1,69 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 vOutputs; +}; + +struct main0_in +{ + float4 vInputs; + ushort2 m_43; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + spvUnsafeArray _15 = spvUnsafeArray({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs }); + spvUnsafeArray tmp; + tmp = _15; + gl_out[gl_InvocationID].vOutputs = tmp[gl_InvocationID]; +} + diff --git a/reference/shaders-msl/tesc/load-control-point-array.tesc b/reference/shaders-msl/tesc/load-control-point-array.tesc new file mode 100644 index 00000000000..d04571ae364 --- /dev/null +++ b/reference/shaders-msl/tesc/load-control-point-array.tesc @@ -0,0 +1,70 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 vOutputs; +}; + +struct main0_in +{ + float4 vInputs [[attribute(0)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + spvUnsafeArray _15 = spvUnsafeArray({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs }); + spvUnsafeArray tmp; + tmp = _15; + gl_out[gl_InvocationID].vOutputs = tmp[gl_InvocationID]; +} + diff --git a/reference/shaders-msl/tesc/matrix-output.multi-patch.tesc b/reference/shaders-msl/tesc/matrix-output.multi-patch.tesc new file mode 100644 index 00000000000..98b9dd05245 --- /dev/null +++ b/reference/shaders-msl/tesc/matrix-output.multi-patch.tesc @@ -0,0 +1,39 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float in_te_attr; + float4x3 in_te_data0; + float4x3 in_te_data1; +}; + +struct main0_in +{ + float3 in_tc_attr; + ushort2 m_103; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 3]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 3; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1); + float _15 = float(gl_InvocationID); + float4x3 d = float4x3(float3(_15, 0.0, 0.0), float3(0.0, _15, 0.0), float3(0.0, 0.0, _15), float3(0.0)); + gl_out[gl_InvocationID].in_te_data0 = d; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + int _42 = (gl_InvocationID + 1) % 3; + gl_out[gl_InvocationID].in_te_data1 = float4x3(d[0] + gl_out[_42].in_te_data0[0], d[1] + gl_out[_42].in_te_data0[1], d[2] + gl_out[_42].in_te_data0[2], d[3] + gl_out[_42].in_te_data0[3]); + gl_out[gl_InvocationID].in_te_attr = gl_in[gl_InvocationID].in_tc_attr.x; + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(1.0); +} + diff --git a/reference/shaders-msl/tesc/reload-tess-level.multi-patch.tesc b/reference/shaders-msl/tesc/reload-tess-level.multi-patch.tesc new file mode 100644 index 00000000000..ae33de517a3 --- /dev/null +++ b/reference/shaders-msl/tesc/reload-tess-level.multi-patch.tesc @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_in +{ + uint3 m_82; + ushort2 m_86; + float4 gl_Position; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1); + if (gl_InvocationID == 0) + { + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5)); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5)); + } + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; +} + diff --git a/reference/shaders-msl/tesc/reload-tess-level.tesc b/reference/shaders-msl/tesc/reload-tess-level.tesc new file mode 100644 index 00000000000..eafc50607d7 --- /dev/null +++ b/reference/shaders-msl/tesc/reload-tess-level.tesc @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_in +{ + float4 gl_Position [[attribute(0)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + if (gl_InvocationID == 0) + { + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5)); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5)); + } + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; +} + diff --git a/reference/shaders-msl/tesc/struct-output.multi-patch.tesc b/reference/shaders-msl/tesc/struct-output.multi-patch.tesc new file mode 100644 index 00000000000..eaab245c1c8 --- /dev/null +++ b/reference/shaders-msl/tesc/struct-output.multi-patch.tesc @@ -0,0 +1,45 @@ +#include +#include + +using namespace metal; + +struct te_data +{ + float a; + float b; + uint c; +}; + +struct main0_out +{ + float in_te_attr; + te_data in_te_data0; + te_data in_te_data1; +}; + +struct main0_in +{ + float3 in_tc_attr; + ushort2 m_107; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 3]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 3; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1); + te_data d = te_data{ float(gl_InvocationID), float(gl_InvocationID + 1), uint(gl_InvocationID) }; + gl_out[gl_InvocationID].in_te_data0 = d; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + te_data e = gl_out[(gl_InvocationID + 1) % 3].in_te_data0; + gl_out[gl_InvocationID].in_te_data1 = te_data{ d.a + e.a, d.b + e.b, d.c + e.c }; + gl_out[gl_InvocationID].in_te_attr = gl_in[gl_InvocationID].in_tc_attr.x; + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(1.0); +} + diff --git a/reference/shaders-msl/tesc/water_tess.multi-patch.tesc b/reference/shaders-msl/tesc/water_tess.multi-patch.tesc new file mode 100644 index 00000000000..f302302ae27 --- /dev/null +++ b/reference/shaders-msl/tesc/water_tess.multi-patch.tesc @@ -0,0 +1,135 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct UBO +{ + float4 uScale; + float3 uCamPos; + float2 uPatchSize; + float2 uMaxTessLevel; + float uDistanceMod; + float4 uFrustum[6]; +}; + +struct main0_patchOut +{ + float2 vOutPatchPosBase; + float4 vPatchLods; +}; + +struct main0_in +{ + float3 vPatchPosBase; + ushort2 m_430; +}; + +static inline __attribute__((always_inline)) +bool frustum_cull(thread const float2& p0, constant UBO& v_41) +{ + float2 min_xz = (p0 - float2(10.0)) * v_41.uScale.xy; + float2 max_xz = ((p0 + v_41.uPatchSize) + float2(10.0)) * v_41.uScale.xy; + float3 bb_min = float3(min_xz.x, -10.0, min_xz.y); + float3 bb_max = float3(max_xz.x, 10.0, max_xz.y); + float3 center = (bb_min + bb_max) * 0.5; + float radius = 0.5 * length(bb_max - bb_min); + float3 f0 = float3(dot(v_41.uFrustum[0], float4(center, 1.0)), dot(v_41.uFrustum[1], float4(center, 1.0)), dot(v_41.uFrustum[2], float4(center, 1.0))); + float3 f1 = float3(dot(v_41.uFrustum[3], float4(center, 1.0)), dot(v_41.uFrustum[4], float4(center, 1.0)), dot(v_41.uFrustum[5], float4(center, 1.0))); + bool _205 = any(f0 <= float3(-radius)); + bool _215; + if (!_205) + { + _215 = any(f1 <= float3(-radius)); + } + else + { + _215 = _205; + } + return !_215; +} + +static inline __attribute__((always_inline)) +float lod_factor(thread const float2& pos_, constant UBO& v_41) +{ + float2 pos = pos_ * v_41.uScale.xy; + float3 dist_to_cam = v_41.uCamPos - float3(pos.x, 0.0, pos.y); + float level0 = log2((length(dist_to_cam) + 9.9999997473787516355514526367188e-05) * v_41.uDistanceMod); + return fast::clamp(level0, 0.0, v_41.uMaxTessLevel.x); +} + +static inline __attribute__((always_inline)) +float4 tess_level(thread const float4& lod, constant UBO& v_41) +{ + return exp2(-lod) * v_41.uMaxTessLevel.y; +} + +static inline __attribute__((always_inline)) +float tess_level(thread const float& lod, constant UBO& v_41) +{ + return v_41.uMaxTessLevel.y * exp2(-lod); +} + +static inline __attribute__((always_inline)) +void compute_tess_levels(thread const float2& p0, constant UBO& v_41, device float2& vOutPatchPosBase, device float4& vPatchLods, device half (&gl_TessLevelOuter)[4], device half (&gl_TessLevelInner)[2]) +{ + vOutPatchPosBase = p0; + float2 param = p0 + (float2(-0.5) * v_41.uPatchSize); + float l00 = lod_factor(param, v_41); + float2 param_1 = p0 + (float2(0.5, -0.5) * v_41.uPatchSize); + float l10 = lod_factor(param_1, v_41); + float2 param_2 = p0 + (float2(1.5, -0.5) * v_41.uPatchSize); + float l20 = lod_factor(param_2, v_41); + float2 param_3 = p0 + (float2(-0.5, 0.5) * v_41.uPatchSize); + float l01 = lod_factor(param_3, v_41); + float2 param_4 = p0 + (float2(0.5) * v_41.uPatchSize); + float l11 = lod_factor(param_4, v_41); + float2 param_5 = p0 + (float2(1.5, 0.5) * v_41.uPatchSize); + float l21 = lod_factor(param_5, v_41); + float2 param_6 = p0 + (float2(-0.5, 1.5) * v_41.uPatchSize); + float l02 = lod_factor(param_6, v_41); + float2 param_7 = p0 + (float2(0.5, 1.5) * v_41.uPatchSize); + float l12 = lod_factor(param_7, v_41); + float2 param_8 = p0 + (float2(1.5) * v_41.uPatchSize); + float l22 = lod_factor(param_8, v_41); + float4 lods = float4(dot(float4(l01, l11, l02, l12), float4(0.25)), dot(float4(l00, l10, l01, l11), float4(0.25)), dot(float4(l10, l20, l11, l21), float4(0.25)), dot(float4(l11, l21, l12, l22), float4(0.25))); + vPatchLods = lods; + float4 outer_lods = fast::min(lods, lods.yzwx); + float4 param_9 = outer_lods; + float4 levels = tess_level(param_9, v_41); + gl_TessLevelOuter[0] = half(levels.x); + gl_TessLevelOuter[1] = half(levels.y); + gl_TessLevelOuter[2] = half(levels.z); + gl_TessLevelOuter[3] = half(levels.w); + float min_lod = fast::min(fast::min(lods.x, lods.y), fast::min(lods.z, lods.w)); + float param_10 = fast::min(min_lod, l11); + float inner = tess_level(param_10, v_41); + gl_TessLevelInner[0] = half(inner); + gl_TessLevelInner[1] = half(inner); +} + +kernel void main0(constant UBO& v_41 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1); + float2 p0 = gl_in[0].vPatchPosBase.xy; + float2 param = p0; + if (!frustum_cull(param, v_41)) + { + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(-1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(-1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(-1.0); + } + else + { + float2 param_1 = p0; + compute_tess_levels(param_1, v_41, patchOut.vOutPatchPosBase, patchOut.vPatchLods, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor, spvTessLevel[gl_PrimitiveID].insideTessellationFactor); + } +} + diff --git a/reference/shaders-msl/tesc/water_tess.tesc b/reference/shaders-msl/tesc/water_tess.tesc index ccd59bbdf0a..0287df0cd79 100644 --- a/reference/shaders-msl/tesc/water_tess.tesc +++ b/reference/shaders-msl/tesc/water_tess.tesc @@ -26,6 +26,7 @@ struct main0_in float2 vPatchPosBase [[attribute(0)]]; }; +static inline __attribute__((always_inline)) bool frustum_cull(thread const float2& p0, constant UBO& v_41) { float2 min_xz = (p0 - float2(10.0)) * v_41.uScale.xy; @@ -36,9 +37,7 @@ bool frustum_cull(thread const float2& p0, constant UBO& v_41) float radius = 0.5 * length(bb_max - bb_min); float3 f0 = float3(dot(v_41.uFrustum[0], float4(center, 1.0)), dot(v_41.uFrustum[1], float4(center, 1.0)), dot(v_41.uFrustum[2], float4(center, 1.0))); float3 f1 = float3(dot(v_41.uFrustum[3], float4(center, 1.0)), dot(v_41.uFrustum[4], float4(center, 1.0)), dot(v_41.uFrustum[5], float4(center, 1.0))); - float3 _199 = f0; - float _200 = radius; - bool _205 = any(_199 <= float3(-_200)); + bool _205 = any(f0 <= float3(-radius)); bool _215; if (!_205) { @@ -51,24 +50,28 @@ bool frustum_cull(thread const float2& p0, constant UBO& v_41) return !_215; } +static inline __attribute__((always_inline)) float lod_factor(thread const float2& pos_, constant UBO& v_41) { float2 pos = pos_ * v_41.uScale.xy; float3 dist_to_cam = v_41.uCamPos - float3(pos.x, 0.0, pos.y); - float level = log2((length(dist_to_cam) + 9.9999997473787516355514526367188e-05) * v_41.uDistanceMod); - return fast::clamp(level, 0.0, v_41.uMaxTessLevel.x); + float level0 = log2((length(dist_to_cam) + 9.9999997473787516355514526367188e-05) * v_41.uDistanceMod); + return fast::clamp(level0, 0.0, v_41.uMaxTessLevel.x); } +static inline __attribute__((always_inline)) float4 tess_level(thread const float4& lod, constant UBO& v_41) { return exp2(-lod) * v_41.uMaxTessLevel.y; } +static inline __attribute__((always_inline)) float tess_level(thread const float& lod, constant UBO& v_41) { return v_41.uMaxTessLevel.y * exp2(-lod); } +static inline __attribute__((always_inline)) void compute_tess_levels(thread const float2& p0, constant UBO& v_41, device float2& vOutPatchPosBase, device float4& vPatchLods, device half (&gl_TessLevelOuter)[4], device half (&gl_TessLevelInner)[2]) { vOutPatchPosBase = p0; diff --git a/reference/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese b/reference/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese new file mode 100644 index 00000000000..44e495fb32c --- /dev/null +++ b/reference/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese @@ -0,0 +1,73 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _35 +{ + float dummy; + float4 variableInStruct; +}; + +struct main0_out +{ + float outResult [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + spvUnsafeArray<_35, 3> testStructArray; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(float3 gl_TessCoord [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_in* spvIn [[buffer(22)]]) +{ + main0_out out = {}; + const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0]; + out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0); + float result = ((float(abs(gl_in[0].testStructArray[2].variableInStruct.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(gl_in[0].testStructArray[2].variableInStruct.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].testStructArray[2].variableInStruct.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].testStructArray[2].variableInStruct.w - 7.0) < 0.001000000047497451305389404296875); + out.outResult = result; + return out; +} + diff --git a/reference/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese b/reference/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese new file mode 100644 index 00000000000..23c2cc3ecc4 --- /dev/null +++ b/reference/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese @@ -0,0 +1,39 @@ +#include +#include + +using namespace metal; + +struct t35 +{ + float2 m0; + float4 m1; +}; + +struct t36 +{ + float2 m0; + t35 m1; +}; + +struct main0_out +{ + float v80 [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float2 v40_m0; + t35 v40_m1; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(float3 gl_TessCoord [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_in* spvIn [[buffer(22)]]) +{ + main0_out out = {}; + const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0]; + out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0); + float v34 = ((float(abs(gl_in[0].v40_m1.m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(gl_in[0].v40_m1.m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].v40_m1.m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].v40_m1.m1.w - 7.0) < 0.001000000047497451305389404296875); + out.v80 = v34; + return out; +} + diff --git a/reference/shaders-msl/tese/in-block-with-nested-struct.tese b/reference/shaders-msl/tese/in-block-with-nested-struct.tese new file mode 100644 index 00000000000..711580d16a9 --- /dev/null +++ b/reference/shaders-msl/tese/in-block-with-nested-struct.tese @@ -0,0 +1,44 @@ +#include +#include + +using namespace metal; + +struct t35 +{ + float2 m0; + float4 m1; +}; + +struct t36 +{ + float2 m0; + t35 m1; +}; + +struct main0_out +{ + float v80 [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float2 v40_m0 [[attribute(0)]]; + float2 v40_m1_m0 [[attribute(1)]]; + float4 v40_m1_m1 [[attribute(2)]]; +}; + +struct main0_patchIn +{ + patch_control_point gl_in; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float3 gl_TessCoord [[position_in_patch]]) +{ + main0_out out = {}; + out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0); + float v34 = ((float(abs(patchIn.gl_in[0].v40_m1_m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(patchIn.gl_in[0].v40_m1_m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(patchIn.gl_in[0].v40_m1_m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(patchIn.gl_in[0].v40_m1_m1.w - 7.0) < 0.001000000047497451305389404296875); + out.v80 = v34; + return out; +} + diff --git a/reference/shaders-msl/tese/input-array.tese b/reference/shaders-msl/tese/input-array.tese index 2ac5731fe44..c94619602aa 100644 --- a/reference/shaders-msl/tese/input-array.tese +++ b/reference/shaders-msl/tese/input-array.tese @@ -21,14 +21,16 @@ struct main0_patchIn patch_control_point gl_in; }; -void set_position(thread float4& gl_Position, thread patch_control_point& gl_in, thread float2& gl_TessCoord) +static inline __attribute__((always_inline)) +void set_position(thread float4& gl_Position, thread patch_control_point& gl_in, thread float3& gl_TessCoord) { gl_Position = (gl_in[0].Floats * gl_TessCoord.x) + (gl_in[1].Floats2 * gl_TessCoord.y); } -[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]]) +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]]) { main0_out out = {}; + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); set_position(out.gl_Position, patchIn.gl_in, gl_TessCoord); return out; } diff --git a/reference/shaders-msl/tese/input-types.raw-tess-in.tese b/reference/shaders-msl/tese/input-types.raw-tess-in.tese new file mode 100644 index 00000000000..52952220968 --- /dev/null +++ b/reference/shaders-msl/tese/input-types.raw-tess-in.tese @@ -0,0 +1,81 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct Block +{ + float4 a; + float4 b; +}; + +struct PatchBlock +{ + float4 a; + float4 b; +}; + +struct Foo +{ + float4 a; + float4 b; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vColor; + float4 blocks_a; + float4 blocks_b; + Foo vFoos; +}; + +struct main0_patchIn +{ + float4 vColors; + float4 patch_block_a; + float4 patch_block_b; + Foo vFoo; +}; + +static inline __attribute__((always_inline)) +void set_from_function(thread float4& gl_Position, const device main0_in* thread & gl_in, thread PatchBlock& patch_block, const device float4& vColors, const device Foo& vFoo) +{ + gl_Position = gl_in[0].blocks_a; + gl_Position += gl_in[0].blocks_b; + gl_Position += gl_in[1].blocks_a; + gl_Position += gl_in[1].blocks_b; + gl_Position += patch_block.a; + gl_Position += patch_block.b; + gl_Position += gl_in[0].vColor; + gl_Position += gl_in[1].vColor; + gl_Position += vColors; + Foo foo = vFoo; + gl_Position += foo.a; + gl_Position += foo.b; + foo = gl_in[0].vFoos; + gl_Position += foo.a; + gl_Position += foo.b; + foo = gl_in[1].vFoos; + gl_Position += foo.a; + gl_Position += foo.b; +} + +[[ patch(quad, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device main0_patchIn* spvPatchIn [[buffer(20)]], const device main0_in* spvIn [[buffer(22)]]) +{ + main0_out out = {}; + PatchBlock patch_block = {}; + const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0]; + const device main0_patchIn& patchIn = spvPatchIn[gl_PrimitiveID]; + patch_block.a = patchIn.patch_block_a; + patch_block.b = patchIn.patch_block_b; + set_from_function(out.gl_Position, gl_in, patch_block, patchIn.vColors, patchIn.vFoo); + return out; +} + diff --git a/reference/shaders-msl/tese/input-types.tese b/reference/shaders-msl/tese/input-types.tese index 9012a7e1a70..d25235a2a37 100644 --- a/reference/shaders-msl/tese/input-types.tese +++ b/reference/shaders-msl/tese/input-types.tese @@ -31,28 +31,29 @@ struct main0_out struct main0_in { float4 vColor [[attribute(0)]]; - float4 Block_a [[attribute(4)]]; - float4 Block_b [[attribute(5)]]; - float4 Foo_a [[attribute(14)]]; - float4 Foo_b [[attribute(15)]]; + float4 blocks_a [[attribute(4)]]; + float4 blocks_b [[attribute(5)]]; + float4 vFoos_a [[attribute(14)]]; + float4 vFoos_b [[attribute(15)]]; }; struct main0_patchIn { float4 vColors [[attribute(1)]]; - float4 PatchBlock_a [[attribute(6)]]; - float4 PatchBlock_b [[attribute(7)]]; - float4 Foo_a [[attribute(8)]]; - float4 Foo_b [[attribute(9)]]; + float4 patch_block_a [[attribute(6)]]; + float4 patch_block_b [[attribute(7)]]; + float4 vFoo_a [[attribute(8)]]; + float4 vFoo_b [[attribute(9)]]; patch_control_point gl_in; }; +static inline __attribute__((always_inline)) void set_from_function(thread float4& gl_Position, thread patch_control_point& gl_in, thread PatchBlock& patch_block, thread float4& vColors, thread Foo& vFoo) { - gl_Position = gl_in[0].Block_a; - gl_Position += gl_in[0].Block_b; - gl_Position += gl_in[1].Block_a; - gl_Position += gl_in[1].Block_b; + gl_Position = gl_in[0].blocks_a; + gl_Position += gl_in[0].blocks_b; + gl_Position += gl_in[1].blocks_a; + gl_Position += gl_in[1].blocks_b; gl_Position += patch_block.a; gl_Position += patch_block.b; gl_Position += gl_in[0].vColor; @@ -61,16 +62,12 @@ void set_from_function(thread float4& gl_Position, thread patch_control_point +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vInputs_0 [[attribute(0)]]; + float4 vInputs_1 [[attribute(1)]]; + float4 vInputs_2 [[attribute(2)]]; + float4 vInputs_3 [[attribute(3)]]; +}; + +struct main0_patchIn +{ + float4 vBoo_0 [[attribute(4)]]; + float4 vBoo_1 [[attribute(5)]]; + float4 vBoo_2 [[attribute(6)]]; + float4 vBoo_3 [[attribute(7)]]; + int vIndex [[attribute(8)]]; + patch_control_point gl_in; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray vBoo = {}; + vBoo[0] = patchIn.vBoo_0; + vBoo[1] = patchIn.vBoo_1; + vBoo[2] = patchIn.vBoo_2; + vBoo[3] = patchIn.vBoo_3; + spvUnsafeArray _16 = spvUnsafeArray({ float4x4(patchIn.gl_in[0].vInputs_0, patchIn.gl_in[0].vInputs_1, patchIn.gl_in[0].vInputs_2, patchIn.gl_in[0].vInputs_3), float4x4(patchIn.gl_in[1].vInputs_0, patchIn.gl_in[1].vInputs_1, patchIn.gl_in[1].vInputs_2, patchIn.gl_in[1].vInputs_3), float4x4(patchIn.gl_in[2].vInputs_0, patchIn.gl_in[2].vInputs_1, patchIn.gl_in[2].vInputs_2, patchIn.gl_in[2].vInputs_3), float4x4(patchIn.gl_in[3].vInputs_0, patchIn.gl_in[3].vInputs_1, patchIn.gl_in[3].vInputs_2, patchIn.gl_in[3].vInputs_3), float4x4(patchIn.gl_in[4].vInputs_0, patchIn.gl_in[4].vInputs_1, patchIn.gl_in[4].vInputs_2, patchIn.gl_in[4].vInputs_3), float4x4(patchIn.gl_in[5].vInputs_0, patchIn.gl_in[5].vInputs_1, patchIn.gl_in[5].vInputs_2, patchIn.gl_in[5].vInputs_3), float4x4(patchIn.gl_in[6].vInputs_0, patchIn.gl_in[6].vInputs_1, patchIn.gl_in[6].vInputs_2, patchIn.gl_in[6].vInputs_3), float4x4(patchIn.gl_in[7].vInputs_0, patchIn.gl_in[7].vInputs_1, patchIn.gl_in[7].vInputs_2, patchIn.gl_in[7].vInputs_3), float4x4(patchIn.gl_in[8].vInputs_0, patchIn.gl_in[8].vInputs_1, patchIn.gl_in[8].vInputs_2, patchIn.gl_in[8].vInputs_3), float4x4(patchIn.gl_in[9].vInputs_0, patchIn.gl_in[9].vInputs_1, patchIn.gl_in[9].vInputs_2, patchIn.gl_in[9].vInputs_3), float4x4(patchIn.gl_in[10].vInputs_0, patchIn.gl_in[10].vInputs_1, patchIn.gl_in[10].vInputs_2, patchIn.gl_in[10].vInputs_3), float4x4(patchIn.gl_in[11].vInputs_0, patchIn.gl_in[11].vInputs_1, patchIn.gl_in[11].vInputs_2, patchIn.gl_in[11].vInputs_3), float4x4(patchIn.gl_in[12].vInputs_0, patchIn.gl_in[12].vInputs_1, patchIn.gl_in[12].vInputs_2, patchIn.gl_in[12].vInputs_3), float4x4(patchIn.gl_in[13].vInputs_0, patchIn.gl_in[13].vInputs_1, patchIn.gl_in[13].vInputs_2, patchIn.gl_in[13].vInputs_3), float4x4(patchIn.gl_in[14].vInputs_0, patchIn.gl_in[14].vInputs_1, patchIn.gl_in[14].vInputs_2, patchIn.gl_in[14].vInputs_3), float4x4(patchIn.gl_in[15].vInputs_0, patchIn.gl_in[15].vInputs_1, patchIn.gl_in[15].vInputs_2, patchIn.gl_in[15].vInputs_3), float4x4(patchIn.gl_in[16].vInputs_0, patchIn.gl_in[16].vInputs_1, patchIn.gl_in[16].vInputs_2, patchIn.gl_in[16].vInputs_3), float4x4(patchIn.gl_in[17].vInputs_0, patchIn.gl_in[17].vInputs_1, patchIn.gl_in[17].vInputs_2, patchIn.gl_in[17].vInputs_3), float4x4(patchIn.gl_in[18].vInputs_0, patchIn.gl_in[18].vInputs_1, patchIn.gl_in[18].vInputs_2, patchIn.gl_in[18].vInputs_3), float4x4(patchIn.gl_in[19].vInputs_0, patchIn.gl_in[19].vInputs_1, patchIn.gl_in[19].vInputs_2, patchIn.gl_in[19].vInputs_3), float4x4(patchIn.gl_in[20].vInputs_0, patchIn.gl_in[20].vInputs_1, patchIn.gl_in[20].vInputs_2, patchIn.gl_in[20].vInputs_3), float4x4(patchIn.gl_in[21].vInputs_0, patchIn.gl_in[21].vInputs_1, patchIn.gl_in[21].vInputs_2, patchIn.gl_in[21].vInputs_3), float4x4(patchIn.gl_in[22].vInputs_0, patchIn.gl_in[22].vInputs_1, patchIn.gl_in[22].vInputs_2, patchIn.gl_in[22].vInputs_3), float4x4(patchIn.gl_in[23].vInputs_0, patchIn.gl_in[23].vInputs_1, patchIn.gl_in[23].vInputs_2, patchIn.gl_in[23].vInputs_3), float4x4(patchIn.gl_in[24].vInputs_0, patchIn.gl_in[24].vInputs_1, patchIn.gl_in[24].vInputs_2, patchIn.gl_in[24].vInputs_3), float4x4(patchIn.gl_in[25].vInputs_0, patchIn.gl_in[25].vInputs_1, patchIn.gl_in[25].vInputs_2, patchIn.gl_in[25].vInputs_3), float4x4(patchIn.gl_in[26].vInputs_0, patchIn.gl_in[26].vInputs_1, patchIn.gl_in[26].vInputs_2, patchIn.gl_in[26].vInputs_3), float4x4(patchIn.gl_in[27].vInputs_0, patchIn.gl_in[27].vInputs_1, patchIn.gl_in[27].vInputs_2, patchIn.gl_in[27].vInputs_3), float4x4(patchIn.gl_in[28].vInputs_0, patchIn.gl_in[28].vInputs_1, patchIn.gl_in[28].vInputs_2, patchIn.gl_in[28].vInputs_3), float4x4(patchIn.gl_in[29].vInputs_0, patchIn.gl_in[29].vInputs_1, patchIn.gl_in[29].vInputs_2, patchIn.gl_in[29].vInputs_3), float4x4(patchIn.gl_in[30].vInputs_0, patchIn.gl_in[30].vInputs_1, patchIn.gl_in[30].vInputs_2, patchIn.gl_in[30].vInputs_3), float4x4(patchIn.gl_in[31].vInputs_0, patchIn.gl_in[31].vInputs_1, patchIn.gl_in[31].vInputs_2, patchIn.gl_in[31].vInputs_3) }); + spvUnsafeArray tmp; + tmp = _16; + out.gl_Position = (tmp[0][patchIn.vIndex] + tmp[1][patchIn.vIndex]) + vBoo[patchIn.vIndex]; + return out; +} + diff --git a/reference/shaders-msl/tese/load-control-point-array.tese b/reference/shaders-msl/tese/load-control-point-array.tese new file mode 100644 index 00000000000..09c19cb47f4 --- /dev/null +++ b/reference/shaders-msl/tese/load-control-point-array.tese @@ -0,0 +1,81 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vInputs [[attribute(0)]]; +}; + +struct main0_patchIn +{ + float4 vBoo_0 [[attribute(1)]]; + float4 vBoo_1 [[attribute(2)]]; + float4 vBoo_2 [[attribute(3)]]; + float4 vBoo_3 [[attribute(4)]]; + int vIndex [[attribute(5)]]; + patch_control_point gl_in; +}; + +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray vBoo = {}; + vBoo[0] = patchIn.vBoo_0; + vBoo[1] = patchIn.vBoo_1; + vBoo[2] = patchIn.vBoo_2; + vBoo[3] = patchIn.vBoo_3; + spvUnsafeArray _15 = spvUnsafeArray({ patchIn.gl_in[0].vInputs, patchIn.gl_in[1].vInputs, patchIn.gl_in[2].vInputs, patchIn.gl_in[3].vInputs, patchIn.gl_in[4].vInputs, patchIn.gl_in[5].vInputs, patchIn.gl_in[6].vInputs, patchIn.gl_in[7].vInputs, patchIn.gl_in[8].vInputs, patchIn.gl_in[9].vInputs, patchIn.gl_in[10].vInputs, patchIn.gl_in[11].vInputs, patchIn.gl_in[12].vInputs, patchIn.gl_in[13].vInputs, patchIn.gl_in[14].vInputs, patchIn.gl_in[15].vInputs, patchIn.gl_in[16].vInputs, patchIn.gl_in[17].vInputs, patchIn.gl_in[18].vInputs, patchIn.gl_in[19].vInputs, patchIn.gl_in[20].vInputs, patchIn.gl_in[21].vInputs, patchIn.gl_in[22].vInputs, patchIn.gl_in[23].vInputs, patchIn.gl_in[24].vInputs, patchIn.gl_in[25].vInputs, patchIn.gl_in[26].vInputs, patchIn.gl_in[27].vInputs, patchIn.gl_in[28].vInputs, patchIn.gl_in[29].vInputs, patchIn.gl_in[30].vInputs, patchIn.gl_in[31].vInputs }); + spvUnsafeArray tmp; + tmp = _15; + out.gl_Position = (tmp[0] + tmp[1]) + vBoo[patchIn.vIndex]; + return out; +} + diff --git a/reference/shaders-msl/tese/quad.domain.tese b/reference/shaders-msl/tese/quad.domain.tese index 78b58ab9975..10cdf5f10ba 100644 --- a/reference/shaders-msl/tese/quad.domain.tese +++ b/reference/shaders-msl/tese/quad.domain.tese @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 gl_Position [[position]]; @@ -10,15 +51,24 @@ struct main0_out struct main0_patchIn { - float2 gl_TessLevelInner [[attribute(0)]]; - float4 gl_TessLevelOuter [[attribute(1)]]; + float4 gl_TessLevelOuter [[attribute(0)]]; + float2 gl_TessLevelInner [[attribute(1)]]; }; -[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]]) +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]]) { main0_out out = {}; + spvUnsafeArray gl_TessLevelInner = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0]; + gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1]; + gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2]; + gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3]; + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); gl_TessCoord.y = 1.0 - gl_TessCoord.y; - out.gl_Position = float4(((gl_TessCoord.x * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.z), ((gl_TessCoord.y * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.w) + (((1.0 - gl_TessCoord.y) * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.y), 0.0, 1.0); + out.gl_Position = float4(((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0]) + (((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), ((gl_TessCoord.y * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]) + (((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]), 0.0, 1.0); return out; } diff --git a/reference/shaders-msl/tese/quad.tese b/reference/shaders-msl/tese/quad.tese index df3d260fa89..e0c7944394c 100644 --- a/reference/shaders-msl/tese/quad.tese +++ b/reference/shaders-msl/tese/quad.tese @@ -1,10 +1,49 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 gl_Position [[position]]; @@ -12,19 +51,29 @@ struct main0_out struct main0_patchIn { - float2 gl_TessLevelInner [[attribute(0)]]; - float4 gl_TessLevelOuter [[attribute(1)]]; + float4 gl_TessLevelOuter [[attribute(0)]]; + float2 gl_TessLevelInner [[attribute(1)]]; }; -void set_position(thread float4& gl_Position, thread float2& gl_TessCoord, thread float2& gl_TessLevelInner, thread float4& gl_TessLevelOuter) +static inline __attribute__((always_inline)) +void set_position(thread float4& gl_Position, thread float3& gl_TessCoord, thread spvUnsafeArray& gl_TessLevelInner, thread spvUnsafeArray& gl_TessLevelOuter) { - gl_Position = float4(((gl_TessCoord.x * gl_TessLevelInner.x) * gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * gl_TessLevelInner.x) * gl_TessLevelOuter.z), ((gl_TessCoord.y * gl_TessLevelInner.y) * gl_TessLevelOuter.y) + (((1.0 - gl_TessCoord.y) * gl_TessLevelInner.y) * gl_TessLevelOuter.w), 0.0, 1.0); + gl_Position = float4(((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0]) + (((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), ((gl_TessCoord.y * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]) + (((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0); } -[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]]) +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]]) { main0_out out = {}; - set_position(out.gl_Position, gl_TessCoord, patchIn.gl_TessLevelInner, patchIn.gl_TessLevelOuter); + spvUnsafeArray gl_TessLevelInner = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0]; + gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1]; + gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2]; + gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3]; + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); + set_position(out.gl_Position, gl_TessCoord, gl_TessLevelInner, gl_TessLevelOuter); return out; } diff --git a/reference/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese b/reference/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese new file mode 100644 index 00000000000..7048546cea5 --- /dev/null +++ b/reference/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese @@ -0,0 +1,78 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_patchIn +{ + float4 gl_TessLevelOuter [[attribute(0)]]; + float2 gl_TessLevelInner [[attribute(1)]]; +}; + +static inline __attribute__((always_inline)) +float4 read_tess_levels(thread spvUnsafeArray& gl_TessLevelOuter, thread spvUnsafeArray& gl_TessLevelInner) +{ + return float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; +} + +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + spvUnsafeArray gl_TessLevelInner = {}; + gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2]; + gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3]; + gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0]; + gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1]; + out.gl_Position = read_tess_levels(gl_TessLevelOuter, gl_TessLevelInner); + return out; +} + diff --git a/reference/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese b/reference/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese new file mode 100644 index 00000000000..f8f81b7574a --- /dev/null +++ b/reference/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese @@ -0,0 +1,72 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +static inline __attribute__((always_inline)) +float4 read_tess_levels(thread spvUnsafeArray& gl_TessLevelOuter, thread spvUnsafeArray& gl_TessLevelInner) +{ + return float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; +} + +[[ patch(quad, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + main0_out out = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + spvUnsafeArray gl_TessLevelInner = {}; + gl_TessLevelOuter[0] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]; + gl_TessLevelOuter[1] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]; + gl_TessLevelOuter[2] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]; + gl_TessLevelOuter[3] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]; + gl_TessLevelInner[0] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0]; + gl_TessLevelInner[1] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1]; + out.gl_Position = read_tess_levels(gl_TessLevelOuter, gl_TessLevelInner); + return out; +} + diff --git a/reference/shaders-msl/tese/read-tess-level-in-func.msl2.tese b/reference/shaders-msl/tese/read-tess-level-in-func.msl2.tese new file mode 100644 index 00000000000..432ad7cc2d0 --- /dev/null +++ b/reference/shaders-msl/tese/read-tess-level-in-func.msl2.tese @@ -0,0 +1,75 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_patchIn +{ + float4 gl_TessLevel [[attribute(0)]]; +}; + +static inline __attribute__((always_inline)) +float4 read_tess_levels(thread spvUnsafeArray& gl_TessLevelOuter, thread spvUnsafeArray& gl_TessLevelInner) +{ + return float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; +} + +[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + spvUnsafeArray gl_TessLevelInner = {}; + gl_TessLevelOuter[0] = patchIn.gl_TessLevel[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevel[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevel[2]; + gl_TessLevelInner[0] = patchIn.gl_TessLevel[3]; + out.gl_Position = read_tess_levels(gl_TessLevelOuter, gl_TessLevelInner); + return out; +} + diff --git a/reference/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese b/reference/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese new file mode 100644 index 00000000000..5be7c40174d --- /dev/null +++ b/reference/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese @@ -0,0 +1,70 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +static inline __attribute__((always_inline)) +float4 read_tess_levels(thread spvUnsafeArray& gl_TessLevelOuter, thread spvUnsafeArray& gl_TessLevelInner) +{ + return float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; +} + +[[ patch(triangle, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + main0_out out = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + spvUnsafeArray gl_TessLevelInner = {}; + gl_TessLevelOuter[0] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]; + gl_TessLevelOuter[1] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]; + gl_TessLevelOuter[2] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]; + gl_TessLevelInner[0] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor; + out.gl_Position = read_tess_levels(gl_TessLevelOuter, gl_TessLevelInner); + return out; +} + diff --git a/reference/shaders-msl/tese/set-from-function.tese b/reference/shaders-msl/tese/set-from-function.tese index f6e41ee5c8f..dfbbe0b7028 100644 --- a/reference/shaders-msl/tese/set-from-function.tese +++ b/reference/shaders-msl/tese/set-from-function.tese @@ -25,24 +25,25 @@ struct main0_out struct main0_in { float4 vColor [[attribute(0)]]; - float4 Block_a [[attribute(2)]]; - float4 Block_b [[attribute(3)]]; + float4 blocks_a [[attribute(2)]]; + float4 blocks_b [[attribute(3)]]; }; struct main0_patchIn { float4 vColors [[attribute(1)]]; - float4 Foo_a [[attribute(4)]]; - float4 Foo_b [[attribute(5)]]; + float4 vFoo_a [[attribute(4)]]; + float4 vFoo_b [[attribute(5)]]; patch_control_point gl_in; }; +static inline __attribute__((always_inline)) void set_from_function(thread float4& gl_Position, thread patch_control_point& gl_in, thread float4& vColors, thread Foo& vFoo) { - gl_Position = gl_in[0].Block_a; - gl_Position += gl_in[0].Block_b; - gl_Position += gl_in[1].Block_a; - gl_Position += gl_in[1].Block_b; + gl_Position = gl_in[0].blocks_a; + gl_Position += gl_in[0].blocks_b; + gl_Position += gl_in[1].blocks_a; + gl_Position += gl_in[1].blocks_b; gl_Position += gl_in[0].vColor; gl_Position += gl_in[1].vColor; gl_Position += vColors; @@ -54,8 +55,8 @@ void set_from_function(thread float4& gl_Position, thread patch_control_point #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 gl_Position [[position]]; @@ -16,12 +57,12 @@ struct main0_patchIn [[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float3 gl_TessCoord [[position_in_patch]]) { main0_out out = {}; - float gl_TessLevelInner[2] = {}; - float gl_TessLevelOuter[4] = {}; - gl_TessLevelInner[0] = patchIn.gl_TessLevel.w; - gl_TessLevelOuter[0] = patchIn.gl_TessLevel.x; - gl_TessLevelOuter[1] = patchIn.gl_TessLevel.y; - gl_TessLevelOuter[2] = patchIn.gl_TessLevel.z; + spvUnsafeArray gl_TessLevelInner = {}; + spvUnsafeArray gl_TessLevelOuter = {}; + gl_TessLevelInner[0] = patchIn.gl_TessLevel[3]; + gl_TessLevelOuter[0] = patchIn.gl_TessLevel[0]; + gl_TessLevelOuter[1] = patchIn.gl_TessLevel[1]; + gl_TessLevelOuter[2] = patchIn.gl_TessLevel[2]; out.gl_Position = float4((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0], (gl_TessCoord.y * gl_TessLevelInner[0]) * gl_TessLevelOuter[1], (gl_TessCoord.z * gl_TessLevelInner[0]) * gl_TessLevelOuter[2], 1.0); return out; } diff --git a/reference/shaders-msl/tese/water_tess.raw-tess-in.tese b/reference/shaders-msl/tese/water_tess.raw-tess-in.tese new file mode 100644 index 00000000000..bf93456484c --- /dev/null +++ b/reference/shaders-msl/tese/water_tess.raw-tess-in.tese @@ -0,0 +1,77 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct UBO +{ + float4x4 uMVP; + float4 uScale; + float2 uInvScale; + float3 uCamPos; + float2 uPatchSize; + float2 uInvHeightmapSize; +}; + +struct main0_out +{ + float3 vWorld [[user(locn0)]]; + float4 vGradNormalTex [[user(locn1)]]; + float4 gl_Position [[position]]; +}; + +struct main0_patchIn +{ + float2 vOutPatchPosBase; + float4 vPatchLods; +}; + +static inline __attribute__((always_inline)) +float2 lerp_vertex(thread const float2& tess_coord, const device float2& vOutPatchPosBase, constant UBO& v_31) +{ + return vOutPatchPosBase + (tess_coord * v_31.uPatchSize); +} + +static inline __attribute__((always_inline)) +float2 lod_factor(thread const float2& tess_coord, const device float4& vPatchLods) +{ + float2 x = mix(vPatchLods.yx, vPatchLods.zw, float2(tess_coord.x)); + float level0 = mix(x.x, x.y, tess_coord.y); + float floor_level = floor(level0); + float fract_level = level0 - floor_level; + return float2(floor_level, fract_level); +} + +static inline __attribute__((always_inline)) +float3 sample_height_displacement(thread const float2& uv, thread const float2& off, thread const float2& lod, texture2d uHeightmapDisplacement, sampler uHeightmapDisplacementSmplr) +{ + return mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 0.5)), level(lod.x)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 1.0)), level(lod.x + 1.0)).xyz, float3(lod.y)); +} + +[[ patch(quad, 0) ]] vertex main0_out main0(constant UBO& v_31 [[buffer(0)]], texture2d uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoordIn [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_patchIn* spvPatchIn [[buffer(20)]]) +{ + main0_out out = {}; + const device main0_patchIn& patchIn = spvPatchIn[gl_PrimitiveID]; + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); + float2 tess_coord = gl_TessCoord.xy; + float2 param = tess_coord; + float2 pos = lerp_vertex(param, patchIn.vOutPatchPosBase, v_31); + float2 param_1 = tess_coord; + float2 lod = lod_factor(param_1, patchIn.vPatchLods); + float2 tex = pos * v_31.uInvHeightmapSize; + pos *= v_31.uScale.xy; + float delta_mod = exp2(lod.x); + float2 off = v_31.uInvHeightmapSize * delta_mod; + out.vGradNormalTex = float4(tex + (v_31.uInvHeightmapSize * 0.5), tex * v_31.uScale.zw); + float2 param_2 = tex; + float2 param_3 = off; + float2 param_4 = lod; + float3 height_displacement = sample_height_displacement(param_2, param_3, param_4, uHeightmapDisplacement, uHeightmapDisplacementSmplr); + pos += height_displacement.yz; + out.vWorld = float3(pos.x, height_displacement.x, pos.y); + out.gl_Position = v_31.uMVP * float4(out.vWorld, 1.0); + return out; +} + diff --git a/reference/shaders-msl/tese/water_tess.tese b/reference/shaders-msl/tese/water_tess.tese index 9b6c0aca843..5f63d94f16f 100644 --- a/reference/shaders-msl/tese/water_tess.tese +++ b/reference/shaders-msl/tese/water_tess.tese @@ -28,29 +28,33 @@ struct main0_patchIn float4 vPatchLods [[attribute(1)]]; }; +static inline __attribute__((always_inline)) float2 lerp_vertex(thread const float2& tess_coord, thread float2& vOutPatchPosBase, constant UBO& v_31) { return vOutPatchPosBase + (tess_coord * v_31.uPatchSize); } +static inline __attribute__((always_inline)) float2 lod_factor(thread const float2& tess_coord, thread float4& vPatchLods) { float2 x = mix(vPatchLods.yx, vPatchLods.zw, float2(tess_coord.x)); - float level = mix(x.x, x.y, tess_coord.y); - float floor_level = floor(level); - float fract_level = level - floor_level; + float level0 = mix(x.x, x.y, tess_coord.y); + float floor_level = floor(level0); + float fract_level = level0 - floor_level; return float2(floor_level, fract_level); } -float3 sample_height_displacement(thread const float2& uv, thread const float2& off, thread const float2& lod, thread texture2d uHeightmapDisplacement, thread const sampler uHeightmapDisplacementSmplr) +static inline __attribute__((always_inline)) +float3 sample_height_displacement(thread const float2& uv, thread const float2& off, thread const float2& lod, texture2d uHeightmapDisplacement, sampler uHeightmapDisplacementSmplr) { return mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 0.5)), level(lod.x)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 1.0)), level(lod.x + 1.0)).xyz, float3(lod.y)); } -[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant UBO& v_31 [[buffer(0)]], texture2d uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoord [[position_in_patch]]) +[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant UBO& v_31 [[buffer(0)]], texture2d uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoordIn [[position_in_patch]]) { main0_out out = {}; - float2 tess_coord = float3(gl_TessCoord, 0).xy; + float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); + float2 tess_coord = gl_TessCoord.xy; float2 param = tess_coord; float2 pos = lerp_vertex(param, patchIn.vOutPatchPosBase, v_31); float2 param_1 = tess_coord; diff --git a/reference/shaders-msl/vert/array-component-io.for-tess.vert b/reference/shaders-msl/vert/array-component-io.for-tess.vert new file mode 100644 index 00000000000..24958eb50db --- /dev/null +++ b/reference/shaders-msl/vert/array-component-io.for-tess.vert @@ -0,0 +1,98 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 m_location_0; + float4 m_location_1; + float4 m_location_2; + float4 gl_Position; +}; + +struct main0_in +{ + float4 m_location_0 [[attribute(0)]]; + float4 m_location_1 [[attribute(1)]]; + float4 m_location_2 [[attribute(2)]]; + float4 Pos [[attribute(4)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + spvUnsafeArray A = {}; + spvUnsafeArray B = {}; + spvUnsafeArray C = {}; + float D = {}; + spvUnsafeArray InA = {}; + spvUnsafeArray InB = {}; + spvUnsafeArray InC = {}; + float InD = {}; + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + InA[0] = in.m_location_1.x; + InA[1] = in.m_location_2.x; + InB[0] = in.m_location_1.zw; + InB[1] = in.m_location_2.zw; + InC[0] = in.m_location_0.y; + InC[1] = in.m_location_1.y; + InC[2] = in.m_location_2.y; + InD = in.m_location_0.w; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.gl_Position = in.Pos; + A = InA; + B = InB; + C = InC; + D = InD; + out.m_location_1.x = A[0]; + out.m_location_2.x = A[1]; + out.m_location_1.zw = B[0]; + out.m_location_2.zw = B[1]; + out.m_location_0.y = C[0]; + out.m_location_1.y = C[1]; + out.m_location_2.y = C[2]; + out.m_location_0.w = D; +} + diff --git a/reference/shaders-msl/vert/array-component-io.vert b/reference/shaders-msl/vert/array-component-io.vert new file mode 100644 index 00000000000..352c9d2ef0f --- /dev/null +++ b/reference/shaders-msl/vert/array-component-io.vert @@ -0,0 +1,100 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float C_0 [[user(locn0_1)]]; + float D [[user(locn0_3)]]; + float A_0 [[user(locn1)]]; + float C_1 [[user(locn1_1)]]; + float2 B_0 [[user(locn1_2)]]; + float A_1 [[user(locn2)]]; + float C_2 [[user(locn2_1)]]; + float2 B_1 [[user(locn2_2)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 m_location_0 [[attribute(0)]]; + float4 m_location_1 [[attribute(1)]]; + float4 m_location_2 [[attribute(2)]]; + float4 Pos [[attribute(4)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray A = {}; + spvUnsafeArray B = {}; + spvUnsafeArray C = {}; + spvUnsafeArray InA = {}; + spvUnsafeArray InB = {}; + spvUnsafeArray InC = {}; + float InD = {}; + InA[0] = in.m_location_1.x; + InA[1] = in.m_location_2.x; + InB[0] = in.m_location_1.zw; + InB[1] = in.m_location_2.zw; + InC[0] = in.m_location_0.y; + InC[1] = in.m_location_1.y; + InC[2] = in.m_location_2.y; + InD = in.m_location_0.w; + out.gl_Position = in.Pos; + A = InA; + B = InB; + C = InC; + out.D = InD; + out.A_0 = A[0]; + out.A_1 = A[1]; + out.B_0 = B[0]; + out.B_1 = B[1]; + out.C_0 = C[0]; + out.C_1 = C[1]; + out.C_2 = C[2]; + return out; +} + diff --git a/reference/shaders-msl/vert/basic.for-tess.vert b/reference/shaders-msl/vert/basic.for-tess.vert new file mode 100644 index 00000000000..c99a95ac898 --- /dev/null +++ b/reference/shaders-msl/vert/basic.for-tess.vert @@ -0,0 +1,31 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + float4x4 uMVP; +}; + +struct main0_out +{ + float3 vNormal; + float4 gl_Position; +}; + +struct main0_in +{ + float4 aVertex [[attribute(0)]]; + float3 aNormal [[attribute(1)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + out.gl_Position = _16.uMVP * in.aVertex; + out.vNormal = in.aNormal; +} + diff --git a/reference/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/shaders-msl/vert/buffer_device_address.msl2.vert new file mode 100644 index 00000000000..9d856c48114 --- /dev/null +++ b/reference/shaders-msl/vert/buffer_device_address.msl2.vert @@ -0,0 +1,49 @@ +#include +#include + +using namespace metal; + +struct Position; +struct PositionReferences; + +struct Position +{ + float2 positions[1]; +}; + +struct Registers +{ + float4x4 view_projection; + device PositionReferences* references; +}; + +struct PositionReferences +{ + device Position* buffers[1]; +}; + +struct main0_out +{ + float4 out_color [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0(constant Registers& registers [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + int slice = int(gl_InstanceIndex); + const device Position* __restrict positions = registers.references->buffers[slice]; + float2 pos = positions->positions[int(gl_VertexIndex)] * 2.5; + pos += ((float2(float(slice % 8), float(slice / 8)) - float2(3.5)) * 3.0); + out.gl_Position = registers.view_projection * float4(pos, 0.0, 1.0); + int index_x = int(gl_VertexIndex) % 16; + int index_y = int(gl_VertexIndex) / 16; + float r = 0.5 + (0.300000011920928955078125 * sin(float(index_x))); + float g = 0.5 + (0.300000011920928955078125 * sin(float(index_y))); + int checkerboard = (index_x ^ index_y) & 1; + r *= ((float(checkerboard) * 0.800000011920928955078125) + 0.20000000298023223876953125); + g *= ((float(checkerboard) * 0.800000011920928955078125) + 0.20000000298023223876953125); + out.out_color = float4(r, g, 0.1500000059604644775390625, 1.0); + return out; +} + diff --git a/reference/shaders-msl/vert/clip-distance-block.no-user-varying.vert b/reference/shaders-msl/vert/clip-distance-block.no-user-varying.vert new file mode 100644 index 00000000000..c78105e0ce6 --- /dev/null +++ b/reference/shaders-msl/vert/clip-distance-block.no-user-varying.vert @@ -0,0 +1,25 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; +}; + +struct main0_in +{ + float4 Position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.gl_Position = in.Position; + out.gl_ClipDistance[0] = in.Position.x; + out.gl_ClipDistance[1] = in.Position.y; + return out; +} + diff --git a/reference/shaders-msl/vert/clip-distance-block.vert b/reference/shaders-msl/vert/clip-distance-block.vert new file mode 100644 index 00000000000..af58f35ff5f --- /dev/null +++ b/reference/shaders-msl/vert/clip-distance-block.vert @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [2]; + float gl_ClipDistance_0 [[user(clip0)]]; + float gl_ClipDistance_1 [[user(clip1)]]; +}; + +struct main0_in +{ + float4 Position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.gl_Position = in.Position; + out.gl_ClipDistance[0] = in.Position.x; + out.gl_ClipDistance[1] = in.Position.y; + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; + return out; +} + diff --git a/reference/shaders-msl/vert/copy.flatten.vert b/reference/shaders-msl/vert/copy.flatten.vert index a762f7e792b..92757a6001e 100644 --- a/reference/shaders-msl/vert/copy.flatten.vert +++ b/reference/shaders-msl/vert/copy.flatten.vert @@ -47,7 +47,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] light.Radius = _21.lights[i].Radius; light.Color = _21.lights[i].Color; float3 L = in.aVertex.xyz - light.Position; - out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); + out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(L))); } return out; } diff --git a/reference/shaders-msl/vert/dynamic.flatten.vert b/reference/shaders-msl/vert/dynamic.flatten.vert index c285f3c8739..43b3e112ce4 100644 --- a/reference/shaders-msl/vert/dynamic.flatten.vert +++ b/reference/shaders-msl/vert/dynamic.flatten.vert @@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] for (int i = 0; i < 4; i++) { float3 L = in.aVertex.xyz - float3(_21.lights[i].Position); - out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); + out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(L))); } return out; } diff --git a/reference/shaders-msl/vert/float-math.invariant-float-math.vert b/reference/shaders-msl/vert/float-math.invariant-float-math.vert new file mode 100644 index 00000000000..4b25e91b455 --- /dev/null +++ b/reference/shaders-msl/vert/float-math.invariant-float-math.vert @@ -0,0 +1,136 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +template +[[clang::optnone]] T spvFMul(T l, T r) +{ + return fma(l, r, T(0)); +} + +template +[[clang::optnone]] vec spvFMulVectorMatrix(vec v, matrix m) +{ + vec res = vec(0); + for (uint i = Rows; i > 0; --i) + { + vec tmp(0); + for (uint j = 0; j < Cols; ++j) + { + tmp[j] = m[j][i - 1]; + } + res = fma(tmp, vec(v[i - 1]), res); + } + return res; +} + +template +[[clang::optnone]] vec spvFMulMatrixVector(matrix m, vec v) +{ + vec res = vec(0); + for (uint i = Cols; i > 0; --i) + { + res = fma(m[i - 1], vec(v[i - 1]), res); + } + return res; +} + +template +[[clang::optnone]] matrix spvFMulMatrixMatrix(matrix l, matrix r) +{ + matrix res; + for (uint i = 0; i < RCols; i++) + { + vec tmp(0); + for (uint j = 0; j < LCols; j++) + { + tmp = fma(vec(r[i][j]), l[j], tmp); + } + res[i] = tmp; + } + return res; +} + +struct Matrices +{ + float4x4 vpMatrix; + float4x4 wMatrix; + float4x3 wMatrix4x3; + float3x4 wMatrix3x4; +}; + +struct main0_out +{ + float3 OutNormal [[user(locn0)]]; + float4 OutWorldPos_0 [[user(locn1)]]; + float4 OutWorldPos_1 [[user(locn2)]]; + float4 OutWorldPos_2 [[user(locn3)]]; + float4 OutWorldPos_3 [[user(locn4)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float3 InPos [[attribute(0)]]; + float3 InNormal [[attribute(1)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant Matrices& _22 [[buffer(0)]]) +{ + main0_out out = {}; + spvUnsafeArray OutWorldPos = {}; + out.gl_Position = spvFMulMatrixVector(spvFMulMatrixMatrix(_22.vpMatrix, _22.wMatrix), float4(in.InPos, 1.0)); + OutWorldPos[0] = spvFMulMatrixVector(_22.wMatrix, float4(in.InPos, 1.0)); + OutWorldPos[1] = spvFMulVectorMatrix(float4(in.InPos, 1.0), _22.wMatrix); + OutWorldPos[2] = spvFMulMatrixVector(_22.wMatrix3x4, in.InPos); + OutWorldPos[3] = spvFMulVectorMatrix(in.InPos, _22.wMatrix4x3); + out.OutNormal = spvFMulMatrixVector(_22.wMatrix, float4(in.InNormal, 0.0)).xyz; + out.OutWorldPos_0 = OutWorldPos[0]; + out.OutWorldPos_1 = OutWorldPos[1]; + out.OutWorldPos_2 = OutWorldPos[2]; + out.OutWorldPos_3 = OutWorldPos[3]; + return out; +} + diff --git a/reference/shaders-msl/vert/float-math.vert b/reference/shaders-msl/vert/float-math.vert new file mode 100644 index 00000000000..e96fdaedc22 --- /dev/null +++ b/reference/shaders-msl/vert/float-math.vert @@ -0,0 +1,87 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Matrices +{ + float4x4 vpMatrix; + float4x4 wMatrix; + float4x3 wMatrix4x3; + float3x4 wMatrix3x4; +}; + +struct main0_out +{ + float3 OutNormal [[user(locn0)]]; + float4 OutWorldPos_0 [[user(locn1)]]; + float4 OutWorldPos_1 [[user(locn2)]]; + float4 OutWorldPos_2 [[user(locn3)]]; + float4 OutWorldPos_3 [[user(locn4)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float3 InPos [[attribute(0)]]; + float3 InNormal [[attribute(1)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant Matrices& _22 [[buffer(0)]]) +{ + main0_out out = {}; + spvUnsafeArray OutWorldPos = {}; + out.gl_Position = (_22.vpMatrix * _22.wMatrix) * float4(in.InPos, 1.0); + OutWorldPos[0] = _22.wMatrix * float4(in.InPos, 1.0); + OutWorldPos[1] = float4(in.InPos, 1.0) * _22.wMatrix; + OutWorldPos[2] = _22.wMatrix3x4 * in.InPos; + OutWorldPos[3] = in.InPos * _22.wMatrix4x3; + out.OutNormal = (_22.wMatrix * float4(in.InNormal, 0.0)).xyz; + out.OutWorldPos_0 = OutWorldPos[0]; + out.OutWorldPos_1 = OutWorldPos[1]; + out.OutWorldPos_2 = OutWorldPos[2]; + out.OutWorldPos_3 = OutWorldPos[3]; + return out; +} + diff --git a/reference/shaders-msl/vert/functions.vert b/reference/shaders-msl/vert/functions.vert index f710225261d..4300aa1350a 100644 --- a/reference/shaders-msl/vert/functions.vert +++ b/reference/shaders-msl/vert/functions.vert @@ -5,73 +5,52 @@ using namespace metal; -struct UBO -{ - float4x4 uMVP; - float3 rotDeg; - float3 rotRad; - int2 bits; -}; - -struct main0_out -{ - float3 vNormal [[user(locn0)]]; - float3 vRotDeg [[user(locn1)]]; - float3 vRotRad [[user(locn2)]]; - int2 vLSB [[user(locn3)]]; - int2 vMSB [[user(locn4)]]; - float4 gl_Position [[position]]; -}; - -struct main0_in -{ - float4 aVertex [[attribute(0)]]; - float3 aNormal [[attribute(1)]]; -}; - // Implementation of the GLSL radians() function template -T radians(T d) +inline T radians(T d) { return d * T(0.01745329251); } // Implementation of the GLSL degrees() function template -T degrees(T r) +inline T degrees(T r) { return r * T(57.2957795131); } // Implementation of the GLSL findLSB() function template -T findLSB(T x) +inline T spvFindLSB(T x) { return select(ctz(x), T(-1), x == T(0)); } // Implementation of the signed GLSL findMSB() function template -T findSMSB(T x) +inline T spvFindSMSB(T x) { T v = select(x, T(-1) - x, x < T(0)); return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0)); } // Returns the determinant of a 2x2 matrix. -inline float spvDet2x2(float a1, float a2, float b1, float b2) +static inline __attribute__((always_inline)) +float spvDet2x2(float a1, float a2, float b1, float b2) { return a1 * b2 - b1 * a2; } // Returns the determinant of a 3x3 matrix. -inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) +static inline __attribute__((always_inline)) +float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) { return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3); } // Returns the inverse of a matrix, by using the algorithm of calculating the classical // adjoint and dividing by the determinant. The contents of the matrix are changed. +static inline __attribute__((always_inline)) float4x4 spvInverse4x4(float4x4 m) { float4x4 adj; // The adjoint matrix (inverse after dividing by determinant) @@ -105,6 +84,30 @@ float4x4 spvInverse4x4(float4x4 m) return (det != 0.0f) ? (adj * (1.0f / det)) : m; } +struct UBO +{ + float4x4 uMVP; + float3 rotDeg; + float3 rotRad; + int2 bits; +}; + +struct main0_out +{ + float3 vNormal [[user(locn0)]]; + float3 vRotDeg [[user(locn1)]]; + float3 vRotRad [[user(locn2)]]; + int2 vLSB [[user(locn3)]]; + int2 vMSB [[user(locn4)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 aVertex [[attribute(0)]]; + float3 aNormal [[attribute(1)]]; +}; + vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]) { main0_out out = {}; @@ -112,8 +115,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]] out.vNormal = in.aNormal; out.vRotDeg = degrees(_18.rotRad); out.vRotRad = radians(_18.rotDeg); - out.vLSB = findLSB(_18.bits); - out.vMSB = findSMSB(_18.bits); + out.vLSB = spvFindLSB(_18.bits); + out.vMSB = spvFindSMSB(_18.bits); return out; } diff --git a/reference/shaders-msl/vert/implicit-position-1.vert b/reference/shaders-msl/vert/implicit-position-1.vert new file mode 100644 index 00000000000..5cea4ee2c20 --- /dev/null +++ b/reference/shaders-msl/vert/implicit-position-1.vert @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 V [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + out.V = float4(1.0); + return out; +} + diff --git a/reference/shaders-msl/vert/implicit-position-2.vert b/reference/shaders-msl/vert/implicit-position-2.vert new file mode 100644 index 00000000000..9e024c2095b --- /dev/null +++ b/reference/shaders-msl/vert/implicit-position-2.vert @@ -0,0 +1,9 @@ +#include +#include + +using namespace metal; + +vertex void main0() +{ +} + diff --git a/reference/shaders-msl/vert/in_out_array_mat.vert b/reference/shaders-msl/vert/in_out_array_mat.vert index 95be574a51e..19bfa7311ea 100644 --- a/reference/shaders-msl/vert/in_out_array_mat.vert +++ b/reference/shaders-msl/vert/in_out_array_mat.vert @@ -1,10 +1,49 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct UBO { float4x4 projection; @@ -38,13 +77,15 @@ struct main0_in float4 inViewMat_3 [[attribute(8)]]; }; -void write_deeper_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread float4 (&colors)[3]) +static inline __attribute__((always_inline)) +void write_deeper_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread spvUnsafeArray& colors) { outTransModel[1].y = ubo.lodBias; color = colors[2]; } -void write_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread float4 (&colors)[3], thread float3& inNormal) +static inline __attribute__((always_inline)) +void write_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread spvUnsafeArray& colors, thread float3& inNormal) { outTransModel[2] = float4(inNormal, 1.0); write_deeper_in_function(outTransModel, ubo, color, colors); @@ -54,7 +95,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& ubo [[buffer(0)]] { main0_out out = {}; float4x4 outTransModel = {}; - float4 colors[3] = {}; + spvUnsafeArray colors = {}; float4x4 inViewMat = {}; colors[0] = in.colors_0; colors[1] = in.colors_1; diff --git a/reference/shaders-msl/vert/interface-block-block-composites.frag b/reference/shaders-msl/vert/interface-block-block-composites.frag index c42381d0046..cc2727682d9 100644 --- a/reference/shaders-msl/vert/interface-block-block-composites.frag +++ b/reference/shaders-msl/vert/interface-block-block-composites.frag @@ -1,13 +1,54 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Vert { float3x3 wMatrix; float4 wTmp; - float arr[4]; + spvUnsafeArray arr; }; struct main0_out @@ -20,14 +61,14 @@ struct main0_in float3 vMatrix_0 [[user(locn0)]]; float3 vMatrix_1 [[user(locn1)]]; float3 vMatrix_2 [[user(locn2)]]; - float3 Vert_wMatrix_0 [[user(locn4)]]; - float3 Vert_wMatrix_1 [[user(locn5)]]; - float3 Vert_wMatrix_2 [[user(locn6)]]; - float4 Vert_wTmp [[user(locn7)]]; - float Vert_arr_0 [[user(locn8)]]; - float Vert_arr_1 [[user(locn9)]]; - float Vert_arr_2 [[user(locn10)]]; - float Vert_arr_3 [[user(locn11)]]; + float3 m_17_wMatrix_0 [[user(locn4)]]; + float3 m_17_wMatrix_1 [[user(locn5)]]; + float3 m_17_wMatrix_2 [[user(locn6)]]; + float4 m_17_wTmp [[user(locn7)]]; + float m_17_arr_0 [[user(locn8)]]; + float m_17_arr_1 [[user(locn9)]]; + float m_17_arr_2 [[user(locn10)]]; + float m_17_arr_3 [[user(locn11)]]; }; fragment main0_out main0(main0_in in [[stage_in]]) @@ -35,14 +76,14 @@ fragment main0_out main0(main0_in in [[stage_in]]) main0_out out = {}; Vert _17 = {}; float3x3 vMatrix = {}; - _17.wMatrix[0] = in.Vert_wMatrix_0; - _17.wMatrix[1] = in.Vert_wMatrix_1; - _17.wMatrix[2] = in.Vert_wMatrix_2; - _17.wTmp = in.Vert_wTmp; - _17.arr[0] = in.Vert_arr_0; - _17.arr[1] = in.Vert_arr_1; - _17.arr[2] = in.Vert_arr_2; - _17.arr[3] = in.Vert_arr_3; + _17.wMatrix[0] = in.m_17_wMatrix_0; + _17.wMatrix[1] = in.m_17_wMatrix_1; + _17.wMatrix[2] = in.m_17_wMatrix_2; + _17.wTmp = in.m_17_wTmp; + _17.arr[0] = in.m_17_arr_0; + _17.arr[1] = in.m_17_arr_1; + _17.arr[2] = in.m_17_arr_2; + _17.arr[3] = in.m_17_arr_3; vMatrix[0] = in.vMatrix_0; vMatrix[1] = in.vMatrix_1; vMatrix[2] = in.vMatrix_2; diff --git a/reference/shaders-msl/vert/interface-block-block-composites.vert b/reference/shaders-msl/vert/interface-block-block-composites.vert index 3d97ae6dcff..a05c9331586 100644 --- a/reference/shaders-msl/vert/interface-block-block-composites.vert +++ b/reference/shaders-msl/vert/interface-block-block-composites.vert @@ -1,11 +1,52 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct Vert { - float arr[3]; + spvUnsafeArray arr; float3x3 wMatrix; float4 wTmp; }; @@ -15,13 +56,13 @@ struct main0_out float3 vMatrix_0 [[user(locn0)]]; float3 vMatrix_1 [[user(locn1)]]; float3 vMatrix_2 [[user(locn2)]]; - float Vert_arr_0 [[user(locn4)]]; - float Vert_arr_1 [[user(locn5)]]; - float Vert_arr_2 [[user(locn6)]]; - float3 Vert_wMatrix_0 [[user(locn7)]]; - float3 Vert_wMatrix_1 [[user(locn8)]]; - float3 Vert_wMatrix_2 [[user(locn9)]]; - float4 Vert_wTmp [[user(locn10)]]; + float m_20_arr_0 [[user(locn4)]]; + float m_20_arr_1 [[user(locn5)]]; + float m_20_arr_2 [[user(locn6)]]; + float3 m_20_wMatrix_0 [[user(locn7)]]; + float3 m_20_wMatrix_1 [[user(locn8)]]; + float3 m_20_wMatrix_2 [[user(locn9)]]; + float4 m_20_wTmp [[user(locn10)]]; float4 gl_Position [[position]]; }; @@ -52,13 +93,13 @@ vertex main0_out main0(main0_in in [[stage_in]]) out.vMatrix_0 = vMatrix[0]; out.vMatrix_1 = vMatrix[1]; out.vMatrix_2 = vMatrix[2]; - out.Vert_arr_0 = _20.arr[0]; - out.Vert_arr_1 = _20.arr[1]; - out.Vert_arr_2 = _20.arr[2]; - out.Vert_wMatrix_0 = _20.wMatrix[0]; - out.Vert_wMatrix_1 = _20.wMatrix[1]; - out.Vert_wMatrix_2 = _20.wMatrix[2]; - out.Vert_wTmp = _20.wTmp; + out.m_20_arr_0 = _20.arr[0]; + out.m_20_arr_1 = _20.arr[1]; + out.m_20_arr_2 = _20.arr[2]; + out.m_20_wMatrix_0 = _20.wMatrix[0]; + out.m_20_wMatrix_1 = _20.wMatrix[1]; + out.m_20_wMatrix_2 = _20.wMatrix[2]; + out.m_20_wTmp = _20.wTmp; return out; } diff --git a/reference/shaders-msl/vert/interface-block-single-element-array.vert b/reference/shaders-msl/vert/interface-block-single-element-array.vert new file mode 100644 index 00000000000..6858db730e3 --- /dev/null +++ b/reference/shaders-msl/vert/interface-block-single-element-array.vert @@ -0,0 +1,79 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct TDPickVertex +{ + float4 c; + spvUnsafeArray uv; +}; + +struct main0_out +{ + float4 oTDVert_c [[user(locn0)]]; + float3 oTDVert_uv_0 [[user(locn1)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float3 P [[attribute(0)]]; + float3 uv_0 [[attribute(1)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + TDPickVertex oTDVert = {}; + spvUnsafeArray uv = {}; + uv[0] = in.uv_0; + out.gl_Position = float4(in.P, 1.0); + oTDVert.uv[0] = uv[0]; + oTDVert.c = float4(1.0); + out.oTDVert_c = oTDVert.c; + out.oTDVert_uv_0 = oTDVert.uv[0]; + return out; +} + diff --git a/reference/shaders-msl/vert/interpolation-qualifiers-block.vert b/reference/shaders-msl/vert/interpolation-qualifiers-block.vert index 4206623b4f6..1ae24c7e5b5 100644 --- a/reference/shaders-msl/vert/interpolation-qualifiers-block.vert +++ b/reference/shaders-msl/vert/interpolation-qualifiers-block.vert @@ -16,13 +16,13 @@ struct Output struct main0_out { - float2 Output_v0 [[user(locn0)]]; - float2 Output_v1 [[user(locn1)]]; - float3 Output_v2 [[user(locn2)]]; - float4 Output_v3 [[user(locn3)]]; - float Output_v4 [[user(locn4)]]; - float Output_v5 [[user(locn5)]]; - float Output_v6 [[user(locn6)]]; + float2 outp_v0 [[user(locn0)]]; + float2 outp_v1 [[user(locn1)]]; + float3 outp_v2 [[user(locn2)]]; + float4 outp_v3 [[user(locn3)]]; + float outp_v4 [[user(locn4)]]; + float outp_v5 [[user(locn5)]]; + float outp_v6 [[user(locn6)]]; float4 gl_Position [[position]]; }; @@ -43,13 +43,13 @@ vertex main0_out main0(main0_in in [[stage_in]]) outp.v5 = in.Position.y; outp.v6 = in.Position.x * in.Position.w; out.gl_Position = in.Position; - out.Output_v0 = outp.v0; - out.Output_v1 = outp.v1; - out.Output_v2 = outp.v2; - out.Output_v3 = outp.v3; - out.Output_v4 = outp.v4; - out.Output_v5 = outp.v5; - out.Output_v6 = outp.v6; + out.outp_v0 = outp.v0; + out.outp_v1 = outp.v1; + out.outp_v2 = outp.v2; + out.outp_v3 = outp.v3; + out.outp_v4 = outp.v4; + out.outp_v5 = outp.v5; + out.outp_v6 = outp.v6; return out; } diff --git a/reference/shaders-msl/vert/leaf-function.capture.vert b/reference/shaders-msl/vert/leaf-function.capture.vert index 5a8469d1ac3..6519e56b8ff 100644 --- a/reference/shaders-msl/vert/leaf-function.capture.vert +++ b/reference/shaders-msl/vert/leaf-function.capture.vert @@ -22,6 +22,7 @@ struct main0_in float3 aNormal [[attribute(1)]]; }; +static inline __attribute__((always_inline)) void set_output(device float4& gl_Position, constant UBO& v_18, thread float4& aVertex, device float3& vNormal, thread float3& aNormal) { gl_Position = v_18.uMVP * aVertex; diff --git a/reference/shaders-msl/vert/leaf-function.for-tess.vert b/reference/shaders-msl/vert/leaf-function.for-tess.vert new file mode 100644 index 00000000000..5a960e5ec84 --- /dev/null +++ b/reference/shaders-msl/vert/leaf-function.for-tess.vert @@ -0,0 +1,39 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct UBO +{ + float4x4 uMVP; +}; + +struct main0_out +{ + float3 vNormal; + float4 gl_Position; +}; + +struct main0_in +{ + float4 aVertex [[attribute(0)]]; + float3 aNormal [[attribute(1)]]; +}; + +static inline __attribute__((always_inline)) +void set_output(device float4& gl_Position, constant UBO& v_18, thread float4& aVertex, device float3& vNormal, thread float3& aNormal) +{ + gl_Position = v_18.uMVP * aVertex; + vNormal = aNormal; +} + +kernel void main0(main0_in in [[stage_in]], constant UBO& v_18 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) +{ + device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x]; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + set_output(out.gl_Position, v_18, in.aVertex, out.vNormal, in.aNormal); +} + diff --git a/reference/shaders-msl/vert/no-contraction.vert b/reference/shaders-msl/vert/no-contraction.vert new file mode 100644 index 00000000000..26bef234e1f --- /dev/null +++ b/reference/shaders-msl/vert/no-contraction.vert @@ -0,0 +1,92 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +[[clang::optnone]] T spvFMul(T l, T r) +{ + return fma(l, r, T(0)); +} + +template +[[clang::optnone]] vec spvFMulVectorMatrix(vec v, matrix m) +{ + vec res = vec(0); + for (uint i = Rows; i > 0; --i) + { + vec tmp(0); + for (uint j = 0; j < Cols; ++j) + { + tmp[j] = m[j][i - 1]; + } + res = fma(tmp, vec(v[i - 1]), res); + } + return res; +} + +template +[[clang::optnone]] vec spvFMulMatrixVector(matrix m, vec v) +{ + vec res = vec(0); + for (uint i = Cols; i > 0; --i) + { + res = fma(m[i - 1], vec(v[i - 1]), res); + } + return res; +} + +template +[[clang::optnone]] matrix spvFMulMatrixMatrix(matrix l, matrix r) +{ + matrix res; + for (uint i = 0; i < RCols; i++) + { + vec tmp(0); + for (uint j = 0; j < LCols; j++) + { + tmp = fma(vec(r[i][j]), l[j], tmp); + } + res[i] = tmp; + } + return res; +} + +template +[[clang::optnone]] T spvFAdd(T l, T r) +{ + return fma(T(1), l, r); +} + +template +[[clang::optnone]] T spvFSub(T l, T r) +{ + return fma(T(-1), r, l); +} + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vA [[attribute(0)]]; + float4 vB [[attribute(1)]]; + float4 vC [[attribute(2)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float4 mul = spvFMul(in.vA, in.vB); + float4 add = spvFAdd(in.vA, in.vB); + float4 sub = spvFSub(in.vA, in.vB); + float4 mad = spvFAdd(spvFMul(in.vA, in.vB), in.vC); + float4 summed = spvFAdd(spvFAdd(spvFAdd(mul, add), sub), mad); + out.gl_Position = summed; + return out; +} + diff --git a/reference/shaders-msl/vert/no-disable-vertex-out.frag-output.vert b/reference/shaders-msl/vert/no-disable-vertex-out.frag-output.vert new file mode 100644 index 00000000000..14cc94937c0 --- /dev/null +++ b/reference/shaders-msl/vert/no-disable-vertex-out.frag-output.vert @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct buf +{ + float4x4 MVP; + float4 position[36]; + float4 attr[36]; +}; + +struct main0_out +{ + float4 texcoord [[user(locn0)]]; + float3 frag_pos [[user(locn1)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0(constant buf& ubuf [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + out.texcoord = ubuf.attr[int(gl_VertexIndex)]; + out.gl_Position = ubuf.MVP * ubuf.position[int(gl_VertexIndex)]; + out.frag_pos = out.gl_Position.xyz; + return out; +} + diff --git a/reference/shaders-msl/vert/no_stage_out.for-tess.vert b/reference/shaders-msl/vert/no_stage_out.for-tess.vert new file mode 100644 index 00000000000..984e83260aa --- /dev/null +++ b/reference/shaders-msl/vert/no_stage_out.for-tess.vert @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct _RESERVED_IDENTIFIER_FIXUP_10_12 +{ + uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024]; +}; + +struct main0_in +{ + uint4 _RESERVED_IDENTIFIER_FIXUP_19 [[attribute(0)]]; +}; + +kernel void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_10_12& _RESERVED_IDENTIFIER_FIXUP_12 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], uint3 spvDispatchBase [[grid_origin]]) +{ + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + uint gl_VertexIndex = gl_GlobalInvocationID.x + spvDispatchBase.x; + _RESERVED_IDENTIFIER_FIXUP_12._RESERVED_IDENTIFIER_FIXUP_m0[int(gl_VertexIndex)] = in._RESERVED_IDENTIFIER_FIXUP_19; +} + diff --git a/reference/shaders-msl/vert/no_stage_out.vert b/reference/shaders-msl/vert/no_stage_out.vert index 28098ee88e6..e804da67535 100644 --- a/reference/shaders-msl/vert/no_stage_out.vert +++ b/reference/shaders-msl/vert/no_stage_out.vert @@ -3,18 +3,18 @@ using namespace metal; -struct _10 +struct _RESERVED_IDENTIFIER_FIXUP_10_12 { - uint4 _m0[1024]; + uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024]; }; struct main0_in { - uint4 m_19 [[attribute(0)]]; + uint4 _RESERVED_IDENTIFIER_FIXUP_19 [[attribute(0)]]; }; -vertex void main0(main0_in in [[stage_in]], device _10& _12 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +vertex void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_10_12& _RESERVED_IDENTIFIER_FIXUP_12 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) { - _12._m0[gl_VertexIndex] = in.m_19; + _RESERVED_IDENTIFIER_FIXUP_12._RESERVED_IDENTIFIER_FIXUP_m0[int(gl_VertexIndex)] = in._RESERVED_IDENTIFIER_FIXUP_19; } diff --git a/reference/shaders-msl/vert/no_stage_out.write_buff.vert b/reference/shaders-msl/vert/no_stage_out.write_buff.vert index 23fa0817c33..fb8060f0722 100644 --- a/reference/shaders-msl/vert/no_stage_out.write_buff.vert +++ b/reference/shaders-msl/vert/no_stage_out.write_buff.vert @@ -3,14 +3,14 @@ using namespace metal; -struct _35 +struct _RESERVED_IDENTIFIER_FIXUP_33_35 { - uint4 _m0[1024]; + uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024]; }; -struct _40 +struct _RESERVED_IDENTIFIER_FIXUP_38_40 { - uint4 _m0[1024]; + uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024]; }; struct main0_out @@ -20,16 +20,16 @@ struct main0_out struct main0_in { - float4 m_17 [[attribute(0)]]; + float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]]; }; -vertex void main0(main0_in in [[stage_in]], device _35& _37 [[buffer(0)]], constant _40& _42 [[buffer(1)]]) +vertex void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_33_35& _RESERVED_IDENTIFIER_FIXUP_35 [[buffer(0)]], constant _RESERVED_IDENTIFIER_FIXUP_38_40& _RESERVED_IDENTIFIER_FIXUP_40 [[buffer(1)]]) { main0_out out = {}; - out.gl_Position = in.m_17; - for (int _22 = 0; _22 < 1024; _22++) + out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14; + for (int _RESERVED_IDENTIFIER_FIXUP_19 = 0; _RESERVED_IDENTIFIER_FIXUP_19 < 1024; _RESERVED_IDENTIFIER_FIXUP_19++) { - _37._m0[_22] = _42._m0[_22]; + _RESERVED_IDENTIFIER_FIXUP_35._RESERVED_IDENTIFIER_FIXUP_m0[_RESERVED_IDENTIFIER_FIXUP_19] = _RESERVED_IDENTIFIER_FIXUP_40._RESERVED_IDENTIFIER_FIXUP_m0[_RESERVED_IDENTIFIER_FIXUP_19]; } } diff --git a/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert b/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert index 9fe99e29fe1..68c649ed6c0 100644 --- a/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert +++ b/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert @@ -6,9 +6,9 @@ using namespace metal; -struct _23 +struct _RESERVED_IDENTIFIER_FIXUP_19_21 { - uint _m0; + uint _RESERVED_IDENTIFIER_FIXUP_m0; }; struct main0_out @@ -18,14 +18,14 @@ struct main0_out struct main0_in { - float4 m_17 [[attribute(0)]]; + float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]]; }; -vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]]) +vertex void main0(main0_in in [[stage_in]], volatile device _RESERVED_IDENTIFIER_FIXUP_19_21& _RESERVED_IDENTIFIER_FIXUP_21 [[buffer(0)]]) { main0_out out = {}; - out.gl_Position = in.m_17; - uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_25._m0, 1u, memory_order_relaxed); - uint _22 = _29; + out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14; + uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_RESERVED_IDENTIFIER_FIXUP_21._RESERVED_IDENTIFIER_FIXUP_m0, 1u, memory_order_relaxed); + uint _RESERVED_IDENTIFIER_FIXUP_26 = _29; } diff --git a/reference/shaders-msl/vert/no_stage_out.write_tex.vert b/reference/shaders-msl/vert/no_stage_out.write_tex.vert index 76eb6f24fbf..dfe1c328d9e 100644 --- a/reference/shaders-msl/vert/no_stage_out.write_tex.vert +++ b/reference/shaders-msl/vert/no_stage_out.write_tex.vert @@ -10,16 +10,16 @@ struct main0_out struct main0_in { - float4 m_17 [[attribute(0)]]; + float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]]; }; -vertex void main0(main0_in in [[stage_in]], texture1d _34 [[texture(0)]], texture1d _37 [[texture(1)]]) +vertex void main0(main0_in in [[stage_in]], texture1d _RESERVED_IDENTIFIER_FIXUP_32 [[texture(0)]], texture1d _RESERVED_IDENTIFIER_FIXUP_35 [[texture(1)]]) { main0_out out = {}; - out.gl_Position = in.m_17; - for (int _22 = 0; _22 < 128; _22++) + out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14; + for (int _RESERVED_IDENTIFIER_FIXUP_19 = 0; _RESERVED_IDENTIFIER_FIXUP_19 < 128; _RESERVED_IDENTIFIER_FIXUP_19++) { - _34.write(_37.read(uint(_22)), uint(_22)); + _RESERVED_IDENTIFIER_FIXUP_32.write(_RESERVED_IDENTIFIER_FIXUP_35.read(uint(_RESERVED_IDENTIFIER_FIXUP_19)), uint(_RESERVED_IDENTIFIER_FIXUP_19)); } } diff --git a/reference/shaders-msl/vert/out-block-with-nested-struct-array.vert b/reference/shaders-msl/vert/out-block-with-nested-struct-array.vert new file mode 100644 index 00000000000..cabcfcb521d --- /dev/null +++ b/reference/shaders-msl/vert/out-block-with-nested-struct-array.vert @@ -0,0 +1,88 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct t21 +{ + float4 m0; + float4 m1; +}; + +struct t24 +{ + spvUnsafeArray m0; +}; + +struct main0_out +{ + float4 v26_m0_0_m0 [[user(locn0)]]; + float4 v26_m0_0_m1 [[user(locn1)]]; + float4 v26_m0_1_m0 [[user(locn2)]]; + float4 v26_m0_1_m1 [[user(locn3)]]; + float4 v26_m0_2_m0 [[user(locn4)]]; + float4 v26_m0_2_m1 [[user(locn5)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 v17 [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + t24 v26 = {}; + out.gl_Position = in.v17; + v26.m0[1].m1 = float4(-4.0, -9.0, 3.0, 7.0); + out.v26_m0_0_m0 = v26.m0[0].m0; + out.v26_m0_0_m1 = v26.m0[0].m1; + out.v26_m0_1_m0 = v26.m0[1].m0; + out.v26_m0_1_m1 = v26.m0[1].m1; + out.v26_m0_2_m0 = v26.m0[2].m0; + out.v26_m0_2_m1 = v26.m0[2].m1; + return out; +} + diff --git a/reference/shaders-msl/vert/out-block-with-struct-array.vert b/reference/shaders-msl/vert/out-block-with-struct-array.vert new file mode 100644 index 00000000000..61c7c18b54c --- /dev/null +++ b/reference/shaders-msl/vert/out-block-with-struct-array.vert @@ -0,0 +1,83 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct t21 +{ + float m0; + float4 m1; +}; + +struct main0_out +{ + float v25_0_m0 [[user(locn0)]]; + float4 v25_0_m1 [[user(locn1)]]; + float v25_1_m0 [[user(locn2)]]; + float4 v25_1_m1 [[user(locn3)]]; + float v25_2_m0 [[user(locn4)]]; + float4 v25_2_m1 [[user(locn5)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 v17 [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray v25 = {}; + out.gl_Position = in.v17; + v25[2].m1 = float4(-4.0, -9.0, 3.0, 7.0); + out.v25_0_m0 = v25[0].m0; + out.v25_0_m1 = v25[0].m1; + out.v25_1_m0 = v25[1].m0; + out.v25_1_m1 = v25[1].m1; + out.v25_2_m0 = v25[2].m0; + out.v25_2_m1 = v25[2].m1; + return out; +} + diff --git a/reference/shaders-msl/vert/out_block.vert b/reference/shaders-msl/vert/out_block.vert index 45b897013b1..909a059bd2c 100644 --- a/reference/shaders-msl/vert/out_block.vert +++ b/reference/shaders-msl/vert/out_block.vert @@ -16,8 +16,8 @@ struct VertexOut struct main0_out { - float4 VertexOut_color [[user(locn2)]]; - float4 VertexOut_color2 [[user(locn3)]]; + float4 outputs_color [[user(locn2)]]; + float4 outputs_color2 [[user(locn3)]]; float4 gl_Position [[position]]; }; @@ -34,8 +34,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant Transform& block [[buf out.gl_Position = block.transform * float4(in.position, 1.0); outputs.color = in.color; outputs.color2 = in.color + float4(1.0); - out.VertexOut_color = outputs.color; - out.VertexOut_color2 = outputs.color2; + out.outputs_color = outputs.color; + out.outputs_color2 = outputs.color2; return out; } diff --git a/reference/shaders-msl/vert/packed-bool-to-uint.vert b/reference/shaders-msl/vert/packed-bool-to-uint.vert new file mode 100644 index 00000000000..6cc55204848 --- /dev/null +++ b/reference/shaders-msl/vert/packed-bool-to-uint.vert @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct Struct +{ + uint flags[1]; +}; + +struct defaultUniformsVS +{ + Struct flags; + float4 uquad[4]; + float4x4 umatrix; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 a_position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _24 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + out.gl_Position = _24.umatrix * float4(_24.uquad[int(gl_VertexIndex)].x, _24.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w); + if (_24.flags.flags[0] != 0u) + { + out.gl_Position.z = 0.0; + } + return out; +} + diff --git a/reference/shaders-msl/vert/packed-bool2-to-packed_uint2.vert b/reference/shaders-msl/vert/packed-bool2-to-packed_uint2.vert new file mode 100644 index 00000000000..4c46aaeb4ea --- /dev/null +++ b/reference/shaders-msl/vert/packed-bool2-to-packed_uint2.vert @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct Struct +{ + uint2 flags[1]; +}; + +struct defaultUniformsVS +{ + Struct flags; + float4 uquad[4]; + float4x4 umatrix; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 a_position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _25 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + out.gl_Position = _25.umatrix * float4(_25.uquad[int(gl_VertexIndex)].x, _25.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w); + if (_25.flags.flags[0].x != 0u) + { + out.gl_Position.z = 0.0; + } + return out; +} + diff --git a/reference/shaders-msl/vert/packed_matrix.vert b/reference/shaders-msl/vert/packed_matrix.vert index db688115811..e18d5f225a7 100644 --- a/reference/shaders-msl/vert/packed_matrix.vert +++ b/reference/shaders-msl/vert/packed_matrix.vert @@ -3,55 +3,53 @@ using namespace metal; -typedef packed_float4 packed_rm_float4x3[3]; - -struct _15 +struct _RESERVED_IDENTIFIER_FIXUP_1365_18812 { - packed_rm_float4x3 _m0; - packed_rm_float4x3 _m1; + float3x4 _RESERVED_IDENTIFIER_FIXUP_m0; + float3x4 _RESERVED_IDENTIFIER_FIXUP_m1; }; -struct _42 +struct _RESERVED_IDENTIFIER_FIXUP_1126_22044 { - float4x4 _m0; - float4x4 _m1; - float _m2; + float4x4 _RESERVED_IDENTIFIER_FIXUP_m0; + float4x4 _RESERVED_IDENTIFIER_FIXUP_m1; + float _RESERVED_IDENTIFIER_FIXUP_m9; char _m3_pad[12]; - packed_float3 _m3; - float _m4; - packed_float3 _m5; - float _m6; - float _m7; - float _m8; - float2 _m9; + packed_float3 _RESERVED_IDENTIFIER_FIXUP_m10; + float _RESERVED_IDENTIFIER_FIXUP_m11; + packed_float3 _RESERVED_IDENTIFIER_FIXUP_m12; + float _RESERVED_IDENTIFIER_FIXUP_m17; + float _RESERVED_IDENTIFIER_FIXUP_m18; + float _RESERVED_IDENTIFIER_FIXUP_m19; + float2 _RESERVED_IDENTIFIER_FIXUP_m20; }; struct main0_out { - float3 m_72 [[user(locn0)]]; + float3 _RESERVED_IDENTIFIER_FIXUP_3976 [[user(locn0)]]; float4 gl_Position [[position]]; }; struct main0_in { - float4 m_25 [[attribute(0)]]; + float4 _RESERVED_IDENTIFIER_FIXUP_5275 [[attribute(0)]]; }; -vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]]) +vertex main0_out main0(main0_in in [[stage_in]], constant _RESERVED_IDENTIFIER_FIXUP_1365_18812& _RESERVED_IDENTIFIER_FIXUP_18812 [[buffer(0)]], constant _RESERVED_IDENTIFIER_FIXUP_1126_22044& _RESERVED_IDENTIFIER_FIXUP_22044 [[buffer(1)]]) { main0_out out = {}; - float3 _91; - float3 _13; - do + float3 _RESERVED_IDENTIFIER_FIXUP_2; + float3 _RESERVED_IDENTIFIER_FIXUP_23783; + for (;;) { - _13 = normalize(float4(in.m_25.xyz, 0.0) * float3x4(float4(_17._m1[0]), float4(_17._m1[1]), float4(_17._m1[2]))); + _RESERVED_IDENTIFIER_FIXUP_23783 = fast::normalize(float4(in._RESERVED_IDENTIFIER_FIXUP_5275.xyz, 0.0) * _RESERVED_IDENTIFIER_FIXUP_18812._RESERVED_IDENTIFIER_FIXUP_m1); break; - } while (false); - float4 _39 = _44._m0 * float4(float3(_44._m3) + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0); - out.m_72 = _13; - float4 _74 = _39; - _74.y = -_39.y; - out.gl_Position = _74; + } + float4 _RESERVED_IDENTIFIER_FIXUP_14995 = _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m0 * float4(float3(_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m10) + (in._RESERVED_IDENTIFIER_FIXUP_5275.xyz * (_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m17 + _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m18)), 1.0); + out._RESERVED_IDENTIFIER_FIXUP_3976 = _RESERVED_IDENTIFIER_FIXUP_23783; + float4 _RESERVED_IDENTIFIER_FIXUP_6282 = _RESERVED_IDENTIFIER_FIXUP_14995; + _RESERVED_IDENTIFIER_FIXUP_6282.y = -_RESERVED_IDENTIFIER_FIXUP_14995.y; + out.gl_Position = _RESERVED_IDENTIFIER_FIXUP_6282; return out; } diff --git a/reference/shaders-msl/vert/read-from-row-major-array.vert b/reference/shaders-msl/vert/read-from-row-major-array.vert index 9a633c5fe63..d6ade7c38b9 100644 --- a/reference/shaders-msl/vert/read-from-row-major-array.vert +++ b/reference/shaders-msl/vert/read-from-row-major-array.vert @@ -7,7 +7,7 @@ using namespace metal; struct Block { - float2x3 var[3][4]; + float3x4 var[3][4]; }; struct main0_out @@ -21,17 +21,13 @@ struct main0_in float4 a_position [[attribute(0)]]; }; -// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization. -float2x3 spvConvertFromRowMajor2x3(float2x3 m) -{ - return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2])); -} - +static inline __attribute__((always_inline)) float compare_float(thread const float& a, thread const float& b) { return float(abs(a - b) < 0.0500000007450580596923828125); } +static inline __attribute__((always_inline)) float compare_vec3(thread const float3& a, thread const float3& b) { float param = a.x; @@ -43,6 +39,7 @@ float compare_vec3(thread const float3& a, thread const float3& b) return (compare_float(param, param_1) * compare_float(param_2, param_3)) * compare_float(param_4, param_5); } +static inline __attribute__((always_inline)) float compare_mat2x3(thread const float2x3& a, thread const float2x3& b) { float3 param = a[0]; @@ -57,7 +54,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant Block& _104 [[buffer(0 main0_out out = {}; out.gl_Position = in.a_position; float result = 1.0; - float2x3 param = spvConvertFromRowMajor2x3(_104.var[0][0]); + float2x3 param = transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy)); float2x3 param_1 = float2x3(float3(2.0, 6.0, -6.0), float3(0.0, 5.0, 5.0)); result *= compare_mat2x3(param, param_1); out.v_vtxResult = result; diff --git a/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert b/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert index 97e97e6fd29..fad06d6afec 100644 --- a/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert +++ b/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert @@ -22,7 +22,8 @@ struct constant_block #endif constant int arraySize = SPIRV_CROSS_CONSTANT_ID_0; -void doWork(device storage_block* (&storage)[2], constant constant_block* (&constants)[4], thread const array, 3> images) +static inline __attribute__((always_inline)) +void doWork(device storage_block* (&storage)[2], constant constant_block* (&constants)[4], thread const array, 3>& images) { storage[0]->baz = uint4(constants[3]->foo); storage[1]->quux = images[2].read(uint2(int2(constants[1]->bar))).xy; diff --git a/reference/shaders-msl/vert/return-array.force-native-array.vert b/reference/shaders-msl/vert/return-array.force-native-array.vert new file mode 100644 index 00000000000..4793b4aca9e --- /dev/null +++ b/reference/shaders-msl/vert/return-array.force-native-array.vert @@ -0,0 +1,154 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +constant float4 _20[2] = { float4(10.0), float4(20.0) }; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vInput0 [[attribute(0)]]; + float4 vInput1 [[attribute(1)]]; +}; + +static inline __attribute__((always_inline)) +void test(thread float4 (&spvReturnValue)[2]) +{ + spvArrayCopyFromConstantToStack1(spvReturnValue, _20); +} + +static inline __attribute__((always_inline)) +void test2(thread float4 (&spvReturnValue)[2], thread float4& vInput0, thread float4& vInput1) +{ + float4 foobar[2]; + foobar[0] = vInput0; + foobar[1] = vInput1; + spvArrayCopyFromStackToStack1(spvReturnValue, foobar); +} + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float4 _42[2]; + test(_42); + float4 _44[2]; + test2(_44, in.vInput0, in.vInput1); + out.gl_Position = _42[0] + _44[1]; + return out; +} + diff --git a/reference/shaders-msl/vert/return-array.vert b/reference/shaders-msl/vert/return-array.vert index cd06fddaa80..dacb0ba3053 100644 --- a/reference/shaders-msl/vert/return-array.vert +++ b/reference/shaders-msl/vert/return-array.vert @@ -1,11 +1,50 @@ #pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; -constant float4 _20[2] = { float4(10.0), float4(20.0) }; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +constant spvUnsafeArray _20 = spvUnsafeArray({ float4(10.0), float4(20.0) }); struct main0_out { @@ -18,40 +57,25 @@ struct main0_in float4 vInput1 [[attribute(1)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -void test(thread float4 (&SPIRV_Cross_return_value)[2]) +static inline __attribute__((always_inline)) +spvUnsafeArray test() { - spvArrayCopyFromConstant1(SPIRV_Cross_return_value, _20); + return _20; } -void test2(thread float4 (&SPIRV_Cross_return_value)[2], thread float4& vInput0, thread float4& vInput1) +static inline __attribute__((always_inline)) +spvUnsafeArray test2(thread float4& vInput0, thread float4& vInput1) { - float4 foobar[2]; + spvUnsafeArray foobar; foobar[0] = vInput0; foobar[1] = vInput1; - spvArrayCopyFromStack1(SPIRV_Cross_return_value, foobar); + return foobar; } vertex main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float4 _42[2]; - test(_42); - float4 _44[2]; - test2(_44, in.vInput0, in.vInput1); - out.gl_Position = _42[0] + _44[1]; + out.gl_Position = test()[0] + test2(in.vInput0, in.vInput1)[1]; return out; } diff --git a/reference/shaders-msl/vert/set_builtin_in_func.vert b/reference/shaders-msl/vert/set_builtin_in_func.vert index 2952748dc00..91057da2ba9 100644 --- a/reference/shaders-msl/vert/set_builtin_in_func.vert +++ b/reference/shaders-msl/vert/set_builtin_in_func.vert @@ -11,6 +11,7 @@ struct main0_out float gl_PointSize [[point_size]]; }; +static inline __attribute__((always_inline)) void write_outblock(thread float4& gl_Position, thread float& gl_PointSize) { gl_PointSize = 1.0; diff --git a/reference/shaders-msl/vert/sign-int-types.vert b/reference/shaders-msl/vert/sign-int-types.vert index 2f518b12911..f5f647d4589 100644 --- a/reference/shaders-msl/vert/sign-int-types.vert +++ b/reference/shaders-msl/vert/sign-int-types.vert @@ -5,6 +5,13 @@ using namespace metal; +// Implementation of the GLSL sign() function for integer types +template::value>::type> +inline T sign(T x) +{ + return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0)); +} + struct UBO { float4x4 uMVP; @@ -36,13 +43,6 @@ struct main0_in float4 aVertex [[attribute(0)]]; }; -// Implementation of the GLSL sign() function for integer types -template::value>::type> -T sign(T x) -{ - return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0)); -} - vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]) { main0_out out = {}; diff --git a/reference/shaders-msl/vert/signedness-mismatch.shader-inputs.vert b/reference/shaders-msl/vert/signedness-mismatch.shader-inputs.vert new file mode 100644 index 00000000000..56e00199cb1 --- /dev/null +++ b/reference/shaders-msl/vert/signedness-mismatch.shader-inputs.vert @@ -0,0 +1,74 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + ushort2 a [[attribute(0)]]; + uint3 b [[attribute(1)]]; + ushort c_0 [[attribute(2)]]; + ushort c_1 [[attribute(3)]]; + uint4 d_0 [[attribute(4)]]; + uint4 d_1 [[attribute(5)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + spvUnsafeArray c = {}; + spvUnsafeArray d = {}; + c[0] = in.c_0; + c[1] = in.c_1; + d[0] = in.d_0; + d[1] = in.d_1; + out.gl_Position = float4(float(int(short(in.a.x))), float(int(in.b.x)), float(uint(c[1])), float(d[0].w)); + return out; +} + diff --git a/reference/shaders-msl/vert/texture_buffer.vert b/reference/shaders-msl/vert/texture_buffer.vert index ee3956fad84..9d8b5c49f02 100644 --- a/reference/shaders-msl/vert/texture_buffer.vert +++ b/reference/shaders-msl/vert/texture_buffer.vert @@ -5,17 +5,18 @@ using namespace metal; -struct main0_out -{ - float4 gl_Position [[position]]; -}; - // Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) uint2 spvTexelBufferCoord(uint tc) { return uint2(tc % 4096, tc / 4096); } +struct main0_out +{ + float4 gl_Position [[position]]; +}; + vertex main0_out main0(texture2d uSamp [[texture(0)]], texture2d uSampo [[texture(1)]]) { main0_out out = {}; diff --git a/reference/shaders-msl/vert/uniform-struct-out-of-order-offests.vert b/reference/shaders-msl/vert/uniform-struct-out-of-order-offests.vert new file mode 100644 index 00000000000..4f71b205570 --- /dev/null +++ b/reference/shaders-msl/vert/uniform-struct-out-of-order-offests.vert @@ -0,0 +1,35 @@ +#include +#include + +using namespace metal; + +struct data_u_t +{ + int4 m1[3]; + uint m3; + uint3 m2; + int4 m0[8]; +}; + +struct main0_out +{ + float foo [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vtx_posn [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant data_u_t& data_u [[buffer(0)]]) +{ + main0_out out = {}; + out.gl_Position = in.vtx_posn; + int4 a = data_u.m1[1]; + uint3 b = data_u.m2; + int c = data_u.m0[4].x; + out.foo = float((uint3(a.xyz) + b).y * uint(c)); + return out; +} + diff --git a/reference/shaders-msl/vert/uniform-struct-packing-nested.vert b/reference/shaders-msl/vert/uniform-struct-packing-nested.vert new file mode 100644 index 00000000000..bfcae2a56c7 --- /dev/null +++ b/reference/shaders-msl/vert/uniform-struct-packing-nested.vert @@ -0,0 +1,57 @@ +#include +#include + +using namespace metal; + +typedef packed_float4 packed_rm_float4x4[4]; + +struct s0 +{ + float3x4 m0; + packed_int4 m1; + packed_rm_float4x4 m2; + packed_uint2 m3; +}; + +struct s1 +{ + float4x4 m0; + int m1; + char _m2_pad[12]; + packed_uint3 m2; + s0 m3; +}; + +struct data_u_t +{ + float4 m1[5]; + float2x4 m3; + int4 m4; + s1 m2; + float3x4 m0; +}; + +struct main0_out +{ + float foo [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vtx_posn [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant data_u_t& data_u [[buffer(0)]]) +{ + main0_out out = {}; + out.gl_Position = in.vtx_posn; + float2 a = data_u.m1[3].xy; + int4 b = data_u.m4; + float2x3 c = transpose(float3x2(data_u.m0[0].xy, data_u.m0[1].xy, data_u.m0[2].xy)); + float3x4 d = transpose(float4x3(data_u.m2.m0[0].xyz, data_u.m2.m0[1].xyz, data_u.m2.m0[2].xyz, data_u.m2.m0[3].xyz)); + float4x4 e = transpose(float4x4(float4(data_u.m2.m3.m2[0]), float4(data_u.m2.m3.m2[1]), float4(data_u.m2.m3.m2[2]), float4(data_u.m2.m3.m2[3]))); + out.foo = (((a.y + float(b.z)) * c[1].z) * d[2].w) * e[3].w; + return out; +} + diff --git a/reference/shaders-msl/vert/unused-position.vert b/reference/shaders-msl/vert/unused-position.vert new file mode 100644 index 00000000000..7dc4672139c --- /dev/null +++ b/reference/shaders-msl/vert/unused-position.vert @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; + float gl_PointSize [[point_size]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + out.gl_PointSize = 1.0; + return out; +} + diff --git a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp deleted file mode 100644 index 4ebab8c7346..00000000000 --- a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp +++ /dev/null @@ -1,146 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct SSBO -{ - float FragColor; -}; - -inline uint4 spvSubgroupBallot(bool value) -{ - simd_vote vote = simd_ballot(value); - // simd_ballot() returns a 64-bit integer-like object, but - // SPIR-V callers expect a uint4. We must convert. - // FIXME: This won't include higher bits if Apple ever supports - // 128 lanes in an SIMD-group. - return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0); -} - -inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) -{ - return !!extract_bits(ballot[bit / 32], bit % 32, 1); -} - -inline uint spvSubgroupBallotFindLSB(uint4 ballot) -{ - return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); -} - -inline uint spvSubgroupBallotFindMSB(uint4 ballot) -{ - return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); -} - -inline uint spvSubgroupBallotBitCount(uint4 ballot) -{ - return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); -} - -inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) -{ - uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); - return spvSubgroupBallotBitCount(ballot & mask); -} - -inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) -{ - uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); - return spvSubgroupBallotBitCount(ballot & mask); -} - -template -inline bool spvSubgroupAllEqual(T value) -{ - return simd_all(value == simd_broadcast_first(value)); -} - -template<> -inline bool spvSubgroupAllEqual(bool value) -{ - return simd_all(value) || !simd_any(value); -} - -kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) -{ - uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); - uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0)); - uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0)); - uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); - uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); - _9.FragColor = float(gl_NumSubgroups); - _9.FragColor = float(gl_SubgroupID); - _9.FragColor = float(gl_SubgroupSize); - _9.FragColor = float(gl_SubgroupInvocationID); - simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); - simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); - simdgroup_barrier(mem_flags::mem_device); - simdgroup_barrier(mem_flags::mem_threadgroup); - simdgroup_barrier(mem_flags::mem_texture); - bool elected = simd_is_first(); - _9.FragColor = float4(gl_SubgroupEqMask).x; - _9.FragColor = float4(gl_SubgroupGeMask).x; - _9.FragColor = float4(gl_SubgroupGtMask).x; - _9.FragColor = float4(gl_SubgroupLeMask).x; - _9.FragColor = float4(gl_SubgroupLtMask).x; - float4 broadcasted = simd_broadcast(float4(10.0), 8u); - float3 first = simd_broadcast_first(float3(20.0)); - uint4 ballot_value = spvSubgroupBallot(true); - bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID); - bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u); - uint bit_count = spvSubgroupBallotBitCount(ballot_value); - uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID); - uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID); - uint lsb = spvSubgroupBallotFindLSB(ballot_value); - uint msb = spvSubgroupBallotFindMSB(ballot_value); - uint shuffled = simd_shuffle(10u, 8u); - uint shuffled_xor = simd_shuffle_xor(30u, 8u); - uint shuffled_up = simd_shuffle_up(20u, 4u); - uint shuffled_down = simd_shuffle_down(20u, 4u); - bool has_all = simd_all(true); - bool has_any = simd_any(true); - bool has_equal = spvSubgroupAllEqual(0); - has_equal = spvSubgroupAllEqual(true); - float4 added = simd_sum(float4(20.0)); - int4 iadded = simd_sum(int4(20)); - float4 multiplied = simd_product(float4(20.0)); - int4 imultiplied = simd_product(int4(20)); - float4 lo = simd_min(float4(20.0)); - float4 hi = simd_max(float4(20.0)); - int4 slo = simd_min(int4(20)); - int4 shi = simd_max(int4(20)); - uint4 ulo = simd_min(uint4(20u)); - uint4 uhi = simd_max(uint4(20u)); - uint4 anded = simd_and(ballot_value); - uint4 ored = simd_or(ballot_value); - uint4 xored = simd_xor(ballot_value); - added = simd_prefix_inclusive_sum(added); - iadded = simd_prefix_inclusive_sum(iadded); - multiplied = simd_prefix_inclusive_product(multiplied); - imultiplied = simd_prefix_inclusive_product(imultiplied); - added = simd_prefix_exclusive_sum(multiplied); - multiplied = simd_prefix_exclusive_product(multiplied); - iadded = simd_prefix_exclusive_sum(imultiplied); - imultiplied = simd_prefix_exclusive_product(imultiplied); - added = quad_sum(added); - multiplied = quad_product(multiplied); - iadded = quad_sum(iadded); - imultiplied = quad_product(imultiplied); - lo = quad_min(lo); - hi = quad_max(hi); - ulo = quad_min(ulo); - uhi = quad_max(uhi); - slo = quad_min(slo); - shi = quad_max(shi); - anded = quad_and(anded); - ored = quad_or(ored); - xored = quad_xor(xored); - float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u); - float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u); - float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u); - float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u); -} - diff --git a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp b/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp deleted file mode 100644 index 84fcb9c3a92..00000000000 --- a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include - -using namespace metal; - -struct SSBO -{ - float FragColor; -}; - -kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]]) -{ - _9.FragColor = float(gl_NumSubgroups); - _9.FragColor = float(gl_SubgroupID); - _9.FragColor = float(gl_SubgroupSize); - _9.FragColor = float(gl_SubgroupInvocationID); - simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); - simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); - simdgroup_barrier(mem_flags::mem_device); - simdgroup_barrier(mem_flags::mem_threadgroup); - simdgroup_barrier(mem_flags::mem_texture); - uint shuffled = quad_shuffle(10u, 8u); - uint shuffled_xor = quad_shuffle_xor(30u, 8u); - uint shuffled_up = quad_shuffle_up(20u, 4u); - uint shuffled_down = quad_shuffle_down(20u, 4u); - float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u); - float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u); - float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u); - float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u); -} - diff --git a/reference/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag b/reference/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag new file mode 100644 index 00000000000..f0935f6dcf4 --- /dev/null +++ b/reference/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag @@ -0,0 +1,73 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vColor [[user(locn0)]]; + float2 vTex_0 [[user(locn1)]]; + float2 vTex_1 [[user(locn2)]]; + float2 vTex_2 [[user(locn3)]]; + float2 vTex_3 [[user(locn4)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]]) +{ + main0_out out = {}; + spvUnsafeArray vTex = {}; + vTex[0] = in.vTex_0; + vTex[1] = in.vTex_1; + vTex[2] = in.vTex_2; + vTex[3] = in.vTex_3; + const uint gl_ViewIndex = spvViewMask[0]; + out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]); + return out; +} + diff --git a/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag index 23c554940b8..67895e3e92c 100644 --- a/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag +++ b/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag @@ -1,8 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + #include #include using namespace metal; +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + struct main0_out { float4 FragColor [[color(0)]]; @@ -20,7 +61,7 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]]) { main0_out out = {}; - float2 vTex[4] = {}; + spvUnsafeArray vTex = {}; vTex[0] = in.vTex_0; vTex[1] = in.vTex_1; vTex[2] = in.vTex_2; diff --git a/reference/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag b/reference/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag new file mode 100644 index 00000000000..274cea2de15 --- /dev/null +++ b/reference/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag @@ -0,0 +1,24 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + bool _15 = gl_HelperInvocation; + gl_HelperInvocation = true, discard_fragment(); + if (!_15) + { + out.FragColor = float4(1.0, 0.0, 0.0, 1.0); + } + return out; +} + diff --git a/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag b/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag new file mode 100644 index 00000000000..7af77d91f96 --- /dev/null +++ b/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag @@ -0,0 +1,28 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +static inline __attribute__((always_inline)) +void foo(thread bool& gl_HelperInvocation) +{ + gl_HelperInvocation = true, discard_fragment(); +} + +static inline __attribute__((always_inline)) +void bar(thread bool& gl_HelperInvocation) +{ + bool _13 = gl_HelperInvocation; + bool helper = _13; +} + +fragment void main0() +{ + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + foo(gl_HelperInvocation); + bar(gl_HelperInvocation); +} + diff --git a/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag b/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag new file mode 100644 index 00000000000..ad3734bdbe8 --- /dev/null +++ b/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag @@ -0,0 +1,14 @@ +#include +#include + +using namespace metal; + +fragment void main0() +{ + bool gl_HelperInvocation = {}; + gl_HelperInvocation = simd_is_helper_thread(); + gl_HelperInvocation = true, discard_fragment(); + bool _9 = gl_HelperInvocation; + bool helper = _9; +} + diff --git a/reference/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag b/reference/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag deleted file mode 100644 index ec25d067872..00000000000 --- a/reference/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag +++ /dev/null @@ -1,36 +0,0 @@ -#include -#include - -using namespace metal; - -struct UBO -{ - float a[1]; - float2 b[2]; -}; - -struct UBOEnhancedLayout -{ - float c[1]; - float2 d[2]; - char _m2_pad[9976]; - float e; -}; - -struct main0_out -{ - float FragColor [[color(0)]]; -}; - -struct main0_in -{ - int vIndex [[user(locn0)]]; -}; - -fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _17 [[buffer(0)]], constant UBOEnhancedLayout& _30 [[buffer(1)]]) -{ - main0_out out = {}; - out.FragColor = (_17.a[in.vIndex] + _30.c[in.vIndex]) + _30.e; - return out; -} - diff --git a/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag deleted file mode 100644 index affaf86d544..00000000000 --- a/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag +++ /dev/null @@ -1,143 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct main0_out -{ - float FragColor [[color(0)]]; -}; - -inline uint4 spvSubgroupBallot(bool value) -{ - simd_vote vote = simd_ballot(value); - // simd_ballot() returns a 64-bit integer-like object, but - // SPIR-V callers expect a uint4. We must convert. - // FIXME: This won't include higher bits if Apple ever supports - // 128 lanes in an SIMD-group. - return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0); -} - -inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) -{ - return !!extract_bits(ballot[bit / 32], bit % 32, 1); -} - -inline uint spvSubgroupBallotFindLSB(uint4 ballot) -{ - return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); -} - -inline uint spvSubgroupBallotFindMSB(uint4 ballot) -{ - return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); -} - -inline uint spvSubgroupBallotBitCount(uint4 ballot) -{ - return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); -} - -inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) -{ - uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); - return spvSubgroupBallotBitCount(ballot & mask); -} - -inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) -{ - uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); - return spvSubgroupBallotBitCount(ballot & mask); -} - -template -inline bool spvSubgroupAllEqual(T value) -{ - return simd_all(value == simd_broadcast_first(value)); -} - -template<> -inline bool spvSubgroupAllEqual(bool value) -{ - return simd_all(value) || !simd_any(value); -} - -fragment main0_out main0() -{ - main0_out out = {}; - uint gl_SubgroupSize = simd_sum(1); - uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1); - uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); - uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0)); - uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0)); - uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); - uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); - out.FragColor = float(gl_SubgroupSize); - out.FragColor = float(gl_SubgroupInvocationID); - bool elected = simd_is_first(); - out.FragColor = float4(gl_SubgroupEqMask).x; - out.FragColor = float4(gl_SubgroupGeMask).x; - out.FragColor = float4(gl_SubgroupGtMask).x; - out.FragColor = float4(gl_SubgroupLeMask).x; - out.FragColor = float4(gl_SubgroupLtMask).x; - float4 broadcasted = simd_broadcast(float4(10.0), 8u); - float3 first = simd_broadcast_first(float3(20.0)); - uint4 ballot_value = spvSubgroupBallot(true); - bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID); - bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u); - uint bit_count = spvSubgroupBallotBitCount(ballot_value); - uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID); - uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID); - uint lsb = spvSubgroupBallotFindLSB(ballot_value); - uint msb = spvSubgroupBallotFindMSB(ballot_value); - uint shuffled = simd_shuffle(10u, 8u); - uint shuffled_xor = simd_shuffle_xor(30u, 8u); - uint shuffled_up = simd_shuffle_up(20u, 4u); - uint shuffled_down = simd_shuffle_down(20u, 4u); - bool has_all = simd_all(true); - bool has_any = simd_any(true); - bool has_equal = spvSubgroupAllEqual(0); - has_equal = spvSubgroupAllEqual(true); - float4 added = simd_sum(float4(20.0)); - int4 iadded = simd_sum(int4(20)); - float4 multiplied = simd_product(float4(20.0)); - int4 imultiplied = simd_product(int4(20)); - float4 lo = simd_min(float4(20.0)); - float4 hi = simd_max(float4(20.0)); - int4 slo = simd_min(int4(20)); - int4 shi = simd_max(int4(20)); - uint4 ulo = simd_min(uint4(20u)); - uint4 uhi = simd_max(uint4(20u)); - uint4 anded = simd_and(ballot_value); - uint4 ored = simd_or(ballot_value); - uint4 xored = simd_xor(ballot_value); - added = simd_prefix_inclusive_sum(added); - iadded = simd_prefix_inclusive_sum(iadded); - multiplied = simd_prefix_inclusive_product(multiplied); - imultiplied = simd_prefix_inclusive_product(imultiplied); - added = simd_prefix_exclusive_sum(multiplied); - multiplied = simd_prefix_exclusive_product(multiplied); - iadded = simd_prefix_exclusive_sum(imultiplied); - imultiplied = simd_prefix_exclusive_product(imultiplied); - added = quad_sum(added); - multiplied = quad_product(multiplied); - iadded = quad_sum(iadded); - imultiplied = quad_product(imultiplied); - lo = quad_min(lo); - hi = quad_max(hi); - ulo = quad_min(ulo); - uhi = quad_max(uhi); - slo = quad_min(slo); - shi = quad_max(shi); - anded = quad_and(anded); - ored = quad_or(ored); - xored = quad_xor(xored); - float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u); - float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u); - float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u); - float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u); - return out; -} - diff --git a/reference/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert new file mode 100644 index 00000000000..e36576b86f5 --- /dev/null +++ b/reference/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + const int gl_DeviceIndex = 0; + const uint gl_ViewIndex = 0; + out.gl_Position = float4(float(gl_DeviceIndex), float(int(gl_ViewIndex)), 0.0, 1.0); + return out; +} + diff --git a/reference/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert new file mode 100644 index 00000000000..cc4bcc42027 --- /dev/null +++ b/reference/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + const int gl_DeviceIndex = 0; + out.gl_Position = float4(float(gl_DeviceIndex)); + return out; +} + diff --git a/reference/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert new file mode 100644 index 00000000000..8959afe821e --- /dev/null +++ b/reference/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct MVPs +{ + float4x4 MVP[2]; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 Position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]]) +{ + main0_out out = {}; + const uint gl_ViewIndex = spvViewMask[0]; + out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position; + return out; +} + diff --git a/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert index c42e67211e7..20eff0a124f 100644 --- a/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert +++ b/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert @@ -19,11 +19,11 @@ struct main0_in float4 Position [[attribute(0)]]; }; -vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) +vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]]) { main0_out out = {}; - uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1]; - gl_InstanceIndex /= spvViewMask[1]; + uint gl_ViewIndex = spvViewMask[0] + (gl_InstanceIndex - gl_BaseInstance) % spvViewMask[1]; + gl_InstanceIndex = (gl_InstanceIndex - gl_BaseInstance) / spvViewMask[1] + gl_BaseInstance; out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position; out.gl_Layer = gl_ViewIndex - spvViewMask[0]; return out; diff --git a/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert index f87d2a11adc..5152b6222ee 100644 --- a/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert +++ b/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert @@ -19,7 +19,7 @@ struct main0_in float4 Position [[attribute(0)]]; }; -vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) +vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]]) { main0_out out = {}; const uint gl_ViewIndex = 0; diff --git a/reference/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert b/reference/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert index 53e26e4a8eb..86a0cea5bb0 100644 --- a/reference/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert +++ b/reference/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert @@ -11,7 +11,7 @@ struct main0_out vertex main0_out main0(uint gl_VertexIndex [[vertex_id]], uint gl_InstanceIndex [[instance_id]]) { main0_out out = {}; - out.gl_Position = float4(1.0, 2.0, 3.0, 4.0) * float(gl_VertexIndex + gl_InstanceIndex); + out.gl_Position = float4(1.0, 2.0, 3.0, 4.0) * float(int(gl_VertexIndex) + int(gl_InstanceIndex)); return out; } diff --git a/reference/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp b/reference/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp new file mode 100644 index 00000000000..e4dfdb87a82 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp @@ -0,0 +1,24 @@ +#version 460 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer Output +{ + int myout; +} _5; + +int foo() +{ + return 12; +} + +void main() +{ + int _17 = foo(); + while (true) + { + _5.myout = _17; + return; + } + _5.myout = _17; +} + diff --git a/reference/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/reference/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp new file mode 100644 index 00000000000..d36f5431088 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp @@ -0,0 +1,25 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct T +{ + float c; +}; + +layout(binding = 0, std430) buffer SSBO1 +{ + T foo[]; +} _7; + +layout(binding = 1, std140) buffer SSBO2 +{ + T bar[]; +} _10; + +void main() +{ + T v = T(40.0); + _7.foo[10].c = v.c; + _10.bar[30].c = v.c; +} + diff --git a/reference/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp.vk b/reference/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp.vk index c2fb39907b0..5f480728e4e 100644 --- a/reference/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp.vk +++ b/reference/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp.vk @@ -33,8 +33,12 @@ void main() _4.u16 = uint16_t(_30); _4.f32 = float(_31); _4.f32 = float(int16_t(_32)); + _4.f32 = float(_29); + _4.f32 = float(int(_30)); _4.f32 = float(uint16_t(_31)); _4.f32 = float(_32); + _4.f32 = float(uint(_29)); + _4.f32 = float(_30); _4.s16 = int16_t(_33); _4.u16 = uint16_t(int16_t(_33)); _4.u16 = uint16_t(_33); diff --git a/reference/shaders-no-opt/asm/comp/atomic-load-store.asm.comp b/reference/shaders-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000000..10a54fc8cf0 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + uint a; + uint b; +} _5; + +void main() +{ + uint _20 = atomicAdd(_5.b, 0u); + uint c = _20; + atomicExchange(_5.a, c); +} + diff --git a/reference/shaders-no-opt/asm/comp/basic.spv16.asm.comp b/reference/shaders-no-opt/asm/comp/basic.spv16.asm.comp new file mode 100644 index 00000000000..7c237d8abd5 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/basic.spv16.asm.comp @@ -0,0 +1,13 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float values[]; +} _3; + +void main() +{ + _3.values[gl_GlobalInvocationID.x] += 2.0; +} + diff --git a/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp b/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp new file mode 100644 index 00000000000..66a70f18486 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp @@ -0,0 +1,24 @@ +#version 450 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for FP16. +#endif +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + f16vec2 a; + float b; + float c; + f16vec2 d; +} _4; + +void main() +{ + _4.b = uintBitsToFloat(packFloat2x16(_4.a)); + _4.d = unpackFloat2x16(floatBitsToUint(_4.c)); +} + diff --git a/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp.vk b/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp.vk new file mode 100644 index 00000000000..09eccf4b31e --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp.vk @@ -0,0 +1,25 @@ +#version 450 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0, std430) buffer SSBO +{ + f16vec2 a; + float b; + float c; + f16vec2 d; +} _4; + +void main() +{ + _4.b = uintBitsToFloat(packFloat2x16(_4.a)); + _4.d = unpackFloat2x16(floatBitsToUint(_4.c)); +} + diff --git a/reference/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/reference/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp new file mode 100644 index 00000000000..f535ba7f49b --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp @@ -0,0 +1,27 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + ivec4 ints; + uvec4 uints; +} _3; + +void main() +{ + ivec4 _19 = _3.ints; + uvec4 _20 = _3.uints; + _3.ints = bitCount(_19); + _3.uints = uvec4(bitCount(_19)); + _3.ints = bitCount(_20); + _3.uints = uvec4(bitCount(_20)); + _3.ints = bitfieldReverse(_19); + _3.uints = bitfieldReverse(_20); + _3.ints = bitfieldExtract(_19, 1, int(11u)); + _3.uints = uvec4(bitfieldExtract(ivec4(_20), int(11u), 1)); + _3.ints = ivec4(bitfieldExtract(uvec4(_19), 1, int(11u))); + _3.uints = bitfieldExtract(_20, int(11u), 1); + _3.ints = bitfieldInsert(_19, _19.wzyx, 1, int(11u)); + _3.uints = bitfieldInsert(_20, _20.wzyx, int(11u), 1); +} + diff --git a/reference/shaders-no-opt/asm/comp/bitscan.asm.comp b/reference/shaders-no-opt/asm/comp/bitscan.asm.comp new file mode 100644 index 00000000000..31a6234abb5 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/bitscan.asm.comp @@ -0,0 +1,27 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + uvec4 u; + ivec4 i; +} _4; + +void main() +{ + uvec4 _19 = _4.u; + ivec4 _20 = _4.i; + _4.u = uvec4(findLSB(_19)); + _4.i = findLSB(_19); + _4.u = uvec4(findLSB(_20)); + _4.i = findLSB(_20); + _4.u = uvec4(findMSB(_19)); + _4.i = findMSB(_19); + _4.u = uvec4(findMSB(uvec4(_20))); + _4.i = findMSB(uvec4(_20)); + _4.u = uvec4(findMSB(ivec4(_19))); + _4.i = findMSB(ivec4(_19)); + _4.u = uvec4(findMSB(_20)); + _4.i = findMSB(_20); +} + diff --git a/reference/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp.vk new file mode 100644 index 00000000000..d700d613534 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp.vk @@ -0,0 +1,15 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0, std430) buffer SSBO +{ + uint v; +} ssbos[]; + +void main() +{ + uint _24 = gl_GlobalInvocationID.z; + uint _25 = atomicAdd(ssbos[nonuniformEXT(_24)].v, 1u); +} + diff --git a/reference/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp.vk new file mode 100644 index 00000000000..f082267f931 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp.vk @@ -0,0 +1,37 @@ +#version 450 +#if defined(GL_ARB_gpu_shader_int64) +#extension GL_ARB_gpu_shader_int64 : require +#else +#error No extension available for 64-bit integers. +#endif +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer SomeBuffer; +layout(buffer_reference, buffer_reference_align = 16, std430) buffer SomeBuffer +{ + vec4 v; + uint64_t a; + uvec2 b; +}; + +layout(push_constant, std430) uniform Registers +{ + uint64_t address; + uvec2 address2; +} registers; + +void main() +{ + SomeBuffer _44 = SomeBuffer(registers.address); + SomeBuffer _45 = SomeBuffer(registers.address); + SomeBuffer _46 = SomeBuffer(registers.address2); + _44.v = vec4(1.0, 2.0, 3.0, 4.0); + _45.v = vec4(1.0, 2.0, 3.0, 4.0); + _46.v = vec4(1.0, 2.0, 3.0, 4.0); + _44.a = uint64_t(_44); + _45.a = uint64_t(_45); + _46.b = uvec2(_46); +} + diff --git a/reference/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp.vk new file mode 100644 index 00000000000..12581cc3f8a --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp.vk @@ -0,0 +1,35 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer Alias; +layout(buffer_reference) buffer _6; +layout(buffer_reference) buffer _7; +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer Alias +{ + vec4 v[]; +}; + +layout(buffer_reference, buffer_reference_align = 16, std430) restrict buffer _6 +{ + vec4 v[]; +}; + +layout(buffer_reference, buffer_reference_align = 16, std430) coherent writeonly buffer _7 +{ + vec4 v[]; +}; + +layout(push_constant, std430) uniform Registers +{ + Alias ro; + _6 rw; + _7 wo; +} registers; + +void main() +{ + registers.rw.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x]; + registers.wo.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x]; +} + diff --git a/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp.vk new file mode 100644 index 00000000000..06e620d2c9b --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp.vk @@ -0,0 +1,19 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference, buffer_reference_align = 8) buffer uvec4Pointer +{ + uvec4 value; +}; + +layout(push_constant, std430) uniform Push +{ + uvec4Pointer ptr; +} _4; + +void main() +{ + _4.ptr.value = uvec4(1u, 2u, 3u, 4u); +} + diff --git a/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp.vk new file mode 100644 index 00000000000..44427de81e6 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp.vk @@ -0,0 +1,13 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer uvec4Pointer +{ + uvec4 value; +}; + +void main() +{ +} + diff --git a/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer-2.asm.nocompat.vk.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer-2.asm.nocompat.vk.comp.vk index 0288931915c..f77142a7434 100644 --- a/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer-2.asm.nocompat.vk.comp.vk +++ b/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer-2.asm.nocompat.vk.comp.vk @@ -1,9 +1,13 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#else +#error No extension available for 64-bit integers. +#endif #extension GL_EXT_buffer_reference : require layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; -layout(buffer_reference) buffer uintPointer +layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer { uint value; }; diff --git a/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer.asm.nocompat.vk.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer.asm.nocompat.vk.comp.vk index 9553199b462..6ba488be6b1 100644 --- a/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer.asm.nocompat.vk.comp.vk +++ b/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer.asm.nocompat.vk.comp.vk @@ -1,9 +1,13 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#else +#error No extension available for 64-bit integers. +#endif #extension GL_EXT_buffer_reference : require layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; -layout(buffer_reference) buffer uint0_Pointer +layout(buffer_reference, buffer_reference_align = 4) buffer uint0_Pointer { uint value[]; }; diff --git a/reference/shaders-no-opt/asm/comp/constant-composite-undef.asm.comp b/reference/shaders-no-opt/asm/comp/constant-composite-undef.asm.comp index 279dede112e..d8f1f19b12e 100644 --- a/reference/shaders-no-opt/asm/comp/constant-composite-undef.asm.comp +++ b/reference/shaders-no-opt/asm/comp/constant-composite-undef.asm.comp @@ -1,13 +1,13 @@ #version 450 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; +float _15; + layout(binding = 0, std430) buffer Block { vec4 f; } block; -float _15; - void main() { block.f = vec4(0.100000001490116119384765625, 0.20000000298023223876953125, 0.300000011920928955078125, 0.0); diff --git a/reference/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/reference/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp new file mode 100644 index 00000000000..1f43951a155 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 4, local_size_y = 4, local_size_z = 1) in; + +const int indexable[4] = int[](0, 1, 2, 3); +const int indexable_1[4] = int[](4, 5, 6, 7); + +layout(binding = 0, std430) buffer SSBO +{ + int values[]; +} _6; + +void main() +{ + _6.values[gl_GlobalInvocationID.x] = indexable[gl_LocalInvocationID.x] + indexable_1[gl_LocalInvocationID.y]; +} + diff --git a/reference/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp b/reference/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp new file mode 100644 index 00000000000..28b2d1d0e9a --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp @@ -0,0 +1,45 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct B2 +{ + vec4 elem2; +}; + +struct C +{ + vec4 c; + B2 b2; + B2 b2_array[4]; +}; + +struct B1 +{ + vec4 elem1; +}; + +struct A +{ + vec4 a; + B1 b1; + B1 b1_array[4]; +}; + +layout(binding = 0, std430) buffer _8_3 +{ + A a_block; + C c_block; +} _3; + +void main() +{ + A _27; + _27.a = _3.c_block.c; + _27.b1.elem1 = _3.c_block.b2.elem2; + _27.b1_array[0].elem1 = _3.c_block.b2_array[0].elem2; + _27.b1_array[1].elem1 = _3.c_block.b2_array[1].elem2; + _27.b1_array[2].elem1 = _3.c_block.b2_array[2].elem2; + _27.b1_array[3].elem1 = _3.c_block.b2_array[3].elem2; + _3.a_block = _27; +} + diff --git a/reference/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/reference/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp new file mode 100644 index 00000000000..77ea03495f2 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp @@ -0,0 +1,7 @@ +#version 450 +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +void main() +{ +} + diff --git a/reference/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp b/reference/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp new file mode 100644 index 00000000000..83a9b83fa12 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp @@ -0,0 +1,34 @@ +#version 430 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _4_5 +{ + uint _m0[16]; +} _5; + +layout(binding = 1, std430) buffer _4_6 +{ + uint _m0[16]; +} _6; + +layout(binding = 2, std430) buffer _4_7 +{ + uint _m0[16]; +} _7; + +vec4 _88(vec4 _89) +{ + for (int _91 = 0; _91 < 16; _91++) + { + uint _163 = _6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + _5._m0[_91])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))); + uint _225 = _6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + _163))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))); + _7._m0[_91] = _6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + _225)))))))))))))); + } + return _89; +} + +void main() +{ + vec4 _87 = _88(vec4(uvec4(gl_GlobalInvocationID, 0u))); +} + diff --git a/reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp b/reference/shaders-no-opt/asm/comp/extended-debug-extinst.invalid.asm.comp similarity index 100% rename from reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp rename to reference/shaders-no-opt/asm/comp/extended-debug-extinst.invalid.asm.comp diff --git a/reference/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp b/reference/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp new file mode 100644 index 00000000000..5a5f212faae --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp @@ -0,0 +1,51 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 1, std430) buffer _2_9 +{ + uint _m0[2]; +} _9; + +layout(binding = 0, std430) buffer _4_8 +{ + uint _m0[3]; +} _8; + +layout(binding = 2, std430) buffer _6_10 +{ + uint _m0[11]; +} _10; + +void main() +{ + uint _34 = 0u; + uint _35 = 0u; + uint _36 = 0u; + _10._m0[_34] = 8u; + _34++; + for (;;) + { + _10._m0[_34] = 9u; + _34++; + uint _44 = _35; + _35 = _44 + 1u; + if (_8._m0[_44] == 1u) + { + _10._m0[_34] = 12u; + _34++; + _36++; + _10._m0[_34] = 13u; + _34++; + _10._m0[_34] = 11u; + _34++; + continue; + } + else + { + break; + } + } + _10._m0[_34] = 10u; + _34++; +} + diff --git a/reference/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp b/reference/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp new file mode 100644 index 00000000000..73c7d367283 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp @@ -0,0 +1,411 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _4_12 +{ + uint _m0[1]; +} _12; + +layout(binding = 1, std430) buffer _4_13 +{ + uint _m0[1]; +} _13; + +layout(binding = 7, std430) buffer _4_19 +{ + uint _m0[1]; +} _19; + +layout(binding = 2, std430) buffer _6_14 +{ + uint _m0[2]; +} _14; + +layout(binding = 3, std430) buffer _6_15 +{ + uint _m0[2]; +} _15; + +layout(binding = 4, std430) buffer _6_16 +{ + uint _m0[2]; +} _16; + +layout(binding = 5, std430) buffer _6_17 +{ + uint _m0[2]; +} _17; + +layout(binding = 6, std430) buffer _6_18 +{ + uint _m0[2]; +} _18; + +layout(binding = 8, std430) buffer _8_20 +{ + uint _m0[3]; +} _20; + +layout(binding = 9, std430) buffer _10_21 +{ + uint _m0[37]; +} _21; + +void main() +{ + uint _70 = 0u; + uint _71 = 0u; + uint _72 = 0u; + uint _74 = 0u; + uint _75 = 0u; + uint _76 = 0u; + uint _77 = 0u; + uint _78 = 0u; + uint _79 = 0u; + uint _90 = ((gl_WorkGroupID.y * 1u) + (gl_WorkGroupID.z * 1u)) + gl_WorkGroupID.x; + uint _111 = (_90 * 2u) + (gl_LocalInvocationIndex * 2u); + _71 = (_90 * 1u) + (gl_LocalInvocationIndex * 1u); + _72 = (_90 * 1u) + (gl_LocalInvocationIndex * 1u); + _74 = (_90 * 2u) + (gl_LocalInvocationIndex * 2u); + _75 = (_90 * 2u) + (gl_LocalInvocationIndex * 2u); + _76 = (_90 * 2u) + (gl_LocalInvocationIndex * 2u); + _77 = (_90 * 2u) + (gl_LocalInvocationIndex * 2u); + _78 = (_90 * 1u) + (gl_LocalInvocationIndex * 1u); + _79 = (_90 * 3u) + (gl_LocalInvocationIndex * 3u); + _70 = (_90 * 37u) + (gl_LocalInvocationIndex * 37u); + _21._m0[_70] = 8u; + uint _123 = _70 + 1u; + _71++; + uint _136; + uint _233; + uint _234; + uint _241; + uint _242; + uint _73 = _111; + uint _129 = _123; + for (;;) + { + _21._m0[_129] = 9u; + _136 = _129 + 1u; + uint _141; + for (;;) + { + _21._m0[_136] = 12u; + _141 = _136 + 1u; + break; + } + uint _148; + uint _149; + uint _162; + uint _163; + for (;;) + { + _21._m0[_141] = 13u; + _148 = _141 + 1u; + _149 = _75; + _21._m0[_148] = 17u; + _75 = _149 + 1u; + uint _158; + if (_16._m0[_149] == 1u) + { + _158 = _148 + 1u; + _21._m0[_158] = 19u; + _162 = _158 + 1u; + _163 = _74; + break; + } + if (true) + { + _141 = 666u; + continue; + } + else + { + _162 = 666u; + _163 = 666u; + break; + } + } + _21._m0[_162] = 15u; + uint _165 = _162 + 1u; + _74 = _163 + 1u; + uint _174; + uint _178; + uint _179; + if (_15._m0[_163] == 1u) + { + _178 = _165; + _179 = _76; + _21._m0[_178] = 21u; + uint _181 = _178 + 1u; + uint _184 = _179 + 1u; + _76 = _184; + uint _186; + _186 = _181; + uint _191; + for (;;) + { + _21._m0[_186] = 23u; + uint _189 = _186 + 1u; + _191 = _189; + break; + } + uint _199; + uint _200; + uint _216; + uint _217; + uint _224; + uint _225; + for (;;) + { + _21._m0[_191] = 24u; + uint _195 = _191 + 1u; + uint _196 = _79; + _199 = _195; + _200 = _196; + _21._m0[_199] = 28u; + uint _202 = _199 + 1u; + uint _204 = _20._m0[_200]; + uint _205 = _200 + 1u; + _79 = _205; + uint _208; + uint _212; + bool _198_ladder_break = false; + switch (_204) + { + default: + { + _208 = _202; + _21._m0[_208] = 30u; + uint _210 = _208 + 1u; + uint _211 = _77; + _224 = _210; + _225 = _211; + _198_ladder_break = true; + break; + } + case 1u: + { + _212 = _202; + break; + } + } + if (_198_ladder_break) + { + break; + } + _21._m0[_212] = 29u; + uint _214 = _212 + 1u; + uint _215 = _78; + _216 = _214; + _217 = _215; + _21._m0[_216] = 27u; + uint _192 = _216 + 1u; + uint _220 = _19._m0[_217]; + uint _222 = _217 + 1u; + _78 = _222; + uint _223 = _77; + if (_220 == 1u) + { + _191 = _192; + continue; + } + else + { + _224 = _192; + _225 = _223; + break; + } + } + _21._m0[_224] = 26u; + uint _227 = _224 + 1u; + uint _229 = _18._m0[_225]; + bool _230 = _229 == 1u; + uint _231 = _225 + 1u; + _77 = _231; + uint _232 = _73; + if (_230) + { + _233 = _227; + _234 = _232; + _21._m0[_233] = 11u; + uint _130 = _233 + 1u; + uint _237 = _14._m0[_234]; + uint _239 = _234 + 1u; + _73 = _239; + if (_237 == 1u) + { + _129 = _130; + continue; + } + else + { + _241 = _130; + _242 = _72; + break; + } + } + else + { + } + } + else + { + _174 = _165; + _21._m0[_174] = 22u; + _178 = _174 + 1u; + _179 = _76; + _21._m0[_178] = 21u; + uint _181 = _178 + 1u; + uint _184 = _179 + 1u; + _76 = _184; + uint _186; + _186 = _181; + uint _191; + for (;;) + { + _21._m0[_186] = 23u; + uint _189 = _186 + 1u; + _191 = _189; + break; + } + uint _199; + uint _200; + uint _216; + uint _217; + uint _224; + uint _225; + for (;;) + { + _21._m0[_191] = 24u; + uint _195 = _191 + 1u; + uint _196 = _79; + _199 = _195; + _200 = _196; + _21._m0[_199] = 28u; + uint _202 = _199 + 1u; + uint _204 = _20._m0[_200]; + uint _205 = _200 + 1u; + _79 = _205; + uint _208; + uint _212; + bool _198_ladder_break = false; + switch (_204) + { + default: + { + _208 = _202; + _21._m0[_208] = 30u; + uint _210 = _208 + 1u; + uint _211 = _77; + _224 = _210; + _225 = _211; + _198_ladder_break = true; + break; + } + case 1u: + { + _212 = _202; + break; + } + } + if (_198_ladder_break) + { + break; + } + _21._m0[_212] = 29u; + uint _214 = _212 + 1u; + uint _215 = _78; + _216 = _214; + _217 = _215; + _21._m0[_216] = 27u; + uint _192 = _216 + 1u; + uint _220 = _19._m0[_217]; + uint _222 = _217 + 1u; + _78 = _222; + uint _223 = _77; + if (_220 == 1u) + { + _191 = _192; + continue; + } + else + { + _224 = _192; + _225 = _223; + break; + } + } + _21._m0[_224] = 26u; + uint _227 = _224 + 1u; + uint _229 = _18._m0[_225]; + bool _230 = _229 == 1u; + uint _231 = _225 + 1u; + _77 = _231; + uint _232 = _73; + if (_230) + { + _233 = _227; + _234 = _232; + _21._m0[_233] = 11u; + uint _130 = _233 + 1u; + uint _237 = _14._m0[_234]; + uint _239 = _234 + 1u; + _73 = _239; + if (_237 == 1u) + { + _129 = _130; + continue; + } + else + { + _241 = _130; + _242 = _72; + break; + } + } + else + { + } + } + _233 = 666u; + _234 = 666u; + _21._m0[_233] = 11u; + uint _130 = _233 + 1u; + uint _237 = _14._m0[_234]; + uint _239 = _234 + 1u; + _73 = _239; + if (_237 == 1u) + { + _129 = _130; + continue; + } + else + { + _241 = _130; + _242 = _72; + break; + } + } + _21._m0[_241] = 10u; + _72 = _242 + 1u; + uint _251; + uint _254; + switch (_13._m0[_242]) + { + case 1u: + { + _254 = 666u; + break; + } + default: + { + _251 = _241 + 1u; + _21._m0[_251] = 32u; + _254 = _251 + 1u; + break; + } + } + _21._m0[_254] = 31u; +} + diff --git a/reference/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp b/reference/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp new file mode 100644 index 00000000000..dc0956c3453 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp @@ -0,0 +1,33 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct _8 +{ + float _m0; + float _m1; +}; + +struct _15 +{ + float _m0; + int _m1; +}; + +layout(binding = 0, std430) buffer _3_4 +{ + float _m0; + int _m1; +} _4; + +void main() +{ + _8 _23; + _23._m0 = modf(20.0, _23._m1); + _15 _24; + _24._m0 = frexp(40.0, _24._m1); + _4._m0 = _23._m0; + _4._m0 = _23._m1; + _4._m0 = _24._m0; + _4._m1 = _24._m1; +} + diff --git a/reference/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp.vk b/reference/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp.vk new file mode 100644 index 00000000000..c6c1ea3e266 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp.vk @@ -0,0 +1,12 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0, r32ui) uniform uimage2D uImage[]; + +void main() +{ + uint _26 = gl_GlobalInvocationID.z; + uint _31 = imageAtomicAdd(uImage[nonuniformEXT(_26)], ivec2(gl_GlobalInvocationID.xy), 1u); +} + diff --git a/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp b/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp new file mode 100644 index 00000000000..57587ebfa28 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp @@ -0,0 +1,29 @@ +#version 450 + +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 11u +#endif +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 12u +#endif +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 13u +#endif +const uint _4 = SPIRV_CROSS_CONSTANT_ID_3; +#ifndef SPIRV_CROSS_CONSTANT_ID_4 +#define SPIRV_CROSS_CONSTANT_ID_4 14u +#endif +const uint _5 = SPIRV_CROSS_CONSTANT_ID_4; + +layout(local_size_x = 3, local_size_y = SPIRV_CROSS_CONSTANT_ID_1, local_size_z = SPIRV_CROSS_CONSTANT_ID_2) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 values[]; +} _8; + +void main() +{ + _8.values[gl_GlobalInvocationID.x] += vec4(2.0); +} + diff --git a/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp.vk b/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp.vk new file mode 100644 index 00000000000..0073fbee0cf --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp.vk @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 3, local_size_y_id = 1, local_size_z_id = 2) in; + +layout(constant_id = 3) const uint _4 = 13u; +layout(constant_id = 4) const uint _5 = 14u; + +layout(set = 0, binding = 0, std430) buffer SSBO +{ + vec4 values[]; +} _8; + +void main() +{ + _8.values[gl_GlobalInvocationID.x] += vec4(2.0); +} + diff --git a/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp b/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp new file mode 100644 index 00000000000..5c2a09d3d4c --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp @@ -0,0 +1,31 @@ +#version 450 + +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 11 +#endif +const int _10 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 12 +#endif +const int _11 = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 13 +#endif +#ifndef SPIRV_CROSS_CONSTANT_ID_4 +#define SPIRV_CROSS_CONSTANT_ID_4 14 +#endif +const uint _29 = (uint(int(gl_WorkGroupSize.x)) + 3u); +const uvec3 _30 = uvec3(_29, int(gl_WorkGroupSize.y), 2u); + +layout(local_size_x = SPIRV_CROSS_CONSTANT_ID_3, local_size_y = SPIRV_CROSS_CONSTANT_ID_4, local_size_z = 2) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 values[]; +} _8; + +void main() +{ + _8.values[gl_GlobalInvocationID.x] = ((((_8.values[gl_GlobalInvocationID.x] + vec4(2.0)) + vec3(_30).xyzz) * float(int(gl_WorkGroupSize.x))) * float(int(gl_WorkGroupSize.y))) * float(int(2u)); +} + diff --git a/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp.vk b/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp.vk new file mode 100644 index 00000000000..b6a78bdf1cd --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp.vk @@ -0,0 +1,22 @@ +#version 450 +layout(local_size_x_id = 3, local_size_y_id = 4, local_size_z = 2) in; + +layout(constant_id = 1) const int _10 = 11; +layout(constant_id = 2) const int _11 = 12; +const uint _29 = (uint(int(gl_WorkGroupSize.x)) + 3u); +const uvec3 _30 = uvec3(_29, int(gl_WorkGroupSize.y), 2u); + +layout(set = 0, binding = 0, std430) buffer SSBO +{ + vec4 values[]; +} _8; + +void main() +{ + vec3 _38 = vec3(_30); + float _41 = float(int(gl_WorkGroupSize.x)); + float _42 = float(int(gl_WorkGroupSize.y)); + float _43 = float(int(2u)); + _8.values[gl_GlobalInvocationID.x] = ((((_8.values[gl_GlobalInvocationID.x] + vec4(2.0)) + _38.xyzz) * _41) * _42) * _43; +} + diff --git a/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp b/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp new file mode 100644 index 00000000000..c34852f79c3 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp @@ -0,0 +1,107 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 4, std430) buffer _2_12 +{ + uint _m0[1]; +} _12; + +layout(binding = 1, std430) buffer _2_9 +{ + uint _m0[1]; +} _9; + +layout(binding = 2, std430) buffer _2_10 +{ + uint _m0[1]; +} _10; + +layout(binding = 3, std430) buffer _2_11 +{ + uint _m0[1]; +} _11; + +layout(binding = 0, std430) buffer _4_8 +{ + uint _m0[2]; +} _8; + +layout(binding = 5, std430) buffer _6_13 +{ + uint _m0[11]; +} _13; + +void main() +{ + uint _43 = 0u; + uint _44 = 0u; + uint _45 = 0u; + uint _46 = 0u; + uint _47 = 0u; + _13._m0[0u] = 8u; + uint _50 = 0u + 1u; + uint _42 = _50; + for (;;) + { + _13._m0[_42] = 9u; + _42++; + uint _55 = _43; + _43 = _55 + 1u; + if (_8._m0[_55] == 1u) + { + _13._m0[_42] = 12u; + _42++; + return; + } + else + { + _13._m0[_42] = 13u; + _42++; + uint _70 = _44; + _44 = _70 + 1u; + if (_9._m0[_70] == 1u) + { + _13._m0[_42] = 11u; + _42++; + _13._m0[_42] = 14u; + _42++; + _45++; + do + { + _13._m0[_42] = 16u; + _42++; + break; + } while(false); + _13._m0[_42] = 15u; + _42++; + uint _94 = _46; + _46 = _94 + 1u; + if (_11._m0[_94] == 1u) + { + } + else + { + _13._m0[_42] = 19u; + _42++; + } + _13._m0[_42] = 17u; + _42++; + uint _108 = _47; + _47 = _108 + 1u; + if (_12._m0[_108] == 1u) + { + continue; + } + else + { + break; + } + } + else + { + break; + } + } + } +} + diff --git a/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp b/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp new file mode 100644 index 00000000000..08f3b44e0b7 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp @@ -0,0 +1,108 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 4, std430) buffer _2_12 +{ + uint _m0[1]; +} _12; + +layout(binding = 1, std430) buffer _2_9 +{ + uint _m0[1]; +} _9; + +layout(binding = 2, std430) buffer _2_10 +{ + uint _m0[1]; +} _10; + +layout(binding = 3, std430) buffer _2_11 +{ + uint _m0[1]; +} _11; + +layout(binding = 0, std430) buffer _4_8 +{ + uint _m0[2]; +} _8; + +layout(binding = 5, std430) buffer _6_13 +{ + uint _m0[11]; +} _13; + +void main() +{ + uint _43 = 0u; + uint _44 = 0u; + uint _45 = 0u; + uint _46 = 0u; + uint _47 = 0u; + uint _115; + _13._m0[_115] = 8u; + uint _50 = _115 + 1u; + uint _42 = _50; + for (;;) + { + _13._m0[_42] = 9u; + _42++; + uint _55 = _43; + _43 = _55 + 1u; + if (_8._m0[_55] == 1u) + { + _13._m0[_42] = 12u; + _42++; + return; + } + else + { + _13._m0[_42] = 13u; + _42++; + uint _70 = _44; + _44 = _70 + 1u; + if (_9._m0[_70] == 1u) + { + _13._m0[_42] = 11u; + _42++; + _13._m0[_42] = 14u; + _42++; + _45++; + do + { + _13._m0[_42] = 16u; + _42++; + break; + } while(false); + _13._m0[_42] = 15u; + _42++; + uint _94 = _46; + _46 = _94 + 1u; + if (_11._m0[_94] == 1u) + { + } + else + { + _13._m0[_42] = 19u; + _42++; + } + _13._m0[_42] = 17u; + _42++; + uint _108 = _47; + _47 = _108 + 1u; + if (_12._m0[_108] == 1u) + { + continue; + } + else + { + break; + } + } + else + { + break; + } + } + } +} + diff --git a/reference/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp b/reference/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp new file mode 100644 index 00000000000..ca8e58cfc75 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp @@ -0,0 +1,12 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +void main() +{ + uint i = 0u; + for (;;) + { + break; + } +} + diff --git a/reference/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp b/reference/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp new file mode 100644 index 00000000000..7de95ae6b4e --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp @@ -0,0 +1,52 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std140) uniform UBO +{ + int v; +} _6; + +void main() +{ + uint count = 0u; + for (int i = 0; i < 4; i++) + { + bool _31_ladder_break = false; + do + { + bool _33_ladder_break = false; + do + { + bool _35_ladder_break = false; + do + { + if (_6.v == 20) + { + _35_ladder_break = true; + _33_ladder_break = true; + _31_ladder_break = true; + break; + } + break; + } while(false); + if (_35_ladder_break) + { + break; + } + break; + } while(false); + if (_33_ladder_break) + { + break; + } + count++; + break; + } while(false); + if (_31_ladder_break) + { + break; + } + count++; + } +} + diff --git a/reference/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp.vk b/reference/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp.vk new file mode 100644 index 00000000000..9b7de0622f8 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp.vk @@ -0,0 +1,55 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_KHR_shader_subgroup_ballot : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO_Offsets +{ + uvec2 _m0[]; +} _7; + +layout(push_constant, std430) uniform RootConstants +{ + uint _m0; + uint _m1; + uint _m2; + uint _m3; + uint _m4; + uint _m5; + uint _m6; + uint _m7; +} registers; + +layout(set = 1, binding = 0) uniform samplerBuffer _8[]; +layout(set = 4, binding = 0, r32f) uniform imageBuffer _9[]; +layout(set = 4, binding = 0, r32ui) uniform uimageBuffer _10[]; + +void main() +{ + uint _61 = registers._m4 + 2u; + uint _64 = subgroupBroadcastFirst(_61); + uint _71 = subgroupBroadcastFirst(registers._m4); + uint _76 = registers._m1 + 1u; + uint _79 = subgroupBroadcastFirst(_76); + uint _88 = gl_GlobalInvocationID.x + 4u; + uint _99 = gl_GlobalInvocationID.x + 1024u; + imageStore(_9[registers._m4], int((_99 < _7._m0[_71].y) ? (_99 + _7._m0[_71].x) : 4294967295u), vec4(imageLoad(_9[registers._m4], int((_88 < _7._m0[_71].y) ? (_88 + _7._m0[_71].x) : 4294967295u)))); + uint _106 = gl_GlobalInvocationID.x + 2u; + uint _117 = gl_GlobalInvocationID.x + 2048u; + imageStore(_9[registers._m4], int((_117 < _7._m0[_71].y) ? (_117 + _7._m0[_71].x) : 4294967295u), vec4(texelFetch(_8[_76], int((_106 < _7._m0[_79].y) ? (_106 + _7._m0[_79].x) : 4294967295u)))); + uint _130 = imageAtomicAdd(_10[_61], int((gl_GlobalInvocationID.x < _7._m0[_64].y) ? (gl_GlobalInvocationID.x + _7._m0[_64].x) : 4294967295u), 40u); + uint _137 = imageAtomicCompSwap(_10[_61], int((gl_GlobalInvocationID.y < _7._m0[_64].y) ? (gl_GlobalInvocationID.y + _7._m0[_64].x) : 4294967295u), 40u, 50u); + imageStore(_9[registers._m4], int((0u < _7._m0[_71].y) ? (0u + _7._m0[_71].x) : 4294967295u), vec4(float(_7._m0[_71].y))); + imageStore(_9[registers._m4], int((1u < _7._m0[_71].y) ? (1u + _7._m0[_71].x) : 4294967295u), vec4(float(_7._m0[_79].y))); + uint _11 = registers._m4 + (gl_GlobalInvocationID.z + 0u); + imageStore(_9[nonuniformEXT(_11)], int((_99 < _7._m0[_11].y) ? (_99 + _7._m0[_11].x) : 4294967295u), vec4(imageLoad(_9[nonuniformEXT(_11)], int((_88 < _7._m0[_11].y) ? (_88 + _7._m0[_11].x) : 4294967295u)))); + uint _13 = registers._m1 + (gl_GlobalInvocationID.z + 0u); + imageStore(_9[nonuniformEXT(_11)], int((_117 < _7._m0[_11].y) ? (_117 + _7._m0[_11].x) : 4294967295u), vec4(texelFetch(_8[nonuniformEXT(_13)], int((_88 < _7._m0[_13].y) ? (_88 + _7._m0[_13].x) : 4294967295u)))); + uint _15 = registers._m4 + (gl_GlobalInvocationID.z + 0u); + uint _209 = imageAtomicAdd(_10[nonuniformEXT(_15)], int((gl_GlobalInvocationID.y < _7._m0[_15].y) ? (gl_GlobalInvocationID.y + _7._m0[_15].x) : 4294967295u), 40u); + uint _215 = imageAtomicCompSwap(_10[nonuniformEXT(_15)], int((gl_GlobalInvocationID.y < _7._m0[_15].y) ? (gl_GlobalInvocationID.y + _7._m0[_15].x) : 4294967295u), 40u, 70u); + imageStore(_9[registers._m4], int((2u < _7._m0[_71].y) ? (2u + _7._m0[_71].x) : 4294967295u), vec4(float(_7._m0[_11].y))); + imageStore(_9[registers._m4], int((3u < _7._m0[_71].y) ? (3u + _7._m0[_71].x) : 4294967295u), vec4(float(_7._m0[_13].y))); +} + diff --git a/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/reference/shaders-no-opt/asm/comp/phi-temporary-copy-loop-variable.asm.invalid.comp similarity index 86% rename from reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp rename to reference/shaders-no-opt/asm/comp/phi-temporary-copy-loop-variable.asm.invalid.comp index 9ae8d6fd7f3..f8650b5e5c2 100644 --- a/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp +++ b/reference/shaders-no-opt/asm/comp/phi-temporary-copy-loop-variable.asm.invalid.comp @@ -5,20 +5,15 @@ layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture; void main() { + int _27_copy; int _30; _30 = 7; - int _27_copy; for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy) { if (5.0 > float(_27)) { break; } - else - { - continue; - } - continue; } imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0)); } diff --git a/reference/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp.vk b/reference/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp.vk new file mode 100644 index 00000000000..fa46c715bac --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp.vk @@ -0,0 +1,28 @@ +#version 460 +#extension GL_EXT_ray_query : require +#extension GL_EXT_ray_tracing : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0, std430) readonly buffer Buf +{ + uvec2 vas[1024]; +} _3; + +layout(push_constant, std430) uniform Registers +{ + uint index; +} _4; + +rayQueryEXT rq; + +void main() +{ + uvec2 _41; + do + { + uvec2 va = _3.vas[_4.index]; + _41 = _3.vas[_4.index]; + } while (false); + rayQueryInitializeEXT(rq, accelerationStructureEXT(_41), 0u, 0u, vec3(0.0), 0.0, vec3(0.0), 0.0); +} + diff --git a/reference/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp.vk b/reference/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp.vk new file mode 100644 index 00000000000..ccbbc02ff44 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp.vk @@ -0,0 +1,15 @@ +#version 460 +#extension GL_EXT_ray_query : require +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +float _16; +vec3 _17; + +layout(set = 0, binding = 0) uniform accelerationStructureEXT RTAS; + +void main() +{ + rayQueryEXT _19; + rayQueryInitializeEXT(_19, RTAS, 2u, 255u, _17, _16, _17, _16); +} + diff --git a/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp b/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp new file mode 100644 index 00000000000..e7b9dbf3377 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp @@ -0,0 +1,44 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 0 +#endif +const int A = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 1 +#endif +const int A_1 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 2 +#endif +const int A_2 = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 3 +#endif +const int A_3 = SPIRV_CROSS_CONSTANT_ID_3; +#ifndef SPIRV_CROSS_CONSTANT_ID_4 +#define SPIRV_CROSS_CONSTANT_ID_4 4 +#endif +const int A_4 = SPIRV_CROSS_CONSTANT_ID_4; +#ifndef SPIRV_CROSS_CONSTANT_ID_5 +#define SPIRV_CROSS_CONSTANT_ID_5 5 +#endif +const int A_5 = SPIRV_CROSS_CONSTANT_ID_5; +const int A_6 = (A - A_1); +const int A_7 = (A_6 - A_2); +const int A_8 = (A_7 - A_3); +const int A_9 = (A_8 - A_4); +const int A_10 = (A_9 - A_5); +const int A_11 = (A_10 + A_5); + +layout(binding = 0, std430) buffer SSBO +{ + int values[]; +} _5; + +void main() +{ + _5.values[gl_GlobalInvocationID.x] = A_11; +} + diff --git a/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp.vk b/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp.vk new file mode 100644 index 00000000000..c31d0787d80 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp.vk @@ -0,0 +1,26 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(constant_id = 0) const int A = 0; +layout(constant_id = 1) const int A_1 = 1; +layout(constant_id = 2) const int A_2 = 2; +layout(constant_id = 3) const int A_3 = 3; +layout(constant_id = 4) const int A_4 = 4; +layout(constant_id = 5) const int A_5 = 5; +const int A_6 = (A - A_1); +const int A_7 = (A_6 - A_2); +const int A_8 = (A_7 - A_3); +const int A_9 = (A_8 - A_4); +const int A_10 = (A_9 - A_5); +const int A_11 = (A_10 + A_5); + +layout(set = 0, binding = 0, std430) buffer SSBO +{ + int values[]; +} _5; + +void main() +{ + _5.values[gl_GlobalInvocationID.x] = A_11; +} + diff --git a/reference/shaders-no-opt/asm/comp/spec-constant-op-convert-sign.asm.comp b/reference/shaders-no-opt/asm/comp/spec-constant-op-convert-sign.asm.comp index c6aa711f650..50ca0fbdbc4 100644 --- a/reference/shaders-no-opt/asm/comp/spec-constant-op-convert-sign.asm.comp +++ b/reference/shaders-no-opt/asm/comp/spec-constant-op-convert-sign.asm.comp @@ -1,5 +1,11 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for 64-bit integers. +#endif layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; #ifndef SPIRV_CROSS_CONSTANT_ID_0 diff --git a/reference/opt/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp b/reference/shaders-no-opt/asm/comp/storage-buffer-basic.asm.comp similarity index 100% rename from reference/opt/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp rename to reference/shaders-no-opt/asm/comp/storage-buffer-basic.asm.comp diff --git a/reference/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp.vk b/reference/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp.vk new file mode 100644 index 00000000000..22834fa8e72 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp.vk @@ -0,0 +1,36 @@ +#version 450 +#extension GL_KHR_shader_subgroup_ballot : require +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform usamplerBuffer _4; +layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _5; + +uvec4 WaveMatch(uint _45) +{ + uvec4 _52; + for (;;) + { + bool _51 = _45 == subgroupBroadcastFirst(_45); + _52 = subgroupBallot(_51); + if (_51) + { + break; + } + else + { + continue; + } + } + return _52; +} + +void main() +{ + uvec4 _32 = WaveMatch(texelFetch(_4, int(gl_GlobalInvocationID.x)).x); + uint _37 = gl_GlobalInvocationID.x * 4u; + imageStore(_5, int(_37), uvec4(_32.x)); + imageStore(_5, int(_37 + 1u), uvec4(_32.y)); + imageStore(_5, int(_37 + 2u), uvec4(_32.z)); + imageStore(_5, int(_37 + 3u), uvec4(_32.w)); +} + diff --git a/reference/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag b/reference/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag new file mode 100644 index 00000000000..eb1cf0ca940 --- /dev/null +++ b/reference/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag @@ -0,0 +1,118 @@ +#version 320 es +precision mediump float; +precision highp int; + +layout(binding = 1, std140) uniform buf1 +{ + highp vec2 resolution; +} _9; + +layout(binding = 0, std140) uniform buf0 +{ + highp vec2 injectionSwitch; +} _13; + +layout(location = 0) out highp vec4 _GLF_color; + +bool checkSwap(highp float a, highp float b) +{ + bool _153 = gl_FragCoord.y < (_9.resolution.y / 2.0); + highp float _160; + if (_153) + { + _160 = a; + } + else + { + highp float _159 = 0.0; + _160 = _159; + } + bool _147; + do + { + highp float _168; + if (_153) + { + _168 = b; + } + else + { + highp float _167 = 0.0; + _168 = _167; + } + if (_153) + { + _147 = _160 > _168; + } + if (true) + { + break; + } + else + { + break; + } + } while(false); + highp float _180; + if (_153) + { + highp float _179 = 0.0; + _180 = _179; + } + else + { + _180 = a; + } + highp float _186; + if (_153) + { + highp float _185 = 0.0; + _186 = _185; + } + else + { + _186 = b; + } + if (!_153) + { + _147 = _180 < _186; + } + return _147; +} + +void main() +{ + highp float data[10]; + for (int i = 0; i < 10; i++) + { + data[i] = float(10 - i) * _13.injectionSwitch.y; + } + for (int i_1 = 0; i_1 < 9; i_1++) + { + for (int j = 0; j < 10; j++) + { + if (j < (i_1 + 1)) + { + continue; + } + highp float param = data[i_1]; + highp float param_1 = data[j]; + bool doSwap = checkSwap(param, param_1); + if (doSwap) + { + highp float temp = data[i_1]; + data[i_1] = data[j]; + data[j] = temp; + } + } + } + if (gl_FragCoord.x < (_9.resolution.x / 2.0)) + { + _GLF_color = vec4(data[0] / 10.0, data[5] / 10.0, data[9] / 10.0, 1.0); + } + else + { + _GLF_color = vec4(data[5] / 10.0, data[9] / 10.0, data[0] / 10.0, 1.0); + } +} + diff --git a/reference/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/reference/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag new file mode 100644 index 00000000000..b2d8919aa96 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag @@ -0,0 +1,63 @@ +#version 450 + +struct anon_aa +{ + int foo; +}; + +struct anon_ab +{ + int foo; +}; + +struct anon_a +{ + anon_aa _aa; + anon_ab ab; +}; + +struct anon_ba +{ + int foo; +}; + +struct anon_bb +{ + int foo; +}; + +struct anon_b +{ + anon_ba _ba; + anon_bb bb; +}; + +struct anon_ca +{ + int foo; +}; + +struct anon_c +{ + anon_ca _ca; +}; + +struct anon_da +{ + int foo; +}; + +struct anon_d +{ + anon_da da; +}; + +struct anon_e +{ + int a; +}; + +void main() +{ +} + diff --git a/reference/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag b/reference/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag new file mode 100644 index 00000000000..40f6ee714b1 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag @@ -0,0 +1,28 @@ +#version 450 + +layout(binding = 0, std140) uniform uBuffer +{ + vec4 color; +} x_12; + +layout(location = 0) out vec4 fragColor; +const vec4 _2_init = vec4(0.0); + +void main() +{ + fragColor = _2_init; + gl_SampleMask[0] = 0; + fragColor = x_12.color; + gl_SampleMask[0] = int(uint(6)); + gl_SampleMask[0] = int(uint(gl_SampleMask[0])); + uint _30_unrolled[1]; + for (int i = 0; i < int(1); i++) + { + _30_unrolled[i] = int(gl_SampleMask[i]); + } + for (int i = 0; i < int(1); i++) + { + gl_SampleMask[i] = int(_30_unrolled[i]); + } +} + diff --git a/reference/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag b/reference/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag new file mode 100644 index 00000000000..a6f3e694418 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag @@ -0,0 +1,11 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +void main() +{ + vec4 _17; + _17 = vec4(1.0); + FragColor = _17; +} + diff --git a/reference/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag b/reference/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag new file mode 100644 index 00000000000..5fa822b39f2 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag @@ -0,0 +1,28 @@ +#version 310 es +precision mediump float; +precision highp int; + +struct PSInput +{ + highp vec4 color; + highp vec2 uv; +}; + +uniform mediump sampler2D SPIRV_Cross_CombinedtexSamp; + +layout(location = 0) in highp vec4 in_var_COLOR; +layout(location = 1) in highp vec2 in_var_TEXCOORD0; +layout(location = 0) out highp vec4 out_var_SV_TARGET; + +highp vec4 src_PSMain(PSInput _input) +{ + vec4 a = _input.color * texture(SPIRV_Cross_CombinedtexSamp, _input.uv); + return a; +} + +void main() +{ + PSInput param_var_input = PSInput(in_var_COLOR, in_var_TEXCOORD0); + out_var_SV_TARGET = src_PSMain(param_var_input); +} + diff --git a/reference/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag b/reference/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag new file mode 100644 index 00000000000..140a336debe --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag @@ -0,0 +1,71 @@ +#version 450 +#if defined(GL_EXT_control_flow_attributes) +#extension GL_EXT_control_flow_attributes : require +#define SPIRV_CROSS_FLATTEN [[flatten]] +#define SPIRV_CROSS_BRANCH [[dont_flatten]] +#define SPIRV_CROSS_UNROLL [[unroll]] +#define SPIRV_CROSS_LOOP [[dont_unroll]] +#else +#define SPIRV_CROSS_FLATTEN +#define SPIRV_CROSS_BRANCH +#define SPIRV_CROSS_UNROLL +#define SPIRV_CROSS_LOOP +#endif + +struct MyConsts +{ + uint opt; +}; + +uvec4 _37; + +layout(binding = 3, std140) uniform type_scene +{ + MyConsts myConsts; +} scene; + +uniform sampler2D SPIRV_Cross_CombinedtexTablemySampler[1]; + +layout(location = 1) out uint out_var_SV_TARGET1; + +void main() +{ + uint _42; + bool _47; + float _55; + do + { + _42 = _37.y & 16777215u; + _47 = scene.myConsts.opt != 0u; + SPIRV_CROSS_BRANCH + if (_47) + { + _55 = 1.0; + break; + } + else + { + _55 = textureLod(SPIRV_Cross_CombinedtexTablemySampler[_42], vec2(0.0), 0.0).x; + break; + } + break; // unreachable workaround + } while(false); + float _66; + do + { + SPIRV_CROSS_BRANCH + if (_47) + { + _66 = 1.0; + break; + } + else + { + _66 = textureLod(SPIRV_Cross_CombinedtexTablemySampler[_42], vec2(0.0), 0.0).x; + break; + } + break; // unreachable workaround + } while(false); + out_var_SV_TARGET1 = uint(cross(vec3(-1.0, -1.0, _55), vec3(1.0, 1.0, _66)).x); +} + diff --git a/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag b/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag new file mode 100644 index 00000000000..f01a3282f10 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag @@ -0,0 +1,27 @@ +#version 450 + +layout(binding = 0, std430) readonly buffer SSBO +{ + float values0[]; +} _5; + +layout(binding = 1, std430) readonly buffer SSBO1 +{ + float values1[]; +} _7; + +layout(location = 0) out vec2 FragColor; + +void main() +{ + vec2 _27; + _27 = vec2(0.0); + vec2 _39; + vec2 _40; + vec2 _41; + for (int _30 = 0; _30 < 16; _39 = _27 * _27, _40 = _39, _40.x = _5.values0[_30], _41 = _40, _41.y = _7.values1[_30], _27 += _41, _30++) + { + } + FragColor = _27; +} + diff --git a/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag b/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag new file mode 100644 index 00000000000..37b66f8c3d0 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag @@ -0,0 +1,29 @@ +#version 450 + +layout(binding = 0, std430) readonly buffer SSBO +{ + float values0[]; +} _5; + +layout(binding = 1, std430) readonly buffer SSBO1 +{ + float values1[]; +} _7; + +layout(location = 0) out vec2 FragColor; + +void main() +{ + vec2 _27; + _27 = vec2(0.0); + vec2 _42; + for (int _30 = 0; _30 < 16; _27 += _42, _30++) + { + vec2 _40 = _27 * _27; + _40.x = _5.values0[_30]; + _42 = _40; + _42.y = _7.values1[_30]; + } + FragColor = _27; +} + diff --git a/reference/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/reference/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag new file mode 100644 index 00000000000..11c1f4ca3c4 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag @@ -0,0 +1,73 @@ +#version 310 es +precision mediump float; +precision highp int; + +vec4 _32; + +layout(location = 0) in vec4 vInput; +layout(location = 0) out vec4 FragColor; + +void main() +{ + vec4 _37 = vInput; + highp vec4 _38 = _37; + _38.x = 1.0; + _38.y = 2.0; + _38.z = 3.0; + _38.w = 4.0; + FragColor = _38; + vec4 _6 = _37; + _6.x = 1.0; + _6.y = 2.0; + _6.z = 3.0; + _6.w = 4.0; + FragColor = _6; + highp vec4 _42 = _37; + _42.x = 1.0; + vec4 _10 = _42; + _10.y = 2.0; + highp vec4 _43 = _10; + _43.z = 3.0; + vec4 _11 = _43; + _11.w = 4.0; + FragColor = _11; + highp vec4 _44 = _37; + _44.x = 1.0; + highp vec4 _45 = _44; + _45.y = 2.0; + vec4 mp_copy_45 = _45; + highp vec4 _46 = _45; + _46.z = 3.0; + highp vec4 _47 = _46; + _47.w = 4.0; + vec4 mp_copy_47 = _47; + FragColor = _47 + _44; + FragColor = mp_copy_47 + mp_copy_45; + highp vec4 _49; + _49.x = 1.0; + _49.y = 2.0; + _49.z = 3.0; + _49.w = 4.0; + FragColor = _49; + highp vec4 _53 = vec4(0.0); + _53.x = 1.0; + FragColor = _53; + highp vec4 _54[2] = vec4[](vec4(0.0), vec4(0.0)); + _54[1].z = 1.0; + _54[0].w = 2.0; + FragColor = _54[0]; + FragColor = _54[1]; + highp mat4 _58 = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); + _58[1].z = 1.0; + _58[2].w = 2.0; + FragColor = _58[0]; + FragColor = _58[1]; + FragColor = _58[2]; + FragColor = _58[3]; + highp vec4 PHI; + PHI = _46; + highp vec4 _65 = PHI; + _65.w = 4.0; + FragColor = _65; +} + diff --git a/reference/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag.vk b/reference/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag.vk new file mode 100644 index 00000000000..adde5fcbf46 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag.vk @@ -0,0 +1,22 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +layout(location = 0) flat in int vA; +layout(location = 0) out vec4 FragColor; + +vec4 foobar(int a) +{ + if (a < 0) + { + demote; + } + return vec4(10.0); +} + +void main() +{ + int param = vA; + vec4 _25 = foobar(param); + FragColor = vec4(10.0); +} + diff --git a/reference/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag b/reference/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag new file mode 100644 index 00000000000..0fe71f64b44 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) flat in int vA; +layout(location = 0) out vec4 FragColor; + +vec4 foobar(int a) +{ + if (a < 0) + { + discard; + } + return vec4(10.0); +} + +void main() +{ + int param = vA; + vec4 _25 = foobar(param); + FragColor = vec4(10.0); +} + diff --git a/reference/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag b/reference/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag new file mode 100644 index 00000000000..2024c302efd --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag @@ -0,0 +1,37 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) out highp vec4 _GLF_color; + +void main() +{ + for (;;) + { + bool _32; + for (;;) + { + if (gl_FragCoord.x != gl_FragCoord.x) + { + _32 = true; + break; + } + if (false) + { + continue; + } + else + { + _32 = false; + break; + } + } + if (_32) + { + break; + } + _GLF_color = vec4(1.0, 0.0, 0.0, 1.0); + break; + } +} + diff --git a/reference/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag b/reference/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag new file mode 100644 index 00000000000..b03d5a4d7b1 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag @@ -0,0 +1,67 @@ +#version 450 + +vec4 _32; + +layout(binding = 0, std140) uniform type_gCBuffarrayIndex +{ + uint gArrayIndex; +} gCBuffarrayIndex; + +uniform sampler2D SPIRV_Cross_Combinedg_textureArray0SPIRV_Cross_DummySampler; +uniform sampler2D SPIRV_Cross_Combinedg_textureArray1SPIRV_Cross_DummySampler; +uniform sampler2D SPIRV_Cross_Combinedg_textureArray2SPIRV_Cross_DummySampler; +uniform sampler2D SPIRV_Cross_Combinedg_textureArray3SPIRV_Cross_DummySampler; + +layout(location = 0) out vec4 out_var_SV_TARGET; + +void main() +{ + vec4 _80; + do + { + vec4 _77; + bool _78; + switch (gCBuffarrayIndex.gArrayIndex) + { + case 0u: + { + _77 = texelFetch(SPIRV_Cross_Combinedg_textureArray0SPIRV_Cross_DummySampler, ivec3(int(gl_FragCoord.x), int(gl_FragCoord.y), 0).xy, 0); + _78 = true; + break; + } + case 1u: + { + _77 = texelFetch(SPIRV_Cross_Combinedg_textureArray1SPIRV_Cross_DummySampler, ivec3(int(gl_FragCoord.x), int(gl_FragCoord.y), 0).xy, 0); + _78 = true; + break; + } + case 2u: + { + _77 = texelFetch(SPIRV_Cross_Combinedg_textureArray2SPIRV_Cross_DummySampler, ivec3(int(gl_FragCoord.x), int(gl_FragCoord.y), 0).xy, 0); + _78 = true; + break; + } + case 3u: + { + _77 = texelFetch(SPIRV_Cross_Combinedg_textureArray3SPIRV_Cross_DummySampler, ivec3(int(gl_FragCoord.x), int(gl_FragCoord.y), 0).xy, 0); + _78 = true; + break; + } + default: + { + _77 = _32; + _78 = false; + break; + } + } + if (_78) + { + _80 = _77; + break; + } + _80 = vec4(0.0, 1.0, 0.0, 1.0); + break; + } while(false); + out_var_SV_TARGET = _80; +} + diff --git a/reference/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/reference/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag new file mode 100644 index 00000000000..0d3958b5b08 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag @@ -0,0 +1,31 @@ +#version 450 + +struct EmptyStructTest +{ + int empty_struct_member; +}; + +struct EmptyStruct2Test +{ + EmptyStructTest _m0; +}; + +float GetValue(EmptyStruct2Test self) +{ + return 0.0; +} + +float GetValue_1(EmptyStruct2Test self) +{ + return 0.0; +} + +void main() +{ + EmptyStructTest _25 = EmptyStructTest(0); + EmptyStruct2Test emptyStruct; + float value = GetValue(emptyStruct); + value = GetValue_1(EmptyStruct2Test(_25)); + value = GetValue_1(EmptyStruct2Test(EmptyStructTest(0))); +} + diff --git a/reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.frag b/reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.invalid.frag similarity index 100% rename from reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.frag rename to reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.invalid.frag diff --git a/reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.frag b/reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.invalid.frag similarity index 100% rename from reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.frag rename to reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.invalid.frag diff --git a/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag index 01797173f1b..874bc6de137 100644 --- a/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag +++ b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag @@ -61,6 +61,15 @@ struct Params vec4 LqmatFarTilingFactor; }; +VertexOutput _121; +SurfaceInput _122; +vec2 _123; +vec4 _124; +Surface _125; +vec4 _192; +vec4 _219; +vec4 _297; + layout(binding = 0, std140) uniform CB0 { Globals CB0; @@ -86,51 +95,31 @@ layout(location = 7) in vec4 IN_PosLightSpace_Reflectance; layout(location = 8) in float IN_studIndex; layout(location = 0) out vec4 _entryPointOutput; -VertexOutput _121; -SurfaceInput _122; -vec2 _123; -vec4 _124; -Surface _125; -vec4 _192; -vec4 _219; -vec4 _297; - void main() { - VertexOutput _128 = _121; + VertexOutput _128; _128.HPosition = gl_FragCoord; - VertexOutput _130 = _128; - _130.Uv_EdgeDistance1 = IN_Uv_EdgeDistance1; - VertexOutput _132 = _130; - _132.UvStuds_EdgeDistance2 = IN_UvStuds_EdgeDistance2; - VertexOutput _134 = _132; - _134.Color = IN_Color; - VertexOutput _136 = _134; - _136.LightPosition_Fog = IN_LightPosition_Fog; - VertexOutput _138 = _136; - _138.View_Depth = IN_View_Depth; - VertexOutput _140 = _138; - _140.Normal_SpecPower = IN_Normal_SpecPower; - VertexOutput _142 = _140; - _142.Tangent = IN_Tangent; - VertexOutput _144 = _142; - _144.PosLightSpace_Reflectance = IN_PosLightSpace_Reflectance; - VertexOutput _146 = _144; - _146.studIndex = IN_studIndex; - SurfaceInput _147 = _122; + _128.Uv_EdgeDistance1 = IN_Uv_EdgeDistance1; + _128.UvStuds_EdgeDistance2 = IN_UvStuds_EdgeDistance2; + _128.Color = IN_Color; + _128.LightPosition_Fog = IN_LightPosition_Fog; + _128.View_Depth = IN_View_Depth; + _128.Normal_SpecPower = IN_Normal_SpecPower; + _128.Tangent = IN_Tangent; + _128.PosLightSpace_Reflectance = IN_PosLightSpace_Reflectance; + _128.studIndex = IN_studIndex; + SurfaceInput _147; _147.Color = IN_Color; - SurfaceInput _149 = _147; - _149.Uv = IN_Uv_EdgeDistance1.xy; - SurfaceInput _151 = _149; - _151.UvStuds = IN_UvStuds_EdgeDistance2.xy; - SurfaceInput _156 = _151; - _156.UvStuds.y = (fract(_151.UvStuds.y) + IN_studIndex) * 0.25; - float _163 = _146.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y; + _147.Uv = IN_Uv_EdgeDistance1.xy; + _147.UvStuds = IN_UvStuds_EdgeDistance2.xy; + _147.UvStuds.y = (fract(_147.UvStuds.y) + IN_studIndex) * 0.25; + float _160 = clamp(1.0 - (_128.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0); + float _163 = _128.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y; float _165 = clamp(1.0 - _163, 0.0, 1.0); vec2 _166 = IN_Uv_EdgeDistance1.xy * 1.0; bool _173; vec4 _193; - do + for (;;) { _173 = 0.0 == 0.0; if (_173) @@ -141,15 +130,14 @@ void main() else { float _180 = 1.0 / (1.0 - 0.0); - _193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0) * _180) - (0.0 * _180), 0.0, 1.0))); + _193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((_160 * _180) - (0.0 * _180), 0.0, 1.0))); break; } _193 = _192; break; - } while (false); - vec4 _194 = _193 * 1.0; + } vec4 _220; - do + for (;;) { if (_173) { @@ -164,18 +152,20 @@ void main() } _220 = _219; break; - } while (false); + } vec2 _223 = vec2(1.0); vec2 _224 = (_220.wy * 2.0) - _223; vec3 _232 = vec3(_224, sqrt(clamp(1.0 + dot(-_224, _224), 0.0, 1.0))); - vec2 _240 = (texture(SPIRV_Cross_CombinedNormalDetailMapTextureNormalDetailMapSampler, _166 * 0.0).wy * 2.0) - _223; + vec4 _237 = texture(SPIRV_Cross_CombinedNormalDetailMapTextureNormalDetailMapSampler, _166 * 0.0); + vec2 _240 = (_237.wy * 2.0) - _223; vec2 _252 = _232.xy + (vec3(_240, sqrt(clamp(1.0 + dot(-_240, _240), 0.0, 1.0))).xy * 0.0); vec3 _253 = vec3(_252.x, _252.y, _232.z); vec2 _255 = _253.xy * _165; vec3 _256 = vec3(_255.x, _255.y, _253.z); - vec3 _271 = ((IN_Color.xyz * _194.xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (texture(SPIRV_Cross_CombinedStudsMapTextureStudsMapSampler, _156.UvStuds).x * 2.0); + vec4 _268 = texture(SPIRV_Cross_CombinedStudsMapTextureStudsMapSampler, _147.UvStuds); + vec3 _271 = ((IN_Color.xyz * (_193 * 1.0).xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (_268.x * 2.0); vec4 _298; - do + for (;;) { if (0.75 == 0.0) { @@ -190,23 +180,19 @@ void main() } _298 = _297; break; - } while (false); + } vec2 _303 = mix(vec2(0.800000011920928955078125, 120.0), (_298.xy * vec2(2.0, 256.0)) + vec2(0.0, 0.00999999977648258209228515625), vec2(_165)); - Surface _304 = _125; + Surface _304; _304.albedo = _271; - Surface _305 = _304; - _305.normal = _256; + _304.normal = _256; float _306 = _303.x; - Surface _307 = _305; - _307.specular = _306; + _304.specular = _306; float _308 = _303.y; - Surface _309 = _307; - _309.gloss = _308; + _304.gloss = _308; float _312 = (_298.xy.y * _165) * 0.0; - Surface _313 = _309; - _313.reflectance = _312; - vec4 _318 = vec4(_271, _146.Color.w); - vec3 _329 = normalize(((IN_Tangent * _313.normal.x) + (cross(IN_Normal_SpecPower.xyz, IN_Tangent) * _313.normal.y)) + (IN_Normal_SpecPower.xyz * _313.normal.z)); + _304.reflectance = _312; + vec4 _318 = vec4(_271, _128.Color.w); + vec3 _329 = normalize(((IN_Tangent * _304.normal.x) + (cross(IN_Normal_SpecPower.xyz, IN_Tangent) * _304.normal.y)) + (IN_Normal_SpecPower.xyz * _304.normal.z)); vec3 _332 = -_19.CB0.Lamp0Dir; float _333 = dot(_329, _332); float _357 = clamp(dot(step(_19.CB0.LightConfig3.xyz, abs(IN_LightPosition_Fog.xyz - _19.CB0.LightConfig2.xyz)), vec3(1.0)), 0.0, 1.0); @@ -214,15 +200,14 @@ void main() vec2 _376 = texture(SPIRV_Cross_CombinedShadowMapTextureShadowMapSampler, IN_PosLightSpace_Reflectance.xyz.xy).xy; float _392 = (1.0 - (((step(_376.x, IN_PosLightSpace_Reflectance.xyz.z) * clamp(9.0 - (20.0 * abs(IN_PosLightSpace_Reflectance.xyz.z - 0.5)), 0.0, 1.0)) * _376.y) * _19.CB0.OutlineBrightness_ShadowInfo.w)) * _368.w; vec3 _403 = mix(_318.xyz, texture(SPIRV_Cross_CombinedEnvironmentMapTextureEnvironmentMapSampler, reflect(-IN_View_Depth.xyz, _329)).xyz, vec3(_312)); - vec4 _404 = vec4(_403.x, _403.y, _403.z, _318.w); - vec3 _422 = (((_19.CB0.AmbientColor + (((_19.CB0.Lamp0Color * clamp(_333, 0.0, 1.0)) + (_19.CB0.Lamp1Color * max(-_333, 0.0))) * _392)) + _368.xyz) * _404.xyz) + (_19.CB0.Lamp0Color * (((step(0.0, _333) * _306) * _392) * pow(clamp(dot(_329, normalize(_332 + normalize(IN_View_Depth.xyz))), 0.0, 1.0), _308))); - vec4 _425 = vec4(_422.x, _422.y, _422.z, _124.w); - _425.w = _404.w; + vec3 _422 = (((_19.CB0.AmbientColor + (((_19.CB0.Lamp0Color * clamp(_333, 0.0, 1.0)) + (_19.CB0.Lamp1Color * max(-_333, 0.0))) * _392)) + _368.xyz) * vec4(_403.x, _403.y, _403.z, _318.w).xyz) + (_19.CB0.Lamp0Color * (((step(0.0, _333) * _306) * _392) * pow(clamp(dot(_329, normalize(_332 + normalize(IN_View_Depth.xyz))), 0.0, 1.0), _308))); + vec4 _423 = vec4(_422.x, _422.y, _422.z, _124.w); + _423.w = vec4(_403.x, _403.y, _403.z, _318.w).w; vec2 _435 = min(IN_Uv_EdgeDistance1.wz, IN_UvStuds_EdgeDistance2.wz); float _439 = min(_435.x, _435.y) / _163; - vec3 _445 = _425.xyz * clamp((clamp((_163 * _19.CB0.OutlineBrightness_ShadowInfo.x) + _19.CB0.OutlineBrightness_ShadowInfo.y, 0.0, 1.0) * (1.5 - _439)) + _439, 0.0, 1.0); - vec4 _446 = vec4(_445.x, _445.y, _445.z, _425.w); - vec3 _453 = mix(_19.CB0.FogColor, _446.xyz, vec3(clamp(_146.LightPosition_Fog.w, 0.0, 1.0))); + vec3 _445 = _423.xyz * clamp((clamp((_163 * _19.CB0.OutlineBrightness_ShadowInfo.x) + _19.CB0.OutlineBrightness_ShadowInfo.y, 0.0, 1.0) * (1.5 - _439)) + _439, 0.0, 1.0); + vec4 _446 = vec4(_445.x, _445.y, _445.z, _423.w); + vec3 _453 = mix(_19.CB0.FogColor, _446.xyz, vec3(clamp(_128.LightPosition_Fog.w, 0.0, 1.0))); _entryPointOutput = vec4(_453.x, _453.y, _453.z, _446.w); } diff --git a/reference/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag b/reference/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag new file mode 100644 index 00000000000..ed853d0125c --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag @@ -0,0 +1,263 @@ +#version 320 es +#if defined(GL_EXT_control_flow_attributes) +#extension GL_EXT_control_flow_attributes : require +#define SPIRV_CROSS_FLATTEN [[flatten]] +#define SPIRV_CROSS_BRANCH [[dont_flatten]] +#define SPIRV_CROSS_UNROLL [[unroll]] +#define SPIRV_CROSS_LOOP [[dont_unroll]] +#else +#define SPIRV_CROSS_FLATTEN +#define SPIRV_CROSS_BRANCH +#define SPIRV_CROSS_UNROLL +#define SPIRV_CROSS_LOOP +#endif +precision mediump float; +precision highp int; + +layout(binding = 0, std140) uniform buf0 +{ + highp vec2 resolution; +} _7; + +layout(location = 0) out highp vec4 _GLF_color; +int map[256]; +highp mat2x4 _60 = mat2x4(vec4(0.0), vec4(0.0)); + +void main() +{ + int _65 = 256 - 14; + int _68 = -_65; + highp vec2 pos = gl_FragCoord.xy / _7.resolution; + ivec2 ipos = ivec2(int(pos.x * 16.0), int(pos.y * 16.0)); + int i = 0; + for (; i < 256; i++) + { + map[i] = 0; + } + ivec2 p = ivec2(0); + int v = 0; + bool canwalk = true; + do + { + v++; + int directions = 0; + bool _98 = p.x > 0; + bool _111; + if (_98) + { + _111 = map[(p.x - 2) + (p.y * 16)] == 0; + } + else + { + _111 = _98; + } + if (_111) + { + directions++; + } + bool _118 = p.y > 0; + bool _131; + if (_118) + { + _131 = map[p.x + ((p.y - 2) * 16)] == 0; + } + else + { + _131 = _118; + } + if (_131) + { + directions++; + } + bool _138 = p.x < 14; + bool _151; + if (_138) + { + _151 = map[(p.x + 2) + (p.y * 16)] == 0; + } + else + { + _151 = _138; + } + if (_151) + { + directions++; + } + int _156 = 256 - _68; + bool _159 = p.y < 14; + bool _172; + if (_159) + { + _172 = map[p.x + ((p.y + 2) * 16)] == 0; + } + else + { + _172 = _159; + } + if (_172) + { + directions++; + } + if (directions == 0) + { + canwalk = false; + i = 0; + for (;;) + { + int _186 = i; + if (_186 < 8) + { + int j = 0; + _60 = mat2x4(vec4(0.0), vec4(0.0)); + if (false) + { + int _216 = i; + i = _216 + 1; + continue; + } + else + { + SPIRV_CROSS_UNROLL + for (; j < 8; j++) + { + if (map[(j * 2) + ((i * 2) * 16)] == 0) + { + p.x = j * 2; + p.y = i * 2; + canwalk = true; + } + } + int _216 = i; + i = _216 + 1; + continue; + } + } + else + { + break; + } + } + map[p.x + (p.y * 16)] = 1; + } + else + { + int d = v % directions; + v += directions; + bool _232 = d >= 0; + bool _238; + if (_232) + { + _238 = p.x > 0; + } + else + { + _238 = _232; + } + bool _251; + if (_238) + { + _251 = map[(p.x - 2) + (p.y * 16)] == 0; + } + else + { + _251 = _238; + } + if (_251) + { + d--; + map[p.x + (p.y * 16)] = 1; + map[(p.x - 1) + (p.y * 16)] = 1; + map[(p.x - 2) + (p.y * 16)] = 1; + p.x -= 2; + } + bool _284 = d >= 0; + bool _290; + if (_284) + { + _290 = p.y > 0; + } + else + { + _290 = _284; + } + bool _303; + if (_290) + { + _303 = map[p.x + ((p.y - 2) * 16)] == 0; + } + else + { + _303 = _290; + } + if (_303) + { + d--; + map[p.x + (p.y * 16)] = 1; + map[p.x + ((p.y - 1) * 16)] = 1; + map[p.x + ((p.y - 2) * 16)] = 1; + p.y -= 2; + } + bool _336 = d >= 0; + bool _342; + if (_336) + { + _342 = p.x < 14; + } + else + { + _342 = _336; + } + bool _355; + if (_342) + { + _355 = map[(p.x + 2) + (p.y * 16)] == 0; + } + else + { + _355 = _342; + } + if (_355) + { + d--; + map[p.x + (p.y * 16)] = 1; + map[(p.x + 1) + (p.y * 16)] = 1; + map[(p.x + 2) + (p.y * 16)] = 1; + p.x += 2; + } + bool _388 = d >= 0; + bool _394; + if (_388) + { + _394 = p.y < 14; + } + else + { + _394 = _388; + } + bool _407; + if (_394) + { + _407 = map[p.x + ((p.y + 2) * 16)] == 0; + } + else + { + _407 = _394; + } + if (_407) + { + d--; + map[p.x + (p.y * 16)] = 1; + map[p.x + ((p.y + 1) * 16)] = 1; + map[p.x + ((p.y + 2) * 16)] = 1; + p.y += 2; + } + } + if (map[(ipos.y * 16) + ipos.x] == 1) + { + _GLF_color = vec4(1.0); + return; + } + } while (canwalk); + _GLF_color = vec4(0.0, 0.0, 0.0, 1.0); +} + diff --git a/reference/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag b/reference/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag new file mode 100644 index 00000000000..6522c651c8f --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag @@ -0,0 +1,54 @@ +#version 310 es +precision mediump float; +precision highp int; + +const mat4 _34[4] = mat4[](mat4(vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0)), mat4(vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0)), mat4(vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0)), mat4(vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0))); + +layout(location = 0) out highp vec4 _GLF_color; + +void main() +{ + for (;;) + { + if (gl_FragCoord.x < 10.0) + { + _GLF_color = vec4(1.0, 0.0, 0.0, 1.0); + break; + } + for (int _46 = 0; _46 < 4; _46++) + { + int _53; + _53 = 0; + bool _56; + for (;;) + { + _56 = _53 < 4; + if (_56) + { + if (distance(vec2(1.0), vec2(1.0) / vec2(_34[int(_56)][_46].w)) < 1.0) + { + _GLF_color = vec4(1.0); + int _54 = _53 + 1; + _53 = _54; + continue; + } + else + { + int _54 = _53 + 1; + _53 = _54; + continue; + } + int _54 = _53 + 1; + _53 = _54; + continue; + } + else + { + break; + } + } + } + break; + } +} + diff --git a/reference/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag b/reference/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag new file mode 100644 index 00000000000..4ce9b253578 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag @@ -0,0 +1,13 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) out highp vec4 _GLF_color; + +void main() +{ + mediump uvec4 _4 = uvec4(bitCount(uvec4(1u))); + uvec4 hp_copy_4 = _4; + _GLF_color = ldexp(vec4(1.0), ivec4(hp_copy_4)); +} + diff --git a/reference/shaders/asm/frag/loop-merge-to-continue.asm.frag b/reference/shaders-no-opt/asm/frag/loop-merge-to-continue.asm.invalid.frag similarity index 100% rename from reference/shaders/asm/frag/loop-merge-to-continue.asm.frag rename to reference/shaders-no-opt/asm/frag/loop-merge-to-continue.asm.invalid.frag diff --git a/reference/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag.vk b/reference/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag.vk new file mode 100644 index 00000000000..d2f964674f0 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag.vk @@ -0,0 +1,20 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require + +layout(set = 0, binding = 0, std430) readonly buffer SSBO +{ + uint indices[]; +} _8; + +layout(set = 0, binding = 0) uniform sampler2D uSamplers[]; +layout(set = 1, binding = 0) uniform sampler2D uSampler; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vUV; + +void main() +{ + FragColor = textureLod(uSamplers[nonuniformEXT(_8.indices[10])], vUV, 0.0); + FragColor += textureLod(uSampler, vUV, float(_8.indices[int(gl_FragCoord.y)])); +} + diff --git a/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk b/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk index 5f7ddeee17a..289f576f6dd 100644 --- a/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk +++ b/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk @@ -24,7 +24,7 @@ void main() int i = vIndex; int _59 = i + 10; int _64 = i + 40; - FragColor = texture(sampler2D(uSamplers[nonuniformEXT(_59)], uSamps[nonuniformEXT(_64)]), vUV); + FragColor = texture(nonuniformEXT(sampler2D(uSamplers[_59], uSamps[_64])), vUV); int _71 = i + 10; FragColor = texture(uCombinedSamplers[nonuniformEXT(_71)], vUV); int _77 = i + 20; diff --git a/reference/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag.vk b/reference/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag.vk new file mode 100644 index 00000000000..2d98ec5fdf2 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag.vk @@ -0,0 +1,24 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require + +layout(set = 0, binding = 3, std430) buffer SSBO +{ + uint counter; + vec4 v[]; +} ssbos[]; + +layout(location = 0) flat in int vIndex; +layout(location = 0) out vec4 FragColor; + +void main() +{ + int i = vIndex; + int _42 = i + 60; + int _45 = i + 70; + ssbos[nonuniformEXT(_42)].v[_45] = vec4(20.0); + int _48 = i + 100; + uint _49 = atomicAdd(ssbos[nonuniformEXT(_48)].counter, 100u); + int _51 = i; + FragColor.z += float(int(uint(ssbos[nonuniformEXT(_51)].v.length()))); +} + diff --git a/reference/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/reference/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag new file mode 100644 index 00000000000..1041f711f8a --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag @@ -0,0 +1,8 @@ +#version 450 + +const float _3_init = 0.5; +void main() +{ + gl_FragDepth = _3_init; +} + diff --git a/reference/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag b/reference/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag new file mode 100644 index 00000000000..59bac994549 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag @@ -0,0 +1,30 @@ +#version 450 + +int uninit_int = 0; +ivec4 uninit_vector = ivec4(0); +mat4 uninit_matrix = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); + +struct Foo +{ + int a; +}; + +Foo uninit_foo = Foo(0); + +layout(location = 0) in vec4 vColor; +layout(location = 0) out vec4 FragColor; + +void main() +{ + int _39 = 0; + if (vColor.x > 10.0) + { + _39 = 10; + } + else + { + _39 = 20; + } + FragColor = vColor; +} + diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag new file mode 100644 index 00000000000..7ba3fd5581e --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag @@ -0,0 +1,51 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(pixel_interlock_ordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _7; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _9; + +void callee2() +{ + int _31 = int(gl_FragCoord.x); + _7.values1[_31]++; +} + +void callee() +{ + int _39 = int(gl_FragCoord.x); + _9.values0[_39]++; + callee2(); +} + +void spvMainInterlockedBody() +{ + callee(); +} + +void main() +{ + // Interlocks were used in a way not compatible with GLSL, this is very slow. + SPIRV_Cross_beginInvocationInterlock(); + spvMainInterlockedBody(); + SPIRV_Cross_endInvocationInterlock(); +} diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag new file mode 100644 index 00000000000..3575e02c8b0 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag @@ -0,0 +1,65 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(pixel_interlock_ordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _7; + +layout(binding = 2, std430) buffer _12_13 +{ + uint _m0[]; +} _13; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _9; + +void callee2() +{ + int _44 = int(gl_FragCoord.x); + _7.values1[_44]++; +} + +void callee() +{ + int _52 = int(gl_FragCoord.x); + _9.values0[_52]++; + callee2(); + if (true) + { + } +} + +void _35() +{ + _13._m0[int(gl_FragCoord.x)] = 4u; +} + +void spvMainInterlockedBody() +{ + callee(); + _35(); +} + +void main() +{ + // Interlocks were used in a way not compatible with GLSL, this is very slow. + SPIRV_Cross_beginInvocationInterlock(); + spvMainInterlockedBody(); + SPIRV_Cross_endInvocationInterlock(); +} diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag new file mode 100644 index 00000000000..806eedf9fbe --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag @@ -0,0 +1,61 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(pixel_interlock_ordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _7; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _9; + +void callee2() +{ + int _37 = int(gl_FragCoord.x); + _7.values1[_37]++; +} + +void callee() +{ + int _45 = int(gl_FragCoord.x); + _9.values0[_45]++; + callee2(); +} + +void _29() +{ +} + +void _31() +{ +} + +void spvMainInterlockedBody() +{ + callee(); + _29(); + _31(); +} + +void main() +{ + // Interlocks were used in a way not compatible with GLSL, this is very slow. + SPIRV_Cross_beginInvocationInterlock(); + spvMainInterlockedBody(); + SPIRV_Cross_endInvocationInterlock(); +} diff --git a/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag new file mode 100644 index 00000000000..6078efae88d --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag @@ -0,0 +1,23 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) in vec4 vColor; +layout(location = 0) out vec4 FragColor; + +void main() +{ + float a = vColor.x; + highp float b = vColor.y; + int i = 0; + float _14; + highp float hp_copy_14; + float _15; + highp float hp_copy_15; + for (; i < 4; i++, _14 = a, hp_copy_14 = _14, _15 = a * _14, hp_copy_15 = _15, b += (hp_copy_15 * hp_copy_14)) + { + FragColor += vec4(1.0); + } + FragColor += vec4(b); +} + diff --git a/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag new file mode 100644 index 00000000000..58de92a45b7 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag @@ -0,0 +1,19 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) in float vColor; +layout(location = 0) out float FragColor; + +void main() +{ + float b; + highp float hp_copy_b; + do + { + b = vColor * vColor; + hp_copy_b = b; + } while (false); + FragColor = hp_copy_b * hp_copy_b; +} + diff --git a/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag new file mode 100644 index 00000000000..b0b3a8dbe91 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag @@ -0,0 +1,53 @@ +#version 320 es +precision mediump float; +precision highp int; + +layout(binding = 0, std140) uniform UBO +{ + float mediump_float; + highp float highp_float; +} ubo; + +layout(location = 0) out vec4 FragColor0; +layout(location = 1) out vec4 FragColor1; +layout(location = 2) out vec4 FragColor2; +layout(location = 3) out vec4 FragColor3; +layout(location = 0) in vec4 V4; + +void main() +{ + vec4 V4_value0 = V4; + highp vec4 hp_copy_V4_value0 = V4_value0; + float V1_value0 = V4.x; + highp float hp_copy_V1_value0 = V1_value0; + float V1_value2 = V4_value0.z; + highp float hp_copy_V1_value2 = V1_value2; + float ubo_mp0 = ubo.mediump_float; + highp float hp_copy_ubo_mp0 = ubo_mp0; + highp float ubo_hp0 = ubo.highp_float; + float mp_copy_ubo_hp0 = ubo_hp0; + highp vec4 _48 = hp_copy_V4_value0 - vec4(3.0); + vec4 mp_copy_48 = _48; + FragColor0 = V4_value0 + vec4(3.0); + FragColor1 = _48; + FragColor2 = mp_copy_48 * vec4(3.0); + float _21 = V1_value0 + 3.0; + float float_0_weird = 3.0 - mp_copy_ubo_hp0; + highp float hp_copy_float_0_weird = float_0_weird; + highp float _49 = hp_copy_V1_value0 - hp_copy_float_0_weird; + float mp_copy_49 = _49; + FragColor3 = vec4(_21, _49, mp_copy_49 * mp_copy_ubo_hp0, 3.0); + highp float _51 = hp_copy_V1_value2 - hp_copy_ubo_mp0; + float mp_copy_51 = _51; + FragColor3 = vec4(V4_value0.z + ubo_mp0, _51, mp_copy_51 * mp_copy_ubo_hp0, 3.0); + FragColor0 = sin(hp_copy_V4_value0); + FragColor1 = sin(V4_value0); + float phi_mp; + highp float phi_hp; + phi_mp = _21; + phi_hp = _49; + highp float hp_copy_phi_mp = phi_mp; + float mp_copy_phi_hp = phi_hp; + FragColor2 = vec4(phi_mp + phi_mp, hp_copy_phi_mp + hp_copy_phi_mp, mp_copy_phi_hp + mp_copy_phi_hp, phi_hp + phi_hp); +} + diff --git a/reference/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag b/reference/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag new file mode 100644 index 00000000000..52f0c616617 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) out float FragColor; + +float _mat3(float a) +{ + return a + 1.0; +} + +float _RESERVED_IDENTIFIER_FIXUP_gl_Foo(int a) +{ + return float(a) + 1.0; +} + +void main() +{ + float param = 2.0; + int param_1 = 4; + FragColor = _mat3(param) + _RESERVED_IDENTIFIER_FIXUP_gl_Foo(param_1); +} + diff --git a/reference/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag b/reference/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag new file mode 100644 index 00000000000..5d75a44a334 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag @@ -0,0 +1,17 @@ +#version 450 + +layout(location = 0) out vec4 _RESERVED_IDENTIFIER_FIXUP_spvFoo; +layout(location = 1) out vec4 SPIRV_Cross_blah; +layout(location = 2) out vec4 _40Bar; +layout(location = 3) out vec4 _m40; +layout(location = 4) out vec4 _underscore_foo_bar_meep_; + +void main() +{ + _RESERVED_IDENTIFIER_FIXUP_spvFoo = vec4(0.0); + SPIRV_Cross_blah = vec4(1.0); + _40Bar = vec4(2.0); + _m40 = vec4(3.0); + _underscore_foo_bar_meep_ = vec4(4.0); +} + diff --git a/reference/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag b/reference/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag new file mode 100644 index 00000000000..d74286a1536 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag @@ -0,0 +1,19 @@ +#version 450 + +struct _15 +{ + float _m0; +}; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = false ? vec4(1.0, 1.0, 0.0, 1.0) : vec4(0.0, 0.0, 0.0, 1.0); + FragColor = vec4(false); + FragColor = mix(vec4(0.0, 0.0, 0.0, 1.0), vec4(1.0, 1.0, 0.0, 1.0), bvec4(false, true, false, true)); + FragColor = vec4(bvec4(false, true, false, true)); + _15 _32 = false ? _15(0.0) : _15(1.0); + float _33[2] = true ? float[](0.0, 1.0) : float[](1.0, 0.0); +} + diff --git a/reference/shaders/asm/frag/selection-merge-to-continue.asm.frag b/reference/shaders-no-opt/asm/frag/selection-merge-to-continue.asm.invalid.frag similarity index 88% rename from reference/shaders/asm/frag/selection-merge-to-continue.asm.frag rename to reference/shaders-no-opt/asm/frag/selection-merge-to-continue.asm.invalid.frag index 82b5973f8af..edbce0ccafb 100644 --- a/reference/shaders/asm/frag/selection-merge-to-continue.asm.frag +++ b/reference/shaders-no-opt/asm/frag/selection-merge-to-continue.asm.invalid.frag @@ -11,12 +11,10 @@ void main() if (v0.x == 20.0) { FragColor += vec4(v0[i & 3]); - continue; } else { FragColor += vec4(v0[i & 1]); - continue; } } } diff --git a/reference/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag b/reference/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag new file mode 100644 index 00000000000..540978c4340 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_sparse_texture2 : require + +struct ResType +{ + uint _m0; + vec4 _m1; +}; + +layout(binding = 0) uniform sampler2D uSamp; + +layout(location = 0) in vec2 vUV; + +void main() +{ + uint _30; + vec4 _31; + _30 = sparseTextureARB(uSamp, vUV, _31); + ResType _26 = ResType(_30, _31); + vec4 texel = _26._m1; + bool ret = sparseTexelsResidentARB(int(_26._m0)); +} + diff --git a/reference/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag.vk b/reference/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag.vk new file mode 100644 index 00000000000..130cab7d1ad --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag.vk @@ -0,0 +1,24 @@ +#version 450 +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_clustered : require + +layout(location = 0) flat in int index; +layout(location = 0) out uint FragColor; + +void main() +{ + uint _17 = uint(index); + FragColor = uint(subgroupMin(index)); + FragColor = uint(subgroupMax(int(_17))); + FragColor = subgroupMin(uint(index)); + FragColor = subgroupMax(_17); + FragColor = uint(subgroupInclusiveMax(index)); + FragColor = uint(subgroupInclusiveMin(int(_17))); + FragColor = subgroupExclusiveMax(uint(index)); + FragColor = subgroupExclusiveMin(_17); + FragColor = uint(subgroupClusteredMin(index, 4u)); + FragColor = uint(subgroupClusteredMax(int(_17), 4u)); + FragColor = subgroupClusteredMin(uint(index), 4u); + FragColor = subgroupClusteredMax(_17, 4u); +} + diff --git a/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag new file mode 100644 index 00000000000..8a918c035c6 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag @@ -0,0 +1,32 @@ +#version 450 + +#if defined(GL_KHR_shader_subgroup_ballot) +#extension GL_KHR_shader_subgroup_ballot : require +#elif defined(GL_NV_shader_thread_group) +#extension GL_NV_shader_thread_group : require +#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64) +#extension GL_ARB_shader_int64 : enable +#extension GL_ARB_shader_ballot : require +#else +#error No extensions available to emulate requested subgroup feature. +#endif + +layout(location = 0) flat in uint INDEX; +layout(location = 0) out uvec4 SV_Target; + +#if defined(GL_KHR_shader_subgroup_ballot) +#elif defined(GL_NV_shader_thread_group) +uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); } +#elif defined(GL_ARB_shader_ballot) +uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); } +#endif + +void main() +{ + uvec4 _21 = subgroupBallot(INDEX < 100u); + SV_Target.x = _21.x; + SV_Target.y = _21.y; + SV_Target.z = _21.z; + SV_Target.w = _21.w; +} + diff --git a/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag.vk b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag.vk new file mode 100644 index 00000000000..ed5933f3128 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag.vk @@ -0,0 +1,15 @@ +#version 450 +#extension GL_KHR_shader_subgroup_ballot : require + +layout(location = 0) flat in uint INDEX; +layout(location = 0) out uvec4 SV_Target; + +void main() +{ + uvec4 _21 = subgroupBallot(INDEX < 100u); + SV_Target.x = _21.x; + SV_Target.y = _21.y; + SV_Target.z = _21.z; + SV_Target.w = _21.w; +} + diff --git a/reference/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/reference/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag similarity index 100% rename from reference/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag rename to reference/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag diff --git a/reference/shaders/asm/frag/switch-merge-to-continue.asm.frag b/reference/shaders-no-opt/asm/frag/switch-merge-to-continue.asm.invalid.frag similarity index 100% rename from reference/shaders/asm/frag/switch-merge-to-continue.asm.frag rename to reference/shaders-no-opt/asm/frag/switch-merge-to-continue.asm.invalid.frag diff --git a/reference/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag b/reference/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag new file mode 100644 index 00000000000..3315180965f --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag @@ -0,0 +1,88 @@ +#version 450 + +struct _4 +{ + uint _m0; + int _m1; +}; + +struct _5 +{ + int _m0; + int _m1; +}; + +_4 _16; +int _21; + +layout(location = 0) flat in int _2; +layout(location = 0) out int _3; + +void main() +{ + bool _25 = false; + do + { + _5 _26; + _26._m0 = 0; + _26._m1 = 10; + _4 _35; + _35 = _16; + int _39; + _4 _36; + bool _59; + int _38 = 0; + for (;;) + { + if (_26._m0 < _26._m1) + { + int _27 = _26._m0; + int _28 = _26._m0 + int(1u); + _26._m0 = _28; + _36 = _4(1u, _27); + } + else + { + _4 _48 = _35; + _48._m0 = 0u; + _36 = _48; + } + bool _45_ladder_break = false; + switch (int(_36._m0)) + { + case 0: + { + _3 = _38; + _25 = true; + _59 = true; + _45_ladder_break = true; + break; + } + default: + { + _59 = false; + _45_ladder_break = true; + break; + } + case 1: + { + break; + } + } + if (_45_ladder_break) + { + break; + } + _39 = _38 + _2; + _35 = _36; + _38 = _39; + continue; + } + if (_59) + { + break; + } + break; + } while(false); +} + diff --git a/reference/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag b/reference/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag new file mode 100644 index 00000000000..c9ddbe6899b --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag @@ -0,0 +1,26 @@ +#version 310 es +precision mediump float; +precision highp int; + +vec2 _19; + +layout(location = 0) out highp vec4 _GLF_color; + +void main() +{ + highp vec2 _30; + do + { + if (gl_FragCoord.x != gl_FragCoord.x) + { + _30 = _19; + break; + } + highp vec2 _29; + _29.y = _19.y; + _30 = _29; + break; + } while(false); + _GLF_color = vec4(_30, 1.0, 1.0); +} + diff --git a/reference/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag b/reference/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag new file mode 100644 index 00000000000..0fe71f64b44 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) flat in int vA; +layout(location = 0) out vec4 FragColor; + +vec4 foobar(int a) +{ + if (a < 0) + { + discard; + } + return vec4(10.0); +} + +void main() +{ + int param = vA; + vec4 _25 = foobar(param); + FragColor = vec4(10.0); +} + diff --git a/reference/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag b/reference/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag new file mode 100644 index 00000000000..d62ccb83866 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag @@ -0,0 +1,15 @@ +#version 450 + +layout(binding = 0) uniform sampler2DMS uSamp; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + ivec2 _28 = ivec2(gl_FragCoord.xy); + FragColor.x = texelFetch(uSamp, _28, int(0u)).x; + FragColor.y = texelFetch(uSamp, _28, int(1u)).x; + FragColor.z = texelFetch(uSamp, _28, int(2u)).x; + FragColor.w = texelFetch(uSamp, _28, int(3u)).x; +} + diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag new file mode 100644 index 00000000000..61122bbd3ab --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag @@ -0,0 +1,34 @@ +#version 450 + +layout(location = 0) in vec4 A; +layout(location = 1) in vec4 B; +layout(location = 0) out vec4 FragColor; + +vec4 test_vector() +{ + bvec4 le = not(greaterThanEqual(A, B)); + bvec4 leq = not(greaterThan(A, B)); + bvec4 ge = not(lessThanEqual(A, B)); + bvec4 geq = not(lessThan(A, B)); + bvec4 eq = not(notEqual(A, B)); + bvec4 neq = notEqual(A, B); + neq = notEqual(A, B); + return ((((vec4(le) + vec4(leq)) + vec4(ge)) + vec4(geq)) + vec4(eq)) + vec4(neq); +} + +float test_scalar() +{ + bool le = !(A.x >= B.x); + bool leq = !(A.x > B.x); + bool ge = !(A.x <= B.x); + bool geq = !(A.x < B.x); + bool eq = !(A.x != B.x); + bool neq = A.x != B.x; + return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq); +} + +void main() +{ + FragColor = test_vector() + vec4(test_scalar()); +} + diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag new file mode 100644 index 00000000000..24db7c9f881 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag @@ -0,0 +1,34 @@ +#version 450 + +layout(location = 0) in vec4 A; +layout(location = 1) in vec4 B; +layout(location = 0) out vec4 FragColor; + +vec4 test_vector() +{ + bvec4 le = lessThan(A, B); + bvec4 leq = lessThanEqual(A, B); + bvec4 ge = greaterThan(A, B); + bvec4 geq = greaterThanEqual(A, B); + bvec4 eq = equal(A, B); + bvec4 neq = notEqual(A, B); + neq = notEqual(A, B); + return ((((vec4(le) + vec4(leq)) + vec4(ge)) + vec4(geq)) + vec4(eq)) + vec4(neq); +} + +float test_scalar() +{ + bool le = A.x < B.x; + bool leq = A.x <= B.x; + bool ge = A.x > B.x; + bool geq = A.x >= B.x; + bool eq = A.x == B.x; + bool neq = A.x != B.x; + return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq); +} + +void main() +{ + FragColor = test_vector() + vec4(test_scalar()); +} + diff --git a/reference/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag b/reference/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag new file mode 100644 index 00000000000..d4f3acae097 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag @@ -0,0 +1,27 @@ +#version 450 + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 0 +#endif +const int omap_r = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 1 +#endif +const int omap_g = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 2 +#endif +const int omap_b = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 3 +#endif +const int omap_a = SPIRV_CROSS_CONSTANT_ID_3; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vColor; + +void main() +{ + FragColor = vec4(vColor[omap_r], vColor[omap_g], vColor[omap_b], vColor[omap_a]); +} + diff --git a/reference/shaders-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag b/reference/shaders-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag index b32d1874856..b6d3bc84900 100644 --- a/reference/shaders-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag +++ b/reference/shaders-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag @@ -1,10 +1,10 @@ #version 450 +vec4 undef; + layout(location = 0) out vec4 FragColor; layout(location = 0) in vec4 vFloat; -vec4 undef; - void main() { FragColor = vec4(undef.x, vFloat.y, 0.0, vFloat.w) + vec4(vFloat.z, vFloat.y, 0.0, vFloat.w); diff --git a/reference/opt/shaders/asm/geom/store-uint-layer.invalid.asm.geom b/reference/shaders-no-opt/asm/geom/store-uint-layer.invalid.asm.geom similarity index 100% rename from reference/opt/shaders/asm/geom/store-uint-layer.invalid.asm.geom rename to reference/shaders-no-opt/asm/geom/store-uint-layer.invalid.asm.geom diff --git a/reference/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp b/reference/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp new file mode 100644 index 00000000000..bd2a3c2736e --- /dev/null +++ b/reference/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp @@ -0,0 +1,89 @@ +#version 450 + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1u +#endif +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 1u +#endif +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 1u +#endif + +layout(local_size_x = SPIRV_CROSS_CONSTANT_ID_0, local_size_y = SPIRV_CROSS_CONSTANT_ID_1, local_size_z = SPIRV_CROSS_CONSTANT_ID_2) in; + +layout(binding = 0, std430) buffer _4_6 +{ + float _m0[]; +} _6; + +layout(binding = 1, std430) buffer _4_7 +{ + float _m0[]; +} _7; + +uvec3 _28 = gl_WorkGroupSize; + +void main() +{ + float _44_copy; + float _46; + uint _47; + float _63; + uint _65; + float _36 = _6._m0[0u]; + uint _39 = 0u; + float _44; + for (;;) + { + _44 = _36; + _46 = _6._m0[35u]; + _47 = 0u; + for (;;) + { + uint _48 = _47 + 1u; + float _45 = _6._m0[_48]; + _6._m0[_47] = ((_46 + _44) + _45) / 3.0; + if (!(_47 < 34u)) + { + break; + } + else + { + _44_copy = _44; + _44 = _45; + _46 = _44_copy; + _47 = _48; + } + } + _6._m0[35u] = (_36 + (_44 + _6._m0[35u])) / 3.0; + if (!(_39 < 5u)) + { + _63 = _6._m0[0u]; + _65 = 1u; + break; + } + else + { + _36 = _6._m0[0u]; + _39++; + continue; + } + } + float _64; + for (;;) + { + _64 = (_63 < _6._m0[_65]) ? _6._m0[_65] : _63; + if (!(_65 < 35u)) + { + break; + } + else + { + _63 = _64; + _65++; + } + } + _7._m0[gl_GlobalInvocationID.x] = _64; +} + diff --git a/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk b/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk new file mode 100644 index 00000000000..5040aa46964 --- /dev/null +++ b/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk @@ -0,0 +1,44 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 8, triangles) out; + +struct _12 +{ + float _m0; +}; + +layout(location = 1) out vec4 B[24]; +layout(location = 3) perprimitiveEXT out vec4 C[8]; +shared float _9[64]; +taskPayloadSharedEXT _12 _11; + +void main() +{ + _9[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex); + barrier(); + SetMeshOutputsEXT(24u, 8u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _9[gl_LocalInvocationIndex]; + float _63 = _11._m0 + _9[gl_LocalInvocationIndex ^ 1u]; + B[gl_LocalInvocationIndex].x = _63; + B[gl_LocalInvocationIndex].y = _63; + B[gl_LocalInvocationIndex].z = _63; + B[gl_LocalInvocationIndex].w = _63; + if (gl_LocalInvocationIndex < 8u) + { + uint _71 = gl_LocalInvocationIndex * 3u; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_71, _71 + 1u, _71 + 2u); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_LocalInvocationIndex & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_LocalInvocationIndex); + uint _81 = gl_LocalInvocationIndex ^ 2u; + C[gl_LocalInvocationIndex].x = _9[_81]; + C[gl_LocalInvocationIndex].y = _9[_81]; + C[gl_LocalInvocationIndex].z = _9[_81]; + C[gl_LocalInvocationIndex].w = _9[_81]; + } +} + diff --git a/reference/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen.vk b/reference/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen.vk new file mode 100644 index 00000000000..a72a7cf48aa --- /dev/null +++ b/reference/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen.vk @@ -0,0 +1,19 @@ +#version 460 +#extension GL_EXT_ray_tracing : require +#extension GL_EXT_nonuniform_qualifier : require + +layout(set = 0, binding = 1) uniform accelerationStructureEXT as[]; +layout(location = 0) rayPayloadEXT float payload; +layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image; + +void main() +{ + vec4 col = vec4(0.0, 0.0, 0.0, 1.0); + vec3 origin = vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0); + vec3 direction = vec3(0.0, 0.0, -1.0); + uint _62 = gl_LaunchIDEXT.x; + traceRayEXT(as[nonuniformEXT(_62)], 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0); + col.y = payload; + imageStore(image, ivec2(gl_LaunchIDEXT.xy), col); +} + diff --git a/reference/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task.vk b/reference/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task.vk new file mode 100644 index 00000000000..1d491e7014b --- /dev/null +++ b/reference/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task.vk @@ -0,0 +1,35 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in; + +struct Payload +{ + float v[3]; +}; + +shared float vs[24]; +taskPayloadSharedEXT Payload p; + +void main() +{ + vs[gl_LocalInvocationIndex] = 10.0; + barrier(); + if (gl_LocalInvocationIndex < 12u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u]; + } + barrier(); + if (gl_LocalInvocationIndex < 6u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u]; + } + barrier(); + if (gl_LocalInvocationIndex < 3u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u]; + } + barrier(); + p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex]; + EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8]))); +} + diff --git a/reference/shaders-no-opt/asm/temporary.zero-initialize.asm.frag b/reference/shaders-no-opt/asm/temporary.zero-initialize.asm.frag new file mode 100644 index 00000000000..1b8e8cd3295 --- /dev/null +++ b/reference/shaders-no-opt/asm/temporary.zero-initialize.asm.frag @@ -0,0 +1,28 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) flat in mediump int vA; +layout(location = 1) flat in mediump int vB; + +void main() +{ + FragColor = vec4(0.0); + mediump int _10 = 0; + mediump int _15 = 0; + for (mediump int _16 = 0, _17 = 0; _16 < vA; _17 = _15, _16 += _10) + { + if ((vA + _16) == 20) + { + _15 = 50; + } + else + { + _15 = ((vB + _16) == 40) ? 60 : _17; + } + _10 = _15 + 10; + FragColor += vec4(1.0); + } +} + diff --git a/reference/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc b/reference/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc new file mode 100644 index 00000000000..13e1d3294b1 --- /dev/null +++ b/reference/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc @@ -0,0 +1,82 @@ +#version 450 +layout(vertices = 4) out; + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[1]; +} gl_out[4]; + +layout(location = 0) patch out vert +{ + float v0; + float v1; +} _5; + +layout(location = 2) patch out vert_patch +{ + float v2; + float v3; +} patches[2]; + +layout(location = 6) patch out float v2; +layout(location = 7) out float v3[4]; +layout(location = 8) out vert2 +{ + float v4; + float v5; +} verts[4]; + +const vec4 _3_0_init[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); +const float _3_1_init[4] = float[](0.0, 0.0, 0.0, 0.0); +const float _3_2_init[4][1] = float[][](float[](0.0), float[](0.0), float[](0.0), float[](0.0)); +const float _3_3_init[4][1] = float[][](float[](0.0), float[](0.0), float[](0.0), float[](0.0)); +const float _6_0_init[2] = float[](0.0, 0.0); +const float _6_1_init[2] = float[](0.0, 0.0); +const float _7_init = 0.0; +const float _8_init[4] = float[](0.0, 0.0, 0.0, 0.0); +const float _9_0_init[4] = float[](0.0, 0.0, 0.0, 0.0); +const float _9_1_init[4] = float[](0.0, 0.0, 0.0, 0.0); + +void main() +{ + gl_out[gl_InvocationID].gl_Position = _3_0_init[gl_InvocationID]; + gl_out[gl_InvocationID].gl_PointSize = _3_1_init[gl_InvocationID]; + gl_out[gl_InvocationID].gl_ClipDistance = _3_2_init[gl_InvocationID]; + gl_out[gl_InvocationID].gl_CullDistance = _3_3_init[gl_InvocationID]; + if (gl_InvocationID == 0) + { + _5.v0 = 0.0; + } + if (gl_InvocationID == 0) + { + _5.v1 = 0.0; + } + if (gl_InvocationID == 0) + { + patches[0].v2 = _6_0_init[0]; + } + if (gl_InvocationID == 0) + { + patches[1].v2 = _6_0_init[1]; + } + if (gl_InvocationID == 0) + { + patches[0].v3 = _6_1_init[0]; + } + if (gl_InvocationID == 0) + { + patches[1].v3 = _6_1_init[1]; + } + if (gl_InvocationID == 0) + { + v2 = _7_init; + } + v3[gl_InvocationID] = _8_init[gl_InvocationID]; + verts[gl_InvocationID].v4 = _9_0_init[gl_InvocationID]; + verts[gl_InvocationID].v5 = _9_1_init[gl_InvocationID]; + gl_out[gl_InvocationID].gl_Position = vec4(1.0); +} + diff --git a/reference/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc b/reference/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc new file mode 100644 index 00000000000..3412f1cf5c5 --- /dev/null +++ b/reference/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc @@ -0,0 +1,73 @@ +#version 450 +layout(vertices = 3) out; + +layout(binding = 0, std140) uniform cb1_struct +{ + vec4 _m0[1]; +} cb0_0; + +layout(location = 0) in vec4 v0[]; +layout(location = 1) in vec4 v1[]; +layout(location = 2) in vec3 vicp0[]; +layout(location = 3) out vec3 vocp0[3]; +layout(location = 4) in vec4 vicp1[]; +layout(location = 5) out vec4 vocp1[3]; +vec4 opc[4]; +vec4 vicp[2][3]; +vec4 _48; +vec4 _49; +vec4 _50; +vec4 _56; + +void fork0_epilogue(vec4 _87, vec4 _88, vec4 _89) +{ + gl_TessLevelOuter[0u] = _87.x; + gl_TessLevelOuter[1u] = _88.x; + gl_TessLevelOuter[2u] = _89.x; +} + +void fork0(uint vForkInstanceId) +{ + vec4 r0; + r0.x = uintBitsToFloat(vForkInstanceId); + opc[floatBitsToInt(r0.x)].x = cb0_0._m0[0u].x; + _48 = opc[0u]; + _49 = opc[1u]; + _50 = opc[2u]; + fork0_epilogue(_48, _49, _50); +} + +void fork1_epilogue(vec4 _109) +{ + gl_TessLevelInner[0u] = _109.x; +} + +void fork1() +{ + opc[3u].x = cb0_0._m0[0u].x; + _56 = opc[3u]; + fork1_epilogue(_56); +} + +void main() +{ + vec4 _126_unrolled[3]; + for (int i = 0; i < int(3); i++) + { + _126_unrolled[i] = v0[i]; + } + vicp[0u] = _126_unrolled; + vec4 _127_unrolled[3]; + for (int i = 0; i < int(3); i++) + { + _127_unrolled[i] = v1[i]; + } + vicp[1u] = _127_unrolled; + vocp0[gl_InvocationID] = vicp0[gl_InvocationID]; + vocp1[gl_InvocationID] = vicp1[gl_InvocationID]; + fork0(0u); + fork0(1u); + fork0(2u); + fork1(); +} + diff --git a/reference/opt/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/shaders-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc similarity index 100% rename from reference/opt/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc rename to reference/shaders-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc diff --git a/reference/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc b/reference/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc new file mode 100644 index 00000000000..ebd2d8aeac3 --- /dev/null +++ b/reference/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc @@ -0,0 +1,24 @@ +#version 450 +layout(vertices = 4) out; + +const float _5_init[2] = float[](0.0, 0.0); +const float _6_init[4] = float[](0.0, 0.0, 0.0, 0.0); +void main() +{ + if (gl_InvocationID == 0) + { + gl_TessLevelInner = _5_init; + } + if (gl_InvocationID == 0) + { + gl_TessLevelOuter = _6_init; + } + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + gl_TessLevelInner[0] = 1.0; + gl_TessLevelInner[1] = 2.0; + gl_TessLevelOuter[0] = 3.0; + gl_TessLevelOuter[1] = 4.0; + gl_TessLevelOuter[2] = 5.0; + gl_TessLevelOuter[3] = 6.0; +} + diff --git a/reference/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert b/reference/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert new file mode 100644 index 00000000000..6060888d81f --- /dev/null +++ b/reference/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert @@ -0,0 +1,24 @@ +#version 450 + +struct Foo +{ + float c; + float d; +}; + +layout(location = 0) out Vert +{ + float a; + float b; +} _3; + +layout(location = 2) out Foo foo; +const Foo _4_init = Foo(0.0, 0.0); + +void main() +{ + _3.a = 0.0; + _3.b = 0.0; + foo = _4_init; +} + diff --git a/reference/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert b/reference/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert new file mode 100644 index 00000000000..b449f080575 --- /dev/null +++ b/reference/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert @@ -0,0 +1,14 @@ +#version 450 + +out float gl_ClipDistance[1]; +out float gl_CullDistance[1]; + +void main() +{ + gl_Position = vec4(0.0); + gl_PointSize = 0.0; + gl_ClipDistance = float[](0.0); + gl_CullDistance = float[](0.0); + gl_Position = vec4(1.0); +} + diff --git a/reference/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert b/reference/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert new file mode 100644 index 00000000000..03271409b76 --- /dev/null +++ b/reference/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert @@ -0,0 +1,35 @@ +#version 450 + +struct Struct_vec4 +{ + vec4 m0; +}; + +layout(binding = 0, std140) uniform UBO +{ + Struct_vec4 m0; + Struct_vec4 m1; +} ubo_binding_0; + +layout(location = 0) out VertexOut +{ + Struct_vec4 m0; + Struct_vec4 m1; +} output_location_0; + +layout(location = 2) out Struct_vec4 output_location_2; +layout(location = 3) out Struct_vec4 output_location_3; + +void main() +{ + Struct_vec4 c; + c.m0 = ubo_binding_0.m0.m0; + Struct_vec4 b; + b.m0 = ubo_binding_0.m1.m0; + gl_Position = c.m0 + b.m0; + output_location_0.m0 = c; + output_location_0.m1 = b; + output_location_2 = c; + output_location_3 = b; +} + diff --git a/reference/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert b/reference/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert new file mode 100644 index 00000000000..280399b44d9 --- /dev/null +++ b/reference/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert @@ -0,0 +1,33 @@ +#version 100 + +struct Struct_vec4 +{ + vec4 m0; +}; + +struct UBO +{ + Struct_vec4 m0; + Struct_vec4 m1; +}; + +uniform UBO ubo_binding_0; + +varying vec4 output_location_0_m0_m0; +varying vec4 output_location_0_m1_m0; +varying vec4 output_location_2_m0; +varying vec4 output_location_3_m0; + +void main() +{ + Struct_vec4 c; + c.m0 = ubo_binding_0.m0.m0; + Struct_vec4 b; + b.m0 = ubo_binding_0.m1.m0; + gl_Position = c.m0 + b.m0; + output_location_0_m0_m0 = c.m0; + output_location_0_m1_m0 = b.m0; + output_location_2_m0 = c.m0; + output_location_3_m0 = b.m0; +} + diff --git a/reference/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert b/reference/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert new file mode 100644 index 00000000000..a1fe3e50acc --- /dev/null +++ b/reference/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert @@ -0,0 +1,7 @@ +#version 450 + +void main() +{ + gl_Position = (vec4(1.0, 2.0, 3.0, 4.0) + vec4(5.0, 6.0, 7.0, 8.0)) + (vec4(1.0, 2.0, 3.0, 4.0) + vec4(4.0, 3.0, 8.0, 2.0)); +} + diff --git a/reference/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert.vk b/reference/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert.vk new file mode 100644 index 00000000000..b90912d1cc2 --- /dev/null +++ b/reference/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert.vk @@ -0,0 +1,10 @@ +#version 450 +#extension GL_EXT_debug_printf : require + +void main() +{ + debugPrintfEXT("Foo %f %f", 1.0, 2.0); + vec4 _17 = vec4(0.0, 0.0, 0.0, 1.0); + gl_Position = vec4(0.0, 0.0, 0.0, 1.0); +} + diff --git a/reference/shaders/comp/bitcast-16bit-1.invalid.comp b/reference/shaders-no-opt/comp/bitcast-16bit-1.invalid.comp similarity index 78% rename from reference/shaders/comp/bitcast-16bit-1.invalid.comp rename to reference/shaders-no-opt/comp/bitcast-16bit-1.invalid.comp index 501f97955fc..85fdcdba7dd 100644 --- a/reference/shaders/comp/bitcast-16bit-1.invalid.comp +++ b/reference/shaders-no-opt/comp/bitcast-16bit-1.invalid.comp @@ -6,8 +6,12 @@ #else #error No extension available for FP16. #endif -#if defined(GL_AMD_gpu_shader_int16) +#if defined(GL_EXT_shader_explicit_arithmetic_types_int16) +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#elif defined(GL_AMD_gpu_shader_int16) #extension GL_AMD_gpu_shader_int16 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require #else #error No extension available for Int16. #endif diff --git a/reference/opt/shaders/comp/bitcast-16bit-2.invalid.comp b/reference/shaders-no-opt/comp/bitcast-16bit-2.invalid.comp similarity index 52% rename from reference/opt/shaders/comp/bitcast-16bit-2.invalid.comp rename to reference/shaders-no-opt/comp/bitcast-16bit-2.invalid.comp index bddc16d62bc..506d4e55780 100644 --- a/reference/opt/shaders/comp/bitcast-16bit-2.invalid.comp +++ b/reference/shaders-no-opt/comp/bitcast-16bit-2.invalid.comp @@ -1,6 +1,10 @@ #version 450 -#if defined(GL_AMD_gpu_shader_int16) +#if defined(GL_EXT_shader_explicit_arithmetic_types_int16) +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#elif defined(GL_AMD_gpu_shader_int16) #extension GL_AMD_gpu_shader_int16 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require #else #error No extension available for Int16. #endif @@ -31,9 +35,13 @@ layout(binding = 2, std140) uniform UBO void main() { uint ident = gl_GlobalInvocationID.x; - i16vec2 _47 = unpackInt2x16(_29.inputs[ident].x) + float16BitsToInt16(_40.const0.xy); - _21.outputs[ident] = i16vec4(_47.x, _47.y, _21.outputs[ident].z, _21.outputs[ident].w); - i16vec2 _66 = i16vec2(unpackUint2x16(uint(_29.inputs[ident].y)) - float16BitsToUint16(_40.const0.zw)); - _21.outputs[ident] = i16vec4(_21.outputs[ident].x, _21.outputs[ident].y, _66.x, _66.y); + int _33 = _29.inputs[ident].x; + i16vec2 _47 = unpackInt2x16(_33) + float16BitsToInt16(_40.const0.xy); + _21.outputs[ident].x = _47.x; + _21.outputs[ident].y = _47.y; + int _57 = _29.inputs[ident].y; + i16vec2 _67 = i16vec2(unpackUint2x16(uint(_57)) - float16BitsToUint16(_40.const0.zw)); + _21.outputs[ident].z = _67.x; + _21.outputs[ident].w = _67.y; } diff --git a/reference/shaders-no-opt/comp/glsl.std450.comp b/reference/shaders-no-opt/comp/glsl.std450.comp new file mode 100644 index 00000000000..d2628a9ab62 --- /dev/null +++ b/reference/shaders-no-opt/comp/glsl.std450.comp @@ -0,0 +1,112 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct ResType +{ + float _m0; + int _m1; +}; + +layout(binding = 0, std430) buffer SSBO +{ + float res; + int ires; + uint ures; + vec4 f32; + ivec4 s32; + uvec4 u32; + mat2 m2; + mat3 m3; + mat4 m4; +} _19; + +void main() +{ + _19.res = round(_19.f32.x); + _19.res = roundEven(_19.f32.x); + _19.res = trunc(_19.f32.x); + _19.res = abs(_19.f32.x); + _19.ires = abs(_19.s32.x); + _19.res = sign(_19.f32.x); + _19.ires = sign(_19.s32.x); + _19.res = floor(_19.f32.x); + _19.res = ceil(_19.f32.x); + _19.res = fract(_19.f32.x); + _19.res = radians(_19.f32.x); + _19.res = degrees(_19.f32.x); + _19.res = sin(_19.f32.x); + _19.res = cos(_19.f32.x); + _19.res = tan(_19.f32.x); + _19.res = asin(_19.f32.x); + _19.res = acos(_19.f32.x); + _19.res = atan(_19.f32.x); + _19.res = sinh(_19.f32.x); + _19.res = cosh(_19.f32.x); + _19.res = tanh(_19.f32.x); + _19.res = asinh(_19.f32.x); + _19.res = acosh(_19.f32.x); + _19.res = atanh(_19.f32.x); + _19.res = atan(_19.f32.x, _19.f32.y); + _19.res = pow(_19.f32.x, _19.f32.y); + _19.res = exp(_19.f32.x); + _19.res = log(_19.f32.x); + _19.res = exp2(_19.f32.x); + _19.res = log2(_19.f32.x); + _19.res = sqrt(_19.f32.x); + _19.res = inversesqrt(_19.f32.x); + _19.res = length(_19.f32.x); + _19.res = distance(_19.f32.x, _19.f32.y); + _19.res = normalize(_19.f32.x); + _19.res = faceforward(_19.f32.x, _19.f32.y, _19.f32.z); + _19.res = reflect(_19.f32.x, _19.f32.y); + _19.res = refract(_19.f32.x, _19.f32.y, _19.f32.z); + _19.res = length(_19.f32.xy); + _19.res = distance(_19.f32.xy, _19.f32.zw); + vec2 v2 = normalize(_19.f32.xy); + v2 = faceforward(_19.f32.xy, _19.f32.yz, _19.f32.zw); + v2 = reflect(_19.f32.xy, _19.f32.zw); + v2 = refract(_19.f32.xy, _19.f32.yz, _19.f32.w); + vec3 v3 = cross(_19.f32.xyz, _19.f32.yzw); + _19.res = determinant(_19.m2); + _19.res = determinant(_19.m3); + _19.res = determinant(_19.m4); + _19.m2 = inverse(_19.m2); + _19.m3 = inverse(_19.m3); + _19.m4 = inverse(_19.m4); + float tmp; + float _287 = modf(_19.f32.x, tmp); + _19.res = _287; + _19.res = min(_19.f32.x, _19.f32.y); + _19.ures = min(_19.u32.x, _19.u32.y); + _19.ires = min(_19.s32.x, _19.s32.y); + _19.res = max(_19.f32.x, _19.f32.y); + _19.ures = max(_19.u32.x, _19.u32.y); + _19.ires = max(_19.s32.x, _19.s32.y); + _19.res = clamp(_19.f32.x, _19.f32.y, _19.f32.z); + _19.ures = clamp(_19.u32.x, _19.u32.y, _19.u32.z); + _19.ires = clamp(_19.s32.x, _19.s32.y, _19.s32.z); + _19.res = mix(_19.f32.x, _19.f32.y, _19.f32.z); + _19.res = step(_19.f32.x, _19.f32.y); + _19.res = smoothstep(_19.f32.x, _19.f32.y, _19.f32.z); + _19.res = fma(_19.f32.x, _19.f32.y, _19.f32.z); + ResType _387; + _387._m0 = frexp(_19.f32.x, _387._m1); + int itmp = _387._m1; + _19.res = _387._m0; + _19.res = ldexp(_19.f32.x, itmp); + _19.ures = packSnorm4x8(_19.f32); + _19.ures = packUnorm4x8(_19.f32); + _19.ures = packSnorm2x16(_19.f32.xy); + _19.ures = packUnorm2x16(_19.f32.xy); + _19.ures = packHalf2x16(_19.f32.xy); + v2 = unpackSnorm2x16(_19.u32.x); + v2 = unpackUnorm2x16(_19.u32.x); + v2 = unpackHalf2x16(_19.u32.x); + vec4 v4 = unpackSnorm4x8(_19.u32.x); + v4 = unpackUnorm4x8(_19.u32.x); + _19.s32 = findLSB(_19.s32); + _19.s32 = findLSB(_19.u32); + _19.s32 = findMSB(_19.s32); + _19.s32 = findMSB(_19.u32); +} + diff --git a/reference/shaders-no-opt/comp/illegal-struct-name.asm.comp b/reference/shaders-no-opt/comp/illegal-struct-name.asm.comp new file mode 100644 index 00000000000..885dcb3baa5 --- /dev/null +++ b/reference/shaders-no-opt/comp/illegal-struct-name.asm.comp @@ -0,0 +1,22 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct Foo +{ + float _abs; +}; + +layout(binding = 0, std430) buffer SSBO +{ + Foo foo; + Foo foo2; +} _7; + +void main() +{ + Foo f; + f._abs = _7.foo._abs; + int _abs = 10; + _7.foo2._abs = f._abs; +} + diff --git a/reference/shaders-no-opt/comp/image-load-formatted.comp b/reference/shaders-no-opt/comp/image-load-formatted.comp new file mode 100644 index 00000000000..e11b8febfa7 --- /dev/null +++ b/reference/shaders-no-opt/comp/image-load-formatted.comp @@ -0,0 +1,12 @@ +#version 450 +#extension GL_EXT_shader_image_load_formatted : require +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(binding = 0) uniform image2D img; + +void main() +{ + vec4 v = imageLoad(img, ivec2(gl_GlobalInvocationID.xy)); + imageStore(img, ivec2(gl_GlobalInvocationID.xy), v + vec4(1.0)); +} + diff --git a/reference/opt/shaders/comp/inout-struct.invalid.comp b/reference/shaders-no-opt/comp/inout-struct.invalid.comp similarity index 100% rename from reference/opt/shaders/comp/inout-struct.invalid.comp rename to reference/shaders-no-opt/comp/inout-struct.invalid.comp diff --git a/reference/shaders-no-opt/comp/int16min-literal.comp b/reference/shaders-no-opt/comp/int16min-literal.comp new file mode 100644 index 00000000000..f4bae2fec60 --- /dev/null +++ b/reference/shaders-no-opt/comp/int16min-literal.comp @@ -0,0 +1,36 @@ +#version 450 +#if defined(GL_EXT_shader_explicit_arithmetic_types_int16) +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#elif defined(GL_AMD_gpu_shader_int16) +#extension GL_AMD_gpu_shader_int16 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for Int16. +#endif +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for FP16. +#endif +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std140) uniform UBO +{ + float16_t b; +} _12; + +layout(binding = 1, std430) buffer SSBO +{ + float16_t a; +} _24; + +void main() +{ + int16_t v = float16BitsToInt16(_12.b); + v ^= (-32768s); + _24.a = int16BitsToFloat16(v); +} + diff --git a/reference/shaders-no-opt/comp/int64min-literal.comp b/reference/shaders-no-opt/comp/int64min-literal.comp new file mode 100644 index 00000000000..63bd0fdaf2d --- /dev/null +++ b/reference/shaders-no-opt/comp/int64min-literal.comp @@ -0,0 +1,29 @@ +#version 450 +#if defined(GL_ARB_gpu_shader_int64) +#extension GL_ARB_gpu_shader_int64 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for 64-bit integers. +#endif +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std140) uniform UBO +{ + float b; +} _12; + +layout(binding = 1, std430) buffer SSBO +{ + float a; +} _32; + +void main() +{ + double b2 = double(_12.b); + int64_t v = doubleBitsToInt64(b2); + v ^= int64_t(0x8000000000000000ul); + double a2 = int64BitsToDouble(v); + _32.a = float(a2); +} + diff --git a/reference/shaders-no-opt/comp/intmin-literal.comp b/reference/shaders-no-opt/comp/intmin-literal.comp new file mode 100644 index 00000000000..5a4896f9070 --- /dev/null +++ b/reference/shaders-no-opt/comp/intmin-literal.comp @@ -0,0 +1,18 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 1, std430) buffer SSBO +{ + float a; +} _9; + +layout(binding = 0, std140) uniform UBO +{ + float b; +} _14; + +void main() +{ + _9.a = intBitsToFloat(floatBitsToInt(_14.b) ^ int(0x80000000)); +} + diff --git a/reference/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp b/reference/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp new file mode 100644 index 00000000000..5b4cb886b4e --- /dev/null +++ b/reference/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp @@ -0,0 +1,22 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer STO +{ + uint data[]; +} ssbo; + +void main() +{ + while (true) + { + ssbo.data[0]++; + if (ssbo.data[2] != 0u) + { + ssbo.data[5]++; + continue; + } + break; + } +} + diff --git a/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp b/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp new file mode 100644 index 00000000000..78ebc26c522 --- /dev/null +++ b/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + int v[]; +} _23; + +void main() +{ + for (int i = 0; i < 4; i++) + { + _23.v[i] += 10; + } +} + diff --git a/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp b/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp new file mode 100644 index 00000000000..8b6a0321044 --- /dev/null +++ b/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + int v[]; +} _64; + +void main() +{ + for (int i = 0; i < 4; i++) + { + _64.v[i] += 10; + } +} + diff --git a/reference/shaders-no-opt/comp/loop.comp b/reference/shaders-no-opt/comp/loop.comp index 049a30669cd..2ba731cdc7d 100644 --- a/reference/shaders-no-opt/comp/loop.comp +++ b/reference/shaders-no-opt/comp/loop.comp @@ -7,11 +7,6 @@ layout(binding = 0, std430) readonly buffer SSBO vec4 in_data[]; } _24; -layout(binding = 1, std430) writeonly buffer SSBO2 -{ - vec4 out_data[]; -} _177; - void main() { uint ident = gl_GlobalInvocationID.x; @@ -83,23 +78,5 @@ void main() k += 10; continue; } - k = 0; - do - { - k++; - } while (k > 10); - int l = 0; - for (;;) - { - if (l == 5) - { - l++; - continue; - } - idat += vec4(1.0); - l++; - continue; - } - _177.out_data[ident] = idat; } diff --git a/reference/shaders-no-opt/comp/return.comp b/reference/shaders-no-opt/comp/return.comp index 4be20e93e41..4802be2244a 100644 --- a/reference/shaders-no-opt/comp/return.comp +++ b/reference/shaders-no-opt/comp/return.comp @@ -21,7 +21,8 @@ void main() return; } } - for (int i = 0; i < 20; i++) + int i = 0; + while (i < 20) { if (i == 10) { diff --git a/reference/opt/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp b/reference/shaders-no-opt/comp/shader_ballot_nonuniform_invocations.invalid.comp similarity index 100% rename from reference/opt/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp rename to reference/shaders-no-opt/comp/shader_ballot_nonuniform_invocations.invalid.comp diff --git a/reference/shaders-no-opt/comp/specialization-constant-evaluation.comp b/reference/shaders-no-opt/comp/specialization-constant-evaluation.comp new file mode 100644 index 00000000000..695835968af --- /dev/null +++ b/reference/shaders-no-opt/comp/specialization-constant-evaluation.comp @@ -0,0 +1,321 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 1 +#endif +const int SONE = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 2 +#endif +const int STWO = SPIRV_CROSS_CONSTANT_ID_3; +const int _10 = (SONE + STWO); +const uint _13 = (uint(_10) + 0u); +#ifndef SPIRV_CROSS_CONSTANT_ID_5 +#define SPIRV_CROSS_CONSTANT_ID_5 1u +#endif +const uint UONE = SPIRV_CROSS_CONSTANT_ID_5; +const uint _15 = (_13 + UONE); +#ifndef SPIRV_CROSS_CONSTANT_ID_6 +#define SPIRV_CROSS_CONSTANT_ID_6 2u +#endif +const uint UTWO = SPIRV_CROSS_CONSTANT_ID_6; +const uint IADD = (_15 + UTWO); +const uint _19 = (IADD - 5u); +const uint _28 = (uint(SONE) + 0u); +const uint ISUB = (UTWO - _28); +const uint IMUL = (UTWO * UTWO); +const uint _37 = (IMUL - 3u); +const uint UDIV = (UTWO / UTWO); +#ifndef SPIRV_CROSS_CONSTANT_ID_4 +#define SPIRV_CROSS_CONSTANT_ID_4 -2 +#endif +const int SNEG_TWO = SPIRV_CROSS_CONSTANT_ID_4; +const int SDIV = (STWO / SNEG_TWO); +const int _52 = (SDIV + 2); +#ifndef SPIRV_CROSS_CONSTANT_ID_7 +#define SPIRV_CROSS_CONSTANT_ID_7 -3 +#endif +const int SNEG_THREE = SPIRV_CROSS_CONSTANT_ID_7; +const int SMOD = (STWO % SNEG_THREE); +const int _66 = (SMOD + 2); +const uint UMOD = (IADD % IMUL); +const uint _73 = (UMOD - 1u); +const uint LSHL = (IADD << ISUB); +const uint _81 = (LSHL - 11u); +const uint RSHL = (IADD >> ISUB); +const uint _89 = (RSHL - 2u); +const int _95 = int(IADD + 0u); +const int _96 = (-_95); +const int _97 = (-SDIV); +const int RSHA = (_96 >> _97); +const int _100 = (RSHA + 4); +const bool IEQ = (IADD == ISUB); +const int _109 = IEQ ? 2 : 1; +const bool INEQ = (IADD != ISUB); +const int _116 = INEQ ? 1 : 2; +const bool ULT = (IADD < ISUB); +const int _123 = ULT ? 2 : 1; +const bool ULE = (IADD <= ISUB); +const int _130 = ULE ? 2 : 1; +const bool UGT = (IADD > ISUB); +const int _137 = UGT ? 1 : 2; +const bool UGE = (IADD >= ISUB); +const int _144 = UGE ? 1 : 2; +const bool SLT = (SMOD < 1); +const int _151 = SLT ? 1 : 2; +const bool SLE = (SMOD <= 1); +const int _158 = SLE ? 1 : 2; +const bool SGT = (SMOD > 1); +const int _165 = SGT ? 2 : 1; +const bool SGE = (SMOD >= 1); +const int _172 = SGE ? 2 : 1; +const bool LOR = (IEQ || SLT); +const int _179 = LOR ? 1 : 2; +const bool LAND = (IEQ && SLT); +const int _186 = LAND ? 2 : 1; +const bool LNOT = (!LOR); +const int _193 = LNOT ? 2 : 1; +const uint AND = (IADD & IADD); +const uint _200 = (AND - 5u); +const uint OR = (IADD | ISUB); +const uint _208 = (OR - 6u); +const uint XOR = (IADD ^ IADD); +const uint _215 = (XOR + 1u); +const uint NOT = (~XOR); +const uint _223 = (NOT - 4294967294u); +const bool LEQ = (LAND == LNOT); +const int _230 = LEQ ? 1 : 2; +const bool LNEQ = (LAND != LNOT); +const int _237 = LNEQ ? 2 : 1; +const uint SEL = IEQ ? IADD : ISUB; +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 true +#endif +const bool TRUE = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 false +#endif +const bool FALSE = SPIRV_CROSS_CONSTANT_ID_1; + +layout(binding = 0, std430) buffer SSBO_IAdd +{ + float val[_19]; + float dummy; +} IAdd; + +layout(binding = 1, std430) buffer SSBO_ISub +{ + float val[ISUB]; + float dummy; +} ISub; + +layout(binding = 2, std430) buffer SSBO_IMul +{ + float val[_37]; + float dummy; +} IMul; + +layout(binding = 3, std430) buffer SSBO_UDiv +{ + float val[UDIV]; + float dummy; +} UDiv; + +layout(binding = 4, std430) buffer SSBO_SDiv +{ + float val[_52]; + float dummy; +} SDiv; + +layout(binding = 5, std430) buffer SSBO_SRem +{ + float val[1]; + float dummy; +} SRem; + +layout(binding = 6, std430) buffer SSBO_SMod +{ + float val[_66]; + float dummy; +} SMod; + +layout(binding = 7, std430) buffer SSBO_UMod +{ + float val[_73]; + float dummy; +} UMod; + +layout(binding = 8, std430) buffer SSBO_LShl +{ + float val[_81]; + float dummy; +} LShl; + +layout(binding = 9, std430) buffer SSBO_RShl +{ + float val[_89]; + float dummy; +} RShl; + +layout(binding = 10, std430) buffer SSBO_RSha +{ + float val[_100]; + float dummy; +} RSha; + +layout(binding = 11, std430) buffer SSBO_IEq +{ + float val[_109]; + float dummy; +} IEq; + +layout(binding = 12, std430) buffer SSBO_INeq +{ + float val[_116]; + float dummy; +} INeq; + +layout(binding = 13, std430) buffer SSBO_Ult +{ + float val[_123]; + float dummy; +} Ult; + +layout(binding = 14, std430) buffer SSBO_Ule +{ + float val[_130]; + float dummy; +} Ule; + +layout(binding = 15, std430) buffer SSBO_Ugt +{ + float val[_137]; + float dummy; +} Ugt; + +layout(binding = 16, std430) buffer SSBO_Uge +{ + float val[_144]; + float dummy; +} Uge; + +layout(binding = 17, std430) buffer SSBO_Slt +{ + float val[_151]; + float dummy; +} Slt; + +layout(binding = 18, std430) buffer SSBO_Sle +{ + float val[_158]; + float dummy; +} Sle; + +layout(binding = 19, std430) buffer SSBO_Sgt +{ + float val[_165]; + float dummy; +} Sgt; + +layout(binding = 20, std430) buffer SSBO_Sge +{ + float val[_172]; + float dummy; +} Sge; + +layout(binding = 21, std430) buffer SSBO_Lor +{ + float val[_179]; + float dummy; +} Lor; + +layout(binding = 22, std430) buffer SSBO_Land +{ + float val[_186]; + float dummy; +} Land; + +layout(binding = 23, std430) buffer SSBO_Lnot +{ + float val[_193]; + float dummy; +} Lnot; + +layout(binding = 24, std430) buffer SSBO_And +{ + float val[_200]; + float dummy; +} And; + +layout(binding = 24, std430) buffer SSBO_Or +{ + float val[_208]; + float dummy; +} Or; + +layout(binding = 24, std430) buffer SSBO_Xor +{ + float val[_215]; + float dummy; +} Xor; + +layout(binding = 25, std430) buffer SSBO_Not +{ + float val[_223]; + float dummy; +} Not; + +layout(binding = 26, std430) buffer SSBO_Leq +{ + float val[_230]; + float dummy; +} Leq; + +layout(binding = 27, std430) buffer SSBO_Lneq +{ + float val[_237]; + float dummy; +} Lneq; + +layout(binding = 28, std430) buffer SSBO_Sel +{ + float val[SEL]; + float dummy; +} Sel; + +void main() +{ + IAdd.val[0] = 0.0; + ISub.val[0] = 0.0; + IMul.val[0] = 0.0; + UDiv.val[0] = 0.0; + SDiv.val[0] = 0.0; + SRem.val[0] = 0.0; + SMod.val[0] = 0.0; + UMod.val[0] = 0.0; + LShl.val[0] = 0.0; + RShl.val[0] = 0.0; + RSha.val[0] = 0.0; + IEq.val[0] = 0.0; + INeq.val[0] = 0.0; + Ult.val[0] = 0.0; + Ule.val[0] = 0.0; + Ugt.val[0] = 0.0; + Uge.val[0] = 0.0; + Slt.val[0] = 0.0; + Sle.val[0] = 0.0; + Sgt.val[0] = 0.0; + Sge.val[0] = 0.0; + Lor.val[0] = 0.0; + Land.val[0] = 0.0; + Lnot.val[0] = 0.0; + And.val[0] = 0.0; + Or.val[0] = 0.0; + Xor.val[0] = 0.0; + Not.val[0] = 0.0; + Leq.val[0] = 0.0; + Lneq.val[0] = 0.0; + Sel.val[0] = 0.0; +} + diff --git a/reference/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk b/reference/shaders-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk similarity index 84% rename from reference/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk rename to reference/shaders-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk index d67e0beeb65..a037b301ca7 100644 --- a/reference/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk +++ b/reference/shaders-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk @@ -44,48 +44,6 @@ struct Content S4 m3s[8]; }; -struct S0_1 -{ - vec2 a[1]; - float b; -}; - -struct S1_1 -{ - vec3 a; - float b; -}; - -struct S2_1 -{ - vec3 a[1]; - float b; -}; - -struct S3_1 -{ - vec2 a; - float b; -}; - -struct S4_1 -{ - vec2 c; -}; - -struct Content_1 -{ - S0_1 m0s[1]; - S1_1 m1s[1]; - S2_1 m2s[1]; - S0_1 m0; - S1_1 m1; - S2_1 m2; - S3_1 m3; - float m4; - S4_1 m3s[8]; -}; - layout(set = 0, binding = 1, scalar) restrict buffer SSBO1 { Content content; @@ -104,9 +62,9 @@ layout(set = 0, binding = 1, scalar) restrict buffer SSBO1 layout(set = 0, binding = 0, std140) restrict buffer SSBO0 { - Content_1 content; - Content_1 content1[2]; - Content_1 content2; + Content content; + Content content1[2]; + Content content2; mat2 m0; mat2 m1; mat2x3 m2[4]; diff --git a/reference/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk b/reference/shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp.vk similarity index 92% rename from reference/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk rename to reference/shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp.vk index 6d288574f74..f3fa6dd00c3 100644 --- a/reference/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk +++ b/reference/shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp.vk @@ -62,6 +62,9 @@ void main() uvec4 anded = subgroupAnd(ballot_value); uvec4 ored = subgroupOr(ballot_value); uvec4 xored = subgroupXor(ballot_value); + bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42u))); + bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42u))); + bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42u))); added = subgroupInclusiveAdd(added); iadded = subgroupInclusiveAdd(iadded); multiplied = subgroupInclusiveMul(multiplied); @@ -102,6 +105,9 @@ void main() anded = subgroupClusteredAnd(anded, 4u); ored = subgroupClusteredOr(ored, 4u); xored = subgroupClusteredXor(xored, 4u); + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u); vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); diff --git a/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp b/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp new file mode 100644 index 00000000000..00b3fa7e1fc --- /dev/null +++ b/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp @@ -0,0 +1,401 @@ +#version 450 + +#if defined(GL_KHR_shader_subgroup_ballot) +#extension GL_KHR_shader_subgroup_ballot : require +#elif defined(GL_NV_shader_thread_group) +#extension GL_NV_shader_thread_group : require +#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64) +#extension GL_ARB_shader_int64 : enable +#extension GL_ARB_shader_ballot : require +#else +#error No extensions available to emulate requested subgroup feature. +#endif + +#if defined(GL_KHR_shader_subgroup_basic) +#extension GL_KHR_shader_subgroup_basic : require +#elif defined(GL_NV_shader_thread_group) +#extension GL_NV_shader_thread_group : require +#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64) +#extension GL_ARB_shader_int64 : enable +#extension GL_ARB_shader_ballot : require +#elif defined(GL_AMD_gcn_shader) && (defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5)) +#extension GL_AMD_gpu_shader_int64 : enable +#extension GL_NV_gpu_shader5 : enable +#extension GL_AMD_gcn_shader : require +#else +#error No extensions available to emulate requested subgroup feature. +#endif + +#if defined(GL_KHR_shader_subgroup_basic) +#extension GL_KHR_shader_subgroup_basic : require +#elif defined(GL_NV_shader_thread_group) +#extension GL_NV_shader_thread_group : require +#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64) +#extension GL_ARB_shader_int64 : enable +#extension GL_ARB_shader_ballot : require +#else +#error No extensions available to emulate requested subgroup feature. +#endif + +#if defined(GL_KHR_shader_subgroup_basic) +#extension GL_KHR_shader_subgroup_basic : require +#elif defined(GL_NV_shader_thread_group) +#extension GL_NV_shader_thread_group : require +#else +#error No extensions available to emulate requested subgroup feature. +#endif + +#if defined(GL_KHR_shader_subgroup_basic) +#extension GL_KHR_shader_subgroup_basic : require +#elif defined(GL_NV_shader_thread_group) +#extension GL_NV_shader_thread_group : require +#else +#error No extensions available to emulate requested subgroup feature. +#endif + +#if defined(GL_KHR_shader_subgroup_ballot) +#extension GL_KHR_shader_subgroup_ballot : require +#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64) +#extension GL_ARB_shader_int64 : enable +#extension GL_ARB_shader_ballot : require +#elif defined(GL_NV_shader_thread_shuffle) +#extension GL_NV_shader_thread_shuffle : require +#else +#error No extensions available to emulate requested subgroup feature. +#endif + +#if defined(GL_KHR_shader_subgroup_ballot) +#extension GL_KHR_shader_subgroup_ballot : require +#elif defined(GL_NV_shader_thread_group) +#extension GL_NV_shader_thread_group : require +#endif + +#if defined(GL_KHR_shader_subgroup_vote) +#extension GL_KHR_shader_subgroup_vote : require +#elif defined(GL_AMD_gcn_shader) && (defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5)) +#extension GL_AMD_gpu_shader_int64 : enable +#extension GL_NV_gpu_shader5 : enable +#extension GL_AMD_gcn_shader : require +#elif defined(GL_NV_gpu_shader_5) +#extension GL_NV_gpu_shader_5 : require +#elif defined(GL_ARB_shader_group_vote) +#extension GL_ARB_shader_group_vote : require +#else +#error No extensions available to emulate requested subgroup feature. +#endif + +#if defined(GL_KHR_shader_subgroup_basic) +#extension GL_KHR_shader_subgroup_basic : require +#elif defined(GL_NV_shader_thread_group) +#extension GL_NV_shader_thread_group : require +#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64) +#extension GL_ARB_shader_int64 : enable +#extension GL_ARB_shader_ballot : require +#elif defined(GL_AMD_gcn_shader) && (defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5)) +#extension GL_AMD_gpu_shader_int64 : enable +#extension GL_NV_gpu_shader5 : enable +#extension GL_AMD_gcn_shader : require +#else +#error No extensions available to emulate requested subgroup feature. +#endif + +#if defined(GL_KHR_shader_subgroup_basic) +#extension GL_KHR_shader_subgroup_basic : require +#endif + +#if defined(GL_KHR_shader_subgroup_ballot) +#extension GL_KHR_shader_subgroup_ballot : require +#elif defined(GL_NV_shader_thread_group) +#extension GL_NV_shader_thread_group : require +#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64) +#extension GL_ARB_shader_int64 : enable +#extension GL_ARB_shader_ballot : require +#else +#error No extensions available to emulate requested subgroup feature. +#endif + +#if defined(GL_NV_shader_thread_group) +#extension GL_NV_shader_thread_group : require +#endif +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float FragColor; +} _9; + +#if defined(GL_KHR_shader_subgroup_ballot) +#elif defined(GL_NV_shader_thread_group) +#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u) +#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u) +#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u) +#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u) +#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u) +#elif defined(GL_ARB_shader_ballot) +#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u) +#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u) +#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u) +#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u) +#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u) +#endif + +#if defined(GL_KHR_shader_subgroup_basic) +#elif defined(GL_NV_shader_thread_group) +#define gl_SubgroupSize gl_WarpSizeNV +#elif defined(GL_ARB_shader_ballot) +#define gl_SubgroupSize gl_SubGroupSizeARB +#elif defined(GL_AMD_gcn_shader) +#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD) +#endif + +#if defined(GL_KHR_shader_subgroup_basic) +#elif defined(GL_NV_shader_thread_group) +#define gl_SubgroupInvocationID gl_ThreadInWarpNV +#elif defined(GL_ARB_shader_ballot) +#define gl_SubgroupInvocationID gl_SubGroupInvocationARB +#endif + +#if defined(GL_KHR_shader_subgroup_basic) +#elif defined(GL_NV_shader_thread_group) +#define gl_SubgroupID gl_WarpIDNV +#endif + +#if defined(GL_KHR_shader_subgroup_basic) +#elif defined(GL_NV_shader_thread_group) +#define gl_NumSubgroups gl_WarpsPerSMNV +#endif + +#if defined(GL_KHR_shader_subgroup_ballot) +#elif defined(GL_ARB_shader_ballot) +int subgroupBroadcastFirst(int value) { return readFirstInvocationARB(value); } +ivec2 subgroupBroadcastFirst(ivec2 value) { return readFirstInvocationARB(value); } +ivec3 subgroupBroadcastFirst(ivec3 value) { return readFirstInvocationARB(value); } +ivec4 subgroupBroadcastFirst(ivec4 value) { return readFirstInvocationARB(value); } +uint subgroupBroadcastFirst(uint value) { return readFirstInvocationARB(value); } +uvec2 subgroupBroadcastFirst(uvec2 value) { return readFirstInvocationARB(value); } +uvec3 subgroupBroadcastFirst(uvec3 value) { return readFirstInvocationARB(value); } +uvec4 subgroupBroadcastFirst(uvec4 value) { return readFirstInvocationARB(value); } +float subgroupBroadcastFirst(float value) { return readFirstInvocationARB(value); } +vec2 subgroupBroadcastFirst(vec2 value) { return readFirstInvocationARB(value); } +vec3 subgroupBroadcastFirst(vec3 value) { return readFirstInvocationARB(value); } +vec4 subgroupBroadcastFirst(vec4 value) { return readFirstInvocationARB(value); } +double subgroupBroadcastFirst(double value) { return readFirstInvocationARB(value); } +dvec2 subgroupBroadcastFirst(dvec2 value) { return readFirstInvocationARB(value); } +dvec3 subgroupBroadcastFirst(dvec3 value) { return readFirstInvocationARB(value); } +dvec4 subgroupBroadcastFirst(dvec4 value) { return readFirstInvocationARB(value); } +int subgroupBroadcast(int value, uint id) { return readInvocationARB(value, id); } +ivec2 subgroupBroadcast(ivec2 value, uint id) { return readInvocationARB(value, id); } +ivec3 subgroupBroadcast(ivec3 value, uint id) { return readInvocationARB(value, id); } +ivec4 subgroupBroadcast(ivec4 value, uint id) { return readInvocationARB(value, id); } +uint subgroupBroadcast(uint value, uint id) { return readInvocationARB(value, id); } +uvec2 subgroupBroadcast(uvec2 value, uint id) { return readInvocationARB(value, id); } +uvec3 subgroupBroadcast(uvec3 value, uint id) { return readInvocationARB(value, id); } +uvec4 subgroupBroadcast(uvec4 value, uint id) { return readInvocationARB(value, id); } +float subgroupBroadcast(float value, uint id) { return readInvocationARB(value, id); } +vec2 subgroupBroadcast(vec2 value, uint id) { return readInvocationARB(value, id); } +vec3 subgroupBroadcast(vec3 value, uint id) { return readInvocationARB(value, id); } +vec4 subgroupBroadcast(vec4 value, uint id) { return readInvocationARB(value, id); } +double subgroupBroadcast(double value, uint id) { return readInvocationARB(value, id); } +dvec2 subgroupBroadcast(dvec2 value, uint id) { return readInvocationARB(value, id); } +dvec3 subgroupBroadcast(dvec3 value, uint id) { return readInvocationARB(value, id); } +dvec4 subgroupBroadcast(dvec4 value, uint id) { return readInvocationARB(value, id); } +#elif defined(GL_NV_shader_thread_shuffle) +int subgroupBroadcastFirst(int value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +ivec2 subgroupBroadcastFirst(ivec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +ivec3 subgroupBroadcastFirst(ivec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +ivec4 subgroupBroadcastFirst(ivec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +uint subgroupBroadcastFirst(uint value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +uvec2 subgroupBroadcastFirst(uvec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +uvec3 subgroupBroadcastFirst(uvec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +uvec4 subgroupBroadcastFirst(uvec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +float subgroupBroadcastFirst(float value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +vec2 subgroupBroadcastFirst(vec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +vec3 subgroupBroadcastFirst(vec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +vec4 subgroupBroadcastFirst(vec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +double subgroupBroadcastFirst(double value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +dvec2 subgroupBroadcastFirst(dvec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +dvec3 subgroupBroadcastFirst(dvec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +dvec4 subgroupBroadcastFirst(dvec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); } +int subgroupBroadcast(int value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +ivec2 subgroupBroadcast(ivec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +ivec3 subgroupBroadcast(ivec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +ivec4 subgroupBroadcast(ivec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +uint subgroupBroadcast(uint value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +uvec2 subgroupBroadcast(uvec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +uvec3 subgroupBroadcast(uvec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +uvec4 subgroupBroadcast(uvec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +float subgroupBroadcast(float value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +vec2 subgroupBroadcast(vec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +vec3 subgroupBroadcast(vec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +vec4 subgroupBroadcast(vec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +double subgroupBroadcast(double value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +dvec2 subgroupBroadcast(dvec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +dvec3 subgroupBroadcast(dvec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +dvec4 subgroupBroadcast(dvec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } +#endif + +#if defined(GL_KHR_shader_subgroup_ballot) +#elif defined(GL_NV_shader_thread_group) +uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); } +uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); } +#else +uint subgroupBallotFindLSB(uvec4 value) +{ + int firstLive = findLSB(value.x); + return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32)); +} +uint subgroupBallotFindMSB(uvec4 value) +{ + int firstLive = findMSB(value.y); + return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x)); +} +#endif + +#if defined(GL_KHR_shader_subgroup_vote) +#elif defined(GL_AMD_gcn_shader) +bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); } +bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; } +bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || b == ballotAMD(true); } +#elif defined(GL_NV_gpu_shader_5) +bool subgroupAll(bool value) { return allThreadsNV(value); } +bool subgroupAny(bool value) { return anyThreadNV(value); } +bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); } +#elif defined(GL_ARB_shader_group_vote) +bool subgroupAll(bool v) { return allInvocationsARB(v); } +bool subgroupAny(bool v) { return anyInvocationARB(v); } +bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); } +#endif + +#ifndef GL_KHR_shader_subgroup_vote +#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return subgroupAllEqual(subgroupBroadcastFirst(value) == value); } +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(int) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec2) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec3) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec4) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uint) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uvec2) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uvec3) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uvec4) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(float) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(vec2) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(vec3) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(vec4) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(double) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec2) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec3) +_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec4) +#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND +#endif + +#if defined(GL_KHR_shader_subgroup_ballot) +#elif defined(GL_NV_shader_thread_group) +uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); } +#elif defined(GL_ARB_shader_ballot) +uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); } +#endif + +#ifndef GL_KHR_shader_subgroup_basic +bool subgroupElect() +{ + uvec4 activeMask = subgroupBallot(true); + uint firstLive = subgroupBallotFindLSB(activeMask); + return gl_SubgroupInvocationID == firstLive; +} +#endif + +#ifndef GL_KHR_shader_subgroup_basic +void subgroupBarrier() { memoryBarrierShared(); } +#endif + +#ifndef GL_KHR_shader_subgroup_basic +void subgroupMemoryBarrier() { groupMemoryBarrier(); } +void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); } +void subgroupMemoryBarrierShared() { memoryBarrierShared(); } +void subgroupMemoryBarrierImage() { groupMemoryBarrier(); } +#endif + +#ifndef GL_KHR_shader_subgroup_ballot +bool subgroupInverseBallot(uvec4 value) +{ + return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u))); +} +uint subgroupBallotInclusiveBitCount(uvec4 value) +{ + uvec2 v = value.xy & gl_SubgroupLeMask.xy; + ivec2 c = bitCount(v); +#ifdef GL_NV_shader_thread_group + return uint(c.x); +#else + return uint(c.x + c.y); +#endif +} +uint subgroupBallotExclusiveBitCount(uvec4 value) +{ + uvec2 v = value.xy & gl_SubgroupLtMask.xy; + ivec2 c = bitCount(v); +#ifdef GL_NV_shader_thread_group + return uint(c.x); +#else + return uint(c.x + c.y); +#endif +} +#endif + +#ifndef GL_KHR_shader_subgroup_ballot +uint subgroupBallotBitCount(uvec4 value) +{ + ivec2 c = bitCount(value.xy); +#ifdef GL_NV_shader_thread_group + return uint(c.x); +#else + return uint(c.x + c.y); +#endif +} +#endif + +#ifndef GL_KHR_shader_subgroup_ballot +bool subgroupBallotBitExtract(uvec4 value, uint index) +{ +#ifdef GL_NV_shader_thread_group + uint shifted = value.x >> index; +#else + uint shifted = value[index >> 5u] >> (index & 0x1fu); +#endif + return (shifted & 1u) != 0u; +} +#endif + +void main() +{ + _9.FragColor = float(gl_NumSubgroups); + _9.FragColor = float(gl_SubgroupID); + _9.FragColor = float(gl_SubgroupSize); + _9.FragColor = float(gl_SubgroupInvocationID); + subgroupMemoryBarrier(); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); + _9.FragColor = vec4(gl_SubgroupEqMask).x; + _9.FragColor = vec4(gl_SubgroupGeMask).x; + _9.FragColor = vec4(gl_SubgroupGtMask).x; + _9.FragColor = vec4(gl_SubgroupLeMask).x; + _9.FragColor = vec4(gl_SubgroupLtMask).x; + vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + vec3 first = subgroupBroadcastFirst(vec3(20.0)); + uvec4 ballot_value = subgroupBallot(true); + bool inverse_ballot_value = subgroupInverseBallot(ballot_value); + bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); + uint bit_count = subgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint lsb = subgroupBallotFindLSB(ballot_value); + uint msb = subgroupBallotFindMSB(ballot_value); + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + bool has_equal_bool = subgroupAllEqual(true); + bool has_equal_T = subgroupAllEqual(uvec3(5u)); +} + diff --git a/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp.vk b/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp.vk new file mode 100644 index 00000000000..61aa2f7a561 --- /dev/null +++ b/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp.vk @@ -0,0 +1,45 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_vote : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0, std430) buffer SSBO +{ + float FragColor; +} _9; + +void main() +{ + _9.FragColor = float(gl_NumSubgroups); + _9.FragColor = float(gl_SubgroupID); + _9.FragColor = float(gl_SubgroupSize); + _9.FragColor = float(gl_SubgroupInvocationID); + subgroupMemoryBarrier(); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); + _9.FragColor = vec4(gl_SubgroupEqMask).x; + _9.FragColor = vec4(gl_SubgroupGeMask).x; + _9.FragColor = vec4(gl_SubgroupGtMask).x; + _9.FragColor = vec4(gl_SubgroupLeMask).x; + _9.FragColor = vec4(gl_SubgroupLtMask).x; + vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + vec3 first = subgroupBroadcastFirst(vec3(20.0)); + uvec4 ballot_value = subgroupBallot(true); + bool inverse_ballot_value = subgroupInverseBallot(ballot_value); + bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); + uint bit_count = subgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint lsb = subgroupBallotFindLSB(ballot_value); + uint msb = subgroupBallotFindMSB(ballot_value); + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + bool has_equal_bool = subgroupAllEqual(true); + bool has_equal_T = subgroupAllEqual(uvec3(5u)); +} + diff --git a/reference/shaders-no-opt/comp/trivial-select-cast-vector.comp b/reference/shaders-no-opt/comp/trivial-select-cast-vector.comp new file mode 100644 index 00000000000..92573ffdc86 --- /dev/null +++ b/reference/shaders-no-opt/comp/trivial-select-cast-vector.comp @@ -0,0 +1,15 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer A +{ + vec3 a; + vec3 b; +} _14; + +void main() +{ + bvec3 c = lessThan(_14.b, vec3(1.0)); + _14.a = mix(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, 1.0), c); +} + diff --git a/reference/shaders-no-opt/comp/trivial-select-matrix.spv14.comp b/reference/shaders-no-opt/comp/trivial-select-matrix.spv14.comp new file mode 100644 index 00000000000..dd227e89d68 --- /dev/null +++ b/reference/shaders-no-opt/comp/trivial-select-matrix.spv14.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer A +{ + mat3 a; + float b; +} _14; + +void main() +{ + bool c = _14.b < 1.0; + _14.a = c ? mat3(vec3(1.0), vec3(1.0), vec3(1.0)) : mat3(vec3(0.0), vec3(0.0), vec3(0.0)); + _14.a = c ? mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 1.0, 0.0), vec3(0.0, 0.0, 1.0)) : mat3(vec3(0.0), vec3(0.0), vec3(0.0)); +} + diff --git a/reference/shaders/frag/16bit-constants.frag b/reference/shaders-no-opt/frag/16bit-constants.invalid.frag similarity index 68% rename from reference/shaders/frag/16bit-constants.frag rename to reference/shaders-no-opt/frag/16bit-constants.invalid.frag index 57d8256138b..a5c0a6a17cd 100644 --- a/reference/shaders/frag/16bit-constants.frag +++ b/reference/shaders-no-opt/frag/16bit-constants.invalid.frag @@ -6,8 +6,12 @@ #else #error No extension available for FP16. #endif -#if defined(GL_AMD_gpu_shader_int16) +#if defined(GL_EXT_shader_explicit_arithmetic_types_int16) +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#elif defined(GL_AMD_gpu_shader_int16) #extension GL_AMD_gpu_shader_int16 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require #else #error No extension available for Int16. #endif diff --git a/reference/opt/shaders/desktop-only/frag/fp16.invalid.desktop.frag b/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag similarity index 100% rename from reference/opt/shaders/desktop-only/frag/fp16.invalid.desktop.frag rename to reference/shaders-no-opt/frag/fp16.invalid.desktop.frag diff --git a/reference/shaders-no-opt/frag/frag-fully-covered.frag b/reference/shaders-no-opt/frag/frag-fully-covered.frag new file mode 100644 index 00000000000..0f22a7de2a3 --- /dev/null +++ b/reference/shaders-no-opt/frag/frag-fully-covered.frag @@ -0,0 +1,14 @@ +#version 450 +#extension GL_NV_conservative_raster_underestimation : require + +layout(location = 0) out vec4 FragColor; + +void main() +{ + if (!gl_FragFullyCoveredNV) + { + discard; + } + FragColor = vec4(1.0); +} + diff --git a/reference/opt/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk b/reference/shaders-no-opt/frag/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk similarity index 100% rename from reference/opt/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk rename to reference/shaders-no-opt/frag/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk diff --git a/reference/shaders/amd/fs.invalid.frag b/reference/shaders-no-opt/frag/fs.invalid.frag similarity index 100% rename from reference/shaders/amd/fs.invalid.frag rename to reference/shaders-no-opt/frag/fs.invalid.frag index aecf69eba7a..8548a8733f1 100644 --- a/reference/shaders/amd/fs.invalid.frag +++ b/reference/shaders-no-opt/frag/fs.invalid.frag @@ -1,6 +1,6 @@ #version 450 -#extension GL_AMD_shader_fragment_mask : require #extension GL_AMD_shader_explicit_vertex_parameter : require +#extension GL_AMD_shader_fragment_mask : require layout(binding = 0) uniform sampler2DMS texture1; diff --git a/reference/shaders-no-opt/frag/image-gather.frag b/reference/shaders-no-opt/frag/image-gather.frag new file mode 100644 index 00000000000..1baccdfa534 --- /dev/null +++ b/reference/shaders-no-opt/frag/image-gather.frag @@ -0,0 +1,15 @@ +#version 450 + +layout(binding = 0) uniform sampler2D uSamp; +layout(binding = 1) uniform sampler2DShadow uSampShadow; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec3 vUV; + +void main() +{ + FragColor = textureGather(uSamp, vUV.xy); + FragColor += textureGather(uSamp, vUV.xy, 1); + FragColor += textureGather(uSampShadow, vUV.xy, vUV.z); +} + diff --git a/reference/shaders-no-opt/frag/modf-non-function-purity-analysis.frag b/reference/shaders-no-opt/frag/modf-non-function-purity-analysis.frag new file mode 100644 index 00000000000..3a4e0866439 --- /dev/null +++ b/reference/shaders-no-opt/frag/modf-non-function-purity-analysis.frag @@ -0,0 +1,18 @@ +#version 450 + +layout(location = 0) in vec4 v; +layout(location = 1) out vec4 vo1; +layout(location = 0) out vec4 vo0; + +vec4 modf_inner() +{ + vec4 _16 = modf(v, vo1); + return _16; +} + +void main() +{ + vec4 _20 = modf_inner(); + vo0 = _20; +} + diff --git a/reference/opt/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag b/reference/shaders-no-opt/frag/multi-dimensional.desktop.invalid.flatten_dim.frag similarity index 100% rename from reference/opt/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag rename to reference/shaders-no-opt/frag/multi-dimensional.desktop.invalid.flatten_dim.frag diff --git a/reference/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag.vk b/reference/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag.vk new file mode 100644 index 00000000000..ab58862ffd4 --- /dev/null +++ b/reference/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag.vk @@ -0,0 +1,15 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require + +layout(set = 0, binding = 0) uniform texture2D uTex[]; +layout(set = 1, binding = 0) uniform sampler Immut; + +layout(location = 0) out vec4 FragColor; +layout(location = 1) flat in int vIndex; +layout(location = 0) in vec2 vUV; + +void main() +{ + FragColor = texture(nonuniformEXT(sampler2D(uTex[vIndex], Immut)), vUV); +} + diff --git a/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag new file mode 100644 index 00000000000..a4a962e163d --- /dev/null +++ b/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag @@ -0,0 +1,46 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(pixel_interlock_ordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _14; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _35; + +void callee2() +{ + int _25 = int(gl_FragCoord.x); + _14.values1[_25]++; +} + +void callee() +{ + int _38 = int(gl_FragCoord.x); + _35.values0[_38]++; + callee2(); +} + +void main() +{ + SPIRV_Cross_beginInvocationInterlock(); + callee(); + SPIRV_Cross_endInvocationInterlock(); +} + diff --git a/reference/opt/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk b/reference/shaders-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk similarity index 100% rename from reference/opt/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk rename to reference/shaders-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk diff --git a/reference/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag b/reference/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag new file mode 100644 index 00000000000..df0daa79de3 --- /dev/null +++ b/reference/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag @@ -0,0 +1,46 @@ +#version 450 +#extension GL_ARB_sparse_texture2 : require +#extension GL_ARB_sparse_texture_clamp : require + +struct ResType +{ + int _m0; + vec4 _m1; +}; + +layout(binding = 0) uniform sampler2D uSamp; + +layout(location = 0) in vec2 vUV; + +void main() +{ + int _66; + vec4 _67; + _66 = sparseTextureClampARB(uSamp, vUV, 1.0, _67, 2.0); + ResType _25 = ResType(_66, _67); + vec4 texel = _25._m1; + int code = _25._m0; + texel = textureClampARB(uSamp, vUV, 1.0, 2.0); + int _68; + vec4 _69; + _68 = sparseTextureOffsetClampARB(uSamp, vUV, ivec2(1, 2), 1.0, _69, 2.0); + ResType _37 = ResType(_68, _69); + texel = _37._m1; + code = _37._m0; + texel = textureOffsetClampARB(uSamp, vUV, ivec2(1, 2), 1.0, 2.0); + int _70; + vec4 _71; + _70 = sparseTextureGradClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), 1.0, _71); + ResType _47 = ResType(_70, _71); + texel = _47._m1; + code = _47._m0; + texel = textureGradClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), 1.0); + int _72; + vec4 _73; + _72 = sparseTextureGradOffsetClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), ivec2(-1, -2), 1.0, _73); + ResType _58 = ResType(_72, _73); + texel = _58._m1; + code = _58._m0; + texel = textureGradOffsetClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), ivec2(-1, -2), 1.0); +} + diff --git a/reference/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag b/reference/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag new file mode 100644 index 00000000000..7faa226430b --- /dev/null +++ b/reference/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag @@ -0,0 +1,105 @@ +#version 450 +#extension GL_ARB_sparse_texture2 : require +#extension GL_ARB_sparse_texture_clamp : require + +struct ResType +{ + int _m0; + vec4 _m1; +}; + +layout(binding = 0) uniform sampler2D uSamp; +layout(binding = 1) uniform sampler2DMS uSampMS; +layout(binding = 2, rgba8) uniform readonly image2D uImage; +layout(binding = 3, rgba8) uniform readonly image2DMS uImageMS; + +layout(location = 0) in vec2 vUV; + +void main() +{ + int _144; + vec4 _145; + _144 = sparseTextureARB(uSamp, vUV, _145); + ResType _24 = ResType(_144, _145); + vec4 texel = _24._m1; + bool ret = sparseTexelsResidentARB(_24._m0); + int _146; + vec4 _147; + _146 = sparseTextureARB(uSamp, vUV, _147, 1.10000002384185791015625); + ResType _31 = ResType(_146, _147); + texel = _31._m1; + ret = sparseTexelsResidentARB(_31._m0); + int _148; + vec4 _149; + _148 = sparseTextureLodARB(uSamp, vUV, 1.0, _149); + ResType _38 = ResType(_148, _149); + texel = _38._m1; + ret = sparseTexelsResidentARB(_38._m0); + int _150; + vec4 _151; + _150 = sparseTextureOffsetARB(uSamp, vUV, ivec2(1), _151); + ResType _47 = ResType(_150, _151); + texel = _47._m1; + ret = sparseTexelsResidentARB(_47._m0); + int _152; + vec4 _153; + _152 = sparseTextureOffsetARB(uSamp, vUV, ivec2(2), _153, 0.5); + ResType _56 = ResType(_152, _153); + texel = _56._m1; + ret = sparseTexelsResidentARB(_56._m0); + int _154; + vec4 _155; + _154 = sparseTexelFetchARB(uSamp, ivec2(vUV), 1, _155); + ResType _64 = ResType(_154, _155); + texel = _64._m1; + ret = sparseTexelsResidentARB(_64._m0); + int _156; + vec4 _157; + _156 = sparseTexelFetchARB(uSampMS, ivec2(vUV), 2, _157); + ResType _76 = ResType(_156, _157); + texel = _76._m1; + ret = sparseTexelsResidentARB(_76._m0); + int _158; + vec4 _159; + _158 = sparseTexelFetchOffsetARB(uSamp, ivec2(vUV), 1, ivec2(2, 3), _159); + ResType _86 = ResType(_158, _159); + texel = _86._m1; + ret = sparseTexelsResidentARB(_86._m0); + int _160; + vec4 _161; + _160 = sparseTextureLodOffsetARB(uSamp, vUV, 1.5, ivec2(2, 3), _161); + ResType _93 = ResType(_160, _161); + texel = _93._m1; + ret = sparseTexelsResidentARB(_93._m0); + int _162; + vec4 _163; + _162 = sparseTextureGradARB(uSamp, vUV, vec2(1.0), vec2(3.0), _163); + ResType _102 = ResType(_162, _163); + texel = _102._m1; + ret = sparseTexelsResidentARB(_102._m0); + int _164; + vec4 _165; + _164 = sparseTextureGradOffsetARB(uSamp, vUV, vec2(1.0), vec2(3.0), ivec2(-2, -3), _165); + ResType _111 = ResType(_164, _165); + texel = _111._m1; + ret = sparseTexelsResidentARB(_111._m0); + int _166; + vec4 _167; + _166 = sparseTextureClampARB(uSamp, vUV, 4.0, _167); + ResType _118 = ResType(_166, _167); + texel = _118._m1; + ret = sparseTexelsResidentARB(_118._m0); + int _168; + vec4 _169; + _168 = sparseImageLoadARB(uImage, ivec2(vUV), _169); + ResType _128 = ResType(_168, _169); + texel = _128._m1; + ret = sparseTexelsResidentARB(_128._m0); + int _170; + vec4 _171; + _170 = sparseImageLoadARB(uImageMS, ivec2(vUV), 1, _171); + ResType _138 = ResType(_170, _171); + texel = _138._m1; + ret = sparseTexelsResidentARB(_138._m0); +} + diff --git a/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag new file mode 100644 index 00000000000..00a146c4d36 --- /dev/null +++ b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag @@ -0,0 +1,18 @@ +#version 310 es +#extension GL_EXT_shader_framebuffer_fetch : require +precision mediump float; +precision highp int; + +mediump vec4 uSubpass0; +mediump vec4 uSubpass1; + +layout(location = 0) inout vec3 FragColor; +layout(location = 1) inout vec4 FragColor2; + +void main() +{ + uSubpass0.xyz = FragColor; + uSubpass1 = FragColor2; + FragColor = uSubpass0.xyz + uSubpass1.xyz; +} + diff --git a/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag new file mode 100644 index 00000000000..8600549859e --- /dev/null +++ b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag @@ -0,0 +1,18 @@ +#version 310 es +#extension GL_EXT_shader_framebuffer_fetch_non_coherent : require +precision mediump float; +precision highp int; + +mediump vec4 uSubpass0; +mediump vec4 uSubpass1; + +layout(location = 0, noncoherent) inout vec3 FragColor; +layout(location = 1, noncoherent) inout vec4 FragColor2; + +void main() +{ + uSubpass0.xyz = FragColor; + uSubpass1 = FragColor2; + FragColor = uSubpass0.xyz + uSubpass1.xyz; +} + diff --git a/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag new file mode 100644 index 00000000000..d1b72651215 --- /dev/null +++ b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag @@ -0,0 +1,16 @@ +#version 100 +#extension GL_EXT_shader_framebuffer_fetch : require +#extension GL_EXT_draw_buffers : require +precision mediump float; +precision highp int; + +mediump vec4 uSubpass0; +mediump vec4 uSubpass1; + +void main() +{ + uSubpass0 = gl_LastFragData[0]; + uSubpass1 = gl_LastFragData[1]; + gl_FragData[0] = uSubpass0.xyz + uSubpass1.xyz; +} + diff --git a/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag new file mode 100644 index 00000000000..c0a40571b05 --- /dev/null +++ b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag @@ -0,0 +1,16 @@ +#version 100 +#extension GL_EXT_shader_framebuffer_fetch_non_coherent : require +#extension GL_EXT_draw_buffers : require +precision mediump float; +precision highp int; + +mediump vec4 uSubpass0; +mediump vec4 uSubpass1; + +void main() +{ + uSubpass0 = gl_LastFragData[0]; + uSubpass1 = gl_LastFragData[1]; + gl_FragData[0] = uSubpass0.xyz + uSubpass1.xyz; +} + diff --git a/reference/shaders-no-opt/frag/texture-gather-offsets.frag b/reference/shaders-no-opt/frag/texture-gather-offsets.frag new file mode 100644 index 00000000000..36409dd3c34 --- /dev/null +++ b/reference/shaders-no-opt/frag/texture-gather-offsets.frag @@ -0,0 +1,12 @@ +#version 460 + +layout(binding = 0) uniform sampler2D Image0; + +layout(location = 0) out vec4 outColor; +layout(location = 0) in vec2 inUv; + +void main() +{ + outColor = textureGatherOffsets(Image0, inUv, ivec2[](ivec2(0), ivec2(1, 0), ivec2(1), ivec2(0, 1))); +} + diff --git a/reference/shaders-no-opt/frag/texture-gather-uint-component.asm.frag b/reference/shaders-no-opt/frag/texture-gather-uint-component.asm.frag new file mode 100644 index 00000000000..66dcb369cc7 --- /dev/null +++ b/reference/shaders-no-opt/frag/texture-gather-uint-component.asm.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(binding = 0) uniform sampler2D uSamp; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vUV; + +void main() +{ + FragColor = textureGather(uSamp, vUV, int(1u)); +} + diff --git a/reference/shaders-no-opt/frag/texture1d-emulation.es.frag b/reference/shaders-no-opt/frag/texture1d-emulation.es.frag new file mode 100644 index 00000000000..71efb7bcc65 --- /dev/null +++ b/reference/shaders-no-opt/frag/texture1d-emulation.es.frag @@ -0,0 +1,30 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 0) uniform highp sampler2D uSamp; +layout(binding = 1) uniform highp sampler2DShadow uSampShadow; +layout(binding = 2) uniform highp sampler2DArray uSampArray; +layout(binding = 3) uniform highp sampler2DArrayShadow uSampArrayShadow; +layout(binding = 4, r32f) uniform highp image2D uImage; + +layout(location = 0) out highp vec4 FragColor; +layout(location = 0) in highp vec4 vUV; + +void main() +{ + FragColor = texture(uSamp, vec2(vUV.x, 0.0)); + FragColor += textureProj(uSamp, vec3(vUV.xy.x, 0.0, vUV.xy.y)); + FragColor += texelFetch(uSamp, ivec2(int(vUV.x), 0), 0); + FragColor += vec4(texture(uSampShadow, vec3(vUV.xyz.x, 0.0, vUV.xyz.z))); + highp vec4 _54 = vUV; + highp vec4 _57 = _54; + _57.y = _54.w; + FragColor += vec4(textureProj(uSampShadow, vec4(_57.x, 0.0, _54.z, _57.y))); + FragColor = texture(uSampArray, vec3(vUV.xy.x, 0.0, vUV.xy.y)); + FragColor += texelFetch(uSampArray, ivec3(ivec2(vUV.xy).x, 0, ivec2(vUV.xy).y), 0); + FragColor += vec4(texture(uSampArrayShadow, vec4(vUV.xyz.xy.x, 0.0, vUV.xyz.xy.y, vUV.xyz.z))); + FragColor += imageLoad(uImage, ivec2(int(vUV.x), 0)); + imageStore(uImage, ivec2(int(vUV.x), 0), FragColor); +} + diff --git a/reference/shaders-no-opt/frag/texture1d-emulation.legacy.frag b/reference/shaders-no-opt/frag/texture1d-emulation.legacy.frag new file mode 100644 index 00000000000..e6a14ed30cb --- /dev/null +++ b/reference/shaders-no-opt/frag/texture1d-emulation.legacy.frag @@ -0,0 +1,21 @@ +#version 100 +#extension GL_EXT_shadow_samplers : require +precision mediump float; +precision highp int; + +uniform highp sampler2D uSamp; +uniform highp sampler2DShadow uSampShadow; + +varying highp vec4 vUV; + +void main() +{ + gl_FragData[0] = texture2D(uSamp, vec2(vUV.x, 0.0)); + gl_FragData[0] += texture2DProj(uSamp, vec3(vUV.xy.x, 0.0, vUV.xy.y)); + gl_FragData[0] += vec4(shadow2DEXT(uSampShadow, vec3(vUV.xyz.x, 0.0, vUV.xyz.z))); + highp vec4 _44 = vUV; + highp vec4 _47 = _44; + _47.y = _44.w; + gl_FragData[0] += vec4(shadow2DProjEXT(uSampShadow, vec4(_47.x, 0.0, _44.z, _47.y))); +} + diff --git a/reference/shaders-no-opt/frag/variables.zero-initialize.frag b/reference/shaders-no-opt/frag/variables.zero-initialize.frag new file mode 100644 index 00000000000..c9027235082 --- /dev/null +++ b/reference/shaders-no-opt/frag/variables.zero-initialize.frag @@ -0,0 +1,28 @@ +#version 450 + +struct Foo +{ + int a; +}; + +layout(location = 0) in vec4 vColor; +layout(location = 0) out vec4 FragColor; +int uninit_int = 0; +ivec4 uninit_vector = ivec4(0); +mat4 uninit_matrix = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); +Foo uninit_foo = Foo(0); + +void main() +{ + int uninit_function_int = 0; + if (vColor.x > 10.0) + { + uninit_function_int = 10; + } + else + { + uninit_function_int = 20; + } + FragColor = vColor; +} + diff --git a/reference/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag b/reference/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag new file mode 100644 index 00000000000..f46bc2fd884 --- /dev/null +++ b/reference/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag @@ -0,0 +1,24 @@ +#version 100 +precision mediump float; +precision highp int; + +vec2 _19; + +void main() +{ + highp vec2 _30; + for (int spvDummy15 = 0; spvDummy15 < 1; spvDummy15++) + { + if (gl_FragCoord.x != gl_FragCoord.x) + { + _30 = _19; + break; + } + highp vec2 _29; + _29.y = _19.y; + _30 = _29; + break; + } + gl_FragData[0] = vec4(_30, 1.0, 1.0); +} + diff --git a/reference/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task.vk b/reference/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task.vk new file mode 100644 index 00000000000..98704e22dec --- /dev/null +++ b/reference/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task.vk @@ -0,0 +1,42 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in; + +struct Payload +{ + float v[3]; +}; + +shared float vs[24]; +taskPayloadSharedEXT Payload p; + +void main() +{ + vs[gl_LocalInvocationIndex] = 10.0; + barrier(); + if (gl_LocalInvocationIndex < 12u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u]; + } + barrier(); + if (gl_LocalInvocationIndex < 6u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u]; + } + barrier(); + if (gl_LocalInvocationIndex < 3u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u]; + } + barrier(); + p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex]; + if (vs[5] > 20.0) + { + EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8]))); + } + else + { + EmitMeshTasksEXT(uint(int(vs[6])), 10u, 50u); + } +} + diff --git a/reference/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task.vk b/reference/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task.vk new file mode 100644 index 00000000000..1d491e7014b --- /dev/null +++ b/reference/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task.vk @@ -0,0 +1,35 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in; + +struct Payload +{ + float v[3]; +}; + +shared float vs[24]; +taskPayloadSharedEXT Payload p; + +void main() +{ + vs[gl_LocalInvocationIndex] = 10.0; + barrier(); + if (gl_LocalInvocationIndex < 12u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u]; + } + barrier(); + if (gl_LocalInvocationIndex < 6u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u]; + } + barrier(); + if (gl_LocalInvocationIndex < 3u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u]; + } + barrier(); + p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex]; + EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8]))); +} + diff --git a/reference/shaders-no-opt/vert/io-blocks.force-flattened-io.vert b/reference/shaders-no-opt/vert/io-blocks.force-flattened-io.vert new file mode 100644 index 00000000000..604de8a2cc3 --- /dev/null +++ b/reference/shaders-no-opt/vert/io-blocks.force-flattened-io.vert @@ -0,0 +1,25 @@ +#version 450 + +struct Foo +{ + vec4 bar[2]; + vec4 baz[2]; +}; + +out vec4 _14_foo_bar[2]; +out vec4 _14_foo_baz[2]; +out vec4 _14_foo2_bar[2]; +out vec4 _14_foo2_baz[2]; +out vec4 foo3_bar[2]; +out vec4 foo3_baz[2]; + +void main() +{ + _14_foo_bar[0] = vec4(1.0); + _14_foo_baz[1] = vec4(2.0); + _14_foo2_bar[0] = vec4(3.0); + _14_foo2_baz[1] = vec4(4.0); + foo3_bar[0] = vec4(5.0); + foo3_baz[1] = vec4(6.0); +} + diff --git a/reference/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag.vk b/reference/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag.vk new file mode 100644 index 00000000000..e5c67115ca8 --- /dev/null +++ b/reference/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag.vk @@ -0,0 +1,10 @@ +#version 450 +#extension GL_EXT_fragment_shading_rate : require + +layout(location = 0) out uint FragColor; + +void main() +{ + FragColor = uint(gl_ShadingRateEXT); +} + diff --git a/reference/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag.vk b/reference/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag.vk new file mode 100644 index 00000000000..380b7465914 --- /dev/null +++ b/reference/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag.vk @@ -0,0 +1,16 @@ +#version 450 + +layout(set = 0, binding = 0, std140) uniform UBO +{ + layout(offset = 16) mat4 m; + layout(offset = 0) vec4 v; +} _13; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vColor; + +void main() +{ + FragColor = (_13.m * vColor) + _13.v; +} + diff --git a/reference/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag.vk b/reference/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag.vk new file mode 100644 index 00000000000..73b0f9b097b --- /dev/null +++ b/reference/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag.vk @@ -0,0 +1,16 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +layout(location = 0) out float FragColor; + +void main() +{ + bool _12 = gl_HelperInvocation; + float _15 = float(_12); + FragColor = _15; + demote; + bool _16 = gl_HelperInvocation; + float _17 = float(_16); + FragColor = _17; +} + diff --git a/reference/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert.vk b/reference/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert.vk new file mode 100644 index 00000000000..4736723322a --- /dev/null +++ b/reference/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert.vk @@ -0,0 +1,9 @@ +#version 450 +#extension GL_EXT_fragment_shading_rate : require + +void main() +{ + gl_PrimitiveShadingRateEXT = 3; + gl_Position = vec4(1.0); +} + diff --git a/reference/shaders-reflection/asm/aliased-entry-point-names.asm.multi.json b/reference/shaders-reflection/asm/aliased-entry-point-names.asm.multi.json index a56a06f35c0..666167af4a7 100644 --- a/reference/shaders-reflection/asm/aliased-entry-point-names.asm.multi.json +++ b/reference/shaders-reflection/asm/aliased-entry-point-names.asm.multi.json @@ -34,6 +34,9 @@ "type" : "float", "array" : [ 1 + ], + "array_size_is_literal" : [ + true ] }, { @@ -41,6 +44,9 @@ "type" : "float", "array" : [ 1 + ], + "array_size_is_literal" : [ + true ] } ] diff --git a/reference/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp.json b/reference/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp.json new file mode 100644 index 00000000000..b9224eccdbf --- /dev/null +++ b/reference/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp.json @@ -0,0 +1,71 @@ +{ + "entryPoints" : [ + { + "name" : "main", + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] + } + ], + "types" : { + "_3" : { + "name" : "Params", + "members" : [ + { + "name" : "x", + "type" : "float", + "offset" : 0 + }, + { + "name" : "y", + "type" : "_6", + "offset" : 16, + "physical_pointer" : true + } + ] + }, + "_4" : { + "name" : "IntBuf", + "members" : [ + { + "name" : "v", + "type" : "int", + "offset" : 0 + } + ] + }, + "_11" : { + "name" : "IntBuf", + "type" : "_4", + "physical_pointer" : true + }, + "_6" : { + "name" : "IntBuf", + "array" : [ + 3 + ], + "array_size_is_literal" : [ + true + ], + "type" : "_11", + "array_stride" : 16 + } + }, + "ubos" : [ + { + "type" : "_3", + "name" : "Params", + "block_size" : 24, + "set" : 0, + "binding" : 0 + } + ] +} \ No newline at end of file diff --git a/reference/shaders-reflection/asm/op-source-glsl-ssbo-1.asm.comp.json b/reference/shaders-reflection/asm/op-source-glsl-ssbo-1.asm.comp.json index 6cd7f95d76e..3b0c9868e56 100644 --- a/reference/shaders-reflection/asm/op-source-glsl-ssbo-1.asm.comp.json +++ b/reference/shaders-reflection/asm/op-source-glsl-ssbo-1.asm.comp.json @@ -2,7 +2,17 @@ "entryPoints" : [ { "name" : "main", - "mode" : "comp" + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] } ], "types" : { @@ -15,7 +25,11 @@ "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 } ] } diff --git a/reference/shaders-reflection/asm/op-source-glsl-ssbo-2.asm.comp.json b/reference/shaders-reflection/asm/op-source-glsl-ssbo-2.asm.comp.json index c2fa56405e3..80cf8622272 100644 --- a/reference/shaders-reflection/asm/op-source-glsl-ssbo-2.asm.comp.json +++ b/reference/shaders-reflection/asm/op-source-glsl-ssbo-2.asm.comp.json @@ -2,7 +2,17 @@ "entryPoints" : [ { "name" : "main", - "mode" : "comp" + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] } ], "types" : { @@ -15,7 +25,11 @@ "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 } ] }, @@ -28,7 +42,11 @@ "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 } ] } diff --git a/reference/shaders-reflection/asm/op-source-hlsl-uav-1.asm.comp.json b/reference/shaders-reflection/asm/op-source-hlsl-uav-1.asm.comp.json index 12b0677f6dd..b34f85bb5a8 100644 --- a/reference/shaders-reflection/asm/op-source-hlsl-uav-1.asm.comp.json +++ b/reference/shaders-reflection/asm/op-source-hlsl-uav-1.asm.comp.json @@ -2,7 +2,17 @@ "entryPoints" : [ { "name" : "main", - "mode" : "comp" + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] } ], "types" : { @@ -10,12 +20,16 @@ "name" : "UAV0", "members" : [ { - "name" : "_data", + "name" : "@data", "type" : "vec4", "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 } ] } diff --git a/reference/shaders-reflection/asm/op-source-hlsl-uav-2.asm.comp.json b/reference/shaders-reflection/asm/op-source-hlsl-uav-2.asm.comp.json index 8da2c74eb59..052e3ba814e 100644 --- a/reference/shaders-reflection/asm/op-source-hlsl-uav-2.asm.comp.json +++ b/reference/shaders-reflection/asm/op-source-hlsl-uav-2.asm.comp.json @@ -2,7 +2,17 @@ "entryPoints" : [ { "name" : "main", - "mode" : "comp" + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] } ], "types" : { @@ -10,12 +20,16 @@ "name" : "UAV0", "members" : [ { - "name" : "_data", + "name" : "@data", "type" : "vec4", "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 } ] } diff --git a/reference/shaders-reflection/asm/op-source-none-ssbo-1.asm.comp.json b/reference/shaders-reflection/asm/op-source-none-ssbo-1.asm.comp.json index 6cd7f95d76e..3b0c9868e56 100644 --- a/reference/shaders-reflection/asm/op-source-none-ssbo-1.asm.comp.json +++ b/reference/shaders-reflection/asm/op-source-none-ssbo-1.asm.comp.json @@ -2,7 +2,17 @@ "entryPoints" : [ { "name" : "main", - "mode" : "comp" + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] } ], "types" : { @@ -15,7 +25,11 @@ "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 } ] } diff --git a/reference/shaders-reflection/asm/op-source-none-ssbo-2.asm.comp.json b/reference/shaders-reflection/asm/op-source-none-ssbo-2.asm.comp.json index c2fa56405e3..80cf8622272 100644 --- a/reference/shaders-reflection/asm/op-source-none-ssbo-2.asm.comp.json +++ b/reference/shaders-reflection/asm/op-source-none-ssbo-2.asm.comp.json @@ -2,7 +2,17 @@ "entryPoints" : [ { "name" : "main", - "mode" : "comp" + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] } ], "types" : { @@ -15,7 +25,11 @@ "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 } ] }, @@ -28,7 +42,11 @@ "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 } ] } diff --git a/reference/shaders-reflection/asm/op-source-none-uav-1.asm.comp.json b/reference/shaders-reflection/asm/op-source-none-uav-1.asm.comp.json index 12b0677f6dd..b34f85bb5a8 100644 --- a/reference/shaders-reflection/asm/op-source-none-uav-1.asm.comp.json +++ b/reference/shaders-reflection/asm/op-source-none-uav-1.asm.comp.json @@ -2,7 +2,17 @@ "entryPoints" : [ { "name" : "main", - "mode" : "comp" + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] } ], "types" : { @@ -10,12 +20,16 @@ "name" : "UAV0", "members" : [ { - "name" : "_data", + "name" : "@data", "type" : "vec4", "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 } ] } diff --git a/reference/shaders-reflection/asm/op-source-none-uav-2.asm.comp.json b/reference/shaders-reflection/asm/op-source-none-uav-2.asm.comp.json index 8da2c74eb59..052e3ba814e 100644 --- a/reference/shaders-reflection/asm/op-source-none-uav-2.asm.comp.json +++ b/reference/shaders-reflection/asm/op-source-none-uav-2.asm.comp.json @@ -2,7 +2,17 @@ "entryPoints" : [ { "name" : "main", - "mode" : "comp" + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] } ], "types" : { @@ -10,12 +20,16 @@ "name" : "UAV0", "members" : [ { - "name" : "_data", + "name" : "@data", "type" : "vec4", "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 } ] } diff --git a/reference/shaders-reflection/comp/array-of-physical-pointer.comp.json b/reference/shaders-reflection/comp/array-of-physical-pointer.comp.json new file mode 100644 index 00000000000..a5da58c1a7d --- /dev/null +++ b/reference/shaders-reflection/comp/array-of-physical-pointer.comp.json @@ -0,0 +1,66 @@ +{ + "entryPoints" : [ + { + "name" : "main", + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] + } + ], + "types" : { + "_11" : { + "name" : "Params", + "members" : [ + { + "name" : "x", + "type" : "float", + "offset" : 0 + }, + { + "name" : "y", + "type" : "_7", + "array" : [ + 3 + ], + "array_size_is_literal" : [ + true + ], + "offset" : 16, + "array_stride" : 16 + } + ] + }, + "_13" : { + "name" : "IntBuf", + "members" : [ + { + "name" : "v", + "type" : "int", + "offset" : 0 + } + ] + }, + "_7" : { + "name" : "IntBuf", + "type" : "_13", + "physical_pointer" : true + } + }, + "ubos" : [ + { + "type" : "_11", + "name" : "Params", + "block_size" : 64, + "set" : 0, + "binding" : 0 + } + ] +} \ No newline at end of file diff --git a/reference/shaders-reflection/comp/function-pointer.invalid.asm.comp.json b/reference/shaders-reflection/comp/function-pointer.invalid.asm.comp.json new file mode 100644 index 00000000000..bed59455f01 --- /dev/null +++ b/reference/shaders-reflection/comp/function-pointer.invalid.asm.comp.json @@ -0,0 +1,18 @@ +{ + "entryPoints" : [ + { + "name" : "main", + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] + } + ] +} \ No newline at end of file diff --git a/reference/shaders-reflection/comp/out-of-order-block-offsets.comp.json b/reference/shaders-reflection/comp/out-of-order-block-offsets.comp.json new file mode 100644 index 00000000000..b697b453b1a --- /dev/null +++ b/reference/shaders-reflection/comp/out-of-order-block-offsets.comp.json @@ -0,0 +1,44 @@ +{ + "entryPoints" : [ + { + "name" : "main", + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] + } + ], + "types" : { + "_7" : { + "name" : "SSBO", + "members" : [ + { + "name" : "foo", + "type" : "uint", + "offset" : 8 + }, + { + "name" : "bar", + "type" : "uint", + "offset" : 4 + } + ] + } + }, + "ssbos" : [ + { + "type" : "_7", + "name" : "SSBO", + "block_size" : 12, + "set" : 0, + "binding" : 0 + } + ] +} \ No newline at end of file diff --git a/reference/shaders-reflection/comp/physical-pointer.comp.json b/reference/shaders-reflection/comp/physical-pointer.comp.json new file mode 100644 index 00000000000..a397d0febc2 --- /dev/null +++ b/reference/shaders-reflection/comp/physical-pointer.comp.json @@ -0,0 +1,55 @@ +{ + "entryPoints" : [ + { + "name" : "main", + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] + } + ], + "types" : { + "_8" : { + "name" : "Params", + "members" : [ + { + "name" : "x", + "type" : "float", + "offset" : 0 + }, + { + "name" : "y", + "type" : "_10", + "offset" : 8, + "physical_pointer" : true + } + ] + }, + "_10" : { + "name" : "IntBuf", + "members" : [ + { + "name" : "v", + "type" : "int", + "offset" : 0 + } + ] + } + }, + "ubos" : [ + { + "type" : "_8", + "name" : "Params", + "block_size" : 16, + "set" : 0, + "binding" : 0 + } + ] +} \ No newline at end of file diff --git a/reference/shaders-reflection/comp/struct-layout.comp.json b/reference/shaders-reflection/comp/struct-layout.comp.json index 3004454b806..e9bf7eea903 100644 --- a/reference/shaders-reflection/comp/struct-layout.comp.json +++ b/reference/shaders-reflection/comp/struct-layout.comp.json @@ -2,7 +2,17 @@ "entryPoints" : [ { "name" : "main", - "mode" : "comp" + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] } ], "types" : { @@ -12,7 +22,8 @@ { "name" : "m", "type" : "mat4", - "offset" : 0 + "offset" : 0, + "matrix_stride" : 16 } ] }, @@ -25,7 +36,11 @@ "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 64 } ] }, @@ -38,7 +53,11 @@ "array" : [ 0 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 64 } ] } diff --git a/reference/shaders-reflection/comp/struct-packing.comp.json b/reference/shaders-reflection/comp/struct-packing.comp.json index 22a41584d96..12285ae24ef 100644 --- a/reference/shaders-reflection/comp/struct-packing.comp.json +++ b/reference/shaders-reflection/comp/struct-packing.comp.json @@ -2,7 +2,17 @@ "entryPoints" : [ { "name" : "main", - "mode" : "comp" + "mode" : "comp", + "workgroup_size" : [ + 1, + 1, + 1 + ], + "workgroup_size_is_spec_constant_id" : [ + false, + false, + false + ] } ], "types" : { @@ -15,7 +25,11 @@ "array" : [ 1 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 8 }, { "name" : "b", @@ -48,7 +62,11 @@ "array" : [ 1 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 }, { "name" : "b", @@ -91,7 +109,11 @@ "array" : [ 1 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 }, { "name" : "m1s", @@ -99,7 +121,11 @@ "array" : [ 1 ], - "offset" : 16 + "array_size_is_literal" : [ + true + ], + "offset" : 16, + "array_stride" : 16 }, { "name" : "m2s", @@ -107,7 +133,11 @@ "array" : [ 1 ], - "offset" : 32 + "array_size_is_literal" : [ + true + ], + "offset" : 32, + "array_stride" : 32 }, { "name" : "m0", @@ -140,7 +170,11 @@ "array" : [ 8 ], - "offset" : 152 + "array_size_is_literal" : [ + true + ], + "offset" : 152, + "array_stride" : 8 } ] }, @@ -158,7 +192,11 @@ "array" : [ 2 ], - "offset" : 224 + "array_size_is_literal" : [ + true + ], + "offset" : 224, + "array_stride" : 224 }, { "name" : "content2", @@ -168,12 +206,14 @@ { "name" : "m0", "type" : "mat2", - "offset" : 896 + "offset" : 896, + "matrix_stride" : 8 }, { "name" : "m1", "type" : "mat2", - "offset" : 912 + "offset" : 912, + "matrix_stride" : 8 }, { "name" : "m2", @@ -181,43 +221,62 @@ "array" : [ 4 ], - "offset" : 928 + "array_size_is_literal" : [ + true + ], + "offset" : 928, + "array_stride" : 32, + "matrix_stride" : 16 }, { "name" : "m3", "type" : "mat3x2", - "offset" : 1056 + "offset" : 1056, + "matrix_stride" : 8 }, { "name" : "m4", "type" : "mat2", - "row_major" : true, - "offset" : 1080 + "offset" : 1080, + "matrix_stride" : 8, + "row_major" : true }, { "name" : "m5", "type" : "mat2", - "row_major" : true, "array" : [ 9 ], - "offset" : 1096 + "array_size_is_literal" : [ + true + ], + "offset" : 1096, + "array_stride" : 16, + "matrix_stride" : 8, + "row_major" : true }, { "name" : "m6", "type" : "mat2x3", - "row_major" : true, "array" : [ 2, 4 ], - "offset" : 1240 + "array_size_is_literal" : [ + true, + true + ], + "offset" : 1240, + "array_stride" : 48, + "matrix_stride" : 8, + "row_major" : true }, { "name" : "m7", "type" : "mat3x2", - "row_major" : true, - "offset" : 1440 + "offset" : 1440, + "matrix_stride" : 16, + "row_major" : true }, { "name" : "array", @@ -225,7 +284,11 @@ "array" : [ 0 ], - "offset" : 1472 + "array_size_is_literal" : [ + true + ], + "offset" : 1472, + "array_stride" : 4 } ] }, @@ -238,7 +301,11 @@ "array" : [ 1 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 }, { "name" : "b", @@ -271,7 +338,11 @@ "array" : [ 1 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 }, { "name" : "b", @@ -314,7 +385,11 @@ "array" : [ 1 ], - "offset" : 0 + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 32 }, { "name" : "m1s", @@ -322,7 +397,11 @@ "array" : [ 1 ], - "offset" : 32 + "array_size_is_literal" : [ + true + ], + "offset" : 32, + "array_stride" : 16 }, { "name" : "m2s", @@ -330,7 +409,11 @@ "array" : [ 1 ], - "offset" : 48 + "array_size_is_literal" : [ + true + ], + "offset" : 48, + "array_stride" : 32 }, { "name" : "m0", @@ -363,7 +446,11 @@ "array" : [ 8 ], - "offset" : 192 + "array_size_is_literal" : [ + true + ], + "offset" : 192, + "array_stride" : 16 } ] }, @@ -381,7 +468,11 @@ "array" : [ 2 ], - "offset" : 320 + "array_size_is_literal" : [ + true + ], + "offset" : 320, + "array_stride" : 320 }, { "name" : "content2", @@ -391,12 +482,14 @@ { "name" : "m0", "type" : "mat2", - "offset" : 1280 + "offset" : 1280, + "matrix_stride" : 16 }, { "name" : "m1", "type" : "mat2", - "offset" : 1312 + "offset" : 1312, + "matrix_stride" : 16 }, { "name" : "m2", @@ -404,43 +497,62 @@ "array" : [ 4 ], - "offset" : 1344 + "array_size_is_literal" : [ + true + ], + "offset" : 1344, + "array_stride" : 32, + "matrix_stride" : 16 }, { "name" : "m3", "type" : "mat3x2", - "offset" : 1472 + "offset" : 1472, + "matrix_stride" : 16 }, { "name" : "m4", "type" : "mat2", - "row_major" : true, - "offset" : 1520 + "offset" : 1520, + "matrix_stride" : 16, + "row_major" : true }, { "name" : "m5", "type" : "mat2", - "row_major" : true, "array" : [ 9 ], - "offset" : 1552 + "array_size_is_literal" : [ + true + ], + "offset" : 1552, + "array_stride" : 32, + "matrix_stride" : 16, + "row_major" : true }, { "name" : "m6", "type" : "mat2x3", - "row_major" : true, "array" : [ 2, 4 ], - "offset" : 1840 + "array_size_is_literal" : [ + true, + true + ], + "offset" : 1840, + "array_stride" : 96, + "matrix_stride" : 16, + "row_major" : true }, { "name" : "m7", "type" : "mat3x2", - "row_major" : true, - "offset" : 2224 + "offset" : 2224, + "matrix_stride" : 16, + "row_major" : true }, { "name" : "array", @@ -448,7 +560,11 @@ "array" : [ 0 ], - "offset" : 2256 + "array_size_is_literal" : [ + true + ], + "offset" : 2256, + "array_stride" : 16 } ] } diff --git a/reference/shaders-reflection/comp/workgroup-size-spec-constant.comp.json b/reference/shaders-reflection/comp/workgroup-size-spec-constant.comp.json new file mode 100644 index 00000000000..c67d7230034 --- /dev/null +++ b/reference/shaders-reflection/comp/workgroup-size-spec-constant.comp.json @@ -0,0 +1,62 @@ +{ + "entryPoints" : [ + { + "name" : "main", + "mode" : "comp", + "workgroup_size" : [ + 10, + 40, + 60 + ], + "workgroup_size_is_spec_constant_id" : [ + true, + true, + true + ] + } + ], + "types" : { + "_8" : { + "name" : "SSBO", + "members" : [ + { + "name" : "v", + "type" : "vec4", + "offset" : 0 + } + ] + } + }, + "ssbos" : [ + { + "type" : "_8", + "name" : "SSBO", + "block_size" : 16, + "set" : 0, + "binding" : 0 + } + ], + "specialization_constants" : [ + { + "name" : "", + "id" : 10, + "type" : "uint", + "variable_id" : 18, + "default_value" : 1 + }, + { + "name" : "", + "id" : 40, + "type" : "uint", + "variable_id" : 19, + "default_value" : 1 + }, + { + "name" : "", + "id" : 60, + "type" : "uint", + "variable_id" : 20, + "default_value" : 1 + } + ] +} \ No newline at end of file diff --git a/reference/shaders-reflection/frag/image-load-store-uint-coord.asm.frag.json b/reference/shaders-reflection/frag/image-load-store-uint-coord.asm.frag.json index 527ea2bfeee..c239527c842 100644 --- a/reference/shaders-reflection/frag/image-load-store-uint-coord.asm.frag.json +++ b/reference/shaders-reflection/frag/image-load-store-uint-coord.asm.frag.json @@ -8,7 +8,7 @@ "outputs" : [ { "type" : "vec4", - "name" : "_entryPointOutput", + "name" : "@entryPointOutput", "location" : 0 } ], diff --git a/reference/shaders-reflection/frag/separate-sampler-texture-array.vk.frag.json b/reference/shaders-reflection/frag/separate-sampler-texture-array.vk.frag.json index 9216d93e5d7..e5f2f756249 100644 --- a/reference/shaders-reflection/frag/separate-sampler-texture-array.vk.frag.json +++ b/reference/shaders-reflection/frag/separate-sampler-texture-array.vk.frag.json @@ -31,6 +31,9 @@ "array" : [ 4 ], + "array_size_is_literal" : [ + true + ], "set" : 0, "binding" : 1 }, @@ -40,6 +43,9 @@ "array" : [ 4 ], + "array_size_is_literal" : [ + true + ], "set" : 0, "binding" : 4 }, @@ -49,6 +55,9 @@ "array" : [ 4 ], + "array_size_is_literal" : [ + true + ], "set" : 0, "binding" : 3 }, @@ -58,6 +67,9 @@ "array" : [ 4 ], + "array_size_is_literal" : [ + true + ], "set" : 0, "binding" : 2 } diff --git a/reference/shaders-reflection/frag/spec-constant.vk.frag.json b/reference/shaders-reflection/frag/spec-constant.vk.frag.json index 0add2986660..dd876dde96d 100644 --- a/reference/shaders-reflection/frag/spec-constant.vk.frag.json +++ b/reference/shaders-reflection/frag/spec-constant.vk.frag.json @@ -14,6 +14,9 @@ "type" : "float", "array" : [ 135 + ], + "array_size_is_literal" : [ + false ] } ] @@ -28,43 +31,59 @@ ], "specialization_constants" : [ { + "name" : "a", "id" : 1, "type" : "float", + "variable_id" : 9, "default_value" : 1.5 }, { + "name" : "b", "id" : 2, "type" : "float", + "variable_id" : 11, "default_value" : 2.5 }, { + "name" : "c", "id" : 3, "type" : "int", + "variable_id" : 16, "default_value" : 3 }, { + "name" : "d", "id" : 4, "type" : "int", + "variable_id" : 25, "default_value" : 4 }, { + "name" : "e", "id" : 5, "type" : "uint", + "variable_id" : 34, "default_value" : 5 }, { + "name" : "f", "id" : 6, "type" : "uint", + "variable_id" : 35, "default_value" : 6 }, { + "name" : "g", "id" : 7, "type" : "bool", + "variable_id" : 56, "default_value" : false }, { + "name" : "h", "id" : 8, "type" : "bool", + "variable_id" : 57, "default_value" : true } ] diff --git a/reference/shaders-reflection/vert/array-size-reflection.vert.json b/reference/shaders-reflection/vert/array-size-reflection.vert.json new file mode 100644 index 00000000000..87b6cb060f4 --- /dev/null +++ b/reference/shaders-reflection/vert/array-size-reflection.vert.json @@ -0,0 +1,78 @@ +{ + "entryPoints" : [ + { + "name" : "main", + "mode" : "vert" + } + ], + "types" : { + "_11" : { + "name" : "gl_PerVertex", + "members" : [ + { + "name" : "gl_Position", + "type" : "vec4" + }, + { + "name" : "gl_PointSize", + "type" : "float" + }, + { + "name" : "gl_ClipDistance", + "type" : "float", + "array" : [ + 1 + ], + "array_size_is_literal" : [ + true + ] + }, + { + "name" : "gl_CullDistance", + "type" : "float", + "array" : [ + 1 + ], + "array_size_is_literal" : [ + true + ] + } + ] + }, + "_18" : { + "name" : "u_", + "members" : [ + { + "name" : "u_0", + "type" : "vec4", + "array" : [ + 16 + ], + "array_size_is_literal" : [ + false + ], + "offset" : 0, + "array_stride" : 16 + } + ] + } + }, + "ubos" : [ + { + "type" : "_18", + "name" : "u_", + "block_size" : 16, + "set" : 1, + "binding" : 0 + } + ], + "specialization_constants" : [ + { + "name" : "ARR_SIZE", + "id" : 0, + "type" : "int", + "variable_id" : 16, + "default_value" : 1 + } + ] +} \ No newline at end of file diff --git a/reference/shaders-reflection/vert/read-from-row-major-array.vert.json b/reference/shaders-reflection/vert/read-from-row-major-array.vert.json index d92fb67fb58..cebd66bd9aa 100644 --- a/reference/shaders-reflection/vert/read-from-row-major-array.vert.json +++ b/reference/shaders-reflection/vert/read-from-row-major-array.vert.json @@ -25,12 +25,18 @@ { "name" : "var", "type" : "mat2x3", - "row_major" : true, "array" : [ 4, 3 ], - "offset" : 0 + "array_size_is_literal" : [ + true, + true + ], + "offset" : 0, + "array_stride" : 192, + "matrix_stride" : 16, + "row_major" : true } ] } diff --git a/reference/shaders-reflection/vert/stride-reflection.vert.json b/reference/shaders-reflection/vert/stride-reflection.vert.json new file mode 100644 index 00000000000..1dd8f189575 --- /dev/null +++ b/reference/shaders-reflection/vert/stride-reflection.vert.json @@ -0,0 +1,96 @@ +{ + "entryPoints" : [ + { + "name" : "main", + "mode" : "vert" + } + ], + "types" : { + "_11" : { + "name" : "gl_PerVertex", + "members" : [ + { + "name" : "gl_Position", + "type" : "vec4" + }, + { + "name" : "gl_PointSize", + "type" : "float" + }, + { + "name" : "gl_ClipDistance", + "type" : "float", + "array" : [ + 1 + ], + "array_size_is_literal" : [ + true + ] + }, + { + "name" : "gl_CullDistance", + "type" : "float", + "array" : [ + 1 + ], + "array_size_is_literal" : [ + true + ] + } + ] + }, + "_21" : { + "name" : "U", + "members" : [ + { + "name" : "v", + "type" : "vec4", + "array" : [ + 4 + ], + "array_size_is_literal" : [ + true + ], + "offset" : 0, + "array_stride" : 16 + }, + { + "name" : "c", + "type" : "mat4", + "array" : [ + 4 + ], + "array_size_is_literal" : [ + true + ], + "offset" : 64, + "array_stride" : 64, + "matrix_stride" : 16 + }, + { + "name" : "r", + "type" : "mat4", + "array" : [ + 4 + ], + "array_size_is_literal" : [ + true + ], + "offset" : 320, + "array_stride" : 64, + "matrix_stride" : 16, + "row_major" : true + } + ] + } + }, + "ubos" : [ + { + "type" : "_21", + "name" : "U", + "block_size" : 576, + "set" : 0, + "binding" : 0 + } + ] +} \ No newline at end of file diff --git a/reference/shaders-reflection/vert/texture_buffer.vert.json b/reference/shaders-reflection/vert/texture_buffer.vert.json index 3c69e24cbc0..a9368639488 100644 --- a/reference/shaders-reflection/vert/texture_buffer.vert.json +++ b/reference/shaders-reflection/vert/texture_buffer.vert.json @@ -32,6 +32,7 @@ { "type" : "imageBuffer", "name" : "uSampo", + "readonly" : true, "set" : 0, "binding" : 5, "format" : "rgba32f" diff --git a/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag new file mode 100644 index 00000000000..429bbf738ea --- /dev/null +++ b/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag @@ -0,0 +1,361 @@ +#include +#include + +using namespace metal; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_MobileDirectionalLight +{ + float4 MobileDirectionalLight_DirectionalLightColor; + float4 MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition; + float4 MobileDirectionalLight_DirectionalLightShadowSize; + float4 MobileDirectionalLight_DirectionalLightDistanceFadeMAD; + float4 MobileDirectionalLight_DirectionalLightShadowDistances; + float4x4 MobileDirectionalLight_DirectionalLightScreenToShadow[4]; +}; + +struct type_Globals +{ + int NumDynamicPointLights; + float4 LightPositionAndInvRadius[4]; + float4 LightColorAndFalloffExponent[4]; + float4 MobileReflectionParams; +}; + +constant float3 _136 = {}; +constant float4 _137 = {}; +constant float _138 = {}; +constant float3 _139 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + float2 in_var_TEXCOORD0 [[user(locn0)]]; + float4 in_var_TEXCOORD7 [[user(locn1)]]; + float4 in_var_TEXCOORD8 [[user(locn2)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_MobileDirectionalLight& MobileDirectionalLight [[buffer(1)]], constant type_Globals& _Globals [[buffer(2)]], texture2d MobileDirectionalLight_DirectionalLightShadowTexture [[texture(0)]], texture2d Material_Texture2D_0 [[texture(1)]], texture2d Material_Texture2D_1 [[texture(2)]], texturecube ReflectionCubemap [[texture(3)]], sampler MobileDirectionalLight_DirectionalLightShadowSampler [[sampler(0)]], sampler Material_Texture2D_0Sampler [[sampler(1)]], sampler Material_Texture2D_1Sampler [[sampler(2)]], sampler ReflectionCubemapSampler [[sampler(3)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + float4 _177 = float4((((gl_FragCoord.xy - View.View_ViewRectMin.xy) * View.View_ViewSizeAndInvSize.zw) - float2(0.5)) * float2(2.0, -2.0), _138, 1.0) * float4(gl_FragCoord.w); + float3 _179 = in.in_var_TEXCOORD8.xyz - float3(View.View_PreViewTranslation); + float3 _181 = fast::normalize(-in.in_var_TEXCOORD8.xyz); + float4 _187 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (in.in_var_TEXCOORD0 * float2(10.0))); + float2 _190 = (_187.xy * float2(2.0)) - float2(1.0); + float3 _206 = fast::normalize(float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0)) * (((float4(_190, sqrt(fast::clamp(1.0 - dot(_190, _190), 0.0, 1.0)), 1.0).xyz * float3(0.300000011920928955078125, 0.300000011920928955078125, 1.0)) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz)); + float _208 = dot(_206, _181); + float4 _217 = Material_Texture2D_1.sample(Material_Texture2D_1Sampler, (in.in_var_TEXCOORD0 * float2(20.0))); + float _219 = mix(0.4000000059604644775390625, 1.0, _217.x); + float4 _223 = Material_Texture2D_1.sample(Material_Texture2D_1Sampler, (in.in_var_TEXCOORD0 * float2(5.0))); + float _224 = _177.w; + float _228 = fast::min(fast::max((_224 - 24.0) * 0.000666666659526526927947998046875, 0.0), 1.0); + float _229 = _223.y; + float4 _233 = Material_Texture2D_1.sample(Material_Texture2D_1Sampler, (in.in_var_TEXCOORD0 * float2(0.5))); + float _235 = _233.y; + float _253 = fast::clamp((fast::min(fast::max(mix(0.0, 0.5, _235) + mix(mix(0.699999988079071044921875, 1.0, _229), 1.0, _228), 0.0), 1.0) * View.View_RoughnessOverrideParameter.y) + View.View_RoughnessOverrideParameter.x, 0.119999997317790985107421875, 1.0); + float2 _257 = (float2(_253) * float2(-1.0, -0.0274999998509883880615234375)) + float2(1.0, 0.0425000004470348358154296875); + float _258 = _257.x; + float3 _270 = (fast::clamp(float3(mix(_219, 1.0 - _219, mix(_229, 1.0, _228)) * (mix(0.2949999868869781494140625, 0.660000026226043701171875, mix(_235 + mix(_229, 0.0, _228), 0.5, 0.5)) * 0.5)), float3(0.0), float3(1.0)) * float3(View.View_DiffuseOverrideParameter.w)) + View.View_DiffuseOverrideParameter.xyz; + float3 _275 = float3(((fast::min(_258 * _258, exp2((-9.27999973297119140625) * fast::max(_208, 0.0))) * _258) + _257.y) * View.View_SpecularOverrideParameter.w) + View.View_SpecularOverrideParameter.xyz; + float _276 = _275.x; + float4 _303; + int _286 = 0; + for (;;) + { + if (_286 < 2) + { + if (_224 < MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowDistances[uint(_286)]) + { + _303 = MobileDirectionalLight.MobileDirectionalLight_DirectionalLightScreenToShadow[_286] * float4(_177.xy, _224, 1.0); + break; + } + _286++; + continue; + } + else + { + _303 = float4(0.0); + break; + } + } + float _423; + if (_303.z > 0.0) + { + float2 _311 = _303.xy * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.xy; + float2 _312 = fract(_311); + float2 _313 = floor(_311); + float3 _320; + _320.x = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(-0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x; + _320.y = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(0.5, -0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x; + _320.z = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(1.5, -0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x; + float3 _335 = float3(MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.w); + float3 _337 = float3((fast::min(_303.z, 0.999989986419677734375) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.w) - 1.0); + float3 _339 = fast::clamp((_320 * _335) - _337, float3(0.0), float3(1.0)); + float3 _345; + _345.x = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(-0.5, 0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x; + _345.y = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x; + _345.z = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(1.5, 0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x; + float3 _360 = fast::clamp((_345 * _335) - _337, float3(0.0), float3(1.0)); + float3 _366; + _366.x = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(-0.5, 1.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x; + _366.y = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(0.5, 1.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x; + _366.z = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(1.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x; + float3 _381 = fast::clamp((_366 * _335) - _337, float3(0.0), float3(1.0)); + float _383 = _312.x; + float _384 = 1.0 - _383; + float3 _399; + _399.x = ((_339.x * _384) + _339.y) + (_339.z * _383); + _399.y = ((_360.x * _384) + _360.y) + (_360.z * _383); + _399.z = ((_381.x * _384) + _381.y) + (_381.z * _383); + float _408 = _312.y; + float _420 = fast::clamp((_224 * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDistanceFadeMAD.x) + MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDistanceFadeMAD.y, 0.0, 1.0); + _423 = mix(fast::clamp(0.25 * dot(_399, float3(1.0 - _408, 1.0, _408)), 0.0, 1.0), 1.0, _420 * _420); + } + else + { + _423 = 1.0; + } + float3 _429 = fast::normalize(_181 + MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.xyz); + float _439 = (_253 * 0.25) + 0.25; + float3 _440 = cross(_206, _429); + float _442 = _253 * _253; + float _443 = fast::max(0.0, dot(_206, _429)) * _442; + float _446 = _442 / (dot(_440, _440) + (_443 * _443)); + bool _458 = float(_Globals.MobileReflectionParams.w > 0.0) != 0.0; + float4 _468 = ReflectionCubemap.sample(ReflectionCubemapSampler, ((-_181) + ((_206 * float3(_208)) * float3(2.0))), level(((_458 ? _Globals.MobileReflectionParams.w : View.View_ReflectionCubemapMaxMip) - 1.0) - (1.0 - (1.2000000476837158203125 * log2(_253))))); + float3 _481; + if (_458) + { + _481 = _468.xyz * View.View_SkyLightColor.xyz; + } + else + { + float3 _476 = _468.xyz * float3(_468.w * 16.0); + _481 = _476 * _476; + } + float3 _484 = float3(_276); + float3 _488; + _488 = ((float3(_423 * fast::max(0.0, dot(_206, MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.xyz))) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightColor.xyz) * (_270 + float3(_276 * (_439 * fast::min(_446 * _446, 65504.0))))) + ((_481 * float3(fast::clamp(1.0, 0.0, 1.0))) * _484); + float3 _507; + float _509; + float _511; + float _537; + int _491 = 0; + for (;;) + { + if (_491 < _Globals.NumDynamicPointLights) + { + float3 _501 = _Globals.LightPositionAndInvRadius[_491].xyz - _179; + float _502 = dot(_501, _501); + float3 _505 = _501 * float3(rsqrt(_502)); + _507 = fast::normalize(_181 + _505); + _509 = fast::max(0.0, dot(_206, _505)); + _511 = fast::max(0.0, dot(_206, _507)); + if (_Globals.LightColorAndFalloffExponent[_491].w == 0.0) + { + float _531 = _502 * (_Globals.LightPositionAndInvRadius[_491].w * _Globals.LightPositionAndInvRadius[_491].w); + float _534 = fast::clamp(1.0 - (_531 * _531), 0.0, 1.0); + _537 = (1.0 / (_502 + 1.0)) * (_534 * _534); + } + else + { + float3 _521 = _501 * float3(_Globals.LightPositionAndInvRadius[_491].w); + _537 = pow(1.0 - fast::clamp(dot(_521, _521), 0.0, 1.0), _Globals.LightColorAndFalloffExponent[_491].w); + } + float3 _544 = cross(_206, _507); + float _546 = _511 * _442; + float _549 = _442 / (dot(_544, _544) + (_546 * _546)); + _488 += fast::min(float3(65000.0), ((float3(_537 * _509) * _Globals.LightColorAndFalloffExponent[_491].xyz) * float3(0.3183098733425140380859375)) * (_270 + float3(_276 * (_439 * fast::min(_549 * _549, 65504.0))))); + _491++; + continue; + } + else + { + break; + } + } + float3 _567 = (mix(_488 + fast::max(float3(0.0), float3(0.0)), _270 + _484, float3(View.View_UnlitViewmodeMask)) * float3(in.in_var_TEXCOORD7.w)) + in.in_var_TEXCOORD7.xyz; + float4 _568 = float4(_567.x, _567.y, _567.z, _137.w); + _568.w = fast::min(in.in_var_TEXCOORD8.w, 65500.0); + out.out_var_SV_Target0 = _568; + return out; +} + diff --git a/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag new file mode 100644 index 00000000000..bb6058c387e --- /dev/null +++ b/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag @@ -0,0 +1,353 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_ClipToWorld; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_908; + packed_float3 View_ViewUp; + float PrePadding_View_924; + packed_float3 View_ViewRight; + float PrePadding_View_940; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_956; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_972; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_1020; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_1036; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_1052; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1068; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1724; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1740; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1756; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2076; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2148; + float PrePadding_View_2152; + float PrePadding_View_2156; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2228; + float PrePadding_View_2232; + float PrePadding_View_2236; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2268; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2412; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + float View_AtmosphericFogSunDiscHalfApexAngleRadian; + float PrePadding_View_2492; + float4 View_AtmosphericFogSunDiscLuminance; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + uint PrePadding_View_2520; + uint PrePadding_View_2524; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2584; + float PrePadding_View_2588; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2744; + float PrePadding_View_2748; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float PrePadding_View_2908; + int2 View_CursorPosition; + float View_bCheckerboardSubsurfaceProfileRendering; + float PrePadding_View_2924; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2940; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2956; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2972; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2988; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_3004; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_PrimitiveDither +{ + float PrimitiveDither_LODFactor; +}; + +struct type_PrimitiveFade +{ + float2 PrimitiveFade_FadeTimeScaleBias; +}; + +struct type_Material +{ + float4 Material_VectorExpressions[9]; + float4 Material_ScalarExpressions[3]; +}; + +constant float _98 = {}; +constant float _103 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; + float gl_FragDepth [[depth(less)]]; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD6 [[user(locn0)]]; + float4 in_var_TEXCOORD7 [[user(locn1)]]; + float4 in_var_TEXCOORD10_centroid [[user(locn2)]]; + float4 in_var_TEXCOORD11_centroid [[user(locn3)]]; + float4 in_var_TEXCOORD0_0 [[user(locn4)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_PrimitiveDither& PrimitiveDither [[buffer(1)]], constant type_PrimitiveFade& PrimitiveFade [[buffer(2)]], constant type_Material& Material [[buffer(3)]], texture2d Material_Texture2D_0 [[texture(0)]], texture2d Material_Texture2D_3 [[texture(1)]], sampler Material_Texture2D_0Sampler [[sampler(0)]], sampler Material_Texture2D_3Sampler [[sampler(1)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + spvUnsafeArray in_var_TEXCOORD0 = {}; + in_var_TEXCOORD0[0] = in.in_var_TEXCOORD0_0; + float2 _135 = gl_FragCoord.xy - View.View_ViewRectMin.xy; + float4 _140 = float4(_103, _103, gl_FragCoord.z, 1.0) * float4(gl_FragCoord.w); + float4 _144 = View.View_SVPositionToTranslatedWorld * float4(gl_FragCoord.xyz, 1.0); + float3 _148 = _144.xyz / float3(_144.w); + float3 _149 = _148 - float3(View.View_PreViewTranslation); + float3 _151 = fast::normalize(-_148); + float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz); + float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0)); + float _171 = floor(_170); + float _172 = 1.0 / _170; + float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172); + float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); + float2 _176 = dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); + float _180_copy; + float2 _183; + _183 = float2(0.0); + float _188; + float _211; + float2 _212; + float _180 = 1.0; + int _185 = 0; + float _187 = 1.0; + float _189 = 1.0; + for (;;) + { + if (float(_185) < (_171 + 2.0)) + { + _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y; + if (_180 < _188) + { + float _201 = _188 - _180; + float _203 = _201 / ((_189 - _187) + _201); + _211 = (_189 * _203) + (_180 * (1.0 - _203)); + _212 = _183 - (float2(_203) * _174); + break; + } + _180_copy = _180; + _180 -= _172; + _183 += _174; + _185++; + _187 = _188; + _189 = _180_copy; + continue; + } + else + { + _211 = _98; + _212 = _183; + break; + } + } + float4 _218 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _212.xy), bias(View.View_MaterialTextureMipBias)); + float2 _229 = _135 + float2(View.View_TemporalAAParams.x); + float _237 = float((uint(_229.x) + (2u * uint(_229.y))) % 5u); + float2 _238 = _135 * float2(0.015625); + float4 _242 = Material_Texture2D_3.sample(Material_Texture2D_3Sampler, _238, bias(View.View_MaterialTextureMipBias)); + float4 _254 = Material_Texture2D_3.sample(Material_Texture2D_3Sampler, _238, bias(View.View_MaterialTextureMipBias)); + float3 _272 = float3(_212, (1.0 - _211) * Material.Material_ScalarExpressions[0].x); + float2 _275 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); + float2 _276 = abs(_275); + float3 _279 = dfdx(_149); + float2 _283 = dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); + float2 _284 = abs(_283); + float3 _287 = dfdy(_149); + if (PrimitiveDither.PrimitiveDither_LODFactor != 0.0) + { + if (abs(PrimitiveDither.PrimitiveDither_LODFactor) > 0.001000000047497451305389404296875) + { + float _317 = fract(cos(dot(floor(gl_FragCoord.xy), float2(347.834503173828125, 3343.28369140625))) * 1000.0); + if ((float((PrimitiveDither.PrimitiveDither_LODFactor < 0.0) ? ((PrimitiveDither.PrimitiveDither_LODFactor + 1.0) > _317) : (PrimitiveDither.PrimitiveDither_LODFactor < _317)) - 0.001000000047497451305389404296875) < 0.0) + { + discard_fragment(); + } + } + } + if ((((_218.z + ((fast::min(fast::max(1.0 - (_218.x * Material.Material_ScalarExpressions[2].y), 0.0), 1.0) + ((_237 + (_242.x * Material.Material_ScalarExpressions[2].z)) * 0.16666667163372039794921875)) + (-0.5))) * ((fast::clamp((View.View_RealTime * PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.x) + PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.y, 0.0, 1.0) + ((_237 + _254.x) * 0.16666667163372039794921875)) + (-0.5))) - 0.33329999446868896484375) < 0.0) + { + discard_fragment(); + } + float2 _351 = ((((in.in_var_TEXCOORD6.xy / float2(in.in_var_TEXCOORD6.w)) - View.View_TemporalAAJitter.xy) - ((in.in_var_TEXCOORD7.xy / float2(in.in_var_TEXCOORD7.w)) - View.View_TemporalAAJitter.zw)) * float2(0.2495000064373016357421875)) + float2(0.49999237060546875); + out.gl_FragDepth = fast::min(_140.z / (_140.w + (sqrt(dot(_272, _272)) / (fast::max(sqrt(dot(_276, _276)) / sqrt(dot(_279, _279)), sqrt(dot(_284, _284)) / sqrt(dot(_287, _287))) / abs(dot(float3x3(View.View_ViewToTranslatedWorld[0].xyz, View.View_ViewToTranslatedWorld[1].xyz, View.View_ViewToTranslatedWorld[2].xyz) * float3(0.0, 0.0, 1.0), _151))))), gl_FragCoord.z); + out.out_var_SV_Target0 = float4(_351.x, _351.y, float2(0.0).x, float2(0.0).y); + return out; +} + diff --git a/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag new file mode 100644 index 00000000000..bb6058c387e --- /dev/null +++ b/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag @@ -0,0 +1,353 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_ClipToWorld; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_908; + packed_float3 View_ViewUp; + float PrePadding_View_924; + packed_float3 View_ViewRight; + float PrePadding_View_940; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_956; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_972; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_1020; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_1036; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_1052; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1068; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1724; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1740; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1756; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2076; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2148; + float PrePadding_View_2152; + float PrePadding_View_2156; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2228; + float PrePadding_View_2232; + float PrePadding_View_2236; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2268; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2412; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + float View_AtmosphericFogSunDiscHalfApexAngleRadian; + float PrePadding_View_2492; + float4 View_AtmosphericFogSunDiscLuminance; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + uint PrePadding_View_2520; + uint PrePadding_View_2524; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2584; + float PrePadding_View_2588; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2744; + float PrePadding_View_2748; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float PrePadding_View_2908; + int2 View_CursorPosition; + float View_bCheckerboardSubsurfaceProfileRendering; + float PrePadding_View_2924; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2940; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2956; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2972; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2988; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_3004; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_PrimitiveDither +{ + float PrimitiveDither_LODFactor; +}; + +struct type_PrimitiveFade +{ + float2 PrimitiveFade_FadeTimeScaleBias; +}; + +struct type_Material +{ + float4 Material_VectorExpressions[9]; + float4 Material_ScalarExpressions[3]; +}; + +constant float _98 = {}; +constant float _103 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; + float gl_FragDepth [[depth(less)]]; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD6 [[user(locn0)]]; + float4 in_var_TEXCOORD7 [[user(locn1)]]; + float4 in_var_TEXCOORD10_centroid [[user(locn2)]]; + float4 in_var_TEXCOORD11_centroid [[user(locn3)]]; + float4 in_var_TEXCOORD0_0 [[user(locn4)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_PrimitiveDither& PrimitiveDither [[buffer(1)]], constant type_PrimitiveFade& PrimitiveFade [[buffer(2)]], constant type_Material& Material [[buffer(3)]], texture2d Material_Texture2D_0 [[texture(0)]], texture2d Material_Texture2D_3 [[texture(1)]], sampler Material_Texture2D_0Sampler [[sampler(0)]], sampler Material_Texture2D_3Sampler [[sampler(1)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + spvUnsafeArray in_var_TEXCOORD0 = {}; + in_var_TEXCOORD0[0] = in.in_var_TEXCOORD0_0; + float2 _135 = gl_FragCoord.xy - View.View_ViewRectMin.xy; + float4 _140 = float4(_103, _103, gl_FragCoord.z, 1.0) * float4(gl_FragCoord.w); + float4 _144 = View.View_SVPositionToTranslatedWorld * float4(gl_FragCoord.xyz, 1.0); + float3 _148 = _144.xyz / float3(_144.w); + float3 _149 = _148 - float3(View.View_PreViewTranslation); + float3 _151 = fast::normalize(-_148); + float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz); + float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0)); + float _171 = floor(_170); + float _172 = 1.0 / _170; + float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172); + float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); + float2 _176 = dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); + float _180_copy; + float2 _183; + _183 = float2(0.0); + float _188; + float _211; + float2 _212; + float _180 = 1.0; + int _185 = 0; + float _187 = 1.0; + float _189 = 1.0; + for (;;) + { + if (float(_185) < (_171 + 2.0)) + { + _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y; + if (_180 < _188) + { + float _201 = _188 - _180; + float _203 = _201 / ((_189 - _187) + _201); + _211 = (_189 * _203) + (_180 * (1.0 - _203)); + _212 = _183 - (float2(_203) * _174); + break; + } + _180_copy = _180; + _180 -= _172; + _183 += _174; + _185++; + _187 = _188; + _189 = _180_copy; + continue; + } + else + { + _211 = _98; + _212 = _183; + break; + } + } + float4 _218 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _212.xy), bias(View.View_MaterialTextureMipBias)); + float2 _229 = _135 + float2(View.View_TemporalAAParams.x); + float _237 = float((uint(_229.x) + (2u * uint(_229.y))) % 5u); + float2 _238 = _135 * float2(0.015625); + float4 _242 = Material_Texture2D_3.sample(Material_Texture2D_3Sampler, _238, bias(View.View_MaterialTextureMipBias)); + float4 _254 = Material_Texture2D_3.sample(Material_Texture2D_3Sampler, _238, bias(View.View_MaterialTextureMipBias)); + float3 _272 = float3(_212, (1.0 - _211) * Material.Material_ScalarExpressions[0].x); + float2 _275 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); + float2 _276 = abs(_275); + float3 _279 = dfdx(_149); + float2 _283 = dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); + float2 _284 = abs(_283); + float3 _287 = dfdy(_149); + if (PrimitiveDither.PrimitiveDither_LODFactor != 0.0) + { + if (abs(PrimitiveDither.PrimitiveDither_LODFactor) > 0.001000000047497451305389404296875) + { + float _317 = fract(cos(dot(floor(gl_FragCoord.xy), float2(347.834503173828125, 3343.28369140625))) * 1000.0); + if ((float((PrimitiveDither.PrimitiveDither_LODFactor < 0.0) ? ((PrimitiveDither.PrimitiveDither_LODFactor + 1.0) > _317) : (PrimitiveDither.PrimitiveDither_LODFactor < _317)) - 0.001000000047497451305389404296875) < 0.0) + { + discard_fragment(); + } + } + } + if ((((_218.z + ((fast::min(fast::max(1.0 - (_218.x * Material.Material_ScalarExpressions[2].y), 0.0), 1.0) + ((_237 + (_242.x * Material.Material_ScalarExpressions[2].z)) * 0.16666667163372039794921875)) + (-0.5))) * ((fast::clamp((View.View_RealTime * PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.x) + PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.y, 0.0, 1.0) + ((_237 + _254.x) * 0.16666667163372039794921875)) + (-0.5))) - 0.33329999446868896484375) < 0.0) + { + discard_fragment(); + } + float2 _351 = ((((in.in_var_TEXCOORD6.xy / float2(in.in_var_TEXCOORD6.w)) - View.View_TemporalAAJitter.xy) - ((in.in_var_TEXCOORD7.xy / float2(in.in_var_TEXCOORD7.w)) - View.View_TemporalAAJitter.zw)) * float2(0.2495000064373016357421875)) + float2(0.49999237060546875); + out.gl_FragDepth = fast::min(_140.z / (_140.w + (sqrt(dot(_272, _272)) / (fast::max(sqrt(dot(_276, _276)) / sqrt(dot(_279, _279)), sqrt(dot(_284, _284)) / sqrt(dot(_287, _287))) / abs(dot(float3x3(View.View_ViewToTranslatedWorld[0].xyz, View.View_ViewToTranslatedWorld[1].xyz, View.View_ViewToTranslatedWorld[2].xyz) * float3(0.0, 0.0, 1.0), _151))))), gl_FragCoord.z); + out.out_var_SV_Target0 = float4(_351.x, _351.y, float2(0.0).x, float2(0.0).y); + return out; +} + diff --git a/reference/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese b/reference/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese new file mode 100644 index 00000000000..346d7e3fc95 --- /dev/null +++ b/reference/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese @@ -0,0 +1,318 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_ClipToWorld; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_908; + packed_float3 View_ViewUp; + float PrePadding_View_924; + packed_float3 View_ViewRight; + float PrePadding_View_940; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_956; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_972; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_1020; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_1036; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_1052; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1068; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1724; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1740; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1756; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2076; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2148; + float PrePadding_View_2152; + float PrePadding_View_2156; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2228; + float PrePadding_View_2232; + float PrePadding_View_2236; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2268; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2412; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + float View_AtmosphericFogSunDiscHalfApexAngleRadian; + float PrePadding_View_2492; + float4 View_AtmosphericFogSunDiscLuminance; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + uint PrePadding_View_2520; + uint PrePadding_View_2524; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2584; + float PrePadding_View_2588; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2744; + float PrePadding_View_2748; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float PrePadding_View_2908; + int2 View_CursorPosition; + float View_bCheckerboardSubsurfaceProfileRendering; + float PrePadding_View_2924; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2940; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2956; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2972; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2988; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_3004; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; + float PrePadding_View_3048; + float PrePadding_View_3052; + float4x4 View_WorldToVirtualTexture; + float4 View_VirtualTextureParams; + float4 View_XRPassthroughCameraUVs[2]; +}; + +constant float4 _68 = {}; + +struct main0_out +{ + float4 out_var_TEXCOORD10_centroid [[user(locn0)]]; + float4 out_var_TEXCOORD11_centroid [[user(locn1)]]; + float4 out_var_TEXCOORD0_0 [[user(locn2)]]; + float4 out_var_COLOR1 [[user(locn3)]]; + float4 out_var_COLOR2 [[user(locn4)]]; + float4 out_var_TEXCOORD6 [[user(locn5)]]; + float3 out_var_TEXCOORD7 [[user(locn6)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 in_var_COLOR1 [[attribute(0)]]; + float4 in_var_COLOR2 [[attribute(1)]]; + float4 in_var_TEXCOORD0_0 [[attribute(5)]]; + float4 in_var_TEXCOORD10_centroid [[attribute(6)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(7)]]; + float3 in_var_TEXCOORD7 [[attribute(8)]]; + float4 in_var_VS_To_DS_Position [[attribute(9)]]; +}; + +struct main0_patchIn +{ + patch_control_point gl_in; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_View& View [[buffer(0)]], float3 gl_TessCoord [[position_in_patch]]) +{ + main0_out out = {}; + spvUnsafeArray out_var_TEXCOORD0 = {}; + spvUnsafeArray _77 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _78 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid }); + spvUnsafeArray, 3> _79 = spvUnsafeArray, 3>({ spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD0_0 }), spvUnsafeArray({ patchIn.gl_in[1].in_var_TEXCOORD0_0 }), spvUnsafeArray({ patchIn.gl_in[2].in_var_TEXCOORD0_0 }) }); + spvUnsafeArray _80 = spvUnsafeArray({ patchIn.gl_in[0].in_var_COLOR1, patchIn.gl_in[1].in_var_COLOR1, patchIn.gl_in[2].in_var_COLOR1 }); + spvUnsafeArray _81 = spvUnsafeArray({ patchIn.gl_in[0].in_var_COLOR2, patchIn.gl_in[1].in_var_COLOR2, patchIn.gl_in[2].in_var_COLOR2 }); + spvUnsafeArray _97 = spvUnsafeArray({ patchIn.gl_in[0].in_var_VS_To_DS_Position, patchIn.gl_in[1].in_var_VS_To_DS_Position, patchIn.gl_in[2].in_var_VS_To_DS_Position }); + spvUnsafeArray _98 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD7, patchIn.gl_in[1].in_var_TEXCOORD7, patchIn.gl_in[2].in_var_TEXCOORD7 }); + float4 _111 = float4(gl_TessCoord.x); + float4 _113 = float4(gl_TessCoord.y); + float4 _116 = float4(gl_TessCoord.z); + float4 _118 = ((_97[0] * _111) + (_97[1] * _113)) + (_97[2] * _116); + spvUnsafeArray _72; + _72 = _79[0]; + spvUnsafeArray _71; + _71 = _79[1]; + float3 _120 = float3(gl_TessCoord.x); + float3 _123 = float3(gl_TessCoord.y); + spvUnsafeArray _73; + for (int _133 = 0; _133 < 1; ) + { + _73[_133] = (_72[_133] * _111) + (_71[_133] * _113); + _133++; + continue; + } + spvUnsafeArray _75; + _75 = _73; + spvUnsafeArray _74; + _74 = _79[2]; + float3 _155 = float3(gl_TessCoord.z); + float3 _157 = ((_77[0].xyz * _120) + (_77[1].xyz * _123)).xyz + (_77[2].xyz * _155); + spvUnsafeArray _76; + for (int _164 = 0; _164 < 1; ) + { + _76[_164] = _75[_164] + (_74[_164] * _116); + _164++; + continue; + } + float4 _181 = float4(_118.x, _118.y, _118.z, _118.w); + out.out_var_TEXCOORD10_centroid = float4(_157.x, _157.y, _157.z, _68.w); + out.out_var_TEXCOORD11_centroid = ((_78[0] * _111) + (_78[1] * _113)) + (_78[2] * _116); + out_var_TEXCOORD0 = _76; + out.out_var_COLOR1 = ((_80[0] * _111) + (_80[1] * _113)) + (_80[2] * _116); + out.out_var_COLOR2 = ((_81[0] * _111) + (_81[1] * _113)) + (_81[2] * _116); + out.out_var_TEXCOORD6 = _181; + out.out_var_TEXCOORD7 = ((_98[0] * _120) + (_98[1] * _123)) + (_98[2] * _155); + out.gl_Position = View.View_TranslatedWorldToClip * _181; + out.out_var_TEXCOORD0_0 = out_var_TEXCOORD0[0]; + return out; +} + diff --git a/reference/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert b/reference/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert new file mode 100644 index 00000000000..b1298b7e683 --- /dev/null +++ b/reference/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert @@ -0,0 +1,122 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_Globals +{ + float4 ViewportSize; + float ScatteringScaling; + float CocRadiusToCircumscribedRadius; +}; + +struct type_StructuredBuffer_v4float +{ + float4 _m0[1]; +}; + +struct main0_out +{ + float2 out_var_TEXCOORD0 [[user(locn0)]]; + float4 out_var_TEXCOORD1 [[user(locn1)]]; + float4 out_var_TEXCOORD2 [[user(locn2)]]; + float4 out_var_TEXCOORD3 [[user(locn3)]]; + float4 out_var_TEXCOORD4 [[user(locn4)]]; + float4 out_var_TEXCOORD5 [[user(locn5)]]; + float4 out_var_TEXCOORD6 [[user(locn6)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0(constant type_Globals& _Globals [[buffer(0)]], const device type_StructuredBuffer_v4float& ScatterDrawList [[buffer(1)]], uint gl_VertexIndex [[vertex_id]], uint gl_InstanceIndex [[instance_id]]) +{ + main0_out out = {}; + uint _66 = gl_VertexIndex / 4u; + uint _68 = gl_VertexIndex - (_66 * 4u); + uint _70 = (16u * gl_InstanceIndex) + _66; + float _72; + _72 = 0.0; + spvUnsafeArray _61; + spvUnsafeArray _62; + spvUnsafeArray _63; + float _73; + uint _75 = 0u; + for (;;) + { + if (_75 < 4u) + { + uint _82 = ((5u * _70) + _75) + 1u; + _61[_75] = float4(ScatterDrawList._m0[_82].xyz, 0.0); + _62[_75] = ScatterDrawList._m0[_82].w; + if (_75 == 0u) + { + _73 = _62[_75]; + } + else + { + _73 = fast::max(_72, _62[_75]); + } + _63[_75].x = (-0.5) / _62[_75]; + _63[_75].y = (0.5 * _62[_75]) + 0.5; + _72 = _73; + _75++; + continue; + } + else + { + break; + } + } + float2 _144 = float2(_Globals.ScatteringScaling) * ScatterDrawList._m0[5u * _70].xy; + float2 _173 = (((float2((_72 * _Globals.CocRadiusToCircumscribedRadius) + 1.0) * ((float2(float(_68 % 2u), float(_68 / 2u)) * float2(2.0)) - float2(1.0))) + _144) + float2(0.5)) * _Globals.ViewportSize.zw; + out.out_var_TEXCOORD0 = _144; + out.out_var_TEXCOORD1 = float4(_61[0].xyz, _62[0]); + out.out_var_TEXCOORD2 = float4(_61[1].xyz, _62[1]); + out.out_var_TEXCOORD3 = float4(_61[2].xyz, _62[2]); + out.out_var_TEXCOORD4 = float4(_61[3].xyz, _62[3]); + out.out_var_TEXCOORD5 = float4(_63[0].x, _63[0].y, _63[1].x, _63[1].y); + out.out_var_TEXCOORD6 = float4(_63[2].x, _63[2].y, _63[3].x, _63[3].y); + out.gl_Position = float4((_173.x * 2.0) - 1.0, 1.0 - (_173.y * 2.0), 0.0, 1.0); + return out; +} + diff --git a/reference/shaders-ue4/asm/frag/depth-compare.asm.frag b/reference/shaders-ue4/asm/frag/depth-compare.asm.frag new file mode 100644 index 00000000000..0a6c98418e3 --- /dev/null +++ b/reference/shaders-ue4/asm/frag/depth-compare.asm.frag @@ -0,0 +1,315 @@ +#include +#include + +using namespace metal; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Globals +{ + float3 SoftTransitionScale; + float4x4 ShadowViewProjectionMatrices[6]; + float InvShadowmapResolution; + float ShadowFadeFraction; + float ShadowSharpen; + float4 LightPositionAndInvRadius; + float2 ProjectionDepthBiasParameters; + float4 PointLightDepthBiasAndProjParameters; +}; + +constant float4 _107 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d SceneTexturesStruct_SceneDepthTexture [[texture(0)]], texture2d SceneTexturesStruct_GBufferATexture [[texture(1)]], texture2d SceneTexturesStruct_GBufferBTexture [[texture(2)]], texture2d SceneTexturesStruct_GBufferDTexture [[texture(3)]], depthcube ShadowDepthCubeTexture [[texture(4)]], texture2d SSProfilesTexture [[texture(5)]], sampler SceneTexturesStruct_SceneDepthTextureSampler [[sampler(0)]], sampler SceneTexturesStruct_GBufferATextureSampler [[sampler(1)]], sampler SceneTexturesStruct_GBufferBTextureSampler [[sampler(2)]], sampler SceneTexturesStruct_GBufferDTextureSampler [[sampler(3)]], sampler ShadowDepthTextureSampler [[sampler(4)]], sampler ShadowDepthCubeTextureSampler [[sampler(5)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + float2 _114 = gl_FragCoord.xy * View.View_BufferSizeAndInvSize.zw; + float4 _118 = SceneTexturesStruct_SceneDepthTexture.sample(SceneTexturesStruct_SceneDepthTextureSampler, _114, level(0.0)); + float _119 = _118.x; + float _133 = ((_119 * View.View_InvDeviceZToWorldZTransform.x) + View.View_InvDeviceZToWorldZTransform.y) + (1.0 / ((_119 * View.View_InvDeviceZToWorldZTransform.z) - View.View_InvDeviceZToWorldZTransform.w)); + float4 _147 = View.View_ScreenToWorld * float4(((_114 - View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_133), _133, 1.0); + float3 _148 = _147.xyz; + float3 _152 = _Globals.LightPositionAndInvRadius.xyz - _148; + float _158 = length(_152); + bool _160 = (_158 * _Globals.LightPositionAndInvRadius.w) < 1.0; + float _207; + if (_160) + { + float3 _165 = abs(_152); + float _166 = _165.x; + float _167 = _165.y; + float _168 = _165.z; + float _170 = fast::max(_166, fast::max(_167, _168)); + int _189; + if (_170 == _166) + { + _189 = (_166 == _152.x) ? 0 : 1; + } + else + { + int _185; + if (_170 == _167) + { + _185 = (_167 == _152.y) ? 2 : 3; + } + else + { + _185 = (_168 == _152.z) ? 4 : 5; + } + _189 = _185; + } + float4 _196 = _Globals.ShadowViewProjectionMatrices[_189] * float4(_147.xyz, 1.0); + float _198 = _196.w; + _207 = ShadowDepthCubeTexture.sample_compare(ShadowDepthCubeTextureSampler, (_152 / float3(_158)), (_196.z / _198) + ((-_Globals.PointLightDepthBiasAndProjParameters.x) / _198), level(0.0)); + } + else + { + _207 = 1.0; + } + float _213 = fast::clamp(((_207 - 0.5) * _Globals.ShadowSharpen) + 0.5, 0.0, 1.0); + float _218 = sqrt(mix(1.0, _213 * _213, _Globals.ShadowFadeFraction)); + float4 _219; + _219.z = _218; + float4 _220 = float4(float3(1.0).x, float3(1.0).y, _219.z, float3(1.0).z); + float3 _236 = fast::normalize((SceneTexturesStruct_GBufferATexture.sample(SceneTexturesStruct_GBufferATextureSampler, _114, level(0.0)).xyz * float3(2.0)) - float3(1.0)); + uint _240 = uint(round(SceneTexturesStruct_GBufferBTexture.sample(SceneTexturesStruct_GBufferBTextureSampler, _114, level(0.0)).w * 255.0)); + bool _248 = (_240 & 15u) == 5u; + float _448; + if (_248) + { + float4 _260 = SSProfilesTexture.read(uint2(int3(1, int(uint((select(float4(0.0), SceneTexturesStruct_GBufferDTexture.sample(SceneTexturesStruct_GBufferDTextureSampler, _114, level(0.0)), bool4(!(((_240 & 4294967280u) & 16u) != 0u))).x * 255.0) + 0.5)), 0).xy), 0); + float _263 = _260.y * 0.5; + float3 _266 = _148 - (_236 * float3(_263)); + float _274 = pow(fast::clamp(dot(-(_152 * float3(rsqrt(dot(_152, _152)))), _236), 0.0, 1.0), 1.0); + float _445; + if (_160) + { + float3 _278 = _152 / float3(_158); + float3 _280 = fast::normalize(cross(_278, float3(0.0, 0.0, 1.0))); + float3 _284 = float3(_Globals.InvShadowmapResolution); + float3 _285 = _280 * _284; + float3 _286 = cross(_280, _278) * _284; + float3 _287 = abs(_278); + float _288 = _287.x; + float _289 = _287.y; + float _290 = _287.z; + float _292 = fast::max(_288, fast::max(_289, _290)); + int _311; + if (_292 == _288) + { + _311 = (_288 == _278.x) ? 0 : 1; + } + else + { + int _307; + if (_292 == _289) + { + _307 = (_289 == _278.y) ? 2 : 3; + } + else + { + _307 = (_290 == _278.z) ? 4 : 5; + } + _311 = _307; + } + float4 _318 = _Globals.ShadowViewProjectionMatrices[_311] * float4(_266, 1.0); + float _323 = _260.x * (10.0 / _Globals.LightPositionAndInvRadius.w); + float _329 = (1.0 / (((_318.z / _318.w) * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w; + float _342 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, (_278 + (_286 * float3(2.5))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; + float _364 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(2.3776409626007080078125))) + (_286 * float3(0.77254199981689453125))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; + float _387 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(1.46946299076080322265625))) + (_286 * float3(-2.0225429534912109375))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; + float _410 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(-1.46946299076080322265625))) + (_286 * float3(-2.02254199981689453125))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; + float _433 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(-2.3776409626007080078125))) + (_286 * float3(0.772543013095855712890625))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; + _445 = (((((fast::clamp(abs((_342 > 0.0) ? (_342 + _263) : fast::max(0.0, (_342 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25) + (fast::clamp(abs((_364 > 0.0) ? (_364 + _263) : fast::max(0.0, (_364 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_387 > 0.0) ? (_387 + _263) : fast::max(0.0, (_387 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_410 > 0.0) ? (_410 + _263) : fast::max(0.0, (_410 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_433 > 0.0) ? (_433 + _263) : fast::max(0.0, (_433 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) * 0.20000000298023223876953125; + } + else + { + _445 = 1.0; + } + _448 = 1.0 - (_445 * 0.20000000298023223876953125); + } + else + { + _448 = 1.0; + } + _220.w = _248 ? sqrt(_448) : _218; + out.out_var_SV_Target0 = _220; + return out; +} + diff --git a/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag b/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag new file mode 100644 index 00000000000..88618a85129 --- /dev/null +++ b/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag @@ -0,0 +1,1346 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_Globals +{ + float4 MappingPolynomial; + float3 InverseGamma; + float4 ColorMatrixR_ColorCurveCd1; + float4 ColorMatrixG_ColorCurveCd3Cm3; + float4 ColorMatrixB_ColorCurveCm2; + float4 ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3; + float4 ColorCurve_Ch1_Ch2; + float4 ColorShadow_Luma; + float4 ColorShadow_Tint1; + float4 ColorShadow_Tint2; + float FilmSlope; + float FilmToe; + float FilmShoulder; + float FilmBlackClip; + float FilmWhiteClip; + packed_float3 ColorScale; + float4 OverlayColor; + float WhiteTemp; + float WhiteTint; + float4 ColorSaturation; + float4 ColorContrast; + float4 ColorGamma; + float4 ColorGain; + float4 ColorOffset; + float4 ColorSaturationShadows; + float4 ColorContrastShadows; + float4 ColorGammaShadows; + float4 ColorGainShadows; + float4 ColorOffsetShadows; + float4 ColorSaturationMidtones; + float4 ColorContrastMidtones; + float4 ColorGammaMidtones; + float4 ColorGainMidtones; + float4 ColorOffsetMidtones; + float4 ColorSaturationHighlights; + float4 ColorContrastHighlights; + float4 ColorGammaHighlights; + float4 ColorGainHighlights; + float4 ColorOffsetHighlights; + float ColorCorrectionShadowsMax; + float ColorCorrectionHighlightsMin; + uint OutputDevice; + uint OutputGamut; + float BlueCorrection; + float ExpandGamut; +}; + +constant float3 _391 = {}; + +constant spvUnsafeArray _475 = spvUnsafeArray({ -4.0, -4.0, -3.1573765277862548828125, -0.485249996185302734375, 1.84773242473602294921875, 1.84773242473602294921875 }); +constant spvUnsafeArray _476 = spvUnsafeArray({ -0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875, 4.0, 4.0, 4.0 }); +constant spvUnsafeArray _479 = spvUnsafeArray({ -4.97062206268310546875, -3.0293781757354736328125, -2.1261999607086181640625, -1.5104999542236328125, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 }); +constant spvUnsafeArray _480 = spvUnsafeArray({ 0.80891323089599609375, 1.19108676910400390625, 1.5683000087738037109375, 1.94830000400543212890625, 2.308300018310546875, 2.63840007781982421875, 2.85949993133544921875, 2.9872608184814453125, 3.0127391815185546875, 3.0127391815185546875 }); +constant spvUnsafeArray _482 = spvUnsafeArray({ -2.3010299205780029296875, -2.3010299205780029296875, -1.9312000274658203125, -1.5204999446868896484375, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 }); +constant spvUnsafeArray _483 = spvUnsafeArray({ 0.801995217800140380859375, 1.19800484180450439453125, 1.5943000316619873046875, 1.99730002880096435546875, 2.3782999515533447265625, 2.7683999538421630859375, 3.0515000820159912109375, 3.2746293544769287109375, 3.32743072509765625, 3.32743072509765625 }); + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + float2 in_var_TEXCOORD0 [[user(locn0), center_no_perspective]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globals [[buffer(0)]], uint gl_Layer [[render_target_array_index]]) +{ + main0_out out = {}; + float3x3 _546 = float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * float3x3(float3(1.01303005218505859375, 0.0061053098179399967193603515625, -0.014971000142395496368408203125), float3(0.0076982299797236919403076171875, 0.99816501140594482421875, -0.005032029934227466583251953125), float3(-0.0028413101099431514739990234375, 0.0046851597726345062255859375, 0.92450702190399169921875)); + float3x3 _547 = _546 * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)); + float3x3 _548 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(0.98722398281097412109375, -0.0061132698319852352142333984375, 0.01595330052077770233154296875), float3(-0.007598360069096088409423828125, 1.00186002254486083984375, 0.0053300200961530208587646484375), float3(0.003072570078074932098388671875, -0.0050959498621523380279541015625, 1.0816800594329833984375)); + float3x3 _549 = _548 * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)); + float3x3 _550 = float3x3(float3(0.952552378177642822265625, 0.0, 9.25), float3(0.3439664542675018310546875, 0.728166103363037109375, -0.07213254272937774658203125), float3(0.0, 0.0, 1.00882518291473388671875)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)); + float3x3 _551 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)); + float3x3 _576; + for (;;) + { + if (_Globals.OutputGamut == 1u) + { + _576 = _548 * float3x3(float3(2.493396282196044921875, -0.931345880031585693359375, -0.4026944935321807861328125), float3(-0.829486787319183349609375, 1.76265966892242431640625, 0.02362460084259510040283203125), float3(0.0358506999909877777099609375, -0.076182700693607330322265625, 0.957014024257659912109375)); + break; + } + else + { + if (_Globals.OutputGamut == 2u) + { + _576 = _548 * float3x3(float3(1.71660840511322021484375, -0.3556621074676513671875, -0.253360092639923095703125), float3(-0.666682898998260498046875, 1.61647760868072509765625, 0.01576850004494190216064453125), float3(0.017642199993133544921875, -0.04277630150318145751953125, 0.94222867488861083984375)); + break; + } + else + { + if (_Globals.OutputGamut == 3u) + { + _576 = float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625)); + break; + } + else + { + if (_Globals.OutputGamut == 4u) + { + _576 = float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0)); + break; + } + else + { + _576 = _549; + break; + } + } + } + } + } + float3 _577 = float4((in.in_var_TEXCOORD0 - float2(0.015625)) * float2(1.03225803375244140625), float(gl_Layer) * 0.0322580635547637939453125, 0.0).xyz; + float3 _599; + if (_Globals.OutputDevice >= 3u) + { + float3 _591 = pow(_577, float3(0.0126833133399486541748046875)); + _599 = pow(fast::max(float3(0.0), _591 - float3(0.8359375)) / (float3(18.8515625) - (float3(18.6875) * _591)), float3(6.277394771575927734375)) * float3(10000.0); + } + else + { + _599 = (exp2((_577 - float3(0.434017598628997802734375)) * float3(14.0)) * float3(0.180000007152557373046875)) - (exp2(float3(-6.0762462615966796875)) * float3(0.180000007152557373046875)); + } + float _602 = _Globals.WhiteTemp * 1.00055634975433349609375; + float _616 = (_602 <= 7000.0) ? (0.24406300485134124755859375 + ((99.1100006103515625 + ((2967800.0 - (4604438528.0 / _Globals.WhiteTemp)) / _602)) / _602)) : (0.23703999817371368408203125 + ((247.4799957275390625 + ((1901800.0 - (2005284352.0 / _Globals.WhiteTemp)) / _602)) / _602)); + float _633 = ((0.860117733478546142578125 + (0.00015411825734190642833709716796875 * _Globals.WhiteTemp)) + ((1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 + (0.0008424202096648514270782470703125 * _Globals.WhiteTemp)) + ((7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); + float _644 = ((0.317398726940155029296875 + (4.25 * _Globals.WhiteTemp)) + ((4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)) + ((1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); + float _649 = ((2.0 * _633) - (8.0 * _644)) + 4.0; + float2 _653 = float2((3.0 * _633) / _649, (2.0 * _644) / _649); + float2 _660 = fast::normalize(float2(_633, _644)); + float _665 = _633 + (((-_660.y) * _Globals.WhiteTint) * 0.0500000007450580596923828125); + float _669 = _644 + ((_660.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); + float _674 = ((2.0 * _665) - (8.0 * _669)) + 4.0; + float2 _680 = select(float2(_616, ((((-3.0) * _616) * _616) + (2.86999988555908203125 * _616)) - 0.2750000059604644775390625), _653, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _665) / _674, (2.0 * _669) / _674) - _653); + float _683 = fast::max(_680.y, 1.0000000133514319600180897396058e-10); + float3 _685; + _685.x = _680.x / _683; + _685.y = 1.0; + _685.z = ((1.0 - _680.x) - _680.y) / _683; + float _691 = fast::max(0.328999996185302734375, 1.0000000133514319600180897396058e-10); + float3 _693; + _693.x = 0.3127000033855438232421875 / _691; + _693.y = 1.0; + _693.z = 0.3582999706268310546875 / _691; + float3 _697 = _685 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); + float3 _698 = _693 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); + float3 _717 = (_599 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(_698.x / _697.x, 0.0, 0.0), float3(0.0, _698.y / _697.y, 0.0), float3(0.0, 0.0, _698.z / _697.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _547; + float3 _745; + if (_Globals.ColorShadow_Tint2.w != 0.0) + { + float _724 = dot(_717, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625)); + float3 _727 = (_717 / float3(_724)) - float3(1.0); + _745 = mix(_717, _717 * (_549 * (float3x3(float3(0.544169127941131591796875, 0.23959259688854217529296875, 0.16669429838657379150390625), float3(0.23946559429168701171875, 0.702153027057647705078125, 0.058381401002407073974609375), float3(-0.0023439000360667705535888671875, 0.0361833982169628143310546875, 1.05521833896636962890625)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)))), float3((1.0 - exp2((-4.0) * dot(_727, _727))) * (1.0 - exp2((((-4.0) * _Globals.ExpandGamut) * _724) * _724)))); + } + else + { + _745 = _717; + } + float _746 = dot(_745, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625)); + float4 _751 = _Globals.ColorSaturationShadows * _Globals.ColorSaturation; + float4 _756 = _Globals.ColorContrastShadows * _Globals.ColorContrast; + float4 _761 = _Globals.ColorGammaShadows * _Globals.ColorGamma; + float4 _766 = _Globals.ColorGainShadows * _Globals.ColorGain; + float4 _771 = _Globals.ColorOffsetShadows + _Globals.ColorOffset; + float3 _772 = float3(_746); + float _804 = smoothstep(0.0, _Globals.ColorCorrectionShadowsMax, _746); + float4 _808 = _Globals.ColorSaturationHighlights * _Globals.ColorSaturation; + float4 _811 = _Globals.ColorContrastHighlights * _Globals.ColorContrast; + float4 _814 = _Globals.ColorGammaHighlights * _Globals.ColorGamma; + float4 _817 = _Globals.ColorGainHighlights * _Globals.ColorGain; + float4 _820 = _Globals.ColorOffsetHighlights + _Globals.ColorOffset; + float _852 = smoothstep(_Globals.ColorCorrectionHighlightsMin, 1.0, _746); + float4 _855 = _Globals.ColorSaturationMidtones * _Globals.ColorSaturation; + float4 _858 = _Globals.ColorContrastMidtones * _Globals.ColorContrast; + float4 _861 = _Globals.ColorGammaMidtones * _Globals.ColorGamma; + float4 _864 = _Globals.ColorGainMidtones * _Globals.ColorGain; + float4 _867 = _Globals.ColorOffsetMidtones + _Globals.ColorOffset; + float3 _905 = ((((pow(pow(fast::max(float3(0.0), mix(_772, _745, _751.xyz * float3(_751.w))) * float3(5.5555553436279296875), _756.xyz * float3(_756.w)) * float3(0.180000007152557373046875), float3(1.0) / (_761.xyz * float3(_761.w))) * (_766.xyz * float3(_766.w))) + (_771.xyz + float3(_771.w))) * float3(1.0 - _804)) + (((pow(pow(fast::max(float3(0.0), mix(_772, _745, _855.xyz * float3(_855.w))) * float3(5.5555553436279296875), _858.xyz * float3(_858.w)) * float3(0.180000007152557373046875), float3(1.0) / (_861.xyz * float3(_861.w))) * (_864.xyz * float3(_864.w))) + (_867.xyz + float3(_867.w))) * float3(_804 - _852))) + (((pow(pow(fast::max(float3(0.0), mix(_772, _745, _808.xyz * float3(_808.w))) * float3(5.5555553436279296875), _811.xyz * float3(_811.w)) * float3(0.180000007152557373046875), float3(1.0) / (_814.xyz * float3(_814.w))) * (_817.xyz * float3(_817.w))) + (_820.xyz + float3(_820.w))) * float3(_852)); + float3 _906 = _905 * _549; + float3 _914 = float3(_Globals.BlueCorrection); + float3 _916 = mix(_905, _905 * ((_551 * float3x3(float3(0.940437257289886474609375, -0.01830687932670116424560546875, 0.07786960899829864501953125), float3(0.008378696627914905548095703125, 0.82866001129150390625, 0.162961304187774658203125), float3(0.0005471261101774871349334716796875, -0.00088337459601461887359619140625, 1.00033628940582275390625))) * _550), _914) * _551; + float _917 = _916.x; + float _918 = _916.y; + float _920 = _916.z; + float _923 = fast::max(fast::max(_917, _918), _920); + float _928 = (fast::max(_923, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_917, _918), _920), 1.0000000133514319600180897396058e-10)) / fast::max(_923, 0.00999999977648258209228515625); + float _941 = ((_920 + _918) + _917) + (1.75 * sqrt(((_920 * (_920 - _918)) + (_918 * (_918 - _917))) + (_917 * (_917 - _920)))); + float _942 = _941 * 0.3333333432674407958984375; + float _943 = _928 - 0.4000000059604644775390625; + float _948 = fast::max(1.0 - abs(_943 * 2.5), 0.0); + float _956 = (1.0 + (float(int(sign(_943 * 5.0))) * (1.0 - (_948 * _948)))) * 0.02500000037252902984619140625; + float _969; + if (_942 <= 0.053333334624767303466796875) + { + _969 = _956; + } + else + { + float _968; + if (_942 >= 0.1599999964237213134765625) + { + _968 = 0.0; + } + else + { + _968 = _956 * ((0.23999999463558197021484375 / _941) - 0.5); + } + _969 = _968; + } + float3 _972 = _916 * float3(1.0 + _969); + float _973 = _972.x; + float _974 = _972.y; + float _976 = _972.z; + float _990; + if ((_973 == _974) && (_974 == _976)) + { + _990 = 0.0; + } + else + { + _990 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_974 - _976), ((2.0 * _973) - _974) - _976); + } + float _995; + if (_990 < 0.0) + { + _995 = _990 + 360.0; + } + else + { + _995 = _990; + } + float _996 = fast::clamp(_995, 0.0, 360.0); + float _1001; + if (_996 > 180.0) + { + _1001 = _996 - 360.0; + } + else + { + _1001 = _996; + } + float _1005 = smoothstep(0.0, 1.0, 1.0 - abs(_1001 * 0.01481481455266475677490234375)); + _972.x = _973 + ((((_1005 * _1005) * _928) * (0.02999999932944774627685546875 - _973)) * 0.180000007152557373046875); + float3 _1014 = fast::max(float3(0.0), _972 * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375))); + float _1023 = (1.0 + _Globals.FilmBlackClip) - _Globals.FilmToe; + float _1026 = 1.0 + _Globals.FilmWhiteClip; + float _1029 = _1026 - _Globals.FilmShoulder; + float _1056; + if (_Globals.FilmToe > 0.800000011920928955078125) + { + _1056 = ((0.819999992847442626953125 - _Globals.FilmToe) / _Globals.FilmSlope) + (log(0.180000007152557373046875) / log(10.0)); + } + else + { + float _1035 = (0.180000007152557373046875 + _Globals.FilmBlackClip) / _1023; + _1056 = (log(0.180000007152557373046875) / log(10.0)) - ((0.5 * log(_1035 / (2.0 - _1035))) * (_1023 / _Globals.FilmSlope)); + } + float _1061 = ((1.0 - _Globals.FilmToe) / _Globals.FilmSlope) - _1056; + float _1063 = (_Globals.FilmShoulder / _Globals.FilmSlope) - _1061; + float _1065 = log(10.0); + float3 _1067 = log(mix(float3(dot(_1014, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1014, float3(0.959999978542327880859375))) / float3(_1065); + float3 _1071 = float3(_Globals.FilmSlope) * (_1067 + float3(_1061)); + float3 _1079 = float3(_1056); + float3 _1080 = _1067 - _1079; + float3 _1092 = float3(_1063); + float3 _1106 = fast::clamp(_1080 / float3(_1063 - _1056), float3(0.0), float3(1.0)); + float3 _1110 = select(_1106, float3(1.0) - _1106, bool3(_1063 < _1056)); + float3 _1115 = mix(select(_1071, float3(-_Globals.FilmBlackClip) + (float3(2.0 * _1023) / (float3(1.0) + exp(float3(((-2.0) * _Globals.FilmSlope) / _1023) * _1080))), _1067 < _1079), select(_1071, float3(_1026) - (float3(2.0 * _1029) / (float3(1.0) + exp(float3((2.0 * _Globals.FilmSlope) / _1029) * (_1067 - _1092)))), _1067 > _1092), ((float3(3.0) - (float3(2.0) * _1110)) * _1110) * _1110); + float3 _1119 = fast::max(float3(0.0), mix(float3(dot(_1115, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1115, float3(0.930000007152557373046875))); + float3 _1189; + if (_Globals.ColorShadow_Tint2.w == 0.0) + { + float3 _1131; + _1131.x = dot(_906, _Globals.ColorMatrixR_ColorCurveCd1.xyz); + _1131.y = dot(_906, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz); + _1131.z = dot(_906, _Globals.ColorMatrixB_ColorCurveCm2.xyz); + float3 _1157 = fast::max(float3(0.0), _1131 * (_Globals.ColorShadow_Tint1.xyz + (_Globals.ColorShadow_Tint2.xyz * float3(1.0 / (dot(_906, _Globals.ColorShadow_Luma.xyz) + 1.0))))); + float3 _1162 = fast::max(float3(0.0), _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx - _1157); + float3 _1164 = fast::max(_1157, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz); + _1189 = ((((_1164 * _Globals.ColorCurve_Ch1_Ch2.xxx) + _Globals.ColorCurve_Ch1_Ch2.yyy) * (float3(1.0) / (_1164 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www))) + ((fast::clamp(_1157, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz) * _Globals.ColorMatrixB_ColorCurveCm2.www) + (((_1162 * _Globals.ColorMatrixR_ColorCurveCd1.www) * (float3(1.0) / (_1162 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy))) + _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375); + } + else + { + _1189 = fast::max(float3(0.0), mix(_1119, _1119 * ((_551 * float3x3(float3(1.06317996978759765625, 0.02339559979736804962158203125, -0.08657260239124298095703125), float3(-0.010633699595928192138671875, 1.2063200473785400390625, -0.1956900060176849365234375), float3(-0.0005908869788981974124908447265625, 0.00105247995816171169281005859375, 0.999538004398345947265625))) * _550), _914) * _549); + } + float3 _1218 = pow(fast::max(float3(0.0), mix((((float3(_Globals.MappingPolynomial.x) * (_1189 * _1189)) + (float3(_Globals.MappingPolynomial.y) * _1189)) + float3(_Globals.MappingPolynomial.z)) * float3(_Globals.ColorScale), _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y)); + float3 _3001; + if (_Globals.OutputDevice == 0u) + { + float _2961 = _1218.x; + float _2973; + for (;;) + { + if (_2961 < 0.00313066993840038776397705078125) + { + _2973 = _2961 * 12.9200000762939453125; + break; + } + _2973 = (pow(_2961, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + break; + } + float _2974 = _1218.y; + float _2986; + for (;;) + { + if (_2974 < 0.00313066993840038776397705078125) + { + _2986 = _2974 * 12.9200000762939453125; + break; + } + _2986 = (pow(_2974, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + break; + } + float _2987 = _1218.z; + float _2999; + for (;;) + { + if (_2987 < 0.00313066993840038776397705078125) + { + _2999 = _2987 * 12.9200000762939453125; + break; + } + _2999 = (pow(_2987, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + break; + } + _3001 = float3(_2973, _2986, _2999); + } + else + { + float3 _2960; + if (_Globals.OutputDevice == 1u) + { + float3 _2953 = fast::max(float3(6.1035199905745685100555419921875e-05), (_1218 * _547) * _576); + _2960 = fast::min(_2953 * float3(4.5), (pow(fast::max(_2953, float3(0.017999999225139617919921875)), float3(0.449999988079071044921875)) * float3(1.09899997711181640625)) - float3(0.098999999463558197021484375)); + } + else + { + float3 _2950; + if ((_Globals.OutputDevice == 3u) || (_Globals.OutputDevice == 5u)) + { + float3 _2100 = (_906 * float3(1.5)) * (_546 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625))); + float _2101 = _2100.x; + float _2102 = _2100.y; + float _2104 = _2100.z; + float _2107 = fast::max(fast::max(_2101, _2102), _2104); + float _2112 = (fast::max(_2107, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_2101, _2102), _2104), 1.0000000133514319600180897396058e-10)) / fast::max(_2107, 0.00999999977648258209228515625); + float _2125 = ((_2104 + _2102) + _2101) + (1.75 * sqrt(((_2104 * (_2104 - _2102)) + (_2102 * (_2102 - _2101))) + (_2101 * (_2101 - _2104)))); + float _2126 = _2125 * 0.3333333432674407958984375; + float _2127 = _2112 - 0.4000000059604644775390625; + float _2132 = fast::max(1.0 - abs(_2127 * 2.5), 0.0); + float _2140 = (1.0 + (float(int(sign(_2127 * 5.0))) * (1.0 - (_2132 * _2132)))) * 0.02500000037252902984619140625; + float _2153; + if (_2126 <= 0.053333334624767303466796875) + { + _2153 = _2140; + } + else + { + float _2152; + if (_2126 >= 0.1599999964237213134765625) + { + _2152 = 0.0; + } + else + { + _2152 = _2140 * ((0.23999999463558197021484375 / _2125) - 0.5); + } + _2153 = _2152; + } + float3 _2156 = _2100 * float3(1.0 + _2153); + float _2157 = _2156.x; + float _2158 = _2156.y; + float _2160 = _2156.z; + float _2174; + if ((_2157 == _2158) && (_2158 == _2160)) + { + _2174 = 0.0; + } + else + { + _2174 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_2158 - _2160), ((2.0 * _2157) - _2158) - _2160); + } + float _2179; + if (_2174 < 0.0) + { + _2179 = _2174 + 360.0; + } + else + { + _2179 = _2174; + } + float _2180 = fast::clamp(_2179, 0.0, 360.0); + float _2185; + if (_2180 > 180.0) + { + _2185 = _2180 - 360.0; + } + else + { + _2185 = _2180; + } + float _2235; + if ((_2185 > (-67.5)) && (_2185 < 67.5)) + { + float _2192 = (_2185 - (-67.5)) * 0.0296296291053295135498046875; + int _2193 = int(_2192); + float _2195 = _2192 - float(_2193); + float _2196 = _2195 * _2195; + float _2197 = _2196 * _2195; + float _2234; + if (_2193 == 3) + { + _2234 = (((_2197 * (-0.16666667163372039794921875)) + (_2196 * 0.5)) + (_2195 * (-0.5))) + 0.16666667163372039794921875; + } + else + { + float _2227; + if (_2193 == 2) + { + _2227 = ((_2197 * 0.5) + (_2196 * (-1.0))) + 0.666666686534881591796875; + } + else + { + float _2222; + if (_2193 == 1) + { + _2222 = (((_2197 * (-0.5)) + (_2196 * 0.5)) + (_2195 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _2215; + if (_2193 == 0) + { + _2215 = _2197 * 0.16666667163372039794921875; + } + else + { + _2215 = 0.0; + } + _2222 = _2215; + } + _2227 = _2222; + } + _2234 = _2227; + } + _2235 = _2234; + } + else + { + _2235 = 0.0; + } + _2156.x = _2157 + ((((_2235 * 1.5) * _2112) * (0.02999999932944774627685546875 - _2157)) * 0.180000007152557373046875); + float3 _2245 = fast::clamp(fast::clamp(_2156, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); + float3 _2248 = mix(float3(dot(_2245, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _2245, float3(0.959999978542327880859375)); + float _2249 = _2248.x; + float _2253 = 0.17999999225139617919921875 * exp2(18.0); + float _2255 = exp2(-14.0); + float _2258 = log((_2249 <= 0.0) ? _2255 : _2249) / _1065; + float _2260 = log(0.17999999225139617919921875 * exp2(-15.0)) / _1065; + float _2327; + if (_2258 <= _2260) + { + _2327 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _2267 = log(0.180000007152557373046875) / _1065; + float _2324; + if ((_2258 > _2260) && (_2258 < _2267)) + { + float _2307 = (3.0 * (_2258 - _2260)) / (_2267 - _2260); + int _2308 = int(_2307); + float _2310 = _2307 - float(_2308); + _2324 = dot(float3(_2310 * _2310, _2310, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2308], _475[_2308 + 1], _475[_2308 + 2])); + } + else + { + float _2275 = log(_2253) / _1065; + float _2303; + if ((_2258 >= _2267) && (_2258 < _2275)) + { + float _2286 = (3.0 * (_2258 - _2267)) / (_2275 - _2267); + int _2287 = int(_2286); + float _2289 = _2286 - float(_2287); + _2303 = dot(float3(_2289 * _2289, _2289, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2287], _476[_2287 + 1], _476[_2287 + 2])); + } + else + { + _2303 = log(10000.0) / _1065; + } + _2324 = _2303; + } + _2327 = _2324; + } + float3 _2329; + _2329.x = pow(10.0, _2327); + float _2330 = _2248.y; + float _2334 = log((_2330 <= 0.0) ? _2255 : _2330) / _1065; + float _2401; + if (_2334 <= _2260) + { + _2401 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _2341 = log(0.180000007152557373046875) / _1065; + float _2398; + if ((_2334 > _2260) && (_2334 < _2341)) + { + float _2381 = (3.0 * (_2334 - _2260)) / (_2341 - _2260); + int _2382 = int(_2381); + float _2384 = _2381 - float(_2382); + _2398 = dot(float3(_2384 * _2384, _2384, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2382], _475[_2382 + 1], _475[_2382 + 2])); + } + else + { + float _2349 = log(_2253) / _1065; + float _2377; + if ((_2334 >= _2341) && (_2334 < _2349)) + { + float _2360 = (3.0 * (_2334 - _2341)) / (_2349 - _2341); + int _2361 = int(_2360); + float _2363 = _2360 - float(_2361); + _2377 = dot(float3(_2363 * _2363, _2363, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2361], _476[_2361 + 1], _476[_2361 + 2])); + } + else + { + _2377 = log(10000.0) / _1065; + } + _2398 = _2377; + } + _2401 = _2398; + } + _2329.y = pow(10.0, _2401); + float _2404 = _2248.z; + float _2408 = log((_2404 <= 0.0) ? _2255 : _2404) / _1065; + float _2475; + if (_2408 <= _2260) + { + _2475 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _2415 = log(0.180000007152557373046875) / _1065; + float _2472; + if ((_2408 > _2260) && (_2408 < _2415)) + { + float _2455 = (3.0 * (_2408 - _2260)) / (_2415 - _2260); + int _2456 = int(_2455); + float _2458 = _2455 - float(_2456); + _2472 = dot(float3(_2458 * _2458, _2458, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2456], _475[_2456 + 1], _475[_2456 + 2])); + } + else + { + float _2423 = log(_2253) / _1065; + float _2451; + if ((_2408 >= _2415) && (_2408 < _2423)) + { + float _2434 = (3.0 * (_2408 - _2415)) / (_2423 - _2415); + int _2435 = int(_2434); + float _2437 = _2434 - float(_2435); + _2451 = dot(float3(_2437 * _2437, _2437, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2435], _476[_2435 + 1], _476[_2435 + 2])); + } + else + { + _2451 = log(10000.0) / _1065; + } + _2472 = _2451; + } + _2475 = _2472; + } + _2329.z = pow(10.0, _2475); + float3 _2479 = (_2329 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float _2481 = 0.17999999225139617919921875 * pow(2.0, -12.0); + float _2485 = log((_2481 <= 0.0) ? _2255 : _2481) / _1065; + float _2552; + if (_2485 <= _2260) + { + _2552 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _2492 = log(0.180000007152557373046875) / _1065; + float _2549; + if ((_2485 > _2260) && (_2485 < _2492)) + { + float _2532 = (3.0 * (_2485 - _2260)) / (_2492 - _2260); + int _2533 = int(_2532); + float _2535 = _2532 - float(_2533); + _2549 = dot(float3(_2535 * _2535, _2535, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2533], _475[_2533 + 1], _475[_2533 + 2])); + } + else + { + float _2500 = log(_2253) / _1065; + float _2528; + if ((_2485 >= _2492) && (_2485 < _2500)) + { + float _2511 = (3.0 * (_2485 - _2492)) / (_2500 - _2492); + int _2512 = int(_2511); + float _2514 = _2511 - float(_2512); + _2528 = dot(float3(_2514 * _2514, _2514, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2512], _476[_2512 + 1], _476[_2512 + 2])); + } + else + { + _2528 = log(10000.0) / _1065; + } + _2549 = _2528; + } + _2552 = _2549; + } + float _2555 = log(0.180000007152557373046875) / _1065; + float _2611; + if (_2555 <= _2260) + { + _2611 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _2608; + if ((_2555 > _2260) && (_2555 < _2555)) + { + _2608 = (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[3], _475[4], _475[5])).z; + } + else + { + float _2568 = log(_2253) / _1065; + float _2596; + if ((_2555 >= _2555) && (_2555 < _2568)) + { + float _2579 = (3.0 * (_2555 - _2555)) / (_2568 - _2555); + int _2580 = int(_2579); + float _2582 = _2579 - float(_2580); + _2596 = dot(float3(_2582 * _2582, _2582, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2580], _476[_2580 + 1], _476[_2580 + 2])); + } + else + { + _2596 = log(10000.0) / _1065; + } + _2608 = _2596; + } + _2611 = _2608; + } + float _2612 = pow(10.0, _2611); + float _2614 = 0.17999999225139617919921875 * pow(2.0, 10.0); + float _2618 = log((_2614 <= 0.0) ? _2255 : _2614) / _1065; + float _2683; + if (_2618 <= _2260) + { + _2683 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _2680; + if ((_2618 > _2260) && (_2618 < _2555)) + { + float _2663 = (3.0 * (_2618 - _2260)) / (_2555 - _2260); + int _2664 = int(_2663); + float _2666 = _2663 - float(_2664); + _2680 = dot(float3(_2666 * _2666, _2666, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2664], _475[_2664 + 1], _475[_2664 + 2])); + } + else + { + float _2631 = log(_2253) / _1065; + float _2659; + if ((_2618 >= _2555) && (_2618 < _2631)) + { + float _2642 = (3.0 * (_2618 - _2555)) / (_2631 - _2555); + int _2643 = int(_2642); + float _2645 = _2642 - float(_2643); + _2659 = dot(float3(_2645 * _2645, _2645, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2643], _476[_2643 + 1], _476[_2643 + 2])); + } + else + { + _2659 = log(10000.0) / _1065; + } + _2680 = _2659; + } + _2683 = _2680; + } + float _2684 = pow(10.0, _2683); + float _2685 = _2479.x; + float _2689 = log((_2685 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2685) / _1065; + float _2690 = log(pow(10.0, _2552)); + float _2691 = _2690 / _1065; + float _2768; + if (_2689 <= _2691) + { + _2768 = (_2689 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1065) - ((3.0 * _2690) / _1065)); + } + else + { + float _2698 = log(_2612) / _1065; + float _2760; + if ((_2689 > _2691) && (_2689 < _2698)) + { + float _2743 = (7.0 * (_2689 - _2691)) / (_2698 - _2691); + int _2744 = int(_2743); + float _2746 = _2743 - float(_2744); + _2760 = dot(float3(_2746 * _2746, _2746, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2744], _479[_2744 + 1], _479[_2744 + 2])); + } + else + { + float _2705 = log(_2684); + float _2706 = _2705 / _1065; + float _2739; + if ((_2689 >= _2698) && (_2689 < _2706)) + { + float _2722 = (7.0 * (_2689 - _2698)) / (_2706 - _2698); + int _2723 = int(_2722); + float _2725 = _2722 - float(_2723); + _2739 = dot(float3(_2725 * _2725, _2725, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2723], _480[_2723 + 1], _480[_2723 + 2])); + } + else + { + _2739 = (_2689 * 0.0599999986588954925537109375) + ((log(1000.0) / _1065) - ((0.0599999986588954925537109375 * _2705) / _1065)); + } + _2760 = _2739; + } + _2768 = _2760; + } + float3 _2770; + _2770.x = pow(10.0, _2768); + float _2771 = _2479.y; + float _2775 = log((_2771 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2771) / _1065; + float _2852; + if (_2775 <= _2691) + { + _2852 = (_2775 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1065) - ((3.0 * _2690) / _1065)); + } + else + { + float _2782 = log(_2612) / _1065; + float _2844; + if ((_2775 > _2691) && (_2775 < _2782)) + { + float _2827 = (7.0 * (_2775 - _2691)) / (_2782 - _2691); + int _2828 = int(_2827); + float _2830 = _2827 - float(_2828); + _2844 = dot(float3(_2830 * _2830, _2830, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2828], _479[_2828 + 1], _479[_2828 + 2])); + } + else + { + float _2789 = log(_2684); + float _2790 = _2789 / _1065; + float _2823; + if ((_2775 >= _2782) && (_2775 < _2790)) + { + float _2806 = (7.0 * (_2775 - _2782)) / (_2790 - _2782); + int _2807 = int(_2806); + float _2809 = _2806 - float(_2807); + _2823 = dot(float3(_2809 * _2809, _2809, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2807], _480[_2807 + 1], _480[_2807 + 2])); + } + else + { + _2823 = (_2775 * 0.0599999986588954925537109375) + ((log(1000.0) / _1065) - ((0.0599999986588954925537109375 * _2789) / _1065)); + } + _2844 = _2823; + } + _2852 = _2844; + } + _2770.y = pow(10.0, _2852); + float _2855 = _2479.z; + float _2859 = log((_2855 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2855) / _1065; + float _2936; + if (_2859 <= _2691) + { + _2936 = (_2859 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1065) - ((3.0 * _2690) / _1065)); + } + else + { + float _2866 = log(_2612) / _1065; + float _2928; + if ((_2859 > _2691) && (_2859 < _2866)) + { + float _2911 = (7.0 * (_2859 - _2691)) / (_2866 - _2691); + int _2912 = int(_2911); + float _2914 = _2911 - float(_2912); + _2928 = dot(float3(_2914 * _2914, _2914, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2912], _479[_2912 + 1], _479[_2912 + 2])); + } + else + { + float _2873 = log(_2684); + float _2874 = _2873 / _1065; + float _2907; + if ((_2859 >= _2866) && (_2859 < _2874)) + { + float _2890 = (7.0 * (_2859 - _2866)) / (_2874 - _2866); + int _2891 = int(_2890); + float _2893 = _2890 - float(_2891); + _2907 = dot(float3(_2893 * _2893, _2893, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2891], _480[_2891 + 1], _480[_2891 + 2])); + } + else + { + _2907 = (_2859 * 0.0599999986588954925537109375) + ((log(1000.0) / _1065) - ((0.0599999986588954925537109375 * _2873) / _1065)); + } + _2928 = _2907; + } + _2936 = _2928; + } + _2770.z = pow(10.0, _2936); + float3 _2942 = pow(((_2770 - float3(3.5073844628641381859779357910156e-05)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _2950 = pow((float3(0.8359375) + (float3(18.8515625) * _2942)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _2942))), float3(78.84375)); + } + else + { + float3 _2097; + if ((_Globals.OutputDevice == 4u) || (_Globals.OutputDevice == 6u)) + { + float3 _1263 = (_906 * float3(1.5)) * (_546 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625))); + float _1264 = _1263.x; + float _1265 = _1263.y; + float _1267 = _1263.z; + float _1270 = fast::max(fast::max(_1264, _1265), _1267); + float _1275 = (fast::max(_1270, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_1264, _1265), _1267), 1.0000000133514319600180897396058e-10)) / fast::max(_1270, 0.00999999977648258209228515625); + float _1288 = ((_1267 + _1265) + _1264) + (1.75 * sqrt(((_1267 * (_1267 - _1265)) + (_1265 * (_1265 - _1264))) + (_1264 * (_1264 - _1267)))); + float _1289 = _1288 * 0.3333333432674407958984375; + float _1290 = _1275 - 0.4000000059604644775390625; + float _1295 = fast::max(1.0 - abs(_1290 * 2.5), 0.0); + float _1303 = (1.0 + (float(int(sign(_1290 * 5.0))) * (1.0 - (_1295 * _1295)))) * 0.02500000037252902984619140625; + float _1316; + if (_1289 <= 0.053333334624767303466796875) + { + _1316 = _1303; + } + else + { + float _1315; + if (_1289 >= 0.1599999964237213134765625) + { + _1315 = 0.0; + } + else + { + _1315 = _1303 * ((0.23999999463558197021484375 / _1288) - 0.5); + } + _1316 = _1315; + } + float3 _1319 = _1263 * float3(1.0 + _1316); + float _1320 = _1319.x; + float _1321 = _1319.y; + float _1323 = _1319.z; + float _1337; + if ((_1320 == _1321) && (_1321 == _1323)) + { + _1337 = 0.0; + } + else + { + _1337 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_1321 - _1323), ((2.0 * _1320) - _1321) - _1323); + } + float _1342; + if (_1337 < 0.0) + { + _1342 = _1337 + 360.0; + } + else + { + _1342 = _1337; + } + float _1343 = fast::clamp(_1342, 0.0, 360.0); + float _1348; + if (_1343 > 180.0) + { + _1348 = _1343 - 360.0; + } + else + { + _1348 = _1343; + } + float _1398; + if ((_1348 > (-67.5)) && (_1348 < 67.5)) + { + float _1355 = (_1348 - (-67.5)) * 0.0296296291053295135498046875; + int _1356 = int(_1355); + float _1358 = _1355 - float(_1356); + float _1359 = _1358 * _1358; + float _1360 = _1359 * _1358; + float _1397; + if (_1356 == 3) + { + _1397 = (((_1360 * (-0.16666667163372039794921875)) + (_1359 * 0.5)) + (_1358 * (-0.5))) + 0.16666667163372039794921875; + } + else + { + float _1390; + if (_1356 == 2) + { + _1390 = ((_1360 * 0.5) + (_1359 * (-1.0))) + 0.666666686534881591796875; + } + else + { + float _1385; + if (_1356 == 1) + { + _1385 = (((_1360 * (-0.5)) + (_1359 * 0.5)) + (_1358 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _1378; + if (_1356 == 0) + { + _1378 = _1360 * 0.16666667163372039794921875; + } + else + { + _1378 = 0.0; + } + _1385 = _1378; + } + _1390 = _1385; + } + _1397 = _1390; + } + _1398 = _1397; + } + else + { + _1398 = 0.0; + } + _1319.x = _1320 + ((((_1398 * 1.5) * _1275) * (0.02999999932944774627685546875 - _1320)) * 0.180000007152557373046875); + float3 _1408 = fast::clamp(fast::clamp(_1319, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); + float3 _1411 = mix(float3(dot(_1408, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1408, float3(0.959999978542327880859375)); + float _1412 = _1411.x; + float _1416 = 0.17999999225139617919921875 * exp2(18.0); + float _1418 = exp2(-14.0); + float _1421 = log((_1412 <= 0.0) ? _1418 : _1412) / _1065; + float _1423 = log(0.17999999225139617919921875 * exp2(-15.0)) / _1065; + float _1490; + if (_1421 <= _1423) + { + _1490 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _1430 = log(0.180000007152557373046875) / _1065; + float _1487; + if ((_1421 > _1423) && (_1421 < _1430)) + { + float _1470 = (3.0 * (_1421 - _1423)) / (_1430 - _1423); + int _1471 = int(_1470); + float _1473 = _1470 - float(_1471); + _1487 = dot(float3(_1473 * _1473, _1473, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1471], _475[_1471 + 1], _475[_1471 + 2])); + } + else + { + float _1438 = log(_1416) / _1065; + float _1466; + if ((_1421 >= _1430) && (_1421 < _1438)) + { + float _1449 = (3.0 * (_1421 - _1430)) / (_1438 - _1430); + int _1450 = int(_1449); + float _1452 = _1449 - float(_1450); + _1466 = dot(float3(_1452 * _1452, _1452, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1450], _476[_1450 + 1], _476[_1450 + 2])); + } + else + { + _1466 = log(10000.0) / _1065; + } + _1487 = _1466; + } + _1490 = _1487; + } + float3 _1492; + _1492.x = pow(10.0, _1490); + float _1493 = _1411.y; + float _1497 = log((_1493 <= 0.0) ? _1418 : _1493) / _1065; + float _1564; + if (_1497 <= _1423) + { + _1564 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _1504 = log(0.180000007152557373046875) / _1065; + float _1561; + if ((_1497 > _1423) && (_1497 < _1504)) + { + float _1544 = (3.0 * (_1497 - _1423)) / (_1504 - _1423); + int _1545 = int(_1544); + float _1547 = _1544 - float(_1545); + _1561 = dot(float3(_1547 * _1547, _1547, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1545], _475[_1545 + 1], _475[_1545 + 2])); + } + else + { + float _1512 = log(_1416) / _1065; + float _1540; + if ((_1497 >= _1504) && (_1497 < _1512)) + { + float _1523 = (3.0 * (_1497 - _1504)) / (_1512 - _1504); + int _1524 = int(_1523); + float _1526 = _1523 - float(_1524); + _1540 = dot(float3(_1526 * _1526, _1526, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1524], _476[_1524 + 1], _476[_1524 + 2])); + } + else + { + _1540 = log(10000.0) / _1065; + } + _1561 = _1540; + } + _1564 = _1561; + } + _1492.y = pow(10.0, _1564); + float _1567 = _1411.z; + float _1571 = log((_1567 <= 0.0) ? _1418 : _1567) / _1065; + float _1638; + if (_1571 <= _1423) + { + _1638 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _1578 = log(0.180000007152557373046875) / _1065; + float _1635; + if ((_1571 > _1423) && (_1571 < _1578)) + { + float _1618 = (3.0 * (_1571 - _1423)) / (_1578 - _1423); + int _1619 = int(_1618); + float _1621 = _1618 - float(_1619); + _1635 = dot(float3(_1621 * _1621, _1621, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1619], _475[_1619 + 1], _475[_1619 + 2])); + } + else + { + float _1586 = log(_1416) / _1065; + float _1614; + if ((_1571 >= _1578) && (_1571 < _1586)) + { + float _1597 = (3.0 * (_1571 - _1578)) / (_1586 - _1578); + int _1598 = int(_1597); + float _1600 = _1597 - float(_1598); + _1614 = dot(float3(_1600 * _1600, _1600, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1598], _476[_1598 + 1], _476[_1598 + 2])); + } + else + { + _1614 = log(10000.0) / _1065; + } + _1635 = _1614; + } + _1638 = _1635; + } + _1492.z = pow(10.0, _1638); + float3 _1642 = (_1492 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float _1644 = 0.17999999225139617919921875 * pow(2.0, -12.0); + float _1648 = log((_1644 <= 0.0) ? _1418 : _1644) / _1065; + float _1715; + if (_1648 <= _1423) + { + _1715 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _1655 = log(0.180000007152557373046875) / _1065; + float _1712; + if ((_1648 > _1423) && (_1648 < _1655)) + { + float _1695 = (3.0 * (_1648 - _1423)) / (_1655 - _1423); + int _1696 = int(_1695); + float _1698 = _1695 - float(_1696); + _1712 = dot(float3(_1698 * _1698, _1698, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1696], _475[_1696 + 1], _475[_1696 + 2])); + } + else + { + float _1663 = log(_1416) / _1065; + float _1691; + if ((_1648 >= _1655) && (_1648 < _1663)) + { + float _1674 = (3.0 * (_1648 - _1655)) / (_1663 - _1655); + int _1675 = int(_1674); + float _1677 = _1674 - float(_1675); + _1691 = dot(float3(_1677 * _1677, _1677, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1675], _476[_1675 + 1], _476[_1675 + 2])); + } + else + { + _1691 = log(10000.0) / _1065; + } + _1712 = _1691; + } + _1715 = _1712; + } + float _1718 = log(0.180000007152557373046875) / _1065; + float _1774; + if (_1718 <= _1423) + { + _1774 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _1771; + if ((_1718 > _1423) && (_1718 < _1718)) + { + _1771 = (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[3], _475[4], _475[5])).z; + } + else + { + float _1731 = log(_1416) / _1065; + float _1759; + if ((_1718 >= _1718) && (_1718 < _1731)) + { + float _1742 = (3.0 * (_1718 - _1718)) / (_1731 - _1718); + int _1743 = int(_1742); + float _1745 = _1742 - float(_1743); + _1759 = dot(float3(_1745 * _1745, _1745, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1743], _476[_1743 + 1], _476[_1743 + 2])); + } + else + { + _1759 = log(10000.0) / _1065; + } + _1771 = _1759; + } + _1774 = _1771; + } + float _1775 = pow(10.0, _1774); + float _1777 = 0.17999999225139617919921875 * pow(2.0, 11.0); + float _1781 = log((_1777 <= 0.0) ? _1418 : _1777) / _1065; + float _1846; + if (_1781 <= _1423) + { + _1846 = log(9.9999997473787516355514526367188e-05) / _1065; + } + else + { + float _1843; + if ((_1781 > _1423) && (_1781 < _1718)) + { + float _1826 = (3.0 * (_1781 - _1423)) / (_1718 - _1423); + int _1827 = int(_1826); + float _1829 = _1826 - float(_1827); + _1843 = dot(float3(_1829 * _1829, _1829, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1827], _475[_1827 + 1], _475[_1827 + 2])); + } + else + { + float _1794 = log(_1416) / _1065; + float _1822; + if ((_1781 >= _1718) && (_1781 < _1794)) + { + float _1805 = (3.0 * (_1781 - _1718)) / (_1794 - _1718); + int _1806 = int(_1805); + float _1808 = _1805 - float(_1806); + _1822 = dot(float3(_1808 * _1808, _1808, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1806], _476[_1806 + 1], _476[_1806 + 2])); + } + else + { + _1822 = log(10000.0) / _1065; + } + _1843 = _1822; + } + _1846 = _1843; + } + float _1847 = pow(10.0, _1846); + float _1848 = _1642.x; + float _1852 = log((_1848 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1848) / _1065; + float _1854 = log(pow(10.0, _1715)) / _1065; + float _1926; + if (_1852 <= _1854) + { + _1926 = log(0.004999999888241291046142578125) / _1065; + } + else + { + float _1861 = log(_1775) / _1065; + float _1923; + if ((_1852 > _1854) && (_1852 < _1861)) + { + float _1906 = (7.0 * (_1852 - _1854)) / (_1861 - _1854); + int _1907 = int(_1906); + float _1909 = _1906 - float(_1907); + _1923 = dot(float3(_1909 * _1909, _1909, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_1907], _482[_1907 + 1], _482[_1907 + 2])); + } + else + { + float _1868 = log(_1847); + float _1869 = _1868 / _1065; + float _1902; + if ((_1852 >= _1861) && (_1852 < _1869)) + { + float _1885 = (7.0 * (_1852 - _1861)) / (_1869 - _1861); + int _1886 = int(_1885); + float _1888 = _1885 - float(_1886); + _1902 = dot(float3(_1888 * _1888, _1888, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_1886], _483[_1886 + 1], _483[_1886 + 2])); + } + else + { + _1902 = (_1852 * 0.119999997317790985107421875) + ((log(2000.0) / _1065) - ((0.119999997317790985107421875 * _1868) / _1065)); + } + _1923 = _1902; + } + _1926 = _1923; + } + float3 _1928; + _1928.x = pow(10.0, _1926); + float _1929 = _1642.y; + float _1933 = log((_1929 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1929) / _1065; + float _2005; + if (_1933 <= _1854) + { + _2005 = log(0.004999999888241291046142578125) / _1065; + } + else + { + float _1940 = log(_1775) / _1065; + float _2002; + if ((_1933 > _1854) && (_1933 < _1940)) + { + float _1985 = (7.0 * (_1933 - _1854)) / (_1940 - _1854); + int _1986 = int(_1985); + float _1988 = _1985 - float(_1986); + _2002 = dot(float3(_1988 * _1988, _1988, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_1986], _482[_1986 + 1], _482[_1986 + 2])); + } + else + { + float _1947 = log(_1847); + float _1948 = _1947 / _1065; + float _1981; + if ((_1933 >= _1940) && (_1933 < _1948)) + { + float _1964 = (7.0 * (_1933 - _1940)) / (_1948 - _1940); + int _1965 = int(_1964); + float _1967 = _1964 - float(_1965); + _1981 = dot(float3(_1967 * _1967, _1967, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_1965], _483[_1965 + 1], _483[_1965 + 2])); + } + else + { + _1981 = (_1933 * 0.119999997317790985107421875) + ((log(2000.0) / _1065) - ((0.119999997317790985107421875 * _1947) / _1065)); + } + _2002 = _1981; + } + _2005 = _2002; + } + _1928.y = pow(10.0, _2005); + float _2008 = _1642.z; + float _2012 = log((_2008 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2008) / _1065; + float _2084; + if (_2012 <= _1854) + { + _2084 = log(0.004999999888241291046142578125) / _1065; + } + else + { + float _2019 = log(_1775) / _1065; + float _2081; + if ((_2012 > _1854) && (_2012 < _2019)) + { + float _2064 = (7.0 * (_2012 - _1854)) / (_2019 - _1854); + int _2065 = int(_2064); + float _2067 = _2064 - float(_2065); + _2081 = dot(float3(_2067 * _2067, _2067, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_2065], _482[_2065 + 1], _482[_2065 + 2])); + } + else + { + float _2026 = log(_1847); + float _2027 = _2026 / _1065; + float _2060; + if ((_2012 >= _2019) && (_2012 < _2027)) + { + float _2043 = (7.0 * (_2012 - _2019)) / (_2027 - _2019); + int _2044 = int(_2043); + float _2046 = _2043 - float(_2044); + _2060 = dot(float3(_2046 * _2046, _2046, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_2044], _483[_2044 + 1], _483[_2044 + 2])); + } + else + { + _2060 = (_2012 * 0.119999997317790985107421875) + ((log(2000.0) / _1065) - ((0.119999997317790985107421875 * _2026) / _1065)); + } + _2081 = _2060; + } + _2084 = _2081; + } + _1928.z = pow(10.0, _2084); + float3 _2089 = pow((_1928 * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _2097 = pow((float3(0.8359375) + (float3(18.8515625) * _2089)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _2089))), float3(78.84375)); + } + else + { + float3 _1260; + if (_Globals.OutputDevice == 7u) + { + float3 _1252 = pow(((_906 * _547) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _1260 = pow((float3(0.8359375) + (float3(18.8515625) * _1252)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _1252))), float3(78.84375)); + } + else + { + _1260 = pow((_1218 * _547) * _576, float3(_Globals.InverseGamma.z)); + } + _2097 = _1260; + } + _2950 = _2097; + } + _2960 = _2950; + } + _3001 = _2960; + } + float3 _3002 = _3001 * float3(0.95238101482391357421875); + float4 _3003 = float4(_3002.x, _3002.y, _3002.z, float4(0.0).w); + _3003.w = 0.0; + out.out_var_SV_Target0 = _3003; + return out; +} + diff --git a/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag b/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag new file mode 100644 index 00000000000..f405fbb68ef --- /dev/null +++ b/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag @@ -0,0 +1,1392 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_Globals +{ + float4 MappingPolynomial; + float3 InverseGamma; + float4 ColorMatrixR_ColorCurveCd1; + float4 ColorMatrixG_ColorCurveCd3Cm3; + float4 ColorMatrixB_ColorCurveCm2; + float4 ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3; + float4 ColorCurve_Ch1_Ch2; + float4 ColorShadow_Luma; + float4 ColorShadow_Tint1; + float4 ColorShadow_Tint2; + float FilmSlope; + float FilmToe; + float FilmShoulder; + float FilmBlackClip; + float FilmWhiteClip; + float4 LUTWeights[5]; + float3 ColorScale; + float4 OverlayColor; + float WhiteTemp; + float WhiteTint; + float4 ColorSaturation; + float4 ColorContrast; + float4 ColorGamma; + float4 ColorGain; + float4 ColorOffset; + float4 ColorSaturationShadows; + float4 ColorContrastShadows; + float4 ColorGammaShadows; + float4 ColorGainShadows; + float4 ColorOffsetShadows; + float4 ColorSaturationMidtones; + float4 ColorContrastMidtones; + float4 ColorGammaMidtones; + float4 ColorGainMidtones; + float4 ColorOffsetMidtones; + float4 ColorSaturationHighlights; + float4 ColorContrastHighlights; + float4 ColorGammaHighlights; + float4 ColorGainHighlights; + float4 ColorOffsetHighlights; + float ColorCorrectionShadowsMax; + float ColorCorrectionHighlightsMin; + uint OutputDevice; + uint OutputGamut; + float BlueCorrection; + float ExpandGamut; +}; + +constant float3 _523 = {}; +constant float3 _525 = {}; + +constant spvUnsafeArray _499 = spvUnsafeArray({ -4.0, -4.0, -3.1573765277862548828125, -0.485249996185302734375, 1.84773242473602294921875, 1.84773242473602294921875 }); +constant spvUnsafeArray _500 = spvUnsafeArray({ -0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875, 4.0, 4.0, 4.0 }); +constant spvUnsafeArray _503 = spvUnsafeArray({ -4.97062206268310546875, -3.0293781757354736328125, -2.1261999607086181640625, -1.5104999542236328125, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 }); +constant spvUnsafeArray _504 = spvUnsafeArray({ 0.80891323089599609375, 1.19108676910400390625, 1.5683000087738037109375, 1.94830000400543212890625, 2.308300018310546875, 2.63840007781982421875, 2.85949993133544921875, 2.9872608184814453125, 3.0127391815185546875, 3.0127391815185546875 }); +constant spvUnsafeArray _506 = spvUnsafeArray({ -2.3010299205780029296875, -2.3010299205780029296875, -1.9312000274658203125, -1.5204999446868896484375, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 }); +constant spvUnsafeArray _507 = spvUnsafeArray({ 0.801995217800140380859375, 1.19800484180450439453125, 1.5943000316619873046875, 1.99730002880096435546875, 2.3782999515533447265625, 2.7683999538421630859375, 3.0515000820159912109375, 3.2746293544769287109375, 3.32743072509765625, 3.32743072509765625 }); + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + float2 in_var_TEXCOORD0 [[user(locn0), center_no_perspective]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globals [[buffer(0)]], texture2d Texture1 [[texture(0)]], sampler Texture1Sampler [[sampler(0)]], uint gl_Layer [[render_target_array_index]]) +{ + main0_out out = {}; + float3x3 _572 = float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * float3x3(float3(1.01303005218505859375, 0.0061053098179399967193603515625, -0.014971000142395496368408203125), float3(0.0076982299797236919403076171875, 0.99816501140594482421875, -0.005032029934227466583251953125), float3(-0.0028413101099431514739990234375, 0.0046851597726345062255859375, 0.92450702190399169921875)); + float3x3 _573 = _572 * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)); + float3x3 _574 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(0.98722398281097412109375, -0.0061132698319852352142333984375, 0.01595330052077770233154296875), float3(-0.007598360069096088409423828125, 1.00186002254486083984375, 0.0053300200961530208587646484375), float3(0.003072570078074932098388671875, -0.0050959498621523380279541015625, 1.0816800594329833984375)); + float3x3 _575 = _574 * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)); + float3x3 _576 = float3x3(float3(0.952552378177642822265625, 0.0, 9.25), float3(0.3439664542675018310546875, 0.728166103363037109375, -0.07213254272937774658203125), float3(0.0, 0.0, 1.00882518291473388671875)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)); + float3x3 _577 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)); + float3x3 _602; + for (;;) + { + if (_Globals.OutputGamut == 1u) + { + _602 = _574 * float3x3(float3(2.493396282196044921875, -0.931345880031585693359375, -0.4026944935321807861328125), float3(-0.829486787319183349609375, 1.76265966892242431640625, 0.02362460084259510040283203125), float3(0.0358506999909877777099609375, -0.076182700693607330322265625, 0.957014024257659912109375)); + break; + } + else + { + if (_Globals.OutputGamut == 2u) + { + _602 = _574 * float3x3(float3(1.71660840511322021484375, -0.3556621074676513671875, -0.253360092639923095703125), float3(-0.666682898998260498046875, 1.61647760868072509765625, 0.01576850004494190216064453125), float3(0.017642199993133544921875, -0.04277630150318145751953125, 0.94222867488861083984375)); + break; + } + else + { + if (_Globals.OutputGamut == 3u) + { + _602 = float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625)); + break; + } + else + { + if (_Globals.OutputGamut == 4u) + { + _602 = float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0)); + break; + } + else + { + _602 = _575; + break; + } + } + } + } + } + float3 _603 = float4((in.in_var_TEXCOORD0 - float2(0.015625)) * float2(1.03225803375244140625), float(gl_Layer) * 0.0322580635547637939453125, 0.0).xyz; + float3 _625; + if (_Globals.OutputDevice >= 3u) + { + float3 _617 = pow(_603, float3(0.0126833133399486541748046875)); + _625 = pow(fast::max(float3(0.0), _617 - float3(0.8359375)) / (float3(18.8515625) - (float3(18.6875) * _617)), float3(6.277394771575927734375)) * float3(10000.0); + } + else + { + _625 = (exp2((_603 - float3(0.434017598628997802734375)) * float3(14.0)) * float3(0.180000007152557373046875)) - (exp2(float3(-6.0762462615966796875)) * float3(0.180000007152557373046875)); + } + float _628 = _Globals.WhiteTemp * 1.00055634975433349609375; + float _642 = (_628 <= 7000.0) ? (0.24406300485134124755859375 + ((99.1100006103515625 + ((2967800.0 - (4604438528.0 / _Globals.WhiteTemp)) / _628)) / _628)) : (0.23703999817371368408203125 + ((247.4799957275390625 + ((1901800.0 - (2005284352.0 / _Globals.WhiteTemp)) / _628)) / _628)); + float _659 = ((0.860117733478546142578125 + (0.00015411825734190642833709716796875 * _Globals.WhiteTemp)) + ((1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 + (0.0008424202096648514270782470703125 * _Globals.WhiteTemp)) + ((7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); + float _670 = ((0.317398726940155029296875 + (4.25 * _Globals.WhiteTemp)) + ((4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)) + ((1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); + float _675 = ((2.0 * _659) - (8.0 * _670)) + 4.0; + float2 _679 = float2((3.0 * _659) / _675, (2.0 * _670) / _675); + float2 _686 = fast::normalize(float2(_659, _670)); + float _691 = _659 + (((-_686.y) * _Globals.WhiteTint) * 0.0500000007450580596923828125); + float _695 = _670 + ((_686.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); + float _700 = ((2.0 * _691) - (8.0 * _695)) + 4.0; + float2 _706 = select(float2(_642, ((((-3.0) * _642) * _642) + (2.86999988555908203125 * _642)) - 0.2750000059604644775390625), _679, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _691) / _700, (2.0 * _695) / _700) - _679); + float _709 = fast::max(_706.y, 1.0000000133514319600180897396058e-10); + float3 _711; + _711.x = _706.x / _709; + _711.y = 1.0; + _711.z = ((1.0 - _706.x) - _706.y) / _709; + float _717 = fast::max(0.328999996185302734375, 1.0000000133514319600180897396058e-10); + float3 _719; + _719.x = 0.3127000033855438232421875 / _717; + _719.y = 1.0; + _719.z = 0.3582999706268310546875 / _717; + float3 _723 = _711 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); + float3 _724 = _719 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); + float3 _743 = (_625 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(_724.x / _723.x, 0.0, 0.0), float3(0.0, _724.y / _723.y, 0.0), float3(0.0, 0.0, _724.z / _723.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _573; + float3 _771; + if (_Globals.ColorShadow_Tint2.w != 0.0) + { + float _750 = dot(_743, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625)); + float3 _753 = (_743 / float3(_750)) - float3(1.0); + _771 = mix(_743, _743 * (_575 * (float3x3(float3(0.544169127941131591796875, 0.23959259688854217529296875, 0.16669429838657379150390625), float3(0.23946559429168701171875, 0.702153027057647705078125, 0.058381401002407073974609375), float3(-0.0023439000360667705535888671875, 0.0361833982169628143310546875, 1.05521833896636962890625)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)))), float3((1.0 - exp2((-4.0) * dot(_753, _753))) * (1.0 - exp2((((-4.0) * _Globals.ExpandGamut) * _750) * _750)))); + } + else + { + _771 = _743; + } + float _772 = dot(_771, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625)); + float4 _777 = _Globals.ColorSaturationShadows * _Globals.ColorSaturation; + float4 _782 = _Globals.ColorContrastShadows * _Globals.ColorContrast; + float4 _787 = _Globals.ColorGammaShadows * _Globals.ColorGamma; + float4 _792 = _Globals.ColorGainShadows * _Globals.ColorGain; + float4 _797 = _Globals.ColorOffsetShadows + _Globals.ColorOffset; + float3 _798 = float3(_772); + float _830 = smoothstep(0.0, _Globals.ColorCorrectionShadowsMax, _772); + float4 _834 = _Globals.ColorSaturationHighlights * _Globals.ColorSaturation; + float4 _837 = _Globals.ColorContrastHighlights * _Globals.ColorContrast; + float4 _840 = _Globals.ColorGammaHighlights * _Globals.ColorGamma; + float4 _843 = _Globals.ColorGainHighlights * _Globals.ColorGain; + float4 _846 = _Globals.ColorOffsetHighlights + _Globals.ColorOffset; + float _878 = smoothstep(_Globals.ColorCorrectionHighlightsMin, 1.0, _772); + float4 _881 = _Globals.ColorSaturationMidtones * _Globals.ColorSaturation; + float4 _884 = _Globals.ColorContrastMidtones * _Globals.ColorContrast; + float4 _887 = _Globals.ColorGammaMidtones * _Globals.ColorGamma; + float4 _890 = _Globals.ColorGainMidtones * _Globals.ColorGain; + float4 _893 = _Globals.ColorOffsetMidtones + _Globals.ColorOffset; + float3 _931 = ((((pow(pow(fast::max(float3(0.0), mix(_798, _771, _777.xyz * float3(_777.w))) * float3(5.5555553436279296875), _782.xyz * float3(_782.w)) * float3(0.180000007152557373046875), float3(1.0) / (_787.xyz * float3(_787.w))) * (_792.xyz * float3(_792.w))) + (_797.xyz + float3(_797.w))) * float3(1.0 - _830)) + (((pow(pow(fast::max(float3(0.0), mix(_798, _771, _881.xyz * float3(_881.w))) * float3(5.5555553436279296875), _884.xyz * float3(_884.w)) * float3(0.180000007152557373046875), float3(1.0) / (_887.xyz * float3(_887.w))) * (_890.xyz * float3(_890.w))) + (_893.xyz + float3(_893.w))) * float3(_830 - _878))) + (((pow(pow(fast::max(float3(0.0), mix(_798, _771, _834.xyz * float3(_834.w))) * float3(5.5555553436279296875), _837.xyz * float3(_837.w)) * float3(0.180000007152557373046875), float3(1.0) / (_840.xyz * float3(_840.w))) * (_843.xyz * float3(_843.w))) + (_846.xyz + float3(_846.w))) * float3(_878)); + float3 _932 = _931 * _575; + float3 _940 = float3(_Globals.BlueCorrection); + float3 _942 = mix(_931, _931 * ((_577 * float3x3(float3(0.940437257289886474609375, -0.01830687932670116424560546875, 0.07786960899829864501953125), float3(0.008378696627914905548095703125, 0.82866001129150390625, 0.162961304187774658203125), float3(0.0005471261101774871349334716796875, -0.00088337459601461887359619140625, 1.00033628940582275390625))) * _576), _940) * _577; + float _943 = _942.x; + float _944 = _942.y; + float _946 = _942.z; + float _949 = fast::max(fast::max(_943, _944), _946); + float _954 = (fast::max(_949, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_943, _944), _946), 1.0000000133514319600180897396058e-10)) / fast::max(_949, 0.00999999977648258209228515625); + float _967 = ((_946 + _944) + _943) + (1.75 * sqrt(((_946 * (_946 - _944)) + (_944 * (_944 - _943))) + (_943 * (_943 - _946)))); + float _968 = _967 * 0.3333333432674407958984375; + float _969 = _954 - 0.4000000059604644775390625; + float _974 = fast::max(1.0 - abs(_969 * 2.5), 0.0); + float _982 = (1.0 + (float(int(sign(_969 * 5.0))) * (1.0 - (_974 * _974)))) * 0.02500000037252902984619140625; + float _995; + if (_968 <= 0.053333334624767303466796875) + { + _995 = _982; + } + else + { + float _994; + if (_968 >= 0.1599999964237213134765625) + { + _994 = 0.0; + } + else + { + _994 = _982 * ((0.23999999463558197021484375 / _967) - 0.5); + } + _995 = _994; + } + float3 _998 = _942 * float3(1.0 + _995); + float _999 = _998.x; + float _1000 = _998.y; + float _1002 = _998.z; + float _1016; + if ((_999 == _1000) && (_1000 == _1002)) + { + _1016 = 0.0; + } + else + { + _1016 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_1000 - _1002), ((2.0 * _999) - _1000) - _1002); + } + float _1021; + if (_1016 < 0.0) + { + _1021 = _1016 + 360.0; + } + else + { + _1021 = _1016; + } + float _1022 = fast::clamp(_1021, 0.0, 360.0); + float _1027; + if (_1022 > 180.0) + { + _1027 = _1022 - 360.0; + } + else + { + _1027 = _1022; + } + float _1031 = smoothstep(0.0, 1.0, 1.0 - abs(_1027 * 0.01481481455266475677490234375)); + _998.x = _999 + ((((_1031 * _1031) * _954) * (0.02999999932944774627685546875 - _999)) * 0.180000007152557373046875); + float3 _1040 = fast::max(float3(0.0), _998 * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375))); + float _1049 = (1.0 + _Globals.FilmBlackClip) - _Globals.FilmToe; + float _1052 = 1.0 + _Globals.FilmWhiteClip; + float _1055 = _1052 - _Globals.FilmShoulder; + float _1082; + if (_Globals.FilmToe > 0.800000011920928955078125) + { + _1082 = ((0.819999992847442626953125 - _Globals.FilmToe) / _Globals.FilmSlope) + (log(0.180000007152557373046875) / log(10.0)); + } + else + { + float _1061 = (0.180000007152557373046875 + _Globals.FilmBlackClip) / _1049; + _1082 = (log(0.180000007152557373046875) / log(10.0)) - ((0.5 * log(_1061 / (2.0 - _1061))) * (_1049 / _Globals.FilmSlope)); + } + float _1087 = ((1.0 - _Globals.FilmToe) / _Globals.FilmSlope) - _1082; + float _1089 = (_Globals.FilmShoulder / _Globals.FilmSlope) - _1087; + float _1091 = log(10.0); + float3 _1093 = log(mix(float3(dot(_1040, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1040, float3(0.959999978542327880859375))) / float3(_1091); + float3 _1097 = float3(_Globals.FilmSlope) * (_1093 + float3(_1087)); + float3 _1105 = float3(_1082); + float3 _1106 = _1093 - _1105; + float3 _1118 = float3(_1089); + float3 _1132 = fast::clamp(_1106 / float3(_1089 - _1082), float3(0.0), float3(1.0)); + float3 _1136 = select(_1132, float3(1.0) - _1132, bool3(_1089 < _1082)); + float3 _1141 = mix(select(_1097, float3(-_Globals.FilmBlackClip) + (float3(2.0 * _1049) / (float3(1.0) + exp(float3(((-2.0) * _Globals.FilmSlope) / _1049) * _1106))), _1093 < _1105), select(_1097, float3(_1052) - (float3(2.0 * _1055) / (float3(1.0) + exp(float3((2.0 * _Globals.FilmSlope) / _1055) * (_1093 - _1118)))), _1093 > _1118), ((float3(3.0) - (float3(2.0) * _1136)) * _1136) * _1136); + float3 _1145 = fast::max(float3(0.0), mix(float3(dot(_1141, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1141, float3(0.930000007152557373046875))); + float3 _1215; + if (_Globals.ColorShadow_Tint2.w == 0.0) + { + float3 _1157; + _1157.x = dot(_932, _Globals.ColorMatrixR_ColorCurveCd1.xyz); + _1157.y = dot(_932, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz); + _1157.z = dot(_932, _Globals.ColorMatrixB_ColorCurveCm2.xyz); + float3 _1183 = fast::max(float3(0.0), _1157 * (_Globals.ColorShadow_Tint1.xyz + (_Globals.ColorShadow_Tint2.xyz * float3(1.0 / (dot(_932, _Globals.ColorShadow_Luma.xyz) + 1.0))))); + float3 _1188 = fast::max(float3(0.0), _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx - _1183); + float3 _1190 = fast::max(_1183, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz); + _1215 = ((((_1190 * _Globals.ColorCurve_Ch1_Ch2.xxx) + _Globals.ColorCurve_Ch1_Ch2.yyy) * (float3(1.0) / (_1190 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www))) + ((fast::clamp(_1183, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz) * _Globals.ColorMatrixB_ColorCurveCm2.www) + (((_1188 * _Globals.ColorMatrixR_ColorCurveCd1.www) * (float3(1.0) / (_1188 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy))) + _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375); + } + else + { + _1215 = fast::max(float3(0.0), mix(_1145, _1145 * ((_577 * float3x3(float3(1.06317996978759765625, 0.02339559979736804962158203125, -0.08657260239124298095703125), float3(-0.010633699595928192138671875, 1.2063200473785400390625, -0.1956900060176849365234375), float3(-0.0005908869788981974124908447265625, 0.00105247995816171169281005859375, 0.999538004398345947265625))) * _576), _940) * _575); + } + float3 _1216 = fast::clamp(_1215, float3(0.0), float3(1.0)); + float _1217 = _1216.x; + float _1229; + for (;;) + { + if (_1217 < 0.00313066993840038776397705078125) + { + _1229 = _1217 * 12.9200000762939453125; + break; + } + _1229 = (pow(_1217, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + break; + } + float _1230 = _1216.y; + float _1242; + for (;;) + { + if (_1230 < 0.00313066993840038776397705078125) + { + _1242 = _1230 * 12.9200000762939453125; + break; + } + _1242 = (pow(_1230, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + break; + } + float _1243 = _1216.z; + float _1255; + for (;;) + { + if (_1243 < 0.00313066993840038776397705078125) + { + _1255 = _1243 * 12.9200000762939453125; + break; + } + _1255 = (pow(_1243, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + break; + } + float3 _1256 = float3(_1229, _1242, _1255); + float3 _1258 = (_1256 * float3(0.9375)) + float3(0.03125); + float _1270 = (_1258.z * 16.0) - 0.5; + float _1271 = floor(_1270); + float _1275 = (_1258.x + _1271) * 0.0625; + float _1276 = _1258.y; + float4 _1279 = Texture1.sample(Texture1Sampler, float2(_1275, _1276)); + float4 _1283 = Texture1.sample(Texture1Sampler, float2(_1275 + 0.0625, _1276)); + float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), (float3(_Globals.LUTWeights[0].x) * _1256) + (float3(_Globals.LUTWeights[1].x) * mix(_1279, _1283, float4(_1270 - _1271)).xyz)); + float3 _1295 = select(_1289 * float3(0.077399380505084991455078125), pow((_1289 * float3(0.94786727428436279296875)) + float3(0.0521326996386051177978515625), float3(2.400000095367431640625)), _1289 > float3(0.040449999272823333740234375)); + float3 _1324 = pow(fast::max(float3(0.0), mix((((float3(_Globals.MappingPolynomial.x) * (_1295 * _1295)) + (float3(_Globals.MappingPolynomial.y) * _1295)) + float3(_Globals.MappingPolynomial.z)) * _Globals.ColorScale, _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y)); + float3 _3103; + if (_Globals.OutputDevice == 0u) + { + float _3075; + for (;;) + { + if (_1324.x < 0.00313066993840038776397705078125) + { + _3075 = _1324.x * 12.9200000762939453125; + break; + } + _3075 = (pow(_1324.x, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + break; + } + float _3088; + for (;;) + { + if (_1324.y < 0.00313066993840038776397705078125) + { + _3088 = _1324.y * 12.9200000762939453125; + break; + } + _3088 = (pow(_1324.y, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + break; + } + float _3101; + for (;;) + { + if (_1324.z < 0.00313066993840038776397705078125) + { + _3101 = _1324.z * 12.9200000762939453125; + break; + } + _3101 = (pow(_1324.z, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + break; + } + _3103 = float3(_3075, _3088, _3101); + } + else + { + float3 _3062; + if (_Globals.OutputDevice == 1u) + { + float3 _3055 = fast::max(float3(6.1035199905745685100555419921875e-05), (_1324 * _573) * _602); + _3062 = fast::min(_3055 * float3(4.5), (pow(fast::max(_3055, float3(0.017999999225139617919921875)), float3(0.449999988079071044921875)) * float3(1.09899997711181640625)) - float3(0.098999999463558197021484375)); + } + else + { + float3 _3052; + if ((_Globals.OutputDevice == 3u) || (_Globals.OutputDevice == 5u)) + { + float3 _2204 = (_932 * float3(1.5)) * (_572 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625))); + float _2205 = _2204.x; + float _2206 = _2204.y; + float _2208 = _2204.z; + float _2211 = fast::max(fast::max(_2205, _2206), _2208); + float _2216 = (fast::max(_2211, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_2205, _2206), _2208), 1.0000000133514319600180897396058e-10)) / fast::max(_2211, 0.00999999977648258209228515625); + float _2229 = ((_2208 + _2206) + _2205) + (1.75 * sqrt(((_2208 * (_2208 - _2206)) + (_2206 * (_2206 - _2205))) + (_2205 * (_2205 - _2208)))); + float _2230 = _2229 * 0.3333333432674407958984375; + float _2231 = _2216 - 0.4000000059604644775390625; + float _2236 = fast::max(1.0 - abs(_2231 * 2.5), 0.0); + float _2244 = (1.0 + (float(int(sign(_2231 * 5.0))) * (1.0 - (_2236 * _2236)))) * 0.02500000037252902984619140625; + float _2257; + if (_2230 <= 0.053333334624767303466796875) + { + _2257 = _2244; + } + else + { + float _2256; + if (_2230 >= 0.1599999964237213134765625) + { + _2256 = 0.0; + } + else + { + _2256 = _2244 * ((0.23999999463558197021484375 / _2229) - 0.5); + } + _2257 = _2256; + } + float3 _2260 = _2204 * float3(1.0 + _2257); + float _2261 = _2260.x; + float _2262 = _2260.y; + float _2264 = _2260.z; + float _2278; + if ((_2261 == _2262) && (_2262 == _2264)) + { + _2278 = 0.0; + } + else + { + _2278 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_2262 - _2264), ((2.0 * _2261) - _2262) - _2264); + } + float _2283; + if (_2278 < 0.0) + { + _2283 = _2278 + 360.0; + } + else + { + _2283 = _2278; + } + float _2284 = fast::clamp(_2283, 0.0, 360.0); + float _2289; + if (_2284 > 180.0) + { + _2289 = _2284 - 360.0; + } + else + { + _2289 = _2284; + } + float _2339; + if ((_2289 > (-67.5)) && (_2289 < 67.5)) + { + float _2296 = (_2289 - (-67.5)) * 0.0296296291053295135498046875; + int _2297 = int(_2296); + float _2299 = _2296 - float(_2297); + float _2300 = _2299 * _2299; + float _2301 = _2300 * _2299; + float _2338; + if (_2297 == 3) + { + _2338 = (((_2301 * (-0.16666667163372039794921875)) + (_2300 * 0.5)) + (_2299 * (-0.5))) + 0.16666667163372039794921875; + } + else + { + float _2331; + if (_2297 == 2) + { + _2331 = ((_2301 * 0.5) + (_2300 * (-1.0))) + 0.666666686534881591796875; + } + else + { + float _2326; + if (_2297 == 1) + { + _2326 = (((_2301 * (-0.5)) + (_2300 * 0.5)) + (_2299 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _2319; + if (_2297 == 0) + { + _2319 = _2301 * 0.16666667163372039794921875; + } + else + { + _2319 = 0.0; + } + _2326 = _2319; + } + _2331 = _2326; + } + _2338 = _2331; + } + _2339 = _2338; + } + else + { + _2339 = 0.0; + } + _2260.x = _2261 + ((((_2339 * 1.5) * _2216) * (0.02999999932944774627685546875 - _2261)) * 0.180000007152557373046875); + float3 _2349 = fast::clamp(fast::clamp(_2260, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); + float3 _2352 = mix(float3(dot(_2349, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _2349, float3(0.959999978542327880859375)); + float _2353 = _2352.x; + float _2357 = 0.17999999225139617919921875 * exp2(18.0); + float _2359 = exp2(-14.0); + float _2362 = log((_2353 <= 0.0) ? _2359 : _2353) / _1091; + float _2364 = log(0.17999999225139617919921875 * exp2(-15.0)) / _1091; + float _2431; + if (_2362 <= _2364) + { + _2431 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _2371 = log(0.180000007152557373046875) / _1091; + float _2428; + if ((_2362 > _2364) && (_2362 < _2371)) + { + float _2411 = (3.0 * (_2362 - _2364)) / (_2371 - _2364); + int _2412 = int(_2411); + float _2414 = _2411 - float(_2412); + _2428 = dot(float3(_2414 * _2414, _2414, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2412], _499[_2412 + 1], _499[_2412 + 2])); + } + else + { + float _2379 = log(_2357) / _1091; + float _2407; + if ((_2362 >= _2371) && (_2362 < _2379)) + { + float _2390 = (3.0 * (_2362 - _2371)) / (_2379 - _2371); + int _2391 = int(_2390); + float _2393 = _2390 - float(_2391); + _2407 = dot(float3(_2393 * _2393, _2393, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2391], _500[_2391 + 1], _500[_2391 + 2])); + } + else + { + _2407 = log(10000.0) / _1091; + } + _2428 = _2407; + } + _2431 = _2428; + } + float3 _2433; + _2433.x = pow(10.0, _2431); + float _2434 = _2352.y; + float _2438 = log((_2434 <= 0.0) ? _2359 : _2434) / _1091; + float _2505; + if (_2438 <= _2364) + { + _2505 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _2445 = log(0.180000007152557373046875) / _1091; + float _2502; + if ((_2438 > _2364) && (_2438 < _2445)) + { + float _2485 = (3.0 * (_2438 - _2364)) / (_2445 - _2364); + int _2486 = int(_2485); + float _2488 = _2485 - float(_2486); + _2502 = dot(float3(_2488 * _2488, _2488, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2486], _499[_2486 + 1], _499[_2486 + 2])); + } + else + { + float _2453 = log(_2357) / _1091; + float _2481; + if ((_2438 >= _2445) && (_2438 < _2453)) + { + float _2464 = (3.0 * (_2438 - _2445)) / (_2453 - _2445); + int _2465 = int(_2464); + float _2467 = _2464 - float(_2465); + _2481 = dot(float3(_2467 * _2467, _2467, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2465], _500[_2465 + 1], _500[_2465 + 2])); + } + else + { + _2481 = log(10000.0) / _1091; + } + _2502 = _2481; + } + _2505 = _2502; + } + _2433.y = pow(10.0, _2505); + float _2508 = _2352.z; + float _2512 = log((_2508 <= 0.0) ? _2359 : _2508) / _1091; + float _2579; + if (_2512 <= _2364) + { + _2579 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _2519 = log(0.180000007152557373046875) / _1091; + float _2576; + if ((_2512 > _2364) && (_2512 < _2519)) + { + float _2559 = (3.0 * (_2512 - _2364)) / (_2519 - _2364); + int _2560 = int(_2559); + float _2562 = _2559 - float(_2560); + _2576 = dot(float3(_2562 * _2562, _2562, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2560], _499[_2560 + 1], _499[_2560 + 2])); + } + else + { + float _2527 = log(_2357) / _1091; + float _2555; + if ((_2512 >= _2519) && (_2512 < _2527)) + { + float _2538 = (3.0 * (_2512 - _2519)) / (_2527 - _2519); + int _2539 = int(_2538); + float _2541 = _2538 - float(_2539); + _2555 = dot(float3(_2541 * _2541, _2541, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2539], _500[_2539 + 1], _500[_2539 + 2])); + } + else + { + _2555 = log(10000.0) / _1091; + } + _2576 = _2555; + } + _2579 = _2576; + } + _2433.z = pow(10.0, _2579); + float3 _2583 = (_2433 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float _2585 = 0.17999999225139617919921875 * pow(2.0, -12.0); + float _2589 = log((_2585 <= 0.0) ? _2359 : _2585) / _1091; + float _2656; + if (_2589 <= _2364) + { + _2656 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _2596 = log(0.180000007152557373046875) / _1091; + float _2653; + if ((_2589 > _2364) && (_2589 < _2596)) + { + float _2636 = (3.0 * (_2589 - _2364)) / (_2596 - _2364); + int _2637 = int(_2636); + float _2639 = _2636 - float(_2637); + _2653 = dot(float3(_2639 * _2639, _2639, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2637], _499[_2637 + 1], _499[_2637 + 2])); + } + else + { + float _2604 = log(_2357) / _1091; + float _2632; + if ((_2589 >= _2596) && (_2589 < _2604)) + { + float _2615 = (3.0 * (_2589 - _2596)) / (_2604 - _2596); + int _2616 = int(_2615); + float _2618 = _2615 - float(_2616); + _2632 = dot(float3(_2618 * _2618, _2618, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2616], _500[_2616 + 1], _500[_2616 + 2])); + } + else + { + _2632 = log(10000.0) / _1091; + } + _2653 = _2632; + } + _2656 = _2653; + } + float _2659 = log(0.180000007152557373046875) / _1091; + float _2713; + if (_2659 <= _2364) + { + _2713 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _2710; + if ((_2659 > _2364) && (_2659 < _2659)) + { + _2710 = (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[3], _499[4], _499[5])).z; + } + else + { + float _2672 = log(_2357) / _1091; + float _2700; + if ((_2659 >= _2659) && (_2659 < _2672)) + { + float _2683 = (3.0 * (_2659 - _2659)) / (_2672 - _2659); + int _2684 = int(_2683); + float _2686 = _2683 - float(_2684); + _2700 = dot(float3(_2686 * _2686, _2686, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2684], _500[_2684 + 1], _500[_2684 + 2])); + } + else + { + _2700 = log(10000.0) / _1091; + } + _2710 = _2700; + } + _2713 = _2710; + } + float _2714 = pow(10.0, _2713); + float _2716 = 0.17999999225139617919921875 * pow(2.0, 10.0); + float _2720 = log((_2716 <= 0.0) ? _2359 : _2716) / _1091; + float _2785; + if (_2720 <= _2364) + { + _2785 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _2782; + if ((_2720 > _2364) && (_2720 < _2659)) + { + float _2765 = (3.0 * (_2720 - _2364)) / (_2659 - _2364); + int _2766 = int(_2765); + float _2768 = _2765 - float(_2766); + _2782 = dot(float3(_2768 * _2768, _2768, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2766], _499[_2766 + 1], _499[_2766 + 2])); + } + else + { + float _2733 = log(_2357) / _1091; + float _2761; + if ((_2720 >= _2659) && (_2720 < _2733)) + { + float _2744 = (3.0 * (_2720 - _2659)) / (_2733 - _2659); + int _2745 = int(_2744); + float _2747 = _2744 - float(_2745); + _2761 = dot(float3(_2747 * _2747, _2747, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2745], _500[_2745 + 1], _500[_2745 + 2])); + } + else + { + _2761 = log(10000.0) / _1091; + } + _2782 = _2761; + } + _2785 = _2782; + } + float _2786 = pow(10.0, _2785); + float _2787 = _2583.x; + float _2791 = log((_2787 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2787) / _1091; + float _2792 = log(pow(10.0, _2656)); + float _2793 = _2792 / _1091; + float _2870; + if (_2791 <= _2793) + { + _2870 = (_2791 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1091) - ((3.0 * _2792) / _1091)); + } + else + { + float _2800 = log(_2714) / _1091; + float _2862; + if ((_2791 > _2793) && (_2791 < _2800)) + { + float _2845 = (7.0 * (_2791 - _2793)) / (_2800 - _2793); + int _2846 = int(_2845); + float _2848 = _2845 - float(_2846); + _2862 = dot(float3(_2848 * _2848, _2848, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_2846], _503[_2846 + 1], _503[_2846 + 2])); + } + else + { + float _2807 = log(_2786); + float _2808 = _2807 / _1091; + float _2841; + if ((_2791 >= _2800) && (_2791 < _2808)) + { + float _2824 = (7.0 * (_2791 - _2800)) / (_2808 - _2800); + int _2825 = int(_2824); + float _2827 = _2824 - float(_2825); + _2841 = dot(float3(_2827 * _2827, _2827, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2825], _504[_2825 + 1], _504[_2825 + 2])); + } + else + { + _2841 = (_2791 * 0.0599999986588954925537109375) + ((log(1000.0) / _1091) - ((0.0599999986588954925537109375 * _2807) / _1091)); + } + _2862 = _2841; + } + _2870 = _2862; + } + float3 _2872; + _2872.x = pow(10.0, _2870); + float _2873 = _2583.y; + float _2877 = log((_2873 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2873) / _1091; + float _2954; + if (_2877 <= _2793) + { + _2954 = (_2877 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1091) - ((3.0 * _2792) / _1091)); + } + else + { + float _2884 = log(_2714) / _1091; + float _2946; + if ((_2877 > _2793) && (_2877 < _2884)) + { + float _2929 = (7.0 * (_2877 - _2793)) / (_2884 - _2793); + int _2930 = int(_2929); + float _2932 = _2929 - float(_2930); + _2946 = dot(float3(_2932 * _2932, _2932, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_2930], _503[_2930 + 1], _503[_2930 + 2])); + } + else + { + float _2891 = log(_2786); + float _2892 = _2891 / _1091; + float _2925; + if ((_2877 >= _2884) && (_2877 < _2892)) + { + float _2908 = (7.0 * (_2877 - _2884)) / (_2892 - _2884); + int _2909 = int(_2908); + float _2911 = _2908 - float(_2909); + _2925 = dot(float3(_2911 * _2911, _2911, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2909], _504[_2909 + 1], _504[_2909 + 2])); + } + else + { + _2925 = (_2877 * 0.0599999986588954925537109375) + ((log(1000.0) / _1091) - ((0.0599999986588954925537109375 * _2891) / _1091)); + } + _2946 = _2925; + } + _2954 = _2946; + } + _2872.y = pow(10.0, _2954); + float _2957 = _2583.z; + float _2961 = log((_2957 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2957) / _1091; + float _3038; + if (_2961 <= _2793) + { + _3038 = (_2961 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1091) - ((3.0 * _2792) / _1091)); + } + else + { + float _2968 = log(_2714) / _1091; + float _3030; + if ((_2961 > _2793) && (_2961 < _2968)) + { + float _3013 = (7.0 * (_2961 - _2793)) / (_2968 - _2793); + int _3014 = int(_3013); + float _3016 = _3013 - float(_3014); + _3030 = dot(float3(_3016 * _3016, _3016, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_3014], _503[_3014 + 1], _503[_3014 + 2])); + } + else + { + float _2975 = log(_2786); + float _2976 = _2975 / _1091; + float _3009; + if ((_2961 >= _2968) && (_2961 < _2976)) + { + float _2992 = (7.0 * (_2961 - _2968)) / (_2976 - _2968); + int _2993 = int(_2992); + float _2995 = _2992 - float(_2993); + _3009 = dot(float3(_2995 * _2995, _2995, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2993], _504[_2993 + 1], _504[_2993 + 2])); + } + else + { + _3009 = (_2961 * 0.0599999986588954925537109375) + ((log(1000.0) / _1091) - ((0.0599999986588954925537109375 * _2975) / _1091)); + } + _3030 = _3009; + } + _3038 = _3030; + } + _2872.z = pow(10.0, _3038); + float3 _3044 = pow(((_2872 - float3(3.5073844628641381859779357910156e-05)) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _3052 = pow((float3(0.8359375) + (float3(18.8515625) * _3044)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _3044))), float3(78.84375)); + } + else + { + float3 _2201; + if ((_Globals.OutputDevice == 4u) || (_Globals.OutputDevice == 6u)) + { + float3 _1369 = (_932 * float3(1.5)) * (_572 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625))); + float _1370 = _1369.x; + float _1371 = _1369.y; + float _1373 = _1369.z; + float _1376 = fast::max(fast::max(_1370, _1371), _1373); + float _1381 = (fast::max(_1376, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_1370, _1371), _1373), 1.0000000133514319600180897396058e-10)) / fast::max(_1376, 0.00999999977648258209228515625); + float _1394 = ((_1373 + _1371) + _1370) + (1.75 * sqrt(((_1373 * (_1373 - _1371)) + (_1371 * (_1371 - _1370))) + (_1370 * (_1370 - _1373)))); + float _1395 = _1394 * 0.3333333432674407958984375; + float _1396 = _1381 - 0.4000000059604644775390625; + float _1401 = fast::max(1.0 - abs(_1396 * 2.5), 0.0); + float _1409 = (1.0 + (float(int(sign(_1396 * 5.0))) * (1.0 - (_1401 * _1401)))) * 0.02500000037252902984619140625; + float _1422; + if (_1395 <= 0.053333334624767303466796875) + { + _1422 = _1409; + } + else + { + float _1421; + if (_1395 >= 0.1599999964237213134765625) + { + _1421 = 0.0; + } + else + { + _1421 = _1409 * ((0.23999999463558197021484375 / _1394) - 0.5); + } + _1422 = _1421; + } + float3 _1425 = _1369 * float3(1.0 + _1422); + float _1426 = _1425.x; + float _1427 = _1425.y; + float _1429 = _1425.z; + float _1443; + if ((_1426 == _1427) && (_1427 == _1429)) + { + _1443 = 0.0; + } + else + { + _1443 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_1427 - _1429), ((2.0 * _1426) - _1427) - _1429); + } + float _1448; + if (_1443 < 0.0) + { + _1448 = _1443 + 360.0; + } + else + { + _1448 = _1443; + } + float _1449 = fast::clamp(_1448, 0.0, 360.0); + float _1454; + if (_1449 > 180.0) + { + _1454 = _1449 - 360.0; + } + else + { + _1454 = _1449; + } + float _1504; + if ((_1454 > (-67.5)) && (_1454 < 67.5)) + { + float _1461 = (_1454 - (-67.5)) * 0.0296296291053295135498046875; + int _1462 = int(_1461); + float _1464 = _1461 - float(_1462); + float _1465 = _1464 * _1464; + float _1466 = _1465 * _1464; + float _1503; + if (_1462 == 3) + { + _1503 = (((_1466 * (-0.16666667163372039794921875)) + (_1465 * 0.5)) + (_1464 * (-0.5))) + 0.16666667163372039794921875; + } + else + { + float _1496; + if (_1462 == 2) + { + _1496 = ((_1466 * 0.5) + (_1465 * (-1.0))) + 0.666666686534881591796875; + } + else + { + float _1491; + if (_1462 == 1) + { + _1491 = (((_1466 * (-0.5)) + (_1465 * 0.5)) + (_1464 * 0.5)) + 0.16666667163372039794921875; + } + else + { + float _1484; + if (_1462 == 0) + { + _1484 = _1466 * 0.16666667163372039794921875; + } + else + { + _1484 = 0.0; + } + _1491 = _1484; + } + _1496 = _1491; + } + _1503 = _1496; + } + _1504 = _1503; + } + else + { + _1504 = 0.0; + } + _1425.x = _1426 + ((((_1504 * 1.5) * _1381) * (0.02999999932944774627685546875 - _1426)) * 0.180000007152557373046875); + float3 _1514 = fast::clamp(fast::clamp(_1425, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); + float3 _1517 = mix(float3(dot(_1514, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1514, float3(0.959999978542327880859375)); + float _1518 = _1517.x; + float _1522 = 0.17999999225139617919921875 * exp2(18.0); + float _1524 = exp2(-14.0); + float _1527 = log((_1518 <= 0.0) ? _1524 : _1518) / _1091; + float _1529 = log(0.17999999225139617919921875 * exp2(-15.0)) / _1091; + float _1596; + if (_1527 <= _1529) + { + _1596 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _1536 = log(0.180000007152557373046875) / _1091; + float _1593; + if ((_1527 > _1529) && (_1527 < _1536)) + { + float _1576 = (3.0 * (_1527 - _1529)) / (_1536 - _1529); + int _1577 = int(_1576); + float _1579 = _1576 - float(_1577); + _1593 = dot(float3(_1579 * _1579, _1579, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1577], _499[_1577 + 1], _499[_1577 + 2])); + } + else + { + float _1544 = log(_1522) / _1091; + float _1572; + if ((_1527 >= _1536) && (_1527 < _1544)) + { + float _1555 = (3.0 * (_1527 - _1536)) / (_1544 - _1536); + int _1556 = int(_1555); + float _1558 = _1555 - float(_1556); + _1572 = dot(float3(_1558 * _1558, _1558, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1556], _500[_1556 + 1], _500[_1556 + 2])); + } + else + { + _1572 = log(10000.0) / _1091; + } + _1593 = _1572; + } + _1596 = _1593; + } + float3 _1598; + _1598.x = pow(10.0, _1596); + float _1599 = _1517.y; + float _1603 = log((_1599 <= 0.0) ? _1524 : _1599) / _1091; + float _1670; + if (_1603 <= _1529) + { + _1670 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _1610 = log(0.180000007152557373046875) / _1091; + float _1667; + if ((_1603 > _1529) && (_1603 < _1610)) + { + float _1650 = (3.0 * (_1603 - _1529)) / (_1610 - _1529); + int _1651 = int(_1650); + float _1653 = _1650 - float(_1651); + _1667 = dot(float3(_1653 * _1653, _1653, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1651], _499[_1651 + 1], _499[_1651 + 2])); + } + else + { + float _1618 = log(_1522) / _1091; + float _1646; + if ((_1603 >= _1610) && (_1603 < _1618)) + { + float _1629 = (3.0 * (_1603 - _1610)) / (_1618 - _1610); + int _1630 = int(_1629); + float _1632 = _1629 - float(_1630); + _1646 = dot(float3(_1632 * _1632, _1632, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1630], _500[_1630 + 1], _500[_1630 + 2])); + } + else + { + _1646 = log(10000.0) / _1091; + } + _1667 = _1646; + } + _1670 = _1667; + } + _1598.y = pow(10.0, _1670); + float _1673 = _1517.z; + float _1677 = log((_1673 <= 0.0) ? _1524 : _1673) / _1091; + float _1744; + if (_1677 <= _1529) + { + _1744 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _1684 = log(0.180000007152557373046875) / _1091; + float _1741; + if ((_1677 > _1529) && (_1677 < _1684)) + { + float _1724 = (3.0 * (_1677 - _1529)) / (_1684 - _1529); + int _1725 = int(_1724); + float _1727 = _1724 - float(_1725); + _1741 = dot(float3(_1727 * _1727, _1727, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1725], _499[_1725 + 1], _499[_1725 + 2])); + } + else + { + float _1692 = log(_1522) / _1091; + float _1720; + if ((_1677 >= _1684) && (_1677 < _1692)) + { + float _1703 = (3.0 * (_1677 - _1684)) / (_1692 - _1684); + int _1704 = int(_1703); + float _1706 = _1703 - float(_1704); + _1720 = dot(float3(_1706 * _1706, _1706, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1704], _500[_1704 + 1], _500[_1704 + 2])); + } + else + { + _1720 = log(10000.0) / _1091; + } + _1741 = _1720; + } + _1744 = _1741; + } + _1598.z = pow(10.0, _1744); + float3 _1748 = (_1598 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float _1750 = 0.17999999225139617919921875 * pow(2.0, -12.0); + float _1754 = log((_1750 <= 0.0) ? _1524 : _1750) / _1091; + float _1821; + if (_1754 <= _1529) + { + _1821 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _1761 = log(0.180000007152557373046875) / _1091; + float _1818; + if ((_1754 > _1529) && (_1754 < _1761)) + { + float _1801 = (3.0 * (_1754 - _1529)) / (_1761 - _1529); + int _1802 = int(_1801); + float _1804 = _1801 - float(_1802); + _1818 = dot(float3(_1804 * _1804, _1804, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1802], _499[_1802 + 1], _499[_1802 + 2])); + } + else + { + float _1769 = log(_1522) / _1091; + float _1797; + if ((_1754 >= _1761) && (_1754 < _1769)) + { + float _1780 = (3.0 * (_1754 - _1761)) / (_1769 - _1761); + int _1781 = int(_1780); + float _1783 = _1780 - float(_1781); + _1797 = dot(float3(_1783 * _1783, _1783, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1781], _500[_1781 + 1], _500[_1781 + 2])); + } + else + { + _1797 = log(10000.0) / _1091; + } + _1818 = _1797; + } + _1821 = _1818; + } + float _1824 = log(0.180000007152557373046875) / _1091; + float _1878; + if (_1824 <= _1529) + { + _1878 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _1875; + if ((_1824 > _1529) && (_1824 < _1824)) + { + _1875 = (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[3], _499[4], _499[5])).z; + } + else + { + float _1837 = log(_1522) / _1091; + float _1865; + if ((_1824 >= _1824) && (_1824 < _1837)) + { + float _1848 = (3.0 * (_1824 - _1824)) / (_1837 - _1824); + int _1849 = int(_1848); + float _1851 = _1848 - float(_1849); + _1865 = dot(float3(_1851 * _1851, _1851, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1849], _500[_1849 + 1], _500[_1849 + 2])); + } + else + { + _1865 = log(10000.0) / _1091; + } + _1875 = _1865; + } + _1878 = _1875; + } + float _1879 = pow(10.0, _1878); + float _1881 = 0.17999999225139617919921875 * pow(2.0, 11.0); + float _1885 = log((_1881 <= 0.0) ? _1524 : _1881) / _1091; + float _1950; + if (_1885 <= _1529) + { + _1950 = log(9.9999997473787516355514526367188e-05) / _1091; + } + else + { + float _1947; + if ((_1885 > _1529) && (_1885 < _1824)) + { + float _1930 = (3.0 * (_1885 - _1529)) / (_1824 - _1529); + int _1931 = int(_1930); + float _1933 = _1930 - float(_1931); + _1947 = dot(float3(_1933 * _1933, _1933, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1931], _499[_1931 + 1], _499[_1931 + 2])); + } + else + { + float _1898 = log(_1522) / _1091; + float _1926; + if ((_1885 >= _1824) && (_1885 < _1898)) + { + float _1909 = (3.0 * (_1885 - _1824)) / (_1898 - _1824); + int _1910 = int(_1909); + float _1912 = _1909 - float(_1910); + _1926 = dot(float3(_1912 * _1912, _1912, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1910], _500[_1910 + 1], _500[_1910 + 2])); + } + else + { + _1926 = log(10000.0) / _1091; + } + _1947 = _1926; + } + _1950 = _1947; + } + float _1951 = pow(10.0, _1950); + float _1952 = _1748.x; + float _1956 = log((_1952 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1952) / _1091; + float _1958 = log(pow(10.0, _1821)) / _1091; + float _2030; + if (_1956 <= _1958) + { + _2030 = log(0.004999999888241291046142578125) / _1091; + } + else + { + float _1965 = log(_1879) / _1091; + float _2027; + if ((_1956 > _1958) && (_1956 < _1965)) + { + float _2010 = (7.0 * (_1956 - _1958)) / (_1965 - _1958); + int _2011 = int(_2010); + float _2013 = _2010 - float(_2011); + _2027 = dot(float3(_2013 * _2013, _2013, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2011], _506[_2011 + 1], _506[_2011 + 2])); + } + else + { + float _1972 = log(_1951); + float _1973 = _1972 / _1091; + float _2006; + if ((_1956 >= _1965) && (_1956 < _1973)) + { + float _1989 = (7.0 * (_1956 - _1965)) / (_1973 - _1965); + int _1990 = int(_1989); + float _1992 = _1989 - float(_1990); + _2006 = dot(float3(_1992 * _1992, _1992, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_1990], _507[_1990 + 1], _507[_1990 + 2])); + } + else + { + _2006 = (_1956 * 0.119999997317790985107421875) + ((log(2000.0) / _1091) - ((0.119999997317790985107421875 * _1972) / _1091)); + } + _2027 = _2006; + } + _2030 = _2027; + } + float3 _2032; + _2032.x = pow(10.0, _2030); + float _2033 = _1748.y; + float _2037 = log((_2033 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2033) / _1091; + float _2109; + if (_2037 <= _1958) + { + _2109 = log(0.004999999888241291046142578125) / _1091; + } + else + { + float _2044 = log(_1879) / _1091; + float _2106; + if ((_2037 > _1958) && (_2037 < _2044)) + { + float _2089 = (7.0 * (_2037 - _1958)) / (_2044 - _1958); + int _2090 = int(_2089); + float _2092 = _2089 - float(_2090); + _2106 = dot(float3(_2092 * _2092, _2092, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2090], _506[_2090 + 1], _506[_2090 + 2])); + } + else + { + float _2051 = log(_1951); + float _2052 = _2051 / _1091; + float _2085; + if ((_2037 >= _2044) && (_2037 < _2052)) + { + float _2068 = (7.0 * (_2037 - _2044)) / (_2052 - _2044); + int _2069 = int(_2068); + float _2071 = _2068 - float(_2069); + _2085 = dot(float3(_2071 * _2071, _2071, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_2069], _507[_2069 + 1], _507[_2069 + 2])); + } + else + { + _2085 = (_2037 * 0.119999997317790985107421875) + ((log(2000.0) / _1091) - ((0.119999997317790985107421875 * _2051) / _1091)); + } + _2106 = _2085; + } + _2109 = _2106; + } + _2032.y = pow(10.0, _2109); + float _2112 = _1748.z; + float _2116 = log((_2112 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2112) / _1091; + float _2188; + if (_2116 <= _1958) + { + _2188 = log(0.004999999888241291046142578125) / _1091; + } + else + { + float _2123 = log(_1879) / _1091; + float _2185; + if ((_2116 > _1958) && (_2116 < _2123)) + { + float _2168 = (7.0 * (_2116 - _1958)) / (_2123 - _1958); + int _2169 = int(_2168); + float _2171 = _2168 - float(_2169); + _2185 = dot(float3(_2171 * _2171, _2171, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2169], _506[_2169 + 1], _506[_2169 + 2])); + } + else + { + float _2130 = log(_1951); + float _2131 = _2130 / _1091; + float _2164; + if ((_2116 >= _2123) && (_2116 < _2131)) + { + float _2147 = (7.0 * (_2116 - _2123)) / (_2131 - _2123); + int _2148 = int(_2147); + float _2150 = _2147 - float(_2148); + _2164 = dot(float3(_2150 * _2150, _2150, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_2148], _507[_2148 + 1], _507[_2148 + 2])); + } + else + { + _2164 = (_2116 * 0.119999997317790985107421875) + ((log(2000.0) / _1091) - ((0.119999997317790985107421875 * _2130) / _1091)); + } + _2185 = _2164; + } + _2188 = _2185; + } + _2032.z = pow(10.0, _2188); + float3 _2193 = pow((_2032 * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _2201 = pow((float3(0.8359375) + (float3(18.8515625) * _2193)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _2193))), float3(78.84375)); + } + else + { + float3 _1366; + if (_Globals.OutputDevice == 7u) + { + float3 _1358 = pow(((_932 * _573) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _1366 = pow((float3(0.8359375) + (float3(18.8515625) * _1358)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _1358))), float3(78.84375)); + } + else + { + _1366 = pow((_1324 * _573) * _602, float3(_Globals.InverseGamma.z)); + } + _2201 = _1366; + } + _3052 = _2201; + } + _3062 = _3052; + } + _3103 = _3062; + } + float3 _3104 = _3103 * float3(0.95238101482391357421875); + float4 _3105 = float4(_3104.x, _3104.y, _3104.z, float4(0.0).w); + _3105.w = 0.0; + out.out_var_SV_Target0 = _3105; + return out; +} + diff --git a/reference/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag b/reference/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag new file mode 100644 index 00000000000..72a9c58a639 --- /dev/null +++ b/reference/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag @@ -0,0 +1,503 @@ +#include +#include + +using namespace metal; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_StructuredBuffer_v4float +{ + float4 _m0[1]; +}; + +struct type_TranslucentBasePass +{ + uint TranslucentBasePass_Shared_Forward_NumLocalLights; + uint TranslucentBasePass_Shared_Forward_NumReflectionCaptures; + uint TranslucentBasePass_Shared_Forward_HasDirectionalLight; + uint TranslucentBasePass_Shared_Forward_NumGridCells; + packed_int3 TranslucentBasePass_Shared_Forward_CulledGridSize; + uint TranslucentBasePass_Shared_Forward_MaxCulledLightsPerCell; + uint TranslucentBasePass_Shared_Forward_LightGridPixelSizeShift; + uint PrePadding_TranslucentBasePass_Shared_Forward_36; + uint PrePadding_TranslucentBasePass_Shared_Forward_40; + uint PrePadding_TranslucentBasePass_Shared_Forward_44; + packed_float3 TranslucentBasePass_Shared_Forward_LightGridZParams; + float PrePadding_TranslucentBasePass_Shared_Forward_60; + packed_float3 TranslucentBasePass_Shared_Forward_DirectionalLightDirection; + float PrePadding_TranslucentBasePass_Shared_Forward_76; + packed_float3 TranslucentBasePass_Shared_Forward_DirectionalLightColor; + float TranslucentBasePass_Shared_Forward_DirectionalLightVolumetricScatteringIntensity; + uint TranslucentBasePass_Shared_Forward_DirectionalLightShadowMapChannelMask; + uint PrePadding_TranslucentBasePass_Shared_Forward_100; + float2 TranslucentBasePass_Shared_Forward_DirectionalLightDistanceFadeMAD; + uint TranslucentBasePass_Shared_Forward_NumDirectionalLightCascades; + uint PrePadding_TranslucentBasePass_Shared_Forward_116; + uint PrePadding_TranslucentBasePass_Shared_Forward_120; + uint PrePadding_TranslucentBasePass_Shared_Forward_124; + float4 TranslucentBasePass_Shared_Forward_CascadeEndDepths; + float4x4 TranslucentBasePass_Shared_Forward_DirectionalLightWorldToShadowMatrix[4]; + float4 TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapMinMax[4]; + float4 TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapAtlasBufferSize; + float TranslucentBasePass_Shared_Forward_DirectionalLightDepthBias; + uint TranslucentBasePass_Shared_Forward_DirectionalLightUseStaticShadowing; + uint PrePadding_TranslucentBasePass_Shared_Forward_488; + uint PrePadding_TranslucentBasePass_Shared_Forward_492; + float4 TranslucentBasePass_Shared_Forward_DirectionalLightStaticShadowBufferSize; + float4x4 TranslucentBasePass_Shared_Forward_DirectionalLightWorldToStaticShadow; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_576; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_580; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_584; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_588; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_592; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_596; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_600; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_604; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_608; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_612; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_616; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_620; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_624; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_628; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_632; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_636; + uint TranslucentBasePass_Shared_ForwardISR_NumLocalLights; + uint TranslucentBasePass_Shared_ForwardISR_NumReflectionCaptures; + uint TranslucentBasePass_Shared_ForwardISR_HasDirectionalLight; + uint TranslucentBasePass_Shared_ForwardISR_NumGridCells; + packed_int3 TranslucentBasePass_Shared_ForwardISR_CulledGridSize; + uint TranslucentBasePass_Shared_ForwardISR_MaxCulledLightsPerCell; + uint TranslucentBasePass_Shared_ForwardISR_LightGridPixelSizeShift; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_676; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_680; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_684; + packed_float3 TranslucentBasePass_Shared_ForwardISR_LightGridZParams; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_700; + packed_float3 TranslucentBasePass_Shared_ForwardISR_DirectionalLightDirection; + float PrePadding_TranslucentBasePass_Shared_ForwardISR_716; + packed_float3 TranslucentBasePass_Shared_ForwardISR_DirectionalLightColor; + float TranslucentBasePass_Shared_ForwardISR_DirectionalLightVolumetricScatteringIntensity; + uint TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowMapChannelMask; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_740; + float2 TranslucentBasePass_Shared_ForwardISR_DirectionalLightDistanceFadeMAD; + uint TranslucentBasePass_Shared_ForwardISR_NumDirectionalLightCascades; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_756; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_760; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_764; + float4 TranslucentBasePass_Shared_ForwardISR_CascadeEndDepths; + float4x4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToShadowMatrix[4]; + float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapMinMax[4]; + float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapAtlasBufferSize; + float TranslucentBasePass_Shared_ForwardISR_DirectionalLightDepthBias; + uint TranslucentBasePass_Shared_ForwardISR_DirectionalLightUseStaticShadowing; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_1128; + uint PrePadding_TranslucentBasePass_Shared_ForwardISR_1132; + float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightStaticShadowBufferSize; + float4x4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToStaticShadow; + float PrePadding_TranslucentBasePass_Shared_Reflection_1216; + float PrePadding_TranslucentBasePass_Shared_Reflection_1220; + float PrePadding_TranslucentBasePass_Shared_Reflection_1224; + float PrePadding_TranslucentBasePass_Shared_Reflection_1228; + float PrePadding_TranslucentBasePass_Shared_Reflection_1232; + float PrePadding_TranslucentBasePass_Shared_Reflection_1236; + float PrePadding_TranslucentBasePass_Shared_Reflection_1240; + float PrePadding_TranslucentBasePass_Shared_Reflection_1244; + float PrePadding_TranslucentBasePass_Shared_Reflection_1248; + float PrePadding_TranslucentBasePass_Shared_Reflection_1252; + float PrePadding_TranslucentBasePass_Shared_Reflection_1256; + float PrePadding_TranslucentBasePass_Shared_Reflection_1260; + float PrePadding_TranslucentBasePass_Shared_Reflection_1264; + float PrePadding_TranslucentBasePass_Shared_Reflection_1268; + float PrePadding_TranslucentBasePass_Shared_Reflection_1272; + float PrePadding_TranslucentBasePass_Shared_Reflection_1276; + float4 TranslucentBasePass_Shared_Reflection_SkyLightParameters; + float TranslucentBasePass_Shared_Reflection_SkyLightCubemapBrightness; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1300; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1304; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1308; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1312; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1316; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1320; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1324; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1328; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1332; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1336; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1340; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1344; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1348; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1352; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1356; + float4 TranslucentBasePass_Shared_PlanarReflection_ReflectionPlane; + float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionOrigin; + float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionXAxis; + float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionYAxis; + float3x4 TranslucentBasePass_Shared_PlanarReflection_InverseTransposeMirrorMatrix; + packed_float3 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1484; + float2 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters2; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1496; + float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1500; + float4x4 TranslucentBasePass_Shared_PlanarReflection_ProjectionWithExtraFOV[2]; + float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenScaleBias[2]; + float2 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenBound; + uint TranslucentBasePass_Shared_PlanarReflection_bIsStereo; + float PrePadding_TranslucentBasePass_Shared_Fog_1676; + float PrePadding_TranslucentBasePass_Shared_Fog_1680; + float PrePadding_TranslucentBasePass_Shared_Fog_1684; + float PrePadding_TranslucentBasePass_Shared_Fog_1688; + float PrePadding_TranslucentBasePass_Shared_Fog_1692; + float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters; + float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters2; + float4 TranslucentBasePass_Shared_Fog_ExponentialFogColorParameter; + float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters3; + float4 TranslucentBasePass_Shared_Fog_InscatteringLightDirection; + float4 TranslucentBasePass_Shared_Fog_DirectionalInscatteringColor; + float2 TranslucentBasePass_Shared_Fog_SinCosInscatteringColorCubemapRotation; + float PrePadding_TranslucentBasePass_Shared_Fog_1800; + float PrePadding_TranslucentBasePass_Shared_Fog_1804; + packed_float3 TranslucentBasePass_Shared_Fog_FogInscatteringTextureParameters; + float TranslucentBasePass_Shared_Fog_ApplyVolumetricFog; + float PrePadding_TranslucentBasePass_1824; + float PrePadding_TranslucentBasePass_1828; + float PrePadding_TranslucentBasePass_1832; + float PrePadding_TranslucentBasePass_1836; + float PrePadding_TranslucentBasePass_1840; + float PrePadding_TranslucentBasePass_1844; + float PrePadding_TranslucentBasePass_1848; + float PrePadding_TranslucentBasePass_1852; + float PrePadding_TranslucentBasePass_1856; + float PrePadding_TranslucentBasePass_1860; + float PrePadding_TranslucentBasePass_1864; + float PrePadding_TranslucentBasePass_1868; + float PrePadding_TranslucentBasePass_1872; + float PrePadding_TranslucentBasePass_1876; + float PrePadding_TranslucentBasePass_1880; + float PrePadding_TranslucentBasePass_1884; + float PrePadding_TranslucentBasePass_1888; + float PrePadding_TranslucentBasePass_1892; + float PrePadding_TranslucentBasePass_1896; + float PrePadding_TranslucentBasePass_1900; + float PrePadding_TranslucentBasePass_1904; + float PrePadding_TranslucentBasePass_1908; + float PrePadding_TranslucentBasePass_1912; + float PrePadding_TranslucentBasePass_1916; + float PrePadding_TranslucentBasePass_1920; + float PrePadding_TranslucentBasePass_1924; + float PrePadding_TranslucentBasePass_1928; + float PrePadding_TranslucentBasePass_1932; + float PrePadding_TranslucentBasePass_1936; + float PrePadding_TranslucentBasePass_1940; + float PrePadding_TranslucentBasePass_1944; + float PrePadding_TranslucentBasePass_1948; + float PrePadding_TranslucentBasePass_1952; + float PrePadding_TranslucentBasePass_1956; + float PrePadding_TranslucentBasePass_1960; + float PrePadding_TranslucentBasePass_1964; + float PrePadding_TranslucentBasePass_1968; + float PrePadding_TranslucentBasePass_1972; + float PrePadding_TranslucentBasePass_1976; + float PrePadding_TranslucentBasePass_1980; + float PrePadding_TranslucentBasePass_1984; + float PrePadding_TranslucentBasePass_1988; + float PrePadding_TranslucentBasePass_1992; + float PrePadding_TranslucentBasePass_1996; + float PrePadding_TranslucentBasePass_2000; + float PrePadding_TranslucentBasePass_2004; + float PrePadding_TranslucentBasePass_2008; + float PrePadding_TranslucentBasePass_2012; + float PrePadding_TranslucentBasePass_2016; + float PrePadding_TranslucentBasePass_2020; + float PrePadding_TranslucentBasePass_2024; + float PrePadding_TranslucentBasePass_2028; + float PrePadding_TranslucentBasePass_2032; + float PrePadding_TranslucentBasePass_2036; + float PrePadding_TranslucentBasePass_2040; + float PrePadding_TranslucentBasePass_2044; + float PrePadding_TranslucentBasePass_2048; + float PrePadding_TranslucentBasePass_2052; + float PrePadding_TranslucentBasePass_2056; + float PrePadding_TranslucentBasePass_2060; + float PrePadding_TranslucentBasePass_2064; + float PrePadding_TranslucentBasePass_2068; + float PrePadding_TranslucentBasePass_2072; + float PrePadding_TranslucentBasePass_2076; + float PrePadding_TranslucentBasePass_2080; + float PrePadding_TranslucentBasePass_2084; + float PrePadding_TranslucentBasePass_2088; + float PrePadding_TranslucentBasePass_2092; + float PrePadding_TranslucentBasePass_2096; + float PrePadding_TranslucentBasePass_2100; + float PrePadding_TranslucentBasePass_2104; + float PrePadding_TranslucentBasePass_2108; + float PrePadding_TranslucentBasePass_2112; + float PrePadding_TranslucentBasePass_2116; + float PrePadding_TranslucentBasePass_2120; + float PrePadding_TranslucentBasePass_2124; + float PrePadding_TranslucentBasePass_2128; + float PrePadding_TranslucentBasePass_2132; + float PrePadding_TranslucentBasePass_2136; + float PrePadding_TranslucentBasePass_2140; + float4 TranslucentBasePass_HZBUvFactorAndInvFactor; + float4 TranslucentBasePass_PrevScreenPositionScaleBias; + float TranslucentBasePass_PrevSceneColorPreExposureInv; +}; + +struct type_Material +{ + float4 Material_VectorExpressions[2]; + float4 Material_ScalarExpressions[1]; +}; + +constant float _108 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; + uint gl_SampleMask [[sample_mask]]; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD10_centroid [[user(locn0)]]; + float4 in_var_TEXCOORD11_centroid [[user(locn1)]]; + uint in_var_PRIMITIVE_ID [[user(locn2)]]; + float4 in_var_TEXCOORD7 [[user(locn3)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], constant type_TranslucentBasePass& TranslucentBasePass [[buffer(2)]], constant type_Material& Material [[buffer(3)]], texture3d TranslucentBasePass_Shared_Fog_IntegratedLightScattering [[texture(0)]], sampler View_SharedBilinearClampedSampler [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleMaskIn [[sample_mask]]) +{ + main0_out out = {}; + float4 _137 = View.View_SVPositionToTranslatedWorld * float4(gl_FragCoord.xyz, 1.0); + float3 _142 = (_137.xyz / float3(_137.w)) - float3(View.View_PreViewTranslation); + bool _165 = TranslucentBasePass.TranslucentBasePass_Shared_Fog_ApplyVolumetricFog > 0.0; + float4 _215; + if (_165) + { + float4 _172 = View.View_WorldToClip * float4(_142, 1.0); + float _173 = _172.w; + float4 _202; + if (_165) + { + _202 = TranslucentBasePass_Shared_Fog_IntegratedLightScattering.sample(View_SharedBilinearClampedSampler, float3(((_172.xy / float2(_173)).xy * float2(0.5, -0.5)) + float2(0.5), (log2((_173 * View.View_VolumetricFogGridZParams[0]) + View.View_VolumetricFogGridZParams[1]) * View.View_VolumetricFogGridZParams[2]) * View.View_VolumetricFogInvGridSize[2]), level(0.0)); + } + else + { + _202 = float4(0.0, 0.0, 0.0, 1.0); + } + _215 = float4(_202.xyz + (in.in_var_TEXCOORD7.xyz * float3(_202.w)), _202.w * in.in_var_TEXCOORD7.w); + } + else + { + _215 = in.in_var_TEXCOORD7; + } + float3 _216 = fast::max(Material.Material_VectorExpressions[1].xyz * float3(((1.0 + dot(float3(-1.0, -1.5, 3.0) / float3(sqrt(12.25)), fast::normalize(float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz) * fast::normalize((float3(0.0, 0.0, 1.0) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz)))) * 0.5) + 0.20000000298023223876953125), float3(0.0)); + float3 _246; + if (View.View_OutOfBoundsMask > 0.0) + { + uint _222 = in.in_var_PRIMITIVE_ID * 26u; + float3 _245; + if (any(abs(_142 - View_PrimitiveSceneData._m0[_222 + 5u].xyz) > (View_PrimitiveSceneData._m0[_222 + 19u].xyz + float3(1.0)))) + { + _245 = mix(float3(1.0, 1.0, 0.0), float3(0.0, 1.0, 1.0), float3(float3(fract(dot(_142, float3(0.57700002193450927734375)) * 0.00200000009499490261077880859375)) > float3(0.5))); + } + else + { + _245 = _216; + } + _246 = _245; + } + else + { + _246 = _216; + } + float4 _255 = float4((_246 * float3(_215.w)) + _215.xyz, _108); + _255.w = 1.0; + float4 _268; + uint _269; + if (View.View_NumSceneColorMSAASamples > 1) + { + _268 = _255 * float4(float(View.View_NumSceneColorMSAASamples) * 0.25); + _269 = gl_SampleMaskIn & 15u; + } + else + { + _268 = _255; + _269 = gl_SampleMaskIn; + } + out.out_var_SV_Target0 = _268; + out.gl_SampleMask = _269; + return out; +} + diff --git a/reference/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag b/reference/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag new file mode 100644 index 00000000000..d7a1993dc32 --- /dev/null +++ b/reference/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag @@ -0,0 +1,213 @@ +#include +#include + +using namespace metal; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Globals +{ + float3 SoftTransitionScale; + float4 ShadowBufferSize; + float ShadowFadeFraction; + float ShadowSharpen; + float4 LightPositionAndInvRadius; + float4x4 ScreenToShadowMatrix; + float2 ProjectionDepthBiasParameters; + float4 ModulatedShadowColor; + float4 ShadowTileOffsetAndSize; +}; + +constant float4 _58 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], float4 _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData [[color(0)]], texture2d ShadowDepthTexture [[texture(0)]], sampler ShadowDepthTextureSampler [[sampler(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + float4 _67 = _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData; + float _68 = _67.w; + float4 _82 = _Globals.ScreenToShadowMatrix * float4((((gl_FragCoord.xy * View.View_BufferSizeAndInvSize.zw) - View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_68), _68, 1.0); + float _118 = fast::clamp(((fast::clamp((ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx * float3(_Globals.SoftTransitionScale.z)) - float3((fast::min(_82.z, 0.999989986419677734375) * _Globals.SoftTransitionScale.z) - 1.0), float3(0.0), float3(1.0)).x - 0.5) * _Globals.ShadowSharpen) + 0.5, 0.0, 1.0); + float3 _127 = mix(_Globals.ModulatedShadowColor.xyz, float3(1.0), float3(mix(1.0, _118 * _118, _Globals.ShadowFadeFraction))); + float4 _128 = float4(_127.x, _127.y, _127.z, _58.w); + _128.w = 0.0; + out.out_var_SV_Target0 = _128; + return out; +} + diff --git a/reference/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag b/reference/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag new file mode 100644 index 00000000000..d7a1993dc32 --- /dev/null +++ b/reference/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag @@ -0,0 +1,213 @@ +#include +#include + +using namespace metal; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Globals +{ + float3 SoftTransitionScale; + float4 ShadowBufferSize; + float ShadowFadeFraction; + float ShadowSharpen; + float4 LightPositionAndInvRadius; + float4x4 ScreenToShadowMatrix; + float2 ProjectionDepthBiasParameters; + float4 ModulatedShadowColor; + float4 ShadowTileOffsetAndSize; +}; + +constant float4 _58 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], float4 _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData [[color(0)]], texture2d ShadowDepthTexture [[texture(0)]], sampler ShadowDepthTextureSampler [[sampler(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + float4 _67 = _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData; + float _68 = _67.w; + float4 _82 = _Globals.ScreenToShadowMatrix * float4((((gl_FragCoord.xy * View.View_BufferSizeAndInvSize.zw) - View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_68), _68, 1.0); + float _118 = fast::clamp(((fast::clamp((ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx * float3(_Globals.SoftTransitionScale.z)) - float3((fast::min(_82.z, 0.999989986419677734375) * _Globals.SoftTransitionScale.z) - 1.0), float3(0.0), float3(1.0)).x - 0.5) * _Globals.ShadowSharpen) + 0.5, 0.0, 1.0); + float3 _127 = mix(_Globals.ModulatedShadowColor.xyz, float3(1.0), float3(mix(1.0, _118 * _118, _Globals.ShadowFadeFraction))); + float4 _128 = float4(_127.x, _127.y, _127.z, _58.w); + _128.w = 0.0; + out.out_var_SV_Target0 = _128; + return out; +} + diff --git a/reference/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag b/reference/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag new file mode 100644 index 00000000000..575c9ddb5e9 --- /dev/null +++ b/reference/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag @@ -0,0 +1,130 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_StructuredBuffer_v4float +{ + float4 _m0[1]; +}; + +struct type_Globals +{ + uint2 ShadowTileListGroupSize; +}; + +constant float3 _70 = {}; + +struct spvDescriptorSetBuffer0 +{ + const device type_StructuredBuffer_v4float* CulledObjectBoxBounds [[id(0)]]; + constant type_Globals* _Globals [[id(1)]]; + texture2d RWShadowTileNumCulledObjects [[id(2)]]; + device atomic_uint* RWShadowTileNumCulledObjects_atomic [[id(3)]]; +}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + uint in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + uint2 _77 = uint2(gl_FragCoord.xy); + uint _78 = _77.y; + uint _83 = _77.x; + float2 _91 = float2(float(_83), float(((*spvDescriptorSet0._Globals).ShadowTileListGroupSize.y - 1u) - _78)); + float2 _93 = float2((*spvDescriptorSet0._Globals).ShadowTileListGroupSize); + float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0); + float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0); + float3 _101 = float3(_100.x, _100.y, _70.z); + _101.z = 1.0; + uint _103 = in.in_var_TEXCOORD0 * 5u; + uint _107 = _103 + 1u; + if (all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xy > _96.xy) && all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz < _101)) + { + float3 _121 = float3(0.5) * ((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz + (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xyz); + float _122 = _96.x; + float _123 = _96.y; + spvUnsafeArray _73; + _73[0] = float3(_122, _123, -1000.0); + float _126 = _100.x; + _73[1] = float3(_126, _123, -1000.0); + float _129 = _100.y; + _73[2] = float3(_122, _129, -1000.0); + _73[3] = float3(_126, _129, -1000.0); + _73[4] = float3(_122, _123, 1.0); + _73[5] = float3(_126, _123, 1.0); + _73[6] = float3(_122, _129, 1.0); + _73[7] = float3(_126, _129, 1.0); + float3 _155; + float3 _158; + _155 = float3(-500000.0); + _158 = float3(500000.0); + for (int _160 = 0; _160 < 8; ) + { + float3 _166 = _73[_160] - _121; + float3 _170 = float3(dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz)); + _155 = fast::max(_155, _170); + _158 = fast::min(_158, _170); + _160++; + continue; + } + if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0))) + { + uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.RWShadowTileNumCulledObjects_atomic[(_78 * (*spvDescriptorSet0._Globals).ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed); + } + } + out.out_var_SV_Target0 = float4(0.0); + return out; +} + diff --git a/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag b/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag new file mode 100644 index 00000000000..0918dfbfefc --- /dev/null +++ b/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag @@ -0,0 +1,122 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_StructuredBuffer_v4float +{ + float4 _m0[1]; +}; + +struct type_Globals +{ + uint2 ShadowTileListGroupSize; +}; + +constant float3 _70 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + uint in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(0)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + uint2 _77 = uint2(gl_FragCoord.xy); + uint _78 = _77.y; + uint _83 = _77.x; + float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78)); + float2 _93 = float2(_Globals.ShadowTileListGroupSize); + float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0); + float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0); + float3 _101 = float3(_100.x, _100.y, _70.z); + _101.z = 1.0; + uint _103 = in.in_var_TEXCOORD0 * 5u; + uint _107 = _103 + 1u; + if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _101)) + { + float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz); + float _122 = _96.x; + float _123 = _96.y; + spvUnsafeArray _73; + _73[0] = float3(_122, _123, -1000.0); + float _126 = _100.x; + _73[1] = float3(_126, _123, -1000.0); + float _129 = _100.y; + _73[2] = float3(_122, _129, -1000.0); + _73[3] = float3(_126, _129, -1000.0); + _73[4] = float3(_122, _123, 1.0); + _73[5] = float3(_126, _123, 1.0); + _73[6] = float3(_122, _129, 1.0); + _73[7] = float3(_126, _129, 1.0); + float3 _155; + float3 _158; + _155 = float3(-500000.0); + _158 = float3(500000.0); + for (int _160 = 0; _160 < 8; ) + { + float3 _166 = _73[_160] - _121; + float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + _155 = fast::max(_155, _170); + _158 = fast::min(_158, _170); + _160++; + continue; + } + if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0))) + { + uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed); + } + } + out.out_var_SV_Target0 = float4(0.0); + return out; +} + diff --git a/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag b/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag new file mode 100644 index 00000000000..0918dfbfefc --- /dev/null +++ b/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag @@ -0,0 +1,122 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_StructuredBuffer_v4float +{ + float4 _m0[1]; +}; + +struct type_Globals +{ + uint2 ShadowTileListGroupSize; +}; + +constant float3 _70 = {}; + +struct main0_out +{ + float4 out_var_SV_Target0 [[color(0)]]; +}; + +struct main0_in +{ + uint in_var_TEXCOORD0 [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(0)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + uint2 _77 = uint2(gl_FragCoord.xy); + uint _78 = _77.y; + uint _83 = _77.x; + float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78)); + float2 _93 = float2(_Globals.ShadowTileListGroupSize); + float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0); + float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0); + float3 _101 = float3(_100.x, _100.y, _70.z); + _101.z = 1.0; + uint _103 = in.in_var_TEXCOORD0 * 5u; + uint _107 = _103 + 1u; + if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _101)) + { + float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz); + float _122 = _96.x; + float _123 = _96.y; + spvUnsafeArray _73; + _73[0] = float3(_122, _123, -1000.0); + float _126 = _100.x; + _73[1] = float3(_126, _123, -1000.0); + float _129 = _100.y; + _73[2] = float3(_122, _129, -1000.0); + _73[3] = float3(_126, _129, -1000.0); + _73[4] = float3(_122, _123, 1.0); + _73[5] = float3(_126, _123, 1.0); + _73[6] = float3(_122, _129, 1.0); + _73[7] = float3(_126, _129, 1.0); + float3 _155; + float3 _158; + _155 = float3(-500000.0); + _158 = float3(500000.0); + for (int _160 = 0; _160 < 8; ) + { + float3 _166 = _73[_160] - _121; + float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); + _155 = fast::max(_155, _170); + _158 = fast::min(_158, _170); + _160++; + continue; + } + if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0))) + { + uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed); + } + } + out.out_var_SV_Target0 = float4(0.0); + return out; +} + diff --git a/reference/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc b/reference/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc new file mode 100644 index 00000000000..1d0212593bb --- /dev/null +++ b/reference/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc @@ -0,0 +1,396 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct FVertexFactoryInterpolantsVSToPS +{ + float4 TangentToWorld0; + float4 TangentToWorld2; + float4 Color; + spvUnsafeArray TexCoords; + float4 LightMapCoordinate; + uint PrimitiveId; + uint LightmapDataIndex; +}; + +struct FVertexFactoryInterpolantsVSToDS +{ + FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS; +}; + +struct FSharedBasePassInterpolants +{ +}; +struct FBasePassInterpolantsVSToDS +{ + FSharedBasePassInterpolants _m0; +}; + +struct FBasePassVSToDS +{ + FVertexFactoryInterpolantsVSToDS FactoryInterpolants; + FBasePassInterpolantsVSToDS BasePassInterpolants; + float4 Position; +}; + +struct FPNTessellationHSToDS +{ + FBasePassVSToDS PassSpecificData; + spvUnsafeArray WorldPosition; + float3 DisplacementScale; + float TessellationMultiplier; + float WorldDisplacementMultiplier; +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_StructuredBuffer_v4float +{ + float4 _m0[1]; +}; + +constant float4 _142 = {}; + +struct main0_out +{ + float4 out_var_COLOR0; + uint out_var_LIGHTMAP_ID; + float3 out_var_PN_DisplacementScales; + spvUnsafeArray out_var_PN_POSITION; + float out_var_PN_TessellationMultiplier; + float out_var_PN_WorldDisplacementMultiplier; + uint out_var_PRIMITIVE_ID; + spvUnsafeArray out_var_TEXCOORD0; + float4 out_var_TEXCOORD10_centroid; + float4 out_var_TEXCOORD11_centroid; + float4 out_var_TEXCOORD4; + float4 out_var_VS_To_DS_Position; +}; + +struct main0_patchOut +{ + float4 out_var_PN_POSITION9; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD10_centroid [[attribute(0)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(1)]]; + float4 in_var_COLOR0 [[attribute(2)]]; + float4 in_var_TEXCOORD0_0 [[attribute(3)]]; + float4 in_var_TEXCOORD4 [[attribute(4)]]; + uint in_var_PRIMITIVE_ID [[attribute(5)]]; + uint in_var_LIGHTMAP_ID [[attribute(6)]]; + float4 in_var_VS_To_DS_Position [[attribute(7)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 3) + return; + spvUnsafeArray _144 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _145 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid }); + spvUnsafeArray _146 = spvUnsafeArray({ gl_in[0].in_var_COLOR0, gl_in[1].in_var_COLOR0, gl_in[2].in_var_COLOR0, gl_in[3].in_var_COLOR0, gl_in[4].in_var_COLOR0, gl_in[5].in_var_COLOR0, gl_in[6].in_var_COLOR0, gl_in[7].in_var_COLOR0, gl_in[8].in_var_COLOR0, gl_in[9].in_var_COLOR0, gl_in[10].in_var_COLOR0, gl_in[11].in_var_COLOR0 }); + spvUnsafeArray, 12> _147 = spvUnsafeArray, 12>({ spvUnsafeArray({ gl_in[0].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[1].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[2].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[3].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[4].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[5].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[6].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[7].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[8].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[9].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[10].in_var_TEXCOORD0_0 }), spvUnsafeArray({ gl_in[11].in_var_TEXCOORD0_0 }) }); + spvUnsafeArray _148 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD4, gl_in[1].in_var_TEXCOORD4, gl_in[2].in_var_TEXCOORD4, gl_in[3].in_var_TEXCOORD4, gl_in[4].in_var_TEXCOORD4, gl_in[5].in_var_TEXCOORD4, gl_in[6].in_var_TEXCOORD4, gl_in[7].in_var_TEXCOORD4, gl_in[8].in_var_TEXCOORD4, gl_in[9].in_var_TEXCOORD4, gl_in[10].in_var_TEXCOORD4, gl_in[11].in_var_TEXCOORD4 }); + spvUnsafeArray _149 = spvUnsafeArray({ gl_in[0].in_var_PRIMITIVE_ID, gl_in[1].in_var_PRIMITIVE_ID, gl_in[2].in_var_PRIMITIVE_ID, gl_in[3].in_var_PRIMITIVE_ID, gl_in[4].in_var_PRIMITIVE_ID, gl_in[5].in_var_PRIMITIVE_ID, gl_in[6].in_var_PRIMITIVE_ID, gl_in[7].in_var_PRIMITIVE_ID, gl_in[8].in_var_PRIMITIVE_ID, gl_in[9].in_var_PRIMITIVE_ID, gl_in[10].in_var_PRIMITIVE_ID, gl_in[11].in_var_PRIMITIVE_ID }); + spvUnsafeArray _150 = spvUnsafeArray({ gl_in[0].in_var_LIGHTMAP_ID, gl_in[1].in_var_LIGHTMAP_ID, gl_in[2].in_var_LIGHTMAP_ID, gl_in[3].in_var_LIGHTMAP_ID, gl_in[4].in_var_LIGHTMAP_ID, gl_in[5].in_var_LIGHTMAP_ID, gl_in[6].in_var_LIGHTMAP_ID, gl_in[7].in_var_LIGHTMAP_ID, gl_in[8].in_var_LIGHTMAP_ID, gl_in[9].in_var_LIGHTMAP_ID, gl_in[10].in_var_LIGHTMAP_ID, gl_in[11].in_var_LIGHTMAP_ID }); + spvUnsafeArray _259 = spvUnsafeArray({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position }); + spvUnsafeArray _284 = spvUnsafeArray({ FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[0], _145[0], _146[0], _147[0], _148[0], _149[0], _150[0] } }, FBasePassInterpolantsVSToDS{ { } }, _259[0] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[1], _145[1], _146[1], _147[1], _148[1], _149[1], _150[1] } }, FBasePassInterpolantsVSToDS{ { } }, _259[1] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[2], _145[2], _146[2], _147[2], _148[2], _149[2], _150[2] } }, FBasePassInterpolantsVSToDS{ { } }, _259[2] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[3], _145[3], _146[3], _147[3], _148[3], _149[3], _150[3] } }, FBasePassInterpolantsVSToDS{ { } }, _259[3] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[4], _145[4], _146[4], _147[4], _148[4], _149[4], _150[4] } }, FBasePassInterpolantsVSToDS{ { } }, _259[4] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[5], _145[5], _146[5], _147[5], _148[5], _149[5], _150[5] } }, FBasePassInterpolantsVSToDS{ { } }, _259[5] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[6], _145[6], _146[6], _147[6], _148[6], _149[6], _150[6] } }, FBasePassInterpolantsVSToDS{ { } }, _259[6] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[7], _145[7], _146[7], _147[7], _148[7], _149[7], _150[7] } }, FBasePassInterpolantsVSToDS{ { } }, _259[7] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[8], _145[8], _146[8], _147[8], _148[8], _149[8], _150[8] } }, FBasePassInterpolantsVSToDS{ { } }, _259[8] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[9], _145[9], _146[9], _147[9], _148[9], _149[9], _150[9] } }, FBasePassInterpolantsVSToDS{ { } }, _259[9] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[10], _145[10], _146[10], _147[10], _148[10], _149[10], _150[10] } }, FBasePassInterpolantsVSToDS{ { } }, _259[10] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[11], _145[11], _146[11], _147[11], _148[11], _149[11], _150[11] } }, FBasePassInterpolantsVSToDS{ { } }, _259[11] } }); + spvUnsafeArray param_var_I; + param_var_I = _284; + float4 _301 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float3 _310 = View_PrimitiveSceneData._m0[(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.PrimitiveId * 26u) + 22u].xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz); + uint _313 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u; + uint _314 = 2u * gl_InvocationID; + uint _315 = 3u + _314; + uint _316 = _314 + 4u; + float4 _328 = float4(param_var_I[_313].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _336 = float4(param_var_I[_315].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _344 = float4(param_var_I[_316].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + spvUnsafeArray _392 = spvUnsafeArray({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_313].Position) - (float4(dot(param_var_I[_313].Position - param_var_I[gl_InvocationID].Position, _301)) * _301)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_315].Position) + param_var_I[_316].Position) - (float4(dot(param_var_I[_316].Position - param_var_I[_315].Position, _336)) * _336)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_313].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_313].Position, _328)) * _328)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_316].Position) + param_var_I[_315].Position) - (float4(dot(param_var_I[_315].Position - param_var_I[_316].Position, _344)) * _344)) * float4(0.3333333432674407958984375))) * float4(0.5) }); + gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + gl_out[gl_InvocationID].out_var_COLOR0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.Color; + gl_out[gl_InvocationID].out_var_TEXCOORD0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TexCoords; + gl_out[gl_InvocationID].out_var_TEXCOORD4 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.LightMapCoordinate; + gl_out[gl_InvocationID].out_var_PRIMITIVE_ID = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.PrimitiveId; + gl_out[gl_InvocationID].out_var_LIGHTMAP_ID = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.LightmapDataIndex; + gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position; + gl_out[gl_InvocationID].out_var_PN_POSITION = _392; + gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _310; + gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0; + gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0; + temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _392, _310, 1.0, 1.0 }; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + if (gl_InvocationID == 0u) + { + float4 _450 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875); + float4 _463; + _463.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier); + _463.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier); + _463.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier); + _463.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier); + float4 _589; + for (;;) + { + float4 _489 = View.View_ViewToClip * float4(0.0); + float4 _494 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0); + float3 _495 = _494.xyz; + float3 _496 = _489.xyz; + float _498 = _494.w; + float _499 = _489.w; + float4 _516 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0); + float3 _517 = _516.xyz; + float _519 = _516.w; + float4 _537 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0); + float3 _538 = _537.xyz; + float _540 = _537.w; + if (any((((int3((_495 - _496) < float3(_498 + _499)) + (int3(2) * int3((_495 + _496) > float3((-_498) - _499)))) | (int3((_517 - _496) < float3(_519 + _499)) + (int3(2) * int3((_517 + _496) > float3((-_519) - _499))))) | (int3((_538 - _496) < float3(_540 + _499)) + (int3(2) * int3((_538 + _496) > float3((-_540) - _499))))) != int3(3))) + { + _589 = float4(0.0); + break; + } + float3 _558 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz; + float3 _559 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz; + float3 _560 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz; + float3 _563 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _566 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _569 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float _573 = sqrt(dot(_559, _559) / dot(_566, _566)); + float _577 = sqrt(dot(_560, _560) / dot(_569, _569)); + float _581 = sqrt(dot(_558, _558) / dot(_563, _563)); + float4 _582 = float4(_573, _577, _581, 1.0); + _582.w = 0.333000004291534423828125 * ((_573 + _577) + _581); + _589 = float4(View.View_AdaptiveTessellationFactor) * _582; + break; + } + float4 _591 = fast::clamp(_463 * _589, float4(1.0), float4(15.0)); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_591.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_591.y); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_591.z); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_591.w); + patchOut.out_var_PN_POSITION9 = _450 + ((_450 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5)); + } +} + diff --git a/reference/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc b/reference/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc new file mode 100644 index 00000000000..f72e5d3b753 --- /dev/null +++ b/reference/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc @@ -0,0 +1,464 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct FVertexFactoryInterpolantsVSToPS +{ + float4 TangentToWorld0; + float4 TangentToWorld2; +}; + +struct FVertexFactoryInterpolantsVSToDS +{ + FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS; +}; + +struct FHitProxyVSToDS +{ + FVertexFactoryInterpolantsVSToDS FactoryInterpolants; + float4 Position; + uint VertexID; +}; + +struct FHullShaderConstantDominantVertexData +{ + float2 UV; + float4 Normal; + float3 Tangent; +}; + +struct FHullShaderConstantDominantEdgeData +{ + float2 UV0; + float2 UV1; + float4 Normal0; + float4 Normal1; + float3 Tangent0; + float3 Tangent1; +}; + +struct FPNTessellationHSToDS +{ + FHitProxyVSToDS PassSpecificData; + spvUnsafeArray WorldPosition; + float3 DisplacementScale; + float TessellationMultiplier; + float WorldDisplacementMultiplier; + FHullShaderConstantDominantVertexData DominantVertex; + FHullShaderConstantDominantEdgeData DominantEdge; +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_ClipToWorld; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_908; + packed_float3 View_ViewUp; + float PrePadding_View_924; + packed_float3 View_ViewRight; + float PrePadding_View_940; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_956; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_972; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_1020; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_1036; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_1052; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1068; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1724; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1740; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1756; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2076; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2148; + float PrePadding_View_2152; + float PrePadding_View_2156; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2228; + float PrePadding_View_2232; + float PrePadding_View_2236; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2268; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2412; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + float View_AtmosphericFogSunDiscHalfApexAngleRadian; + float PrePadding_View_2492; + float4 View_AtmosphericFogSunDiscLuminance; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + uint PrePadding_View_2520; + uint PrePadding_View_2524; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2584; + float PrePadding_View_2588; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2744; + float PrePadding_View_2748; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float PrePadding_View_2908; + int2 View_CursorPosition; + float View_bCheckerboardSubsurfaceProfileRendering; + float PrePadding_View_2924; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2940; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2956; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2972; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2988; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_3004; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Primitive +{ + float4x4 Primitive_LocalToWorld; + float4 Primitive_InvNonUniformScaleAndDeterminantSign; + float4 Primitive_ObjectWorldPositionAndRadius; + float4x4 Primitive_WorldToLocal; + float4x4 Primitive_PreviousLocalToWorld; + float4x4 Primitive_PreviousWorldToLocal; + packed_float3 Primitive_ActorWorldPosition; + float Primitive_UseSingleSampleShadowFromStationaryLights; + packed_float3 Primitive_ObjectBounds; + float Primitive_LpvBiasMultiplier; + float Primitive_DecalReceiverMask; + float Primitive_PerObjectGBufferData; + float Primitive_UseVolumetricLightmapShadowFromStationaryLights; + float Primitive_DrawsVelocity; + float4 Primitive_ObjectOrientation; + float4 Primitive_NonUniformScale; + packed_float3 Primitive_LocalObjectBoundsMin; + uint Primitive_LightingChannelMask; + packed_float3 Primitive_LocalObjectBoundsMax; + uint Primitive_LightmapDataIndex; + packed_float3 Primitive_PreSkinnedLocalBounds; + int Primitive_SingleCaptureIndex; + uint Primitive_OutputVelocity; + uint PrePadding_Primitive_420; + uint PrePadding_Primitive_424; + uint PrePadding_Primitive_428; + float4 Primitive_CustomPrimitiveData[4]; +}; + +constant float4 _140 = {}; + +struct main0_out +{ + float3 out_var_PN_DisplacementScales; + float2 out_var_PN_DominantEdge; + float2 out_var_PN_DominantEdge1; + float4 out_var_PN_DominantEdge2; + float4 out_var_PN_DominantEdge3; + float3 out_var_PN_DominantEdge4; + float3 out_var_PN_DominantEdge5; + float2 out_var_PN_DominantVertex; + float4 out_var_PN_DominantVertex1; + float3 out_var_PN_DominantVertex2; + spvUnsafeArray out_var_PN_POSITION; + float out_var_PN_TessellationMultiplier; + float out_var_PN_WorldDisplacementMultiplier; + float4 out_var_TEXCOORD10_centroid; + float4 out_var_TEXCOORD11_centroid; + float4 out_var_VS_To_DS_Position; + uint out_var_VS_To_DS_VertexID; +}; + +struct main0_patchOut +{ + float4 out_var_PN_POSITION9; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD10_centroid [[attribute(0)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(1)]]; + float4 in_var_VS_To_DS_Position [[attribute(2)]]; + uint in_var_VS_To_DS_VertexID [[attribute(3)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 3) + return; + spvUnsafeArray _142 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _143 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid }); + spvUnsafeArray _192 = spvUnsafeArray({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position }); + spvUnsafeArray _193 = spvUnsafeArray({ gl_in[0].in_var_VS_To_DS_VertexID, gl_in[1].in_var_VS_To_DS_VertexID, gl_in[2].in_var_VS_To_DS_VertexID, gl_in[3].in_var_VS_To_DS_VertexID, gl_in[4].in_var_VS_To_DS_VertexID, gl_in[5].in_var_VS_To_DS_VertexID, gl_in[6].in_var_VS_To_DS_VertexID, gl_in[7].in_var_VS_To_DS_VertexID, gl_in[8].in_var_VS_To_DS_VertexID, gl_in[9].in_var_VS_To_DS_VertexID, gl_in[10].in_var_VS_To_DS_VertexID, gl_in[11].in_var_VS_To_DS_VertexID }); + spvUnsafeArray _230 = spvUnsafeArray({ FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[0], _143[0] } }, _192[0], _193[0] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[1], _143[1] } }, _192[1], _193[1] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[2], _143[2] } }, _192[2], _193[2] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[3], _143[3] } }, _192[3], _193[3] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[4], _143[4] } }, _192[4], _193[4] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[5], _143[5] } }, _192[5], _193[5] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[6], _143[6] } }, _192[6], _193[6] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[7], _143[7] } }, _192[7], _193[7] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[8], _143[8] } }, _192[8], _193[8] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[9], _143[9] } }, _192[9], _193[9] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[10], _143[10] } }, _192[10], _193[10] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[11], _143[11] } }, _192[11], _193[11] } }); + spvUnsafeArray param_var_I; + param_var_I = _230; + float4 _247 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float3 _251 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz); + uint _254 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u; + uint _255 = 2u * gl_InvocationID; + uint _256 = 3u + _255; + uint _257 = _255 + 4u; + uint _264 = (_254 < 2u) ? (_254 + 1u) : 0u; + uint _265 = 2u * _254; + uint _266 = 3u + _265; + uint _267 = _265 + 4u; + float4 _279 = float4(param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _315; + float4 _316; + float4 _317; + float4 _318; + if ((param_var_I[_266].VertexID < param_var_I[_254].VertexID) || ((param_var_I[_266].VertexID == param_var_I[_254].VertexID) && (param_var_I[_267].VertexID < param_var_I[_264].VertexID))) + { + _315 = param_var_I[_267].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + _316 = param_var_I[_267].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + _317 = param_var_I[_266].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + _318 = param_var_I[_266].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + } + else + { + _315 = param_var_I[_264].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + _316 = param_var_I[_264].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + _317 = param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + _318 = param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + } + float4 _324 = float4(_318.xyz, 0.0); + float4 _328 = float4(_316.xyz, 0.0); + float4 _336 = float4(param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _344 = float4(param_var_I[_256].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _352 = float4(param_var_I[_257].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + spvUnsafeArray _402 = spvUnsafeArray({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_254].Position) - (float4(dot(param_var_I[_254].Position - param_var_I[gl_InvocationID].Position, _247)) * _247)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_256].Position) + param_var_I[_257].Position) - (float4(dot(param_var_I[_257].Position - param_var_I[_256].Position, _344)) * _344)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_254].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_254].Position, _336)) * _336)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_257].Position) + param_var_I[_256].Position) - (float4(dot(param_var_I[_256].Position - param_var_I[_257].Position, _352)) * _352)) * float4(0.3333333432674407958984375))) * float4(0.5) }); + gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position; + gl_out[gl_InvocationID].out_var_VS_To_DS_VertexID = param_var_I[gl_InvocationID].VertexID; + gl_out[gl_InvocationID].out_var_PN_POSITION = _402; + gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _251; + gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0; + gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0; + gl_out[gl_InvocationID].out_var_PN_DominantVertex = float2(0.0); + gl_out[gl_InvocationID].out_var_PN_DominantVertex1 = _279; + gl_out[gl_InvocationID].out_var_PN_DominantVertex2 = param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz; + gl_out[gl_InvocationID].out_var_PN_DominantEdge = float2(0.0); + gl_out[gl_InvocationID].out_var_PN_DominantEdge1 = float2(0.0); + gl_out[gl_InvocationID].out_var_PN_DominantEdge2 = _324; + gl_out[gl_InvocationID].out_var_PN_DominantEdge3 = _328; + gl_out[gl_InvocationID].out_var_PN_DominantEdge4 = _317.xyz; + gl_out[gl_InvocationID].out_var_PN_DominantEdge5 = _315.xyz; + temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _402, _251, 1.0, 1.0, FHullShaderConstantDominantVertexData{ float2(0.0), _279, param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz }, FHullShaderConstantDominantEdgeData{ float2(0.0), float2(0.0), _324, _328, _317.xyz, _315.xyz } }; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + if (gl_InvocationID == 0u) + { + float4 _461 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875); + float4 _474; + _474.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier); + _474.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier); + _474.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier); + _474.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier); + float4 _600; + for (;;) + { + float4 _500 = View.View_ViewToClip * float4(0.0); + float4 _505 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0); + float3 _506 = _505.xyz; + float3 _507 = _500.xyz; + float _509 = _505.w; + float _510 = _500.w; + float4 _527 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0); + float3 _528 = _527.xyz; + float _530 = _527.w; + float4 _548 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0); + float3 _549 = _548.xyz; + float _551 = _548.w; + if (any((((int3((_506 - _507) < float3(_509 + _510)) + (int3(2) * int3((_506 + _507) > float3((-_509) - _510)))) | (int3((_528 - _507) < float3(_530 + _510)) + (int3(2) * int3((_528 + _507) > float3((-_530) - _510))))) | (int3((_549 - _507) < float3(_551 + _510)) + (int3(2) * int3((_549 + _507) > float3((-_551) - _510))))) != int3(3))) + { + _600 = float4(0.0); + break; + } + float3 _569 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz; + float3 _570 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz; + float3 _571 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz; + float3 _574 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _577 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _580 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float _584 = sqrt(dot(_570, _570) / dot(_577, _577)); + float _588 = sqrt(dot(_571, _571) / dot(_580, _580)); + float _592 = sqrt(dot(_569, _569) / dot(_574, _574)); + float4 _593 = float4(_584, _588, _592, 1.0); + _593.w = 0.333000004291534423828125 * ((_584 + _588) + _592); + _600 = float4(View.View_AdaptiveTessellationFactor) * _593; + break; + } + float4 _602 = fast::clamp(_474 * _600, float4(1.0), float4(15.0)); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_602.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_602.y); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_602.z); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_602.w); + patchOut.out_var_PN_POSITION9 = _461 + ((_461 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5)); + } +} + diff --git a/reference/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc b/reference/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc new file mode 100644 index 00000000000..5d4e320bd04 --- /dev/null +++ b/reference/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc @@ -0,0 +1,408 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct FVertexFactoryInterpolantsVSToPS +{ + float4 TangentToWorld0; + float4 TangentToWorld2; + float4 Color; + spvUnsafeArray TexCoords; +}; + +struct FVertexFactoryInterpolantsVSToDS +{ + FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS; +}; + +struct FHitProxyVSToDS +{ + FVertexFactoryInterpolantsVSToDS FactoryInterpolants; + float4 Position; +}; + +struct FPNTessellationHSToDS +{ + FHitProxyVSToDS PassSpecificData; + spvUnsafeArray WorldPosition; + float3 DisplacementScale; + float TessellationMultiplier; + float WorldDisplacementMultiplier; +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_ClipToWorld; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_908; + packed_float3 View_ViewUp; + float PrePadding_View_924; + packed_float3 View_ViewRight; + float PrePadding_View_940; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_956; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_972; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_1020; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_1036; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_1052; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1068; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1724; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1740; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1756; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2076; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2148; + float PrePadding_View_2152; + float PrePadding_View_2156; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2228; + float PrePadding_View_2232; + float PrePadding_View_2236; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2268; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2412; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + float View_AtmosphericFogSunDiscHalfApexAngleRadian; + float PrePadding_View_2492; + float4 View_AtmosphericFogSunDiscLuminance; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + uint PrePadding_View_2520; + uint PrePadding_View_2524; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2584; + float PrePadding_View_2588; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2744; + float PrePadding_View_2748; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float PrePadding_View_2908; + int2 View_CursorPosition; + float View_bCheckerboardSubsurfaceProfileRendering; + float PrePadding_View_2924; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2940; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2956; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2972; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2988; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_3004; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Primitive +{ + float4x4 Primitive_LocalToWorld; + float4 Primitive_InvNonUniformScaleAndDeterminantSign; + float4 Primitive_ObjectWorldPositionAndRadius; + float4x4 Primitive_WorldToLocal; + float4x4 Primitive_PreviousLocalToWorld; + float4x4 Primitive_PreviousWorldToLocal; + packed_float3 Primitive_ActorWorldPosition; + float Primitive_UseSingleSampleShadowFromStationaryLights; + packed_float3 Primitive_ObjectBounds; + float Primitive_LpvBiasMultiplier; + float Primitive_DecalReceiverMask; + float Primitive_PerObjectGBufferData; + float Primitive_UseVolumetricLightmapShadowFromStationaryLights; + float Primitive_DrawsVelocity; + float4 Primitive_ObjectOrientation; + float4 Primitive_NonUniformScale; + packed_float3 Primitive_LocalObjectBoundsMin; + uint Primitive_LightingChannelMask; + packed_float3 Primitive_LocalObjectBoundsMax; + uint Primitive_LightmapDataIndex; + packed_float3 Primitive_PreSkinnedLocalBounds; + int Primitive_SingleCaptureIndex; + uint Primitive_OutputVelocity; + uint PrePadding_Primitive_420; + uint PrePadding_Primitive_424; + uint PrePadding_Primitive_428; + float4 Primitive_CustomPrimitiveData[4]; +}; + +constant float4 _127 = {}; + +struct main0_out +{ + float4 out_var_COLOR0; + float3 out_var_PN_DisplacementScales; + spvUnsafeArray out_var_PN_POSITION; + float out_var_PN_TessellationMultiplier; + float out_var_PN_WorldDisplacementMultiplier; + spvUnsafeArray out_var_TEXCOORD0; + float4 out_var_TEXCOORD10_centroid; + float4 out_var_TEXCOORD11_centroid; + float4 out_var_VS_To_DS_Position; +}; + +struct main0_patchOut +{ + float4 out_var_PN_POSITION9; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD10_centroid [[attribute(0)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(1)]]; + float4 in_var_COLOR0 [[attribute(2)]]; + float2 in_var_TEXCOORD0_0 [[attribute(3)]]; + float2 in_var_TEXCOORD0_1 [[attribute(4)]]; + float4 in_var_VS_To_DS_Position [[attribute(5)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 3) + return; + spvUnsafeArray _129 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _130 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid }); + spvUnsafeArray _131 = spvUnsafeArray({ gl_in[0].in_var_COLOR0, gl_in[1].in_var_COLOR0, gl_in[2].in_var_COLOR0, gl_in[3].in_var_COLOR0, gl_in[4].in_var_COLOR0, gl_in[5].in_var_COLOR0, gl_in[6].in_var_COLOR0, gl_in[7].in_var_COLOR0, gl_in[8].in_var_COLOR0, gl_in[9].in_var_COLOR0, gl_in[10].in_var_COLOR0, gl_in[11].in_var_COLOR0 }); + spvUnsafeArray, 12> _132 = spvUnsafeArray, 12>({ spvUnsafeArray({ gl_in[0].in_var_TEXCOORD0_0, gl_in[0].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[1].in_var_TEXCOORD0_0, gl_in[1].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[2].in_var_TEXCOORD0_0, gl_in[2].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[3].in_var_TEXCOORD0_0, gl_in[3].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[4].in_var_TEXCOORD0_0, gl_in[4].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[5].in_var_TEXCOORD0_0, gl_in[5].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[6].in_var_TEXCOORD0_0, gl_in[6].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[7].in_var_TEXCOORD0_0, gl_in[7].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[8].in_var_TEXCOORD0_0, gl_in[8].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[9].in_var_TEXCOORD0_0, gl_in[9].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[10].in_var_TEXCOORD0_0, gl_in[10].in_var_TEXCOORD0_1 }), spvUnsafeArray({ gl_in[11].in_var_TEXCOORD0_0, gl_in[11].in_var_TEXCOORD0_1 }) }); + spvUnsafeArray _205 = spvUnsafeArray({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position }); + spvUnsafeArray _230 = spvUnsafeArray({ FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[0], _130[0], _131[0], _132[0] } }, _205[0] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[1], _130[1], _131[1], _132[1] } }, _205[1] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[2], _130[2], _131[2], _132[2] } }, _205[2] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[3], _130[3], _131[3], _132[3] } }, _205[3] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[4], _130[4], _131[4], _132[4] } }, _205[4] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[5], _130[5], _131[5], _132[5] } }, _205[5] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[6], _130[6], _131[6], _132[6] } }, _205[6] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[7], _130[7], _131[7], _132[7] } }, _205[7] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[8], _130[8], _131[8], _132[8] } }, _205[8] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[9], _130[9], _131[9], _132[9] } }, _205[9] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[10], _130[10], _131[10], _132[10] } }, _205[10] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[11], _130[11], _131[11], _132[11] } }, _205[11] } }); + spvUnsafeArray param_var_I; + param_var_I = _230; + float4 _247 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float3 _251 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz); + uint _254 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u; + uint _255 = 2u * gl_InvocationID; + uint _256 = 3u + _255; + uint _257 = _255 + 4u; + float4 _269 = float4(param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _277 = float4(param_var_I[_256].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + float4 _285 = float4(param_var_I[_257].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0); + spvUnsafeArray _333 = spvUnsafeArray({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_254].Position) - (float4(dot(param_var_I[_254].Position - param_var_I[gl_InvocationID].Position, _247)) * _247)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_256].Position) + param_var_I[_257].Position) - (float4(dot(param_var_I[_257].Position - param_var_I[_256].Position, _277)) * _277)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_254].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_254].Position, _269)) * _269)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_257].Position) + param_var_I[_256].Position) - (float4(dot(param_var_I[_256].Position - param_var_I[_257].Position, _285)) * _285)) * float4(0.3333333432674407958984375))) * float4(0.5) }); + gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + gl_out[gl_InvocationID].out_var_COLOR0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.Color; + gl_out[gl_InvocationID].out_var_TEXCOORD0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TexCoords; + gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position; + gl_out[gl_InvocationID].out_var_PN_POSITION = _333; + gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _251; + gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0; + gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0; + temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _333, _251, 1.0, 1.0 }; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + if (gl_InvocationID == 0u) + { + float4 _385 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875); + float4 _398; + _398.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier); + _398.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier); + _398.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier); + _398.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier); + float4 _524; + for (;;) + { + float4 _424 = View.View_ViewToClip * float4(0.0); + float4 _429 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0); + float3 _430 = _429.xyz; + float3 _431 = _424.xyz; + float _433 = _429.w; + float _434 = _424.w; + float4 _451 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0); + float3 _452 = _451.xyz; + float _454 = _451.w; + float4 _472 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0); + float3 _473 = _472.xyz; + float _475 = _472.w; + if (any((((int3((_430 - _431) < float3(_433 + _434)) + (int3(2) * int3((_430 + _431) > float3((-_433) - _434)))) | (int3((_452 - _431) < float3(_454 + _434)) + (int3(2) * int3((_452 + _431) > float3((-_454) - _434))))) | (int3((_473 - _431) < float3(_475 + _434)) + (int3(2) * int3((_473 + _431) > float3((-_475) - _434))))) != int3(3))) + { + _524 = float4(0.0); + break; + } + float3 _493 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz; + float3 _494 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz; + float3 _495 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz; + float3 _498 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _501 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float3 _504 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin); + float _508 = sqrt(dot(_494, _494) / dot(_501, _501)); + float _512 = sqrt(dot(_495, _495) / dot(_504, _504)); + float _516 = sqrt(dot(_493, _493) / dot(_498, _498)); + float4 _517 = float4(_508, _512, _516, 1.0); + _517.w = 0.333000004291534423828125 * ((_508 + _512) + _516); + _524 = float4(View.View_AdaptiveTessellationFactor) * _517; + break; + } + float4 _526 = fast::clamp(_398 * _524, float4(1.0), float4(15.0)); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_526.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_526.y); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_526.z); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_526.w); + patchOut.out_var_PN_POSITION9 = _385 + ((_385 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5)); + } +} + diff --git a/reference/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc b/reference/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc new file mode 100644 index 00000000000..9ae81e40615 --- /dev/null +++ b/reference/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc @@ -0,0 +1,175 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct FVertexFactoryInterpolantsVSToPS +{ + float4 TangentToWorld0; + float4 TangentToWorld2; +}; + +struct FVertexFactoryInterpolantsVSToDS +{ + FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS; +}; + +struct FSharedBasePassInterpolants +{ +}; +struct FBasePassInterpolantsVSToDS +{ + FSharedBasePassInterpolants _m0; +}; + +struct FBasePassVSToDS +{ + FVertexFactoryInterpolantsVSToDS FactoryInterpolants; + FBasePassInterpolantsVSToDS BasePassInterpolants; + float4 Position; +}; + +struct FFlatTessellationHSToDS +{ + FBasePassVSToDS PassSpecificData; + float3 DisplacementScale; + float TessellationMultiplier; + float WorldDisplacementMultiplier; +}; + +struct type_Primitive +{ + float4x4 Primitive_LocalToWorld; + float4 Primitive_InvNonUniformScaleAndDeterminantSign; + float4 Primitive_ObjectWorldPositionAndRadius; + float4x4 Primitive_WorldToLocal; + float4x4 Primitive_PreviousLocalToWorld; + float4x4 Primitive_PreviousWorldToLocal; + packed_float3 Primitive_ActorWorldPosition; + float Primitive_UseSingleSampleShadowFromStationaryLights; + packed_float3 Primitive_ObjectBounds; + float Primitive_LpvBiasMultiplier; + float Primitive_DecalReceiverMask; + float Primitive_PerObjectGBufferData; + float Primitive_UseVolumetricLightmapShadowFromStationaryLights; + float Primitive_DrawsVelocity; + float4 Primitive_ObjectOrientation; + float4 Primitive_NonUniformScale; + packed_float3 Primitive_LocalObjectBoundsMin; + uint Primitive_LightingChannelMask; + packed_float3 Primitive_LocalObjectBoundsMax; + uint Primitive_LightmapDataIndex; + packed_float3 Primitive_PreSkinnedLocalBounds; + int Primitive_SingleCaptureIndex; + uint Primitive_OutputVelocity; + uint PrePadding_Primitive_420; + uint PrePadding_Primitive_424; + uint PrePadding_Primitive_428; + float4 Primitive_CustomPrimitiveData[4]; +}; + +struct type_Material +{ + float4 Material_VectorExpressions[3]; + float4 Material_ScalarExpressions[1]; +}; + +constant float4 _88 = {}; + +struct main0_out +{ + float3 out_var_Flat_DisplacementScales; + float out_var_Flat_TessellationMultiplier; + float out_var_Flat_WorldDisplacementMultiplier; + float4 out_var_TEXCOORD10_centroid; + float4 out_var_TEXCOORD11_centroid; + float4 out_var_VS_To_DS_Position; +}; + +struct main0_in +{ + float4 in_var_TEXCOORD10_centroid [[attribute(0)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(1)]]; + float4 in_var_VS_To_DS_Position [[attribute(2)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant type_Primitive& Primitive [[buffer(0)]], constant type_Material& Material [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup FFlatTessellationHSToDS temp_var_hullMainRetVal[3]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 3) + return; + spvUnsafeArray _90 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _91 = spvUnsafeArray({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid }); + spvUnsafeArray _104 = spvUnsafeArray({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position }); + spvUnsafeArray _111 = spvUnsafeArray({ FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[0], _91[0] } }, FBasePassInterpolantsVSToDS{ { } }, _104[0] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[1], _91[1] } }, FBasePassInterpolantsVSToDS{ { } }, _104[1] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[2], _91[2] } }, FBasePassInterpolantsVSToDS{ { } }, _104[2] } }); + spvUnsafeArray param_var_I; + param_var_I = _111; + float3 _128 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz); + gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0; + gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2; + gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position; + gl_out[gl_InvocationID].out_var_Flat_DisplacementScales = _128; + gl_out[gl_InvocationID].out_var_Flat_TessellationMultiplier = Material.Material_ScalarExpressions[0].x; + gl_out[gl_InvocationID].out_var_Flat_WorldDisplacementMultiplier = 1.0; + temp_var_hullMainRetVal[gl_InvocationID] = FFlatTessellationHSToDS{ param_var_I[gl_InvocationID], _128, Material.Material_ScalarExpressions[0].x, 1.0 }; + threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup); + if (gl_InvocationID == 0u) + { + float4 _154; + _154.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier); + _154.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier); + _154.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier); + _154.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier); + float4 _173 = fast::clamp(_154, float4(1.0), float4(15.0)); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_173.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_173.y); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_173.z); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_173.w); + } +} + diff --git a/reference/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese b/reference/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese new file mode 100644 index 00000000000..612100604d7 --- /dev/null +++ b/reference/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese @@ -0,0 +1,419 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_ClipToWorld; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_908; + packed_float3 View_ViewUp; + float PrePadding_View_924; + packed_float3 View_ViewRight; + float PrePadding_View_940; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_956; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_972; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_1020; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_1036; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_1052; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1068; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1724; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1740; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1756; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2076; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2148; + float PrePadding_View_2152; + float PrePadding_View_2156; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2228; + float PrePadding_View_2232; + float PrePadding_View_2236; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2268; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2412; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + float View_AtmosphericFogSunDiscHalfApexAngleRadian; + float PrePadding_View_2492; + float4 View_AtmosphericFogSunDiscLuminance; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + uint PrePadding_View_2520; + uint PrePadding_View_2524; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2584; + float PrePadding_View_2588; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2744; + float PrePadding_View_2748; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float PrePadding_View_2908; + int2 View_CursorPosition; + float View_bCheckerboardSubsurfaceProfileRendering; + float PrePadding_View_2924; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2940; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2956; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2972; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2988; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_3004; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_ShadowDepthPass +{ + float PrePadding_ShadowDepthPass_LPV_0; + float PrePadding_ShadowDepthPass_LPV_4; + float PrePadding_ShadowDepthPass_LPV_8; + float PrePadding_ShadowDepthPass_LPV_12; + float PrePadding_ShadowDepthPass_LPV_16; + float PrePadding_ShadowDepthPass_LPV_20; + float PrePadding_ShadowDepthPass_LPV_24; + float PrePadding_ShadowDepthPass_LPV_28; + float PrePadding_ShadowDepthPass_LPV_32; + float PrePadding_ShadowDepthPass_LPV_36; + float PrePadding_ShadowDepthPass_LPV_40; + float PrePadding_ShadowDepthPass_LPV_44; + float PrePadding_ShadowDepthPass_LPV_48; + float PrePadding_ShadowDepthPass_LPV_52; + float PrePadding_ShadowDepthPass_LPV_56; + float PrePadding_ShadowDepthPass_LPV_60; + float PrePadding_ShadowDepthPass_LPV_64; + float PrePadding_ShadowDepthPass_LPV_68; + float PrePadding_ShadowDepthPass_LPV_72; + float PrePadding_ShadowDepthPass_LPV_76; + float PrePadding_ShadowDepthPass_LPV_80; + float PrePadding_ShadowDepthPass_LPV_84; + float PrePadding_ShadowDepthPass_LPV_88; + float PrePadding_ShadowDepthPass_LPV_92; + float PrePadding_ShadowDepthPass_LPV_96; + float PrePadding_ShadowDepthPass_LPV_100; + float PrePadding_ShadowDepthPass_LPV_104; + float PrePadding_ShadowDepthPass_LPV_108; + float PrePadding_ShadowDepthPass_LPV_112; + float PrePadding_ShadowDepthPass_LPV_116; + float PrePadding_ShadowDepthPass_LPV_120; + float PrePadding_ShadowDepthPass_LPV_124; + float PrePadding_ShadowDepthPass_LPV_128; + float PrePadding_ShadowDepthPass_LPV_132; + float PrePadding_ShadowDepthPass_LPV_136; + float PrePadding_ShadowDepthPass_LPV_140; + float PrePadding_ShadowDepthPass_LPV_144; + float PrePadding_ShadowDepthPass_LPV_148; + float PrePadding_ShadowDepthPass_LPV_152; + float PrePadding_ShadowDepthPass_LPV_156; + float PrePadding_ShadowDepthPass_LPV_160; + float PrePadding_ShadowDepthPass_LPV_164; + float PrePadding_ShadowDepthPass_LPV_168; + float PrePadding_ShadowDepthPass_LPV_172; + float PrePadding_ShadowDepthPass_LPV_176; + float PrePadding_ShadowDepthPass_LPV_180; + float PrePadding_ShadowDepthPass_LPV_184; + float PrePadding_ShadowDepthPass_LPV_188; + float PrePadding_ShadowDepthPass_LPV_192; + float PrePadding_ShadowDepthPass_LPV_196; + float PrePadding_ShadowDepthPass_LPV_200; + float PrePadding_ShadowDepthPass_LPV_204; + float PrePadding_ShadowDepthPass_LPV_208; + float PrePadding_ShadowDepthPass_LPV_212; + float PrePadding_ShadowDepthPass_LPV_216; + float PrePadding_ShadowDepthPass_LPV_220; + float PrePadding_ShadowDepthPass_LPV_224; + float PrePadding_ShadowDepthPass_LPV_228; + float PrePadding_ShadowDepthPass_LPV_232; + float PrePadding_ShadowDepthPass_LPV_236; + float PrePadding_ShadowDepthPass_LPV_240; + float PrePadding_ShadowDepthPass_LPV_244; + float PrePadding_ShadowDepthPass_LPV_248; + float PrePadding_ShadowDepthPass_LPV_252; + float PrePadding_ShadowDepthPass_LPV_256; + float PrePadding_ShadowDepthPass_LPV_260; + float PrePadding_ShadowDepthPass_LPV_264; + float PrePadding_ShadowDepthPass_LPV_268; + float4x4 ShadowDepthPass_LPV_mRsmToWorld; + float4 ShadowDepthPass_LPV_mLightColour; + float4 ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection; + float4 ShadowDepthPass_LPV_mEyePos; + packed_int3 ShadowDepthPass_LPV_mOldGridOffset; + int PrePadding_ShadowDepthPass_LPV_396; + packed_int3 ShadowDepthPass_LPV_mLpvGridOffset; + float ShadowDepthPass_LPV_ClearMultiplier; + float ShadowDepthPass_LPV_LpvScale; + float ShadowDepthPass_LPV_OneOverLpvScale; + float ShadowDepthPass_LPV_DirectionalOcclusionIntensity; + float ShadowDepthPass_LPV_DirectionalOcclusionRadius; + float ShadowDepthPass_LPV_RsmAreaIntensityMultiplier; + float ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier; + float ShadowDepthPass_LPV_SecondaryOcclusionStrength; + float ShadowDepthPass_LPV_SecondaryBounceStrength; + float ShadowDepthPass_LPV_VplInjectionBias; + float ShadowDepthPass_LPV_GeometryVolumeInjectionBias; + float ShadowDepthPass_LPV_EmissiveInjectionMultiplier; + int ShadowDepthPass_LPV_PropagationIndex; + float4x4 ShadowDepthPass_ProjectionMatrix; + float4x4 ShadowDepthPass_ViewMatrix; + float4 ShadowDepthPass_ShadowParams; + float ShadowDepthPass_bClampToNearPlane; + float PrePadding_ShadowDepthPass_612; + float PrePadding_ShadowDepthPass_616; + float PrePadding_ShadowDepthPass_620; + float4x4 ShadowDepthPass_ShadowViewProjectionMatrices[6]; + float4x4 ShadowDepthPass_ShadowViewMatrices[6]; +}; + +constant float4 _113 = {}; + +struct main0_out +{ + float4 out_var_TEXCOORD10_centroid [[user(locn0)]]; + float4 out_var_TEXCOORD11_centroid [[user(locn1)]]; + float4 out_var_COLOR0 [[user(locn2)]]; + float4 out_var_TEXCOORD0_0 [[user(locn3)]]; + uint out_var_PRIMITIVE_ID [[user(locn4)]]; + float out_var_TEXCOORD6 [[user(locn5)]]; + float out_var_TEXCOORD8 [[user(locn6)]]; + float3 out_var_TEXCOORD7 [[user(locn7)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 in_var_COLOR0 [[attribute(0)]]; + float4 in_var_PN_POSITION_0 [[attribute(2)]]; + float4 in_var_PN_POSITION_1 [[attribute(3)]]; + float4 in_var_PN_POSITION_2 [[attribute(4)]]; + float in_var_PN_WorldDisplacementMultiplier [[attribute(7)]]; + uint in_var_PRIMITIVE_ID [[attribute(8)]]; + float4 in_var_TEXCOORD0_0 [[attribute(9)]]; + float4 in_var_TEXCOORD10_centroid [[attribute(10)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(11)]]; +}; + +struct main0_patchIn +{ + float4 in_var_PN_POSITION9 [[attribute(5)]]; + patch_control_point gl_in; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_View& View [[buffer(0)]], constant type_ShadowDepthPass& ShadowDepthPass [[buffer(1)]], texture2d Material_Texture2D_3 [[texture(0)]], sampler Material_Texture2D_3Sampler [[sampler(0)]], float3 gl_TessCoord [[position_in_patch]]) +{ + main0_out out = {}; + spvUnsafeArray out_var_TEXCOORD0 = {}; + spvUnsafeArray _117 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _118 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid }); + spvUnsafeArray _119 = spvUnsafeArray({ patchIn.gl_in[0].in_var_COLOR0, patchIn.gl_in[1].in_var_COLOR0, patchIn.gl_in[2].in_var_COLOR0 }); + spvUnsafeArray, 3> _120 = spvUnsafeArray, 3>({ spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD0_0 }), spvUnsafeArray({ patchIn.gl_in[1].in_var_TEXCOORD0_0 }), spvUnsafeArray({ patchIn.gl_in[2].in_var_TEXCOORD0_0 }) }); + spvUnsafeArray, 3> _135 = spvUnsafeArray, 3>({ spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) }); + spvUnsafeArray _136 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[1].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[2].in_var_PN_WorldDisplacementMultiplier }); + float _157 = gl_TessCoord.x * gl_TessCoord.x; + float _158 = gl_TessCoord.y * gl_TessCoord.y; + float _159 = gl_TessCoord.z * gl_TessCoord.z; + float4 _165 = float4(gl_TessCoord.x); + float4 _169 = float4(gl_TessCoord.y); + float4 _174 = float4(gl_TessCoord.z); + float4 _177 = float4(_157 * 3.0); + float4 _181 = float4(_158 * 3.0); + float4 _188 = float4(_159 * 3.0); + float4 _202 = ((((((((((_135[0][0] * float4(_157)) * _165) + ((_135[1][0] * float4(_158)) * _169)) + ((_135[2][0] * float4(_159)) * _174)) + ((_135[0][1] * _177) * _169)) + ((_135[0][2] * _181) * _165)) + ((_135[1][1] * _181) * _174)) + ((_135[1][2] * _188) * _169)) + ((_135[2][1] * _188) * _165)) + ((_135[2][2] * _177) * _174)) + ((((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _174) * _165) * _169); + float3 _226 = ((_117[0].xyz * float3(gl_TessCoord.x)) + (_117[1].xyz * float3(gl_TessCoord.y))).xyz + (_117[2].xyz * float3(gl_TessCoord.z)); + float4 _229 = ((_118[0] * _165) + (_118[1] * _169)) + (_118[2] * _174); + float4 _231 = ((_119[0] * _165) + (_119[1] * _169)) + (_119[2] * _174); + float4 _233 = ((_120[0][0] * _165) + (_120[1][0] * _169)) + (_120[2][0] * _174); + spvUnsafeArray _234 = spvUnsafeArray({ _233 }); + float3 _236 = _229.xyz; + float3 _264 = _202.xyz + (((float3((Material_Texture2D_3.sample(Material_Texture2D_3Sampler, (float2(View.View_GameTime * 0.20000000298023223876953125, View.View_GameTime * (-0.699999988079071044921875)) + (_233.zw * float2(1.0, 2.0))), level(-1.0)).x * 10.0) * (1.0 - _231.x)) * _236) * float3(0.5)) * float3(((_136[0] * gl_TessCoord.x) + (_136[1] * gl_TessCoord.y)) + (_136[2] * gl_TessCoord.z))); + float4x4 _116 = ShadowDepthPass.ShadowDepthPass_ViewMatrix; + float4 _270 = ShadowDepthPass.ShadowDepthPass_ProjectionMatrix * float4(_264.x, _264.y, _264.z, _202.w); + float4 _281; + if ((ShadowDepthPass.ShadowDepthPass_bClampToNearPlane > 0.0) && (_270.z < 0.0)) + { + float4 _279 = _270; + _279.z = 9.9999999747524270787835121154785e-07; + _279.w = 1.0; + _281 = _279; + } + else + { + _281 = _270; + } + float _290 = abs(dot(float3(_116[0u].z, _116[1u].z, _116[2u].z), _236)); + out.out_var_TEXCOORD10_centroid = float4(_226.x, _226.y, _226.z, _113.w); + out.out_var_TEXCOORD11_centroid = _229; + out.out_var_COLOR0 = _231; + out_var_TEXCOORD0 = _234; + out.out_var_PRIMITIVE_ID = patchIn.gl_in[0u].in_var_PRIMITIVE_ID; + out.out_var_TEXCOORD6 = _281.z; + out.out_var_TEXCOORD8 = (ShadowDepthPass.ShadowDepthPass_ShadowParams.y * fast::clamp((abs(_290) > 0.0) ? (sqrt(fast::clamp(1.0 - (_290 * _290), 0.0, 1.0)) / _290) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z)) + ShadowDepthPass.ShadowDepthPass_ShadowParams.x; + out.out_var_TEXCOORD7 = _264.xyz; + out.gl_Position = _281; + out.out_var_TEXCOORD0_0 = out_var_TEXCOORD0[0]; + return out; +} + diff --git a/reference/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese b/reference/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese new file mode 100644 index 00000000000..f1b74aacbbc --- /dev/null +++ b/reference/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese @@ -0,0 +1,416 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_ClipToWorld; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_908; + packed_float3 View_ViewUp; + float PrePadding_View_924; + packed_float3 View_ViewRight; + float PrePadding_View_940; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_956; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_972; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_1020; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_1036; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_1052; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1068; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1724; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1740; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1756; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2076; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2148; + float PrePadding_View_2152; + float PrePadding_View_2156; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2228; + float PrePadding_View_2232; + float PrePadding_View_2236; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2268; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2412; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + float View_AtmosphericFogSunDiscHalfApexAngleRadian; + float PrePadding_View_2492; + float4 View_AtmosphericFogSunDiscLuminance; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + uint PrePadding_View_2520; + uint PrePadding_View_2524; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2584; + float PrePadding_View_2588; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2744; + float PrePadding_View_2748; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float PrePadding_View_2908; + int2 View_CursorPosition; + float View_bCheckerboardSubsurfaceProfileRendering; + float PrePadding_View_2924; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2940; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2956; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2972; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2988; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_3004; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; + float PrePadding_View_3048; + float PrePadding_View_3052; + float4x4 View_WorldToVirtualTexture; + float4 View_VirtualTextureParams; + float4 View_XRPassthroughCameraUVs[2]; +}; + +struct type_Material +{ + float4 Material_VectorExpressions[5]; + float4 Material_ScalarExpressions[2]; +}; + +constant float4 _118 = {}; + +struct main0_out +{ + float4 out_var_TEXCOORD6 [[user(locn0)]]; + float4 out_var_TEXCOORD7 [[user(locn1)]]; + float4 out_var_TEXCOORD10_centroid [[user(locn2)]]; + float4 out_var_TEXCOORD11_centroid [[user(locn3)]]; + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [1]; + float gl_ClipDistance_0 [[user(clip0)]]; +}; + +struct main0_in +{ + float4 in_var_PN_DominantEdge2 [[attribute(3)]]; + float4 in_var_PN_DominantEdge3 [[attribute(4)]]; + float3 in_var_PN_DominantEdge4 [[attribute(5)]]; + float3 in_var_PN_DominantEdge5 [[attribute(6)]]; + float4 in_var_PN_DominantVertex1 [[attribute(8)]]; + float3 in_var_PN_DominantVertex2 [[attribute(9)]]; + float4 in_var_PN_POSITION_0 [[attribute(10)]]; + float4 in_var_PN_POSITION_1 [[attribute(11)]]; + float4 in_var_PN_POSITION_2 [[attribute(12)]]; + float in_var_PN_WorldDisplacementMultiplier [[attribute(15)]]; + float4 in_var_TEXCOORD10_centroid [[attribute(16)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(17)]]; + float4 in_var_TEXCOORD6 [[attribute(18)]]; + float4 in_var_TEXCOORD8 [[attribute(19)]]; +}; + +struct main0_patchIn +{ + float4 in_var_PN_POSITION9 [[attribute(13)]]; + patch_control_point gl_in; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Material& Material [[buffer(1)]], texture3d View_GlobalDistanceFieldTexture0 [[texture(0)]], texture3d View_GlobalDistanceFieldTexture1 [[texture(1)]], texture3d View_GlobalDistanceFieldTexture2 [[texture(2)]], texture3d View_GlobalDistanceFieldTexture3 [[texture(3)]], sampler View_GlobalDistanceFieldSampler0 [[sampler(0)]], float3 gl_TessCoord [[position_in_patch]]) +{ + main0_out out = {}; + spvUnsafeArray _120 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD6, patchIn.gl_in[1].in_var_TEXCOORD6, patchIn.gl_in[2].in_var_TEXCOORD6 }); + spvUnsafeArray _121 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD8, patchIn.gl_in[1].in_var_TEXCOORD8, patchIn.gl_in[2].in_var_TEXCOORD8 }); + spvUnsafeArray _128 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _129 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid }); + spvUnsafeArray, 3> _136 = spvUnsafeArray, 3>({ spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) }); + spvUnsafeArray _137 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[1].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[2].in_var_PN_WorldDisplacementMultiplier }); + spvUnsafeArray _138 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantVertex1, patchIn.gl_in[1].in_var_PN_DominantVertex1, patchIn.gl_in[2].in_var_PN_DominantVertex1 }); + spvUnsafeArray _139 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantVertex2, patchIn.gl_in[1].in_var_PN_DominantVertex2, patchIn.gl_in[2].in_var_PN_DominantVertex2 }); + spvUnsafeArray _146 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantEdge2, patchIn.gl_in[1].in_var_PN_DominantEdge2, patchIn.gl_in[2].in_var_PN_DominantEdge2 }); + spvUnsafeArray _147 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantEdge3, patchIn.gl_in[1].in_var_PN_DominantEdge3, patchIn.gl_in[2].in_var_PN_DominantEdge3 }); + spvUnsafeArray _148 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantEdge4, patchIn.gl_in[1].in_var_PN_DominantEdge4, patchIn.gl_in[2].in_var_PN_DominantEdge4 }); + spvUnsafeArray _149 = spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_DominantEdge5, patchIn.gl_in[1].in_var_PN_DominantEdge5, patchIn.gl_in[2].in_var_PN_DominantEdge5 }); + float _190 = gl_TessCoord.x * gl_TessCoord.x; + float _191 = gl_TessCoord.y * gl_TessCoord.y; + float _192 = gl_TessCoord.z * gl_TessCoord.z; + float4 _198 = float4(gl_TessCoord.x); + float4 _202 = float4(gl_TessCoord.y); + float4 _207 = float4(gl_TessCoord.z); + float4 _210 = float4(_190 * 3.0); + float4 _214 = float4(_191 * 3.0); + float4 _221 = float4(_192 * 3.0); + float4 _235 = ((((((((((_136[0][0] * float4(_190)) * _198) + ((_136[1][0] * float4(_191)) * _202)) + ((_136[2][0] * float4(_192)) * _207)) + ((_136[0][1] * _210) * _202)) + ((_136[0][2] * _214) * _198)) + ((_136[1][1] * _214) * _207)) + ((_136[1][2] * _221) * _202)) + ((_136[2][1] * _221) * _198)) + ((_136[2][2] * _210) * _207)) + ((((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _207) * _198) * _202); + float3 _237 = float3(gl_TessCoord.x); + float3 _240 = float3(gl_TessCoord.y); + float3 _254 = float3(gl_TessCoord.z); + float3 _256 = ((_128[0].xyz * _237) + (_128[1].xyz * _240)).xyz + (_128[2].xyz * _254); + float4 _259 = ((_129[0] * _198) + (_129[1] * _202)) + (_129[2] * _207); + float3 _264 = _235.xyz; + float3 _265 = _256.xyz; + float3 _266 = _259.xyz; + float3 _272 = _264 + float3(View.View_WorldCameraOrigin); + float _279 = float(int(gl_TessCoord.x == 0.0)); + float _282 = float(int(gl_TessCoord.y == 0.0)); + float _285 = float(int(gl_TessCoord.z == 0.0)); + float _286 = _279 + _282; + float _287 = _286 + _285; + float4 _387; + float3 _388; + if (float(int(_287 == 2.0)) == 1.0) + { + float _363 = float(int((_282 + _285) == 2.0)); + float _367 = float(int((_285 + _279) == 2.0)); + float _370 = float(int(_286 == 2.0)); + _387 = ((float4(_363) * _138[0]) + (float4(_367) * _138[1])) + (float4(_370) * _138[2]); + _388 = ((float3(_363) * _139[0]) + (float3(_367) * _139[1])) + (float3(_370) * _139[2]); + } + else + { + float4 _358; + float3 _359; + if (float(int(_287 == 1.0)) != 0.0) + { + float4 _304 = float4(_279); + float4 _306 = float4(_282); + float4 _309 = float4(_285); + float4 _311 = ((_304 * _146[0]) + (_306 * _146[1])) + (_309 * _146[2]); + float4 _316 = ((_304 * _147[0]) + (_306 * _147[1])) + (_309 * _147[2]); + float3 _331 = float3(_279); + float3 _333 = float3(_282); + float3 _336 = float3(_285); + float3 _338 = ((_331 * _148[0]) + (_333 * _148[1])) + (_336 * _148[2]); + float3 _343 = ((_331 * _149[0]) + (_333 * _149[1])) + (_336 * _149[2]); + _358 = ((_304 * ((_202 * _311) + (_207 * _316))) + (_306 * ((_207 * _311) + (_198 * _316)))) + (_309 * ((_198 * _311) + (_202 * _316))); + _359 = ((_331 * ((_240 * _338) + (_254 * _343))) + (_333 * ((_254 * _338) + (_237 * _343)))) + (_336 * ((_237 * _338) + (_240 * _343))); + } + else + { + _358 = float4(_259.xyz, 0.0); + _359 = _265; + } + _387 = _358; + _388 = _359; + } + float3x3 _398; + if (float(int(_287 == 0.0)) == 0.0) + { + _398 = float3x3(_388, cross(_387.xyz, _388) * float3(_387.w), _387.xyz); + } + else + { + _398 = float3x3(_265, cross(_266, _265) * float3(_259.w), _266); + } + float3 _411 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[0].xyz) + View.View_GlobalVolumeCenterAndExtent[0].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[0].xyz + View.View_GlobalVolumeCenterAndExtent[0].www) - _272, float3(0.0))); + float _547; + if (fast::min(_411.x, fast::min(_411.y, _411.z)) > (View.View_GlobalVolumeCenterAndExtent[0].w * View.View_GlobalVolumeTexelSize)) + { + _547 = View_GlobalDistanceFieldTexture0.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[0u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[0u].xyz), level(0.0)).x; + } + else + { + float3 _436 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[1].xyz) + View.View_GlobalVolumeCenterAndExtent[1].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[1].xyz + View.View_GlobalVolumeCenterAndExtent[1].www) - _272, float3(0.0))); + float _535; + if (fast::min(_436.x, fast::min(_436.y, _436.z)) > (View.View_GlobalVolumeCenterAndExtent[1].w * View.View_GlobalVolumeTexelSize)) + { + _535 = View_GlobalDistanceFieldTexture1.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[1u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[1u].xyz), level(0.0)).x; + } + else + { + float3 _459 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[2].xyz) + View.View_GlobalVolumeCenterAndExtent[2].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[2].xyz + View.View_GlobalVolumeCenterAndExtent[2].www) - _272, float3(0.0))); + float3 _475 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[3].xyz) + View.View_GlobalVolumeCenterAndExtent[3].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[3].xyz + View.View_GlobalVolumeCenterAndExtent[3].www) - _272, float3(0.0))); + float _480 = fast::min(_475.x, fast::min(_475.y, _475.z)); + float _523; + if (fast::min(_459.x, fast::min(_459.y, _459.z)) > (View.View_GlobalVolumeCenterAndExtent[2].w * View.View_GlobalVolumeTexelSize)) + { + _523 = View_GlobalDistanceFieldTexture2.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[2u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[2u].xyz), level(0.0)).x; + } + else + { + float _511; + if (_480 > (View.View_GlobalVolumeCenterAndExtent[3].w * View.View_GlobalVolumeTexelSize)) + { + _511 = mix(View.View_MaxGlobalDistance, View_GlobalDistanceFieldTexture3.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[3u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[3u].xyz), level(0.0)).x, fast::clamp((_480 * 10.0) * View.View_GlobalVolumeWorldToUVAddAndMul[3].w, 0.0, 1.0)); + } + else + { + _511 = View.View_MaxGlobalDistance; + } + _523 = _511; + } + _535 = _523; + } + _547 = _535; + } + float3 _565 = _264 + ((_398[2] * float3(fast::min(_547 + Material.Material_ScalarExpressions[0].z, 0.0) * Material.Material_ScalarExpressions[0].w)) * float3(((_137[0] * gl_TessCoord.x) + (_137[1] * gl_TessCoord.y)) + (_137[2] * gl_TessCoord.z))); + float4 _574 = View.View_TranslatedWorldToClip * float4(_565.x, _565.y, _565.z, _235.w); + _574.z = _574.z + (0.001000000047497451305389404296875 * _574.w); + out.gl_Position = _574; + out.out_var_TEXCOORD6 = ((_120[0] * _198) + (_120[1] * _202)) + (_120[2] * _207); + out.out_var_TEXCOORD7 = ((_121[0] * _198) + (_121[1] * _202)) + (_121[2] * _207); + out.out_var_TEXCOORD10_centroid = float4(_256.x, _256.y, _256.z, _118.w); + out.out_var_TEXCOORD11_centroid = _259; + out.gl_ClipDistance[0u] = dot(View.View_GlobalClippingPlane, float4(_565.xyz - float3(View.View_PreViewTranslation), 1.0)); + out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; + return out; +} + diff --git a/reference/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese b/reference/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese new file mode 100644 index 00000000000..26b2e4b0ff0 --- /dev/null +++ b/reference/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese @@ -0,0 +1,216 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_ShadowDepthPass +{ + float PrePadding_ShadowDepthPass_LPV_0; + float PrePadding_ShadowDepthPass_LPV_4; + float PrePadding_ShadowDepthPass_LPV_8; + float PrePadding_ShadowDepthPass_LPV_12; + float PrePadding_ShadowDepthPass_LPV_16; + float PrePadding_ShadowDepthPass_LPV_20; + float PrePadding_ShadowDepthPass_LPV_24; + float PrePadding_ShadowDepthPass_LPV_28; + float PrePadding_ShadowDepthPass_LPV_32; + float PrePadding_ShadowDepthPass_LPV_36; + float PrePadding_ShadowDepthPass_LPV_40; + float PrePadding_ShadowDepthPass_LPV_44; + float PrePadding_ShadowDepthPass_LPV_48; + float PrePadding_ShadowDepthPass_LPV_52; + float PrePadding_ShadowDepthPass_LPV_56; + float PrePadding_ShadowDepthPass_LPV_60; + float PrePadding_ShadowDepthPass_LPV_64; + float PrePadding_ShadowDepthPass_LPV_68; + float PrePadding_ShadowDepthPass_LPV_72; + float PrePadding_ShadowDepthPass_LPV_76; + float PrePadding_ShadowDepthPass_LPV_80; + float PrePadding_ShadowDepthPass_LPV_84; + float PrePadding_ShadowDepthPass_LPV_88; + float PrePadding_ShadowDepthPass_LPV_92; + float PrePadding_ShadowDepthPass_LPV_96; + float PrePadding_ShadowDepthPass_LPV_100; + float PrePadding_ShadowDepthPass_LPV_104; + float PrePadding_ShadowDepthPass_LPV_108; + float PrePadding_ShadowDepthPass_LPV_112; + float PrePadding_ShadowDepthPass_LPV_116; + float PrePadding_ShadowDepthPass_LPV_120; + float PrePadding_ShadowDepthPass_LPV_124; + float PrePadding_ShadowDepthPass_LPV_128; + float PrePadding_ShadowDepthPass_LPV_132; + float PrePadding_ShadowDepthPass_LPV_136; + float PrePadding_ShadowDepthPass_LPV_140; + float PrePadding_ShadowDepthPass_LPV_144; + float PrePadding_ShadowDepthPass_LPV_148; + float PrePadding_ShadowDepthPass_LPV_152; + float PrePadding_ShadowDepthPass_LPV_156; + float PrePadding_ShadowDepthPass_LPV_160; + float PrePadding_ShadowDepthPass_LPV_164; + float PrePadding_ShadowDepthPass_LPV_168; + float PrePadding_ShadowDepthPass_LPV_172; + float PrePadding_ShadowDepthPass_LPV_176; + float PrePadding_ShadowDepthPass_LPV_180; + float PrePadding_ShadowDepthPass_LPV_184; + float PrePadding_ShadowDepthPass_LPV_188; + float PrePadding_ShadowDepthPass_LPV_192; + float PrePadding_ShadowDepthPass_LPV_196; + float PrePadding_ShadowDepthPass_LPV_200; + float PrePadding_ShadowDepthPass_LPV_204; + float PrePadding_ShadowDepthPass_LPV_208; + float PrePadding_ShadowDepthPass_LPV_212; + float PrePadding_ShadowDepthPass_LPV_216; + float PrePadding_ShadowDepthPass_LPV_220; + float PrePadding_ShadowDepthPass_LPV_224; + float PrePadding_ShadowDepthPass_LPV_228; + float PrePadding_ShadowDepthPass_LPV_232; + float PrePadding_ShadowDepthPass_LPV_236; + float PrePadding_ShadowDepthPass_LPV_240; + float PrePadding_ShadowDepthPass_LPV_244; + float PrePadding_ShadowDepthPass_LPV_248; + float PrePadding_ShadowDepthPass_LPV_252; + float PrePadding_ShadowDepthPass_LPV_256; + float PrePadding_ShadowDepthPass_LPV_260; + float PrePadding_ShadowDepthPass_LPV_264; + float PrePadding_ShadowDepthPass_LPV_268; + float4x4 ShadowDepthPass_LPV_mRsmToWorld; + float4 ShadowDepthPass_LPV_mLightColour; + float4 ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection; + float4 ShadowDepthPass_LPV_mEyePos; + packed_int3 ShadowDepthPass_LPV_mOldGridOffset; + int PrePadding_ShadowDepthPass_LPV_396; + packed_int3 ShadowDepthPass_LPV_mLpvGridOffset; + float ShadowDepthPass_LPV_ClearMultiplier; + float ShadowDepthPass_LPV_LpvScale; + float ShadowDepthPass_LPV_OneOverLpvScale; + float ShadowDepthPass_LPV_DirectionalOcclusionIntensity; + float ShadowDepthPass_LPV_DirectionalOcclusionRadius; + float ShadowDepthPass_LPV_RsmAreaIntensityMultiplier; + float ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier; + float ShadowDepthPass_LPV_SecondaryOcclusionStrength; + float ShadowDepthPass_LPV_SecondaryBounceStrength; + float ShadowDepthPass_LPV_VplInjectionBias; + float ShadowDepthPass_LPV_GeometryVolumeInjectionBias; + float ShadowDepthPass_LPV_EmissiveInjectionMultiplier; + int ShadowDepthPass_LPV_PropagationIndex; + float4x4 ShadowDepthPass_ProjectionMatrix; + float4x4 ShadowDepthPass_ViewMatrix; + float4 ShadowDepthPass_ShadowParams; + float ShadowDepthPass_bClampToNearPlane; + float PrePadding_ShadowDepthPass_612; + float PrePadding_ShadowDepthPass_616; + float PrePadding_ShadowDepthPass_620; + float4x4 ShadowDepthPass_ShadowViewProjectionMatrices[6]; + float4x4 ShadowDepthPass_ShadowViewMatrices[6]; +}; + +constant float4 _90 = {}; + +struct main0_out +{ + float4 out_var_TEXCOORD10_centroid [[user(locn0)]]; + float4 out_var_TEXCOORD11_centroid [[user(locn1)]]; + float out_var_TEXCOORD6 [[user(locn2)]]; + float3 out_var_TEXCOORD7 [[user(locn3)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 in_var_PN_POSITION_0 [[attribute(10)]]; + float4 in_var_PN_POSITION_1 [[attribute(11)]]; + float4 in_var_PN_POSITION_2 [[attribute(12)]]; + float4 in_var_TEXCOORD10_centroid [[attribute(16)]]; + float4 in_var_TEXCOORD11_centroid [[attribute(17)]]; +}; + +struct main0_patchIn +{ + float4 in_var_PN_POSITION9 [[attribute(13)]]; + patch_control_point gl_in; +}; + +[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_ShadowDepthPass& ShadowDepthPass [[buffer(0)]], float3 gl_TessCoord [[position_in_patch]]) +{ + main0_out out = {}; + spvUnsafeArray _93 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid }); + spvUnsafeArray _94 = spvUnsafeArray({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid }); + spvUnsafeArray, 3> _101 = spvUnsafeArray, 3>({ spvUnsafeArray({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) }); + float _119 = gl_TessCoord.x * gl_TessCoord.x; + float _120 = gl_TessCoord.y * gl_TessCoord.y; + float _121 = gl_TessCoord.z * gl_TessCoord.z; + float4 _127 = float4(gl_TessCoord.x); + float4 _131 = float4(gl_TessCoord.y); + float4 _136 = float4(gl_TessCoord.z); + float4 _139 = float4(_119 * 3.0); + float4 _143 = float4(_120 * 3.0); + float4 _150 = float4(_121 * 3.0); + float4 _164 = ((((((((((_101[0][0] * float4(_119)) * _127) + ((_101[1][0] * float4(_120)) * _131)) + ((_101[2][0] * float4(_121)) * _136)) + ((_101[0][1] * _139) * _131)) + ((_101[0][2] * _143) * _127)) + ((_101[1][1] * _143) * _136)) + ((_101[1][2] * _150) * _131)) + ((_101[2][1] * _150) * _127)) + ((_101[2][2] * _139) * _136)) + ((((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _136) * _127) * _131); + float3 _179 = ((_93[0].xyz * float3(gl_TessCoord.x)) + (_93[1].xyz * float3(gl_TessCoord.y))).xyz + (_93[2].xyz * float3(gl_TessCoord.z)); + float4 _182 = ((_94[0] * _127) + (_94[1] * _131)) + (_94[2] * _136); + float4x4 _92 = ShadowDepthPass.ShadowDepthPass_ViewMatrix; + float4 _189 = ShadowDepthPass.ShadowDepthPass_ProjectionMatrix * float4(_164.x, _164.y, _164.z, _164.w); + float4 _200; + if ((ShadowDepthPass.ShadowDepthPass_bClampToNearPlane > 0.0) && (_189.z < 0.0)) + { + float4 _198 = _189; + _198.z = 9.9999999747524270787835121154785e-07; + _198.w = 1.0; + _200 = _198; + } + else + { + _200 = _189; + } + float _209 = abs(dot(float3(_92[0u].z, _92[1u].z, _92[2u].z), _182.xyz)); + float4 _234 = _200; + _234.z = ((_200.z * ShadowDepthPass.ShadowDepthPass_ShadowParams.w) + ((ShadowDepthPass.ShadowDepthPass_ShadowParams.y * fast::clamp((abs(_209) > 0.0) ? (sqrt(fast::clamp(1.0 - (_209 * _209), 0.0, 1.0)) / _209) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z)) + ShadowDepthPass.ShadowDepthPass_ShadowParams.x)) * _200.w; + out.out_var_TEXCOORD10_centroid = float4(_179.x, _179.y, _179.z, _90.w); + out.out_var_TEXCOORD11_centroid = _182; + out.out_var_TEXCOORD6 = 0.0; + out.out_var_TEXCOORD7 = _164.xyz; + out.gl_Position = _234; + return out; +} + diff --git a/reference/shaders-ue4/asm/vert/array-missing-copies.asm.vert b/reference/shaders-ue4/asm/vert/array-missing-copies.asm.vert new file mode 100644 index 00000000000..67097c57715 --- /dev/null +++ b/reference/shaders-ue4/asm/vert/array-missing-copies.asm.vert @@ -0,0 +1,467 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_MobileBasePass +{ + float4 MobileBasePass_Fog_ExponentialFogParameters; + float4 MobileBasePass_Fog_ExponentialFogParameters2; + float4 MobileBasePass_Fog_ExponentialFogColorParameter; + float4 MobileBasePass_Fog_ExponentialFogParameters3; + float4 MobileBasePass_Fog_InscatteringLightDirection; + float4 MobileBasePass_Fog_DirectionalInscatteringColor; + float2 MobileBasePass_Fog_SinCosInscatteringColorCubemapRotation; + float PrePadding_MobileBasePass_Fog_104; + float PrePadding_MobileBasePass_Fog_108; + packed_float3 MobileBasePass_Fog_FogInscatteringTextureParameters; + float MobileBasePass_Fog_ApplyVolumetricFog; + float PrePadding_MobileBasePass_PlanarReflection_128; + float PrePadding_MobileBasePass_PlanarReflection_132; + float PrePadding_MobileBasePass_PlanarReflection_136; + float PrePadding_MobileBasePass_PlanarReflection_140; + float PrePadding_MobileBasePass_PlanarReflection_144; + float PrePadding_MobileBasePass_PlanarReflection_148; + float PrePadding_MobileBasePass_PlanarReflection_152; + float PrePadding_MobileBasePass_PlanarReflection_156; + float4 MobileBasePass_PlanarReflection_ReflectionPlane; + float4 MobileBasePass_PlanarReflection_PlanarReflectionOrigin; + float4 MobileBasePass_PlanarReflection_PlanarReflectionXAxis; + float4 MobileBasePass_PlanarReflection_PlanarReflectionYAxis; + float3x4 MobileBasePass_PlanarReflection_InverseTransposeMirrorMatrix; + packed_float3 MobileBasePass_PlanarReflection_PlanarReflectionParameters; + float PrePadding_MobileBasePass_PlanarReflection_284; + float2 MobileBasePass_PlanarReflection_PlanarReflectionParameters2; + float PrePadding_MobileBasePass_PlanarReflection_296; + float PrePadding_MobileBasePass_PlanarReflection_300; + float4x4 MobileBasePass_PlanarReflection_ProjectionWithExtraFOV[2]; + float4 MobileBasePass_PlanarReflection_PlanarReflectionScreenScaleBias[2]; + float2 MobileBasePass_PlanarReflection_PlanarReflectionScreenBound; + uint MobileBasePass_PlanarReflection_bIsStereo; +}; + +struct type_Primitive +{ + float4x4 Primitive_LocalToWorld; + float4 Primitive_InvNonUniformScaleAndDeterminantSign; + float4 Primitive_ObjectWorldPositionAndRadius; + float4x4 Primitive_WorldToLocal; + float4x4 Primitive_PreviousLocalToWorld; + float4x4 Primitive_PreviousWorldToLocal; + packed_float3 Primitive_ActorWorldPosition; + float Primitive_UseSingleSampleShadowFromStationaryLights; + packed_float3 Primitive_ObjectBounds; + float Primitive_LpvBiasMultiplier; + float Primitive_DecalReceiverMask; + float Primitive_PerObjectGBufferData; + float Primitive_UseVolumetricLightmapShadowFromStationaryLights; + float Primitive_UseEditorDepthTest; + float4 Primitive_ObjectOrientation; + float4 Primitive_NonUniformScale; + packed_float3 Primitive_LocalObjectBoundsMin; + float PrePadding_Primitive_380; + packed_float3 Primitive_LocalObjectBoundsMax; + uint Primitive_LightingChannelMask; + uint Primitive_LightmapDataIndex; + int Primitive_SingleCaptureIndex; +}; + +struct type_LandscapeParameters +{ + float4 LandscapeParameters_HeightmapUVScaleBias; + float4 LandscapeParameters_WeightmapUVScaleBias; + float4 LandscapeParameters_LandscapeLightmapScaleBias; + float4 LandscapeParameters_SubsectionSizeVertsLayerUVPan; + float4 LandscapeParameters_SubsectionOffsetParams; + float4 LandscapeParameters_LightmapSubsectionOffsetParams; + float4x4 LandscapeParameters_LocalToWorldNoScaling; +}; + +struct type_Globals +{ + float4 LodBias; + float4 LodValues; + float4 SectionLods; + float4 NeighborSectionLod[4]; +}; + +struct main0_out +{ + float2 out_var_TEXCOORD0 [[user(locn0)]]; + float2 out_var_TEXCOORD1 [[user(locn1)]]; + float4 out_var_TEXCOORD2 [[user(locn2)]]; + float4 out_var_TEXCOORD3 [[user(locn3)]]; + float4 out_var_TEXCOORD8 [[user(locn4)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 in_var_ATTRIBUTE0 [[attribute(0)]]; + float4 in_var_ATTRIBUTE1_0 [[attribute(1)]]; + float4 in_var_ATTRIBUTE1_1 [[attribute(2)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_MobileBasePass& MobileBasePass [[buffer(1)]], constant type_Primitive& Primitive [[buffer(2)]], constant type_LandscapeParameters& LandscapeParameters [[buffer(3)]], constant type_Globals& _Globals [[buffer(4)]]) +{ + main0_out out = {}; + spvUnsafeArray in_var_ATTRIBUTE1 = {}; + in_var_ATTRIBUTE1[0] = in.in_var_ATTRIBUTE1_0; + in_var_ATTRIBUTE1[1] = in.in_var_ATTRIBUTE1_1; + spvUnsafeArray _97; + for (int _107 = 0; _107 < 1; ) + { + _97[_107] = float4(0.0); + _107++; + continue; + } + float4 _115 = in.in_var_ATTRIBUTE0 * float4(255.0); + float2 _116 = _115.zw; + float2 _119 = fract(_116 * float2(0.5)) * float2(2.0); + float2 _121 = (_116 - _119) * float2(0.0039215688593685626983642578125); + float2 _122 = _115.xy; + float2 _126 = _122 * float2(_Globals.LodValues.w); + float _127 = _126.y; + float _128 = _126.x; + float4 _132 = float4(_127, _128, 1.0 - _128, 1.0 - _127) * float4(2.0); + float4 _186; + if (_119.y > 0.5) + { + float4 _161; + if (_119.x > 0.5) + { + _161 = (_132 * float4(_Globals.SectionLods.w)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[3]); + } + else + { + _161 = (_132 * float4(_Globals.SectionLods.z)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[2]); + } + _186 = _161; + } + else + { + float4 _185; + if (_119.x > 0.5) + { + _185 = (_132 * float4(_Globals.SectionLods.y)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[1]); + } + else + { + _185 = (_132 * float4(_Globals.SectionLods.x)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[0]); + } + _186 = _185; + } + float _206; + if ((_128 + _127) > 1.0) + { + float _198; + if (_128 < _127) + { + _198 = _186.w; + } + else + { + _198 = _186.z; + } + _206 = _198; + } + else + { + float _205; + if (_128 < _127) + { + _205 = _186.y; + } + else + { + _205 = _186.x; + } + _206 = _205; + } + float _207 = floor(_206); + float _220 = _121.x; + float3 _235 = select(select(select(select(select(float3(0.03125, _121.yy), float3(0.0625, _220, _121.y), bool3(_207 < 5.0)), float3(0.125, in_var_ATTRIBUTE1[1].w, _220), bool3(_207 < 4.0)), float3(0.25, in_var_ATTRIBUTE1[1].zw), bool3(_207 < 3.0)), float3(0.5, in_var_ATTRIBUTE1[1].yz), bool3(_207 < 2.0)), float3(1.0, in_var_ATTRIBUTE1[1].xy), bool3(_207 < 1.0)); + float _236 = _235.x; + float _245 = (((in_var_ATTRIBUTE1[0].x * 65280.0) + (in_var_ATTRIBUTE1[0].y * 255.0)) - 32768.0) * 0.0078125; + float _252 = (((in_var_ATTRIBUTE1[0].z * 65280.0) + (in_var_ATTRIBUTE1[0].w * 255.0)) - 32768.0) * 0.0078125; + float2 _257 = floor(_122 * float2(_236)); + float2 _271 = float2((LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.x * _236) - 1.0, fast::max((LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.x * 0.5) * _236, 2.0) - 1.0) * float2(LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.y); + float3 _287 = mix(float3(_257 / float2(_271.x), mix(_245, _252, _235.y)), float3(floor(_257 * float2(0.5)) / float2(_271.y), mix(_245, _252, _235.z)), float3(_206 - _207)); + float2 _288 = _119.xy; + float2 _292 = _288 * LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.ww; + float3 _296 = _287 + float3(_292, 0.0); + float4 _322 = float4((((Primitive.Primitive_LocalToWorld[0u].xyz * _296.xxx) + (Primitive.Primitive_LocalToWorld[1u].xyz * _296.yyy)) + (Primitive.Primitive_LocalToWorld[2u].xyz * _296.zzz)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0); + float2 _323 = _287.xy; + float4 _338 = float4(_322.x, _322.y, _322.z, _322.w); + float4 _339 = View.View_TranslatedWorldToClip * _338; + float3 _341 = _322.xyz - float3(View.View_TranslatedWorldCameraOrigin); + float _345 = dot(_341, _341); + float _346 = rsqrt(_345); + float _347 = _345 * _346; + float _354 = _341.z; + float _357 = fast::max(0.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.w); + float _393; + float _394; + float _395; + float _396; + if (_357 > 0.0) + { + float _361 = _357 * _346; + float _362 = _361 * _354; + float _365 = View.View_WorldCameraOrigin[2] + _362; + _393 = (1.0 - _361) * _347; + _394 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.z * exp2(-fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.y * (_365 - MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.w))); + _395 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.x * exp2(-fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.y * (_365 - MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.y))); + _396 = _354 - _362; + } + else + { + _393 = _347; + _394 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.x; + _395 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.x; + _396 = _354; + } + float _400 = fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.y * _396); + float _405 = log(2.0); + float _407 = 0.5 * (_405 * _405); + float _417 = fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.y * _396); + float _428 = (_395 * ((abs(_400) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_400)) / _400) : (_405 - (_407 * _400)))) + (_394 * ((abs(_417) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_417)) / _417) : (_405 - (_407 * _417)))); + float3 _459; + if (MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.w >= 0.0) + { + _459 = (MobileBasePass.MobileBasePass_Fog_DirectionalInscatteringColor.xyz * float3(pow(fast::clamp(dot(_341 * float3(_346), MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.xyz), 0.0, 1.0), MobileBasePass.MobileBasePass_Fog_DirectionalInscatteringColor.w))) * float3(1.0 - fast::clamp(exp2(-(_428 * fast::max(_393 - MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.w, 0.0))), 0.0, 1.0)); + } + else + { + _459 = float3(0.0); + } + bool _468 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w > 0.0) && (_347 > MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w); + float _471 = _468 ? 1.0 : fast::max(fast::clamp(exp2(-(_428 * _393)), 0.0, 1.0), MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.w); + float3 _475 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.xyz * float3(1.0 - _471)) + select(_459, float3(0.0), bool3(_468)); + _97[0] = float4(_475, _471); + float4 _482 = _338; + _482.w = _339.w; + out.out_var_TEXCOORD0 = ((_323 + LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.zw) + _292).xy; + out.out_var_TEXCOORD1 = ((_323 * LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.xy) + LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.zw) + (_288 * LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.zz); + out.out_var_TEXCOORD2 = float4(float4(0.0).x, float4(0.0).y, _97[0].x, _97[0].y); + out.out_var_TEXCOORD3 = float4(float4(0.0).x, float4(0.0).y, _97[0].z, _97[0].w); + out.out_var_TEXCOORD8 = _482; + out.gl_Position = _339; + return out; +} + diff --git a/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert b/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert new file mode 100644 index 00000000000..5398fec390c --- /dev/null +++ b/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert @@ -0,0 +1,387 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +// Returns 2D texture coords corresponding to 1D texel buffer coords +static inline __attribute__((always_inline)) +uint2 spvTexelBufferCoord(uint tc) +{ + return uint2(tc % 4096, tc / 4096); +} + +struct type_View +{ + float4x4 View_TranslatedWorldToClip; + float4x4 View_WorldToClip; + float4x4 View_TranslatedWorldToView; + float4x4 View_ViewToTranslatedWorld; + float4x4 View_TranslatedWorldToCameraView; + float4x4 View_CameraViewToTranslatedWorld; + float4x4 View_ViewToClip; + float4x4 View_ViewToClipNoAA; + float4x4 View_ClipToView; + float4x4 View_ClipToTranslatedWorld; + float4x4 View_SVPositionToTranslatedWorld; + float4x4 View_ScreenToWorld; + float4x4 View_ScreenToTranslatedWorld; + packed_float3 View_ViewForward; + float PrePadding_View_844; + packed_float3 View_ViewUp; + float PrePadding_View_860; + packed_float3 View_ViewRight; + float PrePadding_View_876; + packed_float3 View_HMDViewNoRollUp; + float PrePadding_View_892; + packed_float3 View_HMDViewNoRollRight; + float PrePadding_View_908; + float4 View_InvDeviceZToWorldZTransform; + float4 View_ScreenPositionScaleBias; + packed_float3 View_WorldCameraOrigin; + float PrePadding_View_956; + packed_float3 View_TranslatedWorldCameraOrigin; + float PrePadding_View_972; + packed_float3 View_WorldViewOrigin; + float PrePadding_View_988; + packed_float3 View_PreViewTranslation; + float PrePadding_View_1004; + float4x4 View_PrevProjection; + float4x4 View_PrevViewProj; + float4x4 View_PrevViewRotationProj; + float4x4 View_PrevViewToClip; + float4x4 View_PrevClipToView; + float4x4 View_PrevTranslatedWorldToClip; + float4x4 View_PrevTranslatedWorldToView; + float4x4 View_PrevViewToTranslatedWorld; + float4x4 View_PrevTranslatedWorldToCameraView; + float4x4 View_PrevCameraViewToTranslatedWorld; + packed_float3 View_PrevWorldCameraOrigin; + float PrePadding_View_1660; + packed_float3 View_PrevWorldViewOrigin; + float PrePadding_View_1676; + packed_float3 View_PrevPreViewTranslation; + float PrePadding_View_1692; + float4x4 View_PrevInvViewProj; + float4x4 View_PrevScreenToTranslatedWorld; + float4x4 View_ClipToPrevClip; + float4 View_TemporalAAJitter; + float4 View_GlobalClippingPlane; + float2 View_FieldOfViewWideAngles; + float2 View_PrevFieldOfViewWideAngles; + float4 View_ViewRectMin; + float4 View_ViewSizeAndInvSize; + float4 View_BufferSizeAndInvSize; + float4 View_BufferBilinearUVMinMax; + int View_NumSceneColorMSAASamples; + float View_PreExposure; + float View_OneOverPreExposure; + float PrePadding_View_2012; + float4 View_DiffuseOverrideParameter; + float4 View_SpecularOverrideParameter; + float4 View_NormalOverrideParameter; + float2 View_RoughnessOverrideParameter; + float View_PrevFrameGameTime; + float View_PrevFrameRealTime; + float View_OutOfBoundsMask; + float PrePadding_View_2084; + float PrePadding_View_2088; + float PrePadding_View_2092; + packed_float3 View_WorldCameraMovementSinceLastFrame; + float View_CullingSign; + float View_NearPlane; + float View_AdaptiveTessellationFactor; + float View_GameTime; + float View_RealTime; + float View_DeltaTime; + float View_MaterialTextureMipBias; + float View_MaterialTextureDerivativeMultiply; + uint View_Random; + uint View_FrameNumber; + uint View_StateFrameIndexMod8; + uint View_StateFrameIndex; + float View_CameraCut; + float View_UnlitViewmodeMask; + float PrePadding_View_2164; + float PrePadding_View_2168; + float PrePadding_View_2172; + float4 View_DirectionalLightColor; + packed_float3 View_DirectionalLightDirection; + float PrePadding_View_2204; + float4 View_TranslucencyLightingVolumeMin[2]; + float4 View_TranslucencyLightingVolumeInvSize[2]; + float4 View_TemporalAAParams; + float4 View_CircleDOFParams; + float View_DepthOfFieldSensorWidth; + float View_DepthOfFieldFocalDistance; + float View_DepthOfFieldScale; + float View_DepthOfFieldFocalLength; + float View_DepthOfFieldFocalRegion; + float View_DepthOfFieldNearTransitionRegion; + float View_DepthOfFieldFarTransitionRegion; + float View_MotionBlurNormalizedToPixel; + float View_bSubsurfacePostprocessEnabled; + float View_GeneralPurposeTweak; + float View_DemosaicVposOffset; + float PrePadding_View_2348; + packed_float3 View_IndirectLightingColorScale; + float View_HDR32bppEncodingMode; + packed_float3 View_AtmosphericFogSunDirection; + float View_AtmosphericFogSunPower; + float View_AtmosphericFogPower; + float View_AtmosphericFogDensityScale; + float View_AtmosphericFogDensityOffset; + float View_AtmosphericFogGroundOffset; + float View_AtmosphericFogDistanceScale; + float View_AtmosphericFogAltitudeScale; + float View_AtmosphericFogHeightScaleRayleigh; + float View_AtmosphericFogStartDistance; + float View_AtmosphericFogDistanceOffset; + float View_AtmosphericFogSunDiscScale; + uint View_AtmosphericFogRenderMask; + uint View_AtmosphericFogInscatterAltitudeSampleNum; + float4 View_AtmosphericFogSunColor; + packed_float3 View_NormalCurvatureToRoughnessScaleBias; + float View_RenderingReflectionCaptureMask; + float4 View_AmbientCubemapTint; + float View_AmbientCubemapIntensity; + float View_SkyLightParameters; + float PrePadding_View_2488; + float PrePadding_View_2492; + float4 View_SkyLightColor; + float4 View_SkyIrradianceEnvironmentMap[7]; + float View_MobilePreviewMode; + float View_HMDEyePaddingOffset; + float View_ReflectionCubemapMaxMip; + float View_ShowDecalsMask; + uint View_DistanceFieldAOSpecularOcclusionMode; + float View_IndirectCapsuleSelfShadowingIntensity; + float PrePadding_View_2648; + float PrePadding_View_2652; + packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; + int View_StereoPassIndex; + float4 View_GlobalVolumeCenterAndExtent[4]; + float4 View_GlobalVolumeWorldToUVAddAndMul[4]; + float View_GlobalVolumeDimension; + float View_GlobalVolumeTexelSize; + float View_MaxGlobalDistance; + float View_bCheckerboardSubsurfaceProfileRendering; + packed_float3 View_VolumetricFogInvGridSize; + float PrePadding_View_2828; + packed_float3 View_VolumetricFogGridZParams; + float PrePadding_View_2844; + float2 View_VolumetricFogSVPosToVolumeUV; + float View_VolumetricFogMaxDistance; + float PrePadding_View_2860; + packed_float3 View_VolumetricLightmapWorldToUVScale; + float PrePadding_View_2876; + packed_float3 View_VolumetricLightmapWorldToUVAdd; + float PrePadding_View_2892; + packed_float3 View_VolumetricLightmapIndirectionTextureSize; + float View_VolumetricLightmapBrickSize; + packed_float3 View_VolumetricLightmapBrickTexelSize; + float View_StereoIPD; + float View_IndirectLightingCacheShowFlag; + float View_EyeToPixelSpreadAngle; +}; + +struct type_Primitive +{ + float4x4 Primitive_LocalToWorld; + float4 Primitive_InvNonUniformScaleAndDeterminantSign; + float4 Primitive_ObjectWorldPositionAndRadius; + float4x4 Primitive_WorldToLocal; + float4x4 Primitive_PreviousLocalToWorld; + float4x4 Primitive_PreviousWorldToLocal; + packed_float3 Primitive_ActorWorldPosition; + float Primitive_UseSingleSampleShadowFromStationaryLights; + packed_float3 Primitive_ObjectBounds; + float Primitive_LpvBiasMultiplier; + float Primitive_DecalReceiverMask; + float Primitive_PerObjectGBufferData; + float Primitive_UseVolumetricLightmapShadowFromStationaryLights; + float Primitive_UseEditorDepthTest; + float4 Primitive_ObjectOrientation; + float4 Primitive_NonUniformScale; + packed_float3 Primitive_LocalObjectBoundsMin; + float PrePadding_Primitive_380; + packed_float3 Primitive_LocalObjectBoundsMax; + uint Primitive_LightingChannelMask; + uint Primitive_LightmapDataIndex; + int Primitive_SingleCaptureIndex; +}; + +struct type_MobileShadowDepthPass +{ + float PrePadding_MobileShadowDepthPass_0; + float PrePadding_MobileShadowDepthPass_4; + float PrePadding_MobileShadowDepthPass_8; + float PrePadding_MobileShadowDepthPass_12; + float PrePadding_MobileShadowDepthPass_16; + float PrePadding_MobileShadowDepthPass_20; + float PrePadding_MobileShadowDepthPass_24; + float PrePadding_MobileShadowDepthPass_28; + float PrePadding_MobileShadowDepthPass_32; + float PrePadding_MobileShadowDepthPass_36; + float PrePadding_MobileShadowDepthPass_40; + float PrePadding_MobileShadowDepthPass_44; + float PrePadding_MobileShadowDepthPass_48; + float PrePadding_MobileShadowDepthPass_52; + float PrePadding_MobileShadowDepthPass_56; + float PrePadding_MobileShadowDepthPass_60; + float PrePadding_MobileShadowDepthPass_64; + float PrePadding_MobileShadowDepthPass_68; + float PrePadding_MobileShadowDepthPass_72; + float PrePadding_MobileShadowDepthPass_76; + float4x4 MobileShadowDepthPass_ProjectionMatrix; + float2 MobileShadowDepthPass_ShadowParams; + float MobileShadowDepthPass_bClampToNearPlane; + float PrePadding_MobileShadowDepthPass_156; + float4x4 MobileShadowDepthPass_ShadowViewProjectionMatrices[6]; +}; + +struct type_EmitterDynamicUniforms +{ + float2 EmitterDynamicUniforms_LocalToWorldScale; + float EmitterDynamicUniforms_EmitterInstRandom; + float PrePadding_EmitterDynamicUniforms_12; + float4 EmitterDynamicUniforms_AxisLockRight; + float4 EmitterDynamicUniforms_AxisLockUp; + float4 EmitterDynamicUniforms_DynamicColor; + float4 EmitterDynamicUniforms_MacroUVParameters; +}; + +struct type_EmitterUniforms +{ + float4 EmitterUniforms_ColorCurve; + float4 EmitterUniforms_ColorScale; + float4 EmitterUniforms_ColorBias; + float4 EmitterUniforms_MiscCurve; + float4 EmitterUniforms_MiscScale; + float4 EmitterUniforms_MiscBias; + float4 EmitterUniforms_SizeBySpeed; + float4 EmitterUniforms_SubImageSize; + float4 EmitterUniforms_TangentSelector; + packed_float3 EmitterUniforms_CameraFacingBlend; + float EmitterUniforms_RemoveHMDRoll; + float EmitterUniforms_RotationRateScale; + float EmitterUniforms_RotationBias; + float EmitterUniforms_CameraMotionBlurAmount; + float PrePadding_EmitterUniforms_172; + float2 EmitterUniforms_PivotOffset; +}; + +struct type_Globals +{ + uint ParticleIndicesOffset; +}; + +struct main0_out +{ + float out_var_TEXCOORD6 [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float2 in_var_ATTRIBUTE0 [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], constant type_MobileShadowDepthPass& MobileShadowDepthPass [[buffer(2)]], constant type_EmitterDynamicUniforms& EmitterDynamicUniforms [[buffer(3)]], constant type_EmitterUniforms& EmitterUniforms [[buffer(4)]], constant type_Globals& _Globals [[buffer(5)]], texture2d ParticleIndices [[texture(0)]], texture2d PositionTexture [[texture(1)]], texture2d VelocityTexture [[texture(2)]], texture2d AttributesTexture [[texture(3)]], texture2d CurveTexture [[texture(4)]], sampler PositionTextureSampler [[sampler(0)]], sampler VelocityTextureSampler [[sampler(1)]], sampler AttributesTextureSampler [[sampler(2)]], sampler CurveTextureSampler [[sampler(3)]], uint gl_VertexIndex [[vertex_id]], uint gl_InstanceIndex [[instance_id]]) +{ + main0_out out = {}; + float2 _133 = ParticleIndices.read(spvTexelBufferCoord((_Globals.ParticleIndicesOffset + ((gl_InstanceIndex * 16u) + (gl_VertexIndex / 4u))))).xy; + float4 _137 = PositionTexture.sample(PositionTextureSampler, _133, level(0.0)); + float4 _145 = AttributesTexture.sample(AttributesTextureSampler, _133, level(0.0)); + float _146 = _137.w; + float3 _158 = float3x3(Primitive.Primitive_LocalToWorld[0].xyz, Primitive.Primitive_LocalToWorld[1].xyz, Primitive.Primitive_LocalToWorld[2].xyz) * VelocityTexture.sample(VelocityTextureSampler, _133, level(0.0)).xyz; + float3 _160 = fast::normalize(_158 + float3(0.0, 0.0, 9.9999997473787516355514526367188e-05)); + float2 _204 = ((((_145.xy + float2((_145.x < 0.5) ? 0.0 : (-0.5), (_145.y < 0.5) ? 0.0 : (-0.5))) * float2(2.0)) * (((CurveTexture.sample(CurveTextureSampler, (EmitterUniforms.EmitterUniforms_MiscCurve.xy + (EmitterUniforms.EmitterUniforms_MiscCurve.zw * float2(_146))), level(0.0)) * EmitterUniforms.EmitterUniforms_MiscScale) + EmitterUniforms.EmitterUniforms_MiscBias).xy * EmitterDynamicUniforms.EmitterDynamicUniforms_LocalToWorldScale)) * fast::min(fast::max(EmitterUniforms.EmitterUniforms_SizeBySpeed.xy * float2(length(_158)), float2(1.0)), EmitterUniforms.EmitterUniforms_SizeBySpeed.zw)) * float2(step(_146, 1.0)); + float3 _239 = float4((((Primitive.Primitive_LocalToWorld[0u].xyz * _137.xxx) + (Primitive.Primitive_LocalToWorld[1u].xyz * _137.yyy)) + (Primitive.Primitive_LocalToWorld[2u].xyz * _137.zzz)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0).xyz; + float3 _242 = float3(EmitterUniforms.EmitterUniforms_RemoveHMDRoll); + float3 _251 = mix(mix(float3(View.View_ViewRight), float3(View.View_HMDViewNoRollRight), _242), EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz, float3(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.w)); + float3 _259 = mix(-mix(float3(View.View_ViewUp), float3(View.View_HMDViewNoRollUp), _242), EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockUp.xyz, float3(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockUp.w)); + float3 _260 = float3(View.View_TranslatedWorldCameraOrigin) - _239; + float _261 = dot(_260, _260); + float3 _265 = _260 / float3(sqrt(fast::max(_261, 0.00999999977648258209228515625))); + float3 _335; + float3 _336; + if (EmitterUniforms.EmitterUniforms_CameraFacingBlend[0] > 0.0) + { + float3 _279 = cross(_265, float3(0.0, 0.0, 1.0)); + float3 _284 = _279 / float3(sqrt(fast::max(dot(_279, _279), 0.00999999977648258209228515625))); + float3 _286 = float3(fast::clamp((_261 * EmitterUniforms.EmitterUniforms_CameraFacingBlend[1]) - EmitterUniforms.EmitterUniforms_CameraFacingBlend[2], 0.0, 1.0)); + _335 = fast::normalize(mix(_251, _284, _286)); + _336 = fast::normalize(mix(_259, cross(_265, _284), _286)); + } + else + { + float3 _333; + float3 _334; + if (EmitterUniforms.EmitterUniforms_TangentSelector.y > 0.0) + { + float3 _297 = cross(_265, _160); + _333 = _297 / float3(sqrt(fast::max(dot(_297, _297), 0.00999999977648258209228515625))); + _334 = -_160; + } + else + { + float3 _331; + float3 _332; + if (EmitterUniforms.EmitterUniforms_TangentSelector.z > 0.0) + { + float3 _310 = cross(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz, _265); + _331 = EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz; + _332 = -(_310 / float3(sqrt(fast::max(dot(_310, _310), 0.00999999977648258209228515625)))); + } + else + { + float3 _329; + float3 _330; + if (EmitterUniforms.EmitterUniforms_TangentSelector.w > 0.0) + { + float3 _322 = cross(_265, float3(0.0, 0.0, 1.0)); + float3 _327 = _322 / float3(sqrt(fast::max(dot(_322, _322), 0.00999999977648258209228515625))); + _329 = _327; + _330 = cross(_265, _327); + } + else + { + _329 = _251; + _330 = _259; + } + _331 = _329; + _332 = _330; + } + _333 = _331; + _334 = _332; + } + _335 = _333; + _336 = _334; + } + float _339 = ((_145.z + ((_145.w * EmitterUniforms.EmitterUniforms_RotationRateScale) * _146)) * 6.283185482025146484375) + EmitterUniforms.EmitterUniforms_RotationBias; + float3 _342 = float3(sin(_339)); + float3 _344 = float3(cos(_339)); + float3 _367 = _239 + ((float3(_204.x * (in.in_var_ATTRIBUTE0.x + EmitterUniforms.EmitterUniforms_PivotOffset.x)) * ((_342 * _336) + (_344 * _335))) + (float3(_204.y * (in.in_var_ATTRIBUTE0.y + EmitterUniforms.EmitterUniforms_PivotOffset.y)) * ((_344 * _336) - (_342 * _335)))); + float4 _371 = float4(_367, 1.0); + float4 _375 = MobileShadowDepthPass.MobileShadowDepthPass_ProjectionMatrix * float4(_371.x, _371.y, _371.z, _371.w); + float4 _386; + if ((MobileShadowDepthPass.MobileShadowDepthPass_bClampToNearPlane > 0.0) && (_375.z < 0.0)) + { + float4 _384 = _375; + _384.z = 9.9999999747524270787835121154785e-07; + _384.w = 1.0; + _386 = _384; + } + else + { + _386 = _375; + } + float4 _396 = _386; + _396.z = ((_386.z * MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.y) + MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.x) * _386.w; + out.out_var_TEXCOORD6 = 0.0; + out.gl_Position = _396; + return out; +} + diff --git a/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk b/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk deleted file mode 100644 index 4aaf397a0fa..00000000000 --- a/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk +++ /dev/null @@ -1,11 +0,0 @@ -#version 450 -#extension GL_AMD_shader_fragment_mask : require - -layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInputMS t; - -void main() -{ - vec4 test2 = fragmentFetchAMD(t, 4u); - uint testi2 = fragmentMaskFetchAMD(t); -} - diff --git a/reference/shaders/amd/gcn_shader.comp b/reference/shaders/amd/gcn_shader.comp index 1c0c5ae38bf..380fb9dfa7b 100644 --- a/reference/shaders/amd/gcn_shader.comp +++ b/reference/shaders/amd/gcn_shader.comp @@ -1,5 +1,11 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for 64-bit integers. +#endif #extension GL_AMD_gcn_shader : require layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; diff --git a/reference/shaders/amd/shader_ballot.comp b/reference/shaders/amd/shader_ballot.comp index 1fade727c6b..04d363457a0 100644 --- a/reference/shaders/amd/shader_ballot.comp +++ b/reference/shaders/amd/shader_ballot.comp @@ -1,5 +1,11 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for 64-bit integers. +#endif #extension GL_ARB_shader_ballot : require #extension GL_AMD_shader_ballot : require layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; diff --git a/reference/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp b/reference/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp deleted file mode 100644 index a14343ae127..00000000000 --- a/reference/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp +++ /dev/null @@ -1,11 +0,0 @@ -#version 450 -#extension GL_AMD_shader_ballot : require -layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; - -void main() -{ - float addInvocations = addInvocationsNonUniformAMD(0.0); - int minInvocations = minInvocationsNonUniformAMD(1); - uint maxInvocations = uint(maxInvocationsNonUniformAMD(4)); -} - diff --git a/reference/shaders/asm/comp/bitcast_icmp.asm.comp b/reference/shaders/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000000..8d59fcc856a --- /dev/null +++ b/reference/shaders/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) restrict buffer _3_5 +{ + ivec4 _m0; + uvec4 _m1; +} _5; + +layout(binding = 1, std430) restrict buffer _4_6 +{ + uvec4 _m0; + ivec4 _m1; +} _6; + +void main() +{ + _6._m0 = uvec4(lessThan(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(lessThanEqual(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(lessThan(_5._m1, uvec4(_5._m0))); + _6._m0 = uvec4(lessThanEqual(_5._m1, uvec4(_5._m0))); + _6._m0 = uvec4(greaterThan(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(greaterThanEqual(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(greaterThan(_5._m1, uvec4(_5._m0))); + _6._m0 = uvec4(greaterThanEqual(_5._m1, uvec4(_5._m0))); +} + diff --git a/reference/shaders/asm/comp/bitcast_iequal.asm.comp b/reference/shaders/asm/comp/bitcast_iequal.asm.comp index bdb3eeb9afd..8a552dba068 100644 --- a/reference/shaders/asm/comp/bitcast_iequal.asm.comp +++ b/reference/shaders/asm/comp/bitcast_iequal.asm.comp @@ -21,13 +21,13 @@ void main() bvec4 _35 = equal(_30, ivec4(_31)); bvec4 _36 = equal(_31, _31); bvec4 _37 = equal(_30, _30); - _6._m0 = mix(uvec4(0u), uvec4(1u), _34); - _6._m0 = mix(uvec4(0u), uvec4(1u), _35); - _6._m0 = mix(uvec4(0u), uvec4(1u), _36); - _6._m0 = mix(uvec4(0u), uvec4(1u), _37); - _6._m1 = mix(ivec4(0), ivec4(1), _34); - _6._m1 = mix(ivec4(0), ivec4(1), _35); - _6._m1 = mix(ivec4(0), ivec4(1), _36); - _6._m1 = mix(ivec4(0), ivec4(1), _37); + _6._m0 = uvec4(_34); + _6._m0 = uvec4(_35); + _6._m0 = uvec4(_36); + _6._m0 = uvec4(_37); + _6._m1 = ivec4(_34); + _6._m1 = ivec4(_35); + _6._m1 = ivec4(_36); + _6._m1 = ivec4(_37); } diff --git a/reference/shaders/asm/comp/block-name-alias-global.asm.comp b/reference/shaders/asm/comp/block-name-alias-global.asm.comp index 08fccbcde6b..86ba0a3b9f9 100644 --- a/reference/shaders/asm/comp/block-name-alias-global.asm.comp +++ b/reference/shaders/asm/comp/block-name-alias-global.asm.comp @@ -7,12 +7,6 @@ struct A int b; }; -struct A_1 -{ - int a; - int b; -}; - layout(binding = 1, std430) buffer C1 { A Data[]; @@ -20,7 +14,7 @@ layout(binding = 1, std430) buffer C1 layout(binding = 2, std140) uniform C2 { - A_1 Data[1024]; + A Data[1024]; } C2_1; layout(binding = 0, std430) buffer B @@ -30,7 +24,7 @@ layout(binding = 0, std430) buffer B layout(binding = 3, std140) uniform B { - A_1 Data[1024]; + A Data[1024]; } C4; void main() diff --git a/reference/shaders/asm/comp/control-flow-hints.asm.comp b/reference/shaders/asm/comp/control-flow-hints.asm.comp new file mode 100644 index 00000000000..a1e0a082252 --- /dev/null +++ b/reference/shaders/asm/comp/control-flow-hints.asm.comp @@ -0,0 +1,58 @@ +#version 450 +#if defined(GL_EXT_control_flow_attributes) +#extension GL_EXT_control_flow_attributes : require +#define SPIRV_CROSS_FLATTEN [[flatten]] +#define SPIRV_CROSS_BRANCH [[dont_flatten]] +#define SPIRV_CROSS_UNROLL [[unroll]] +#define SPIRV_CROSS_LOOP [[dont_unroll]] +#else +#define SPIRV_CROSS_FLATTEN +#define SPIRV_CROSS_BRANCH +#define SPIRV_CROSS_UNROLL +#define SPIRV_CROSS_LOOP +#endif +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer bar +{ + vec4 _data[]; +} bar_1; + +layout(binding = 1, std430) buffer foo +{ + vec4 _data[]; +} foo_1; + +void _main() +{ + SPIRV_CROSS_UNROLL + for (int i = 0; i < 16; i++) + { + bar_1._data[i] = foo_1._data[i]; + } + SPIRV_CROSS_LOOP + for (int i_1 = 0; i_1 < 16; i_1++) + { + bar_1._data[15 - i_1] = foo_1._data[i_1]; + } + float v = bar_1._data[10].x; + float w = foo_1._data[10].x; + SPIRV_CROSS_BRANCH + if (v > 10.0) + { + foo_1._data[20] = vec4(5.0); + } + float value = 20.0; + SPIRV_CROSS_FLATTEN + if (w > 40.0) + { + value = 20.0; + } + foo_1._data[20] = vec4(value); +} + +void main() +{ + _main(); +} + diff --git a/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000000..449a87d0dd3 --- /dev/null +++ b/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,39 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float a1; + vec2 a2; + vec3 a3; + vec4 a4; + float b1; + vec2 b2; + vec3 b3; + vec4 b4; + float c1; + vec2 c2; + vec3 c3; + vec4 c4; +} _4; + +void main() +{ + _4.a1 = min(_4.b1, _4.c1); + _4.a2 = min(_4.b2, _4.c2); + _4.a3 = min(_4.b3, _4.c3); + _4.a4 = min(_4.b4, _4.c4); + _4.a1 = max(_4.b1, _4.c1); + _4.a2 = max(_4.b2, _4.c2); + _4.a3 = max(_4.b3, _4.c3); + _4.a4 = max(_4.b4, _4.c4); + _4.a1 = clamp(_4.a1, _4.b1, _4.c1); + _4.a2 = clamp(_4.a2, _4.b2, _4.c2); + _4.a3 = clamp(_4.a3, _4.b3, _4.c3); + _4.a4 = clamp(_4.a4, _4.b4, _4.c4); + for (int i = 0; i < 2; i++, _4.a1 = clamp(_4.a1, _4.b2.x, _4.b2.y)) + { + _4.a2 = min(_4.b2, _4.c2); + } +} + diff --git a/reference/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp b/reference/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp deleted file mode 100644 index 482cfd8a069..00000000000 --- a/reference/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp +++ /dev/null @@ -1,28 +0,0 @@ -#version 450 - -#ifndef SPIRV_CROSS_CONSTANT_ID_0 -#define SPIRV_CROSS_CONSTANT_ID_0 1u -#endif -#ifndef SPIRV_CROSS_CONSTANT_ID_2 -#define SPIRV_CROSS_CONSTANT_ID_2 3u -#endif - -layout(local_size_x = SPIRV_CROSS_CONSTANT_ID_0, local_size_y = 2, local_size_z = SPIRV_CROSS_CONSTANT_ID_2) in; - -layout(binding = 0, std430) buffer _6_8 -{ - float _m0[]; -} _8; - -layout(binding = 1, std430) buffer _6_9 -{ - float _m0[]; -} _9; - -uvec3 _22 = gl_WorkGroupSize; - -void main() -{ - _8._m0[gl_WorkGroupID.x] = _9._m0[gl_WorkGroupID.x] + _8._m0[gl_WorkGroupID.x]; -} - diff --git a/reference/opt/shaders/asm/comp/switch-break-ladder.asm.comp b/reference/shaders/asm/comp/switch-break-ladder.asm.invalid.comp similarity index 83% rename from reference/opt/shaders/asm/comp/switch-break-ladder.asm.comp rename to reference/shaders/asm/comp/switch-break-ladder.asm.invalid.comp index 4cf3f126d6a..f326869cef5 100644 --- a/reference/opt/shaders/asm/comp/switch-break-ladder.asm.comp +++ b/reference/shaders/asm/comp/switch-break-ladder.asm.invalid.comp @@ -10,13 +10,12 @@ layout(binding = 0, std430) buffer BUF void main() { - int _44; - _44 = o.a; - int _48; + int c = o.a; + int a; for (;;) { bool _22_ladder_break = false; - switch (_44) + switch (c) { case 5: { @@ -28,6 +27,7 @@ void main() case 10: case 20: { + c += c; _30_ladder_break = true; break; } @@ -41,29 +41,24 @@ void main() break; } } - _48 = _44 + _44; break; } case 1: case 2: case 3: { + a = c; _22_ladder_break = true; break; } - default: - { - _48 = _44; - break; - } } if (_22_ladder_break) { break; } - _44 = _48 + 1; + c++; continue; } - o.b = _44; + o.b = a; } diff --git a/reference/shaders/asm/comp/undefined-constant-composite.asm.comp b/reference/shaders/asm/comp/undefined-constant-composite.asm.comp new file mode 100644 index 00000000000..dae558dc84a --- /dev/null +++ b/reference/shaders/asm/comp/undefined-constant-composite.asm.comp @@ -0,0 +1,33 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct _20 +{ + int _m0; + int _m1; +}; + +int _28; + +layout(binding = 1, std430) buffer _5_6 +{ + int _m0[10]; +} _6; + +layout(binding = 0, std430) buffer _7_8 +{ + int _m0[10]; +} _8; + +int _39(int _41, _20 _42) +{ + return _41 + _42._m1; +} + +void main() +{ + int _32 = _8._m0[gl_GlobalInvocationID.x]; + _20 _33 = _20(_28, 200); + _6._m0[gl_GlobalInvocationID.x] = _39(_32, _33); +} + diff --git a/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp b/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp deleted file mode 100644 index 7755593f573..00000000000 --- a/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp +++ /dev/null @@ -1,18 +0,0 @@ -#version 430 -layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; - -layout(binding = 0, std430) buffer _8_9 -{ - float _m0[]; -} _9; - -layout(binding = 1, std430) buffer _8_10 -{ - float _m0[]; -} _10; - -void main() -{ - _10._m0[gl_GlobalInvocationID.x] = -_9._m0[gl_GlobalInvocationID.x]; -} - diff --git a/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag b/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag index feb45db4405..31011429448 100644 --- a/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag +++ b/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag @@ -12,7 +12,6 @@ void main() { _20 = _19 + 1.0; _23 = _22 + 1; - continue; } FragColor = vec4(_19); } diff --git a/reference/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag b/reference/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag index 3585285eb66..ec89d44eef9 100644 --- a/reference/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag +++ b/reference/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag @@ -6,7 +6,7 @@ layout(location = 0) out float _entryPointOutput; float _main() { - vec4 _33 = vec4(vec3(0.100000001490116119384765625), 0.5); + vec4 _33 = vec4(0.100000001490116119384765625, 0.100000001490116119384765625, 0.100000001490116119384765625, 0.5); return textureGrad(SPIRV_Cross_CombinedpointLightShadowMapshadowSamplerPCF, vec4(_33.xyz, _33.w), vec3(0.0), vec3(0.0)); } diff --git a/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag b/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag new file mode 100644 index 00000000000..60bb78aa5c0 --- /dev/null +++ b/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag @@ -0,0 +1,38 @@ +#version 450 + +uniform sampler2D SPIRV_Cross_CombinedparamSPIRV_Cross_DummySampler; +uniform sampler2D SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler; +uniform sampler2D SPIRV_Cross_CombinedparamSampler; +uniform sampler2D SPIRV_Cross_CombinedSampledImageSampler; + +layout(location = 0) out vec4 _entryPointOutput; + +vec4 sample_fetch(ivec3 UV, sampler2D SPIRV_Cross_CombinedtexSPIRV_Cross_DummySampler) +{ + return texelFetch(SPIRV_Cross_CombinedtexSPIRV_Cross_DummySampler, UV.xy, UV.z); +} + +vec4 sample_sampler(vec2 UV, sampler2D SPIRV_Cross_CombinedtexSampler) +{ + return texture(SPIRV_Cross_CombinedtexSampler, UV); +} + +vec4 _main(vec4 xIn) +{ + ivec3 coord = ivec3(int(xIn.x * 1280.0), int(xIn.y * 720.0), 0); + ivec3 param = coord; + vec4 value = sample_fetch(param, SPIRV_Cross_CombinedparamSPIRV_Cross_DummySampler); + value += texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, coord.xy, coord.z); + vec2 param_1 = xIn.xy; + value += sample_sampler(param_1, SPIRV_Cross_CombinedparamSampler); + value += texture(SPIRV_Cross_CombinedSampledImageSampler, xIn.xy); + return value; +} + +void main() +{ + vec4 xIn = gl_FragCoord; + vec4 param = xIn; + _entryPointOutput = _main(param); +} + diff --git a/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk b/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk new file mode 100644 index 00000000000..e4d9fc4543e --- /dev/null +++ b/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk @@ -0,0 +1,37 @@ +#version 450 + +layout(set = 0, binding = 0) uniform sampler Sampler; +layout(set = 0, binding = 0) uniform texture2D SampledImage; +layout(set = 0, binding = 0) uniform sampler SPIRV_Cross_DummySampler; + +layout(location = 0) out vec4 _entryPointOutput; + +vec4 sample_fetch(texture2D tex, ivec3 UV) +{ + return texelFetch(sampler2D(tex, SPIRV_Cross_DummySampler), UV.xy, UV.z); +} + +vec4 sample_sampler(texture2D tex, vec2 UV) +{ + return texture(sampler2D(tex, Sampler), UV); +} + +vec4 _main(vec4 xIn) +{ + ivec3 coord = ivec3(int(xIn.x * 1280.0), int(xIn.y * 720.0), 0); + ivec3 param = coord; + vec4 value = sample_fetch(SampledImage, param); + value += texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), coord.xy, coord.z); + vec2 param_1 = xIn.xy; + value += sample_sampler(SampledImage, param_1); + value += texture(sampler2D(SampledImage, Sampler), xIn.xy); + return value; +} + +void main() +{ + vec4 xIn = gl_FragCoord; + vec4 param = xIn; + _entryPointOutput = _main(param); +} + diff --git a/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag b/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag new file mode 100644 index 00000000000..2040dd1afb7 --- /dev/null +++ b/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag @@ -0,0 +1,13 @@ +#version 450 + +uniform sampler2D SPIRV_Cross_CombineduSampler2DSPIRV_Cross_DummySampler; +uniform sampler2DMS SPIRV_Cross_CombineduSampler2DMSSPIRV_Cross_DummySampler; + +void main() +{ + ivec2 b = textureSize(SPIRV_Cross_CombineduSampler2DSPIRV_Cross_DummySampler, 0); + ivec2 c = textureSize(SPIRV_Cross_CombineduSampler2DMSSPIRV_Cross_DummySampler); + int l1 = textureQueryLevels(SPIRV_Cross_CombineduSampler2DSPIRV_Cross_DummySampler); + int s0 = textureSamples(SPIRV_Cross_CombineduSampler2DMSSPIRV_Cross_DummySampler); +} + diff --git a/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk b/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk new file mode 100644 index 00000000000..828d2a87271 --- /dev/null +++ b/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk @@ -0,0 +1,14 @@ +#version 450 + +layout(set = 0, binding = 0) uniform texture2D uSampler2D; +layout(set = 0, binding = 0) uniform texture2DMS uSampler2DMS; +layout(set = 0, binding = 0) uniform sampler SPIRV_Cross_DummySampler; + +void main() +{ + ivec2 b = textureSize(sampler2D(uSampler2D, SPIRV_Cross_DummySampler), 0); + ivec2 c = textureSize(sampler2DMS(uSampler2DMS, SPIRV_Cross_DummySampler)); + int l1 = textureQueryLevels(sampler2D(uSampler2D, SPIRV_Cross_DummySampler)); + int s0 = textureSamples(sampler2DMS(uSampler2DMS, SPIRV_Cross_DummySampler)); +} + diff --git a/reference/shaders/asm/frag/inf-nan-constant-double.asm.frag b/reference/shaders/asm/frag/inf-nan-constant-double.asm.frag index d8e29aa4041..e53b282f879 100644 --- a/reference/shaders/asm/frag/inf-nan-constant-double.asm.frag +++ b/reference/shaders/asm/frag/inf-nan-constant-double.asm.frag @@ -1,11 +1,17 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for 64-bit integers. +#endif layout(location = 0) out vec3 FragColor; layout(location = 0) flat in double vTmp; void main() { - FragColor = vec3(dvec3(uint64BitsToDouble(0x7ff0000000000000ul), uint64BitsToDouble(0xfff0000000000000ul), uint64BitsToDouble(0x7ff8000000000000ul)) + dvec3(vTmp)); + FragColor = vec3(dvec3(uint64BitsToDouble(0x7ff0000000000000ul /* inf */), uint64BitsToDouble(0xfff0000000000000ul /* -inf */), uint64BitsToDouble(0x7ff8000000000000ul /* nan */)) + dvec3(vTmp)); } diff --git a/reference/shaders/asm/frag/inf-nan-constant.asm.frag b/reference/shaders/asm/frag/inf-nan-constant.asm.frag index dd4284c9b11..b5e0c6e968b 100644 --- a/reference/shaders/asm/frag/inf-nan-constant.asm.frag +++ b/reference/shaders/asm/frag/inf-nan-constant.asm.frag @@ -6,6 +6,6 @@ layout(location = 0) out highp vec3 FragColor; void main() { - FragColor = vec3(uintBitsToFloat(0x7f800000u), uintBitsToFloat(0xff800000u), uintBitsToFloat(0x7fc00000u)); + FragColor = vec3(uintBitsToFloat(0x7f800000u /* inf */), uintBitsToFloat(0xff800000u /* -inf */), uintBitsToFloat(0x7fc00000u /* nan */)); } diff --git a/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag b/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag index e1edccff69e..3ee68e321d9 100644 --- a/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag +++ b/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag @@ -1,4 +1,16 @@ #version 450 +#if defined(GL_EXT_control_flow_attributes) +#extension GL_EXT_control_flow_attributes : require +#define SPIRV_CROSS_FLATTEN [[flatten]] +#define SPIRV_CROSS_BRANCH [[dont_flatten]] +#define SPIRV_CROSS_UNROLL [[unroll]] +#define SPIRV_CROSS_LOOP [[dont_unroll]] +#else +#define SPIRV_CROSS_FLATTEN +#define SPIRV_CROSS_BRANCH +#define SPIRV_CROSS_UNROLL +#define SPIRV_CROSS_LOOP +#endif layout(binding = 0, std140) uniform Foo { @@ -10,6 +22,8 @@ layout(binding = 0, std140) uniform Foo layout(location = 0) in vec3 fragWorld; layout(location = 0) out int _entryPointOutput; +mat4 spvWorkaroundRowMajor(mat4 wrap) { return wrap; } + mat4 GetClip2TexMatrix() { if (_11.test == 0) @@ -21,9 +35,10 @@ mat4 GetClip2TexMatrix() int GetCascade(vec3 fragWorldPosition) { + SPIRV_CROSS_UNROLL for (uint cascadeIndex = 0u; cascadeIndex < _11.shadowCascadesNum; cascadeIndex++) { - mat4 worldToShadowMap = GetClip2TexMatrix() * _11.lightVP[cascadeIndex]; + mat4 worldToShadowMap = GetClip2TexMatrix() * spvWorkaroundRowMajor(_11.lightVP[cascadeIndex]); vec4 fragShadowMapPos = worldToShadowMap * vec4(fragWorldPosition, 1.0); if ((((fragShadowMapPos.z >= 0.0) && (fragShadowMapPos.z <= 1.0)) && (max(fragShadowMapPos.x, fragShadowMapPos.y) <= 1.0)) && (min(fragShadowMapPos.x, fragShadowMapPos.y) >= 0.0)) { diff --git a/reference/shaders/asm/frag/loop-header-to-continue.asm.frag b/reference/shaders/asm/frag/loop-header-to-continue.asm.frag index a99322d67e1..8a3b664bc44 100644 --- a/reference/shaders/asm/frag/loop-header-to-continue.asm.frag +++ b/reference/shaders/asm/frag/loop-header-to-continue.asm.frag @@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput; void main() { + vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w); vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv); float _50 = _49.y; + float _53 = clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375); float _55; float _58; _55 = 0.0; @@ -31,8 +33,8 @@ void main() for (int _60 = -3; _60 <= 3; ) { float _64 = float(_60); - vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64)); - float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375)); + vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64)); + float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < _53); _55 += (_72.x * _78); _58 += _78; _60++; diff --git a/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag b/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag index 3dae3e161c2..d62b63a0e0a 100644 --- a/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag +++ b/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag @@ -18,7 +18,6 @@ void main() _24 = _5.uJ; for (int _26 = 0; _26 < _5.uCount; _23_copy = _23, _23 = _24, _24 = _23_copy, _26++) { - continue; } FragColor = float(_24 - _23) * float(_5.uJ * _5.uK); } diff --git a/reference/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag b/reference/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag new file mode 100644 index 00000000000..eddb3829b70 --- /dev/null +++ b/reference/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag @@ -0,0 +1,15 @@ +#version 320 es +precision mediump float; +precision highp int; + +const uint _15 = 3u; + +void main() +{ + vec3 v = vec3(0.0); + if (false) + { + v[0] = 99.0; + } +} + diff --git a/reference/shaders/asm/frag/out-of-bounds-access.asm.frag b/reference/shaders/asm/frag/out-of-bounds-access.asm.frag new file mode 100644 index 00000000000..080283d4120 --- /dev/null +++ b/reference/shaders/asm/frag/out-of-bounds-access.asm.frag @@ -0,0 +1,14 @@ +#version 320 es +precision mediump float; +precision highp int; + +void main() +{ + vec3 v = vec3(0.0); + if (false) + { + v.x = 99.0; + v.x = 88.0; + } +} + diff --git a/reference/shaders/asm/frag/pack-and-unpack-uint2.asm.frag b/reference/shaders/asm/frag/pack-and-unpack-uint2.asm.frag new file mode 100644 index 00000000000..039c2b598d5 --- /dev/null +++ b/reference/shaders/asm/frag/pack-and-unpack-uint2.asm.frag @@ -0,0 +1,18 @@ +#version 450 +#if defined(GL_ARB_gpu_shader_int64) +#extension GL_ARB_gpu_shader_int64 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for 64-bit integers. +#endif + +layout(location = 0) out vec4 FragColor; + +void main() +{ + uint64_t _packed = packUint2x32(uvec2(18u, 52u)); + uvec2 unpacked = unpackUint2x32(_packed); + FragColor = vec4(float(unpacked.x), float(unpacked.y), 1.0, 1.0); +} + diff --git a/reference/shaders/asm/frag/storage-class-output-initializer.asm.frag b/reference/shaders/asm/frag/storage-class-output-initializer.asm.frag index 229358757aa..a5faaefb309 100644 --- a/reference/shaders/asm/frag/storage-class-output-initializer.asm.frag +++ b/reference/shaders/asm/frag/storage-class-output-initializer.asm.frag @@ -2,10 +2,12 @@ layout(location = 0) out vec4 FragColors[2]; layout(location = 2) out vec4 FragColor; +const vec4 _3_init[2] = vec4[](vec4(1.0, 2.0, 3.0, 4.0), vec4(10.0)); +const vec4 _4_init = vec4(5.0); void main() { - FragColors = vec4[](vec4(1.0, 2.0, 3.0, 4.0), vec4(10.0)); - FragColor = vec4(5.0); + FragColors = _3_init; + FragColor = _4_init; } diff --git a/reference/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag b/reference/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag index b2473f4d037..7930ca3b4a0 100644 --- a/reference/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag +++ b/reference/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag @@ -8,12 +8,12 @@ struct Foo float var2; }; +Foo _22; + layout(binding = 0) uniform mediump sampler2D uSampler; layout(location = 0) out vec4 FragColor; -Foo _22; - void main() { FragColor = texture(uSampler, vec2(_22.var1, _22.var2)); diff --git a/reference/shaders/asm/frag/switch-preserve-sign-extension.asm.frag b/reference/shaders/asm/frag/switch-preserve-sign-extension.asm.frag new file mode 100644 index 00000000000..08921e1e923 --- /dev/null +++ b/reference/shaders/asm/frag/switch-preserve-sign-extension.asm.frag @@ -0,0 +1,27 @@ +#version 330 +#ifdef GL_ARB_shading_language_420pack +#extension GL_ARB_shading_language_420pack : require +#endif + +void main() +{ + int sw = 42; + int result = 0; + switch (sw) + { + case -42: + { + result = 42; + } + case 420: + { + result = 420; + } + case -1234: + { + result = 420; + break; + } + } +} + diff --git a/reference/shaders/asm/frag/undef-variable-store.asm.frag b/reference/shaders/asm/frag/undef-variable-store.asm.frag index 26ad568ad05..a3b1290de6b 100644 --- a/reference/shaders/asm/frag/undef-variable-store.asm.frag +++ b/reference/shaders/asm/frag/undef-variable-store.asm.frag @@ -1,10 +1,10 @@ #version 450 -layout(location = 0) out vec4 _entryPointOutput; - vec4 _38; vec4 _47; +layout(location = 0) out vec4 _entryPointOutput; + void main() { vec4 _27; diff --git a/reference/shaders/asm/frag/unreachable.asm.frag b/reference/shaders/asm/frag/unreachable.asm.frag index 8bc88b9f0ad..beb8708e160 100644 --- a/reference/shaders/asm/frag/unreachable.asm.frag +++ b/reference/shaders/asm/frag/unreachable.asm.frag @@ -1,10 +1,10 @@ #version 450 +vec4 _21; + layout(location = 0) flat in int counter; layout(location = 0) out vec4 FragColor; -vec4 _21; - void main() { vec4 _24; diff --git a/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag b/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag index cdaf78727ee..97c9a2eb9cf 100644 --- a/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag +++ b/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag @@ -1,10 +1,24 @@ #version 450 +#if defined(GL_EXT_control_flow_attributes) +#extension GL_EXT_control_flow_attributes : require +#define SPIRV_CROSS_FLATTEN [[flatten]] +#define SPIRV_CROSS_BRANCH [[dont_flatten]] +#define SPIRV_CROSS_UNROLL [[unroll]] +#define SPIRV_CROSS_LOOP [[dont_unroll]] +#else +#define SPIRV_CROSS_FLATTEN +#define SPIRV_CROSS_BRANCH +#define SPIRV_CROSS_UNROLL +#define SPIRV_CROSS_LOOP +#endif struct _28 { vec4 _m0; }; +_28 _74; + layout(binding = 0, std140) uniform _6_7 { vec4 _m0; @@ -90,11 +104,9 @@ uniform sampler2D SPIRV_Cross_Combined_2; layout(location = 0) out vec4 _5; -_28 _74; - void main() { - _28 _77 = _74; + _28 _77; _77._m0 = vec4(0.0); vec2 _82 = gl_FragCoord.xy * _19._m23.xy; vec4 _88 = _7._m2 * _7._m0.xyxy; @@ -102,6 +114,7 @@ void main() vec3 _109 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _97, 0.0).w * _7._m1, 0.0, 1.0); vec4 _113 = textureLod(SPIRV_Cross_Combined_1, _97, 0.0); vec3 _129; + SPIRV_CROSS_BRANCH if (_113.y > 0.0) { _129 = _109 + (textureLod(SPIRV_Cross_Combined_2, _97, 0.0).xyz * clamp(_113.y * _113.z, 0.0, 1.0)); @@ -110,15 +123,14 @@ void main() { _129 = _109; } - vec3 _130 = _129 * 0.5; - vec3 _133 = vec4(0.0).xyz + _130; + vec3 _133 = vec4(0.0).xyz + (_129 * 0.5); vec4 _134 = vec4(_133.x, _133.y, _133.z, vec4(0.0).w); - _28 _135 = _77; - _135._m0 = _134; + _77._m0 = _134; vec2 _144 = clamp(_82 + (vec3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _156 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _144, 0.0).w * _7._m1, 0.0, 1.0); vec4 _160 = textureLod(SPIRV_Cross_Combined_1, _144, 0.0); vec3 _176; + SPIRV_CROSS_BRANCH if (_160.y > 0.0) { _176 = _156 + (textureLod(SPIRV_Cross_Combined_2, _144, 0.0).xyz * clamp(_160.y * _160.z, 0.0, 1.0)); @@ -127,15 +139,14 @@ void main() { _176 = _156; } - vec3 _177 = _176 * 0.5; - vec3 _180 = _134.xyz + _177; + vec3 _180 = _134.xyz + (_176 * 0.5); vec4 _181 = vec4(_180.x, _180.y, _180.z, _134.w); - _28 _182 = _135; - _182._m0 = _181; + _77._m0 = _181; vec2 _191 = clamp(_82 + (vec3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _203 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _191, 0.0).w * _7._m1, 0.0, 1.0); vec4 _207 = textureLod(SPIRV_Cross_Combined_1, _191, 0.0); vec3 _223; + SPIRV_CROSS_BRANCH if (_207.y > 0.0) { _223 = _203 + (textureLod(SPIRV_Cross_Combined_2, _191, 0.0).xyz * clamp(_207.y * _207.z, 0.0, 1.0)); @@ -144,15 +155,14 @@ void main() { _223 = _203; } - vec3 _224 = _223 * 0.75; - vec3 _227 = _181.xyz + _224; + vec3 _227 = _181.xyz + (_223 * 0.75); vec4 _228 = vec4(_227.x, _227.y, _227.z, _181.w); - _28 _229 = _182; - _229._m0 = _228; + _77._m0 = _228; vec2 _238 = clamp(_82 + (vec3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _250 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _238, 0.0).w * _7._m1, 0.0, 1.0); vec4 _254 = textureLod(SPIRV_Cross_Combined_1, _238, 0.0); vec3 _270; + SPIRV_CROSS_BRANCH if (_254.y > 0.0) { _270 = _250 + (textureLod(SPIRV_Cross_Combined_2, _238, 0.0).xyz * clamp(_254.y * _254.z, 0.0, 1.0)); @@ -161,15 +171,14 @@ void main() { _270 = _250; } - vec3 _271 = _270 * 0.5; - vec3 _274 = _228.xyz + _271; + vec3 _274 = _228.xyz + (_270 * 0.5); vec4 _275 = vec4(_274.x, _274.y, _274.z, _228.w); - _28 _276 = _229; - _276._m0 = _275; + _77._m0 = _275; vec2 _285 = clamp(_82 + (vec3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _297 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _285, 0.0).w * _7._m1, 0.0, 1.0); vec4 _301 = textureLod(SPIRV_Cross_Combined_1, _285, 0.0); vec3 _317; + SPIRV_CROSS_BRANCH if (_301.y > 0.0) { _317 = _297 + (textureLod(SPIRV_Cross_Combined_2, _285, 0.0).xyz * clamp(_301.y * _301.z, 0.0, 1.0)); @@ -178,15 +187,14 @@ void main() { _317 = _297; } - vec3 _318 = _317 * 0.5; - vec3 _321 = _275.xyz + _318; + vec3 _321 = _275.xyz + (_317 * 0.5); vec4 _322 = vec4(_321.x, _321.y, _321.z, _275.w); - _28 _323 = _276; - _323._m0 = _322; + _77._m0 = _322; vec2 _332 = clamp(_82 + (vec3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _344 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _332, 0.0).w * _7._m1, 0.0, 1.0); vec4 _348 = textureLod(SPIRV_Cross_Combined_1, _332, 0.0); vec3 _364; + SPIRV_CROSS_BRANCH if (_348.y > 0.0) { _364 = _344 + (textureLod(SPIRV_Cross_Combined_2, _332, 0.0).xyz * clamp(_348.y * _348.z, 0.0, 1.0)); @@ -195,15 +203,14 @@ void main() { _364 = _344; } - vec3 _365 = _364 * 0.75; - vec3 _368 = _322.xyz + _365; + vec3 _368 = _322.xyz + (_364 * 0.75); vec4 _369 = vec4(_368.x, _368.y, _368.z, _322.w); - _28 _370 = _323; - _370._m0 = _369; + _77._m0 = _369; vec2 _379 = clamp(_82 + (vec3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _391 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _379, 0.0).w * _7._m1, 0.0, 1.0); vec4 _395 = textureLod(SPIRV_Cross_Combined_1, _379, 0.0); vec3 _411; + SPIRV_CROSS_BRANCH if (_395.y > 0.0) { _411 = _391 + (textureLod(SPIRV_Cross_Combined_2, _379, 0.0).xyz * clamp(_395.y * _395.z, 0.0, 1.0)); @@ -212,15 +219,14 @@ void main() { _411 = _391; } - vec3 _412 = _411 * 1.0; - vec3 _415 = _369.xyz + _412; + vec3 _415 = _369.xyz + (_411 * 1.0); vec4 _416 = vec4(_415.x, _415.y, _415.z, _369.w); - _28 _417 = _370; - _417._m0 = _416; + _77._m0 = _416; vec2 _426 = clamp(_82 + (vec3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _438 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _426, 0.0).w * _7._m1, 0.0, 1.0); vec4 _442 = textureLod(SPIRV_Cross_Combined_1, _426, 0.0); vec3 _458; + SPIRV_CROSS_BRANCH if (_442.y > 0.0) { _458 = _438 + (textureLod(SPIRV_Cross_Combined_2, _426, 0.0).xyz * clamp(_442.y * _442.z, 0.0, 1.0)); @@ -229,15 +235,14 @@ void main() { _458 = _438; } - vec3 _459 = _458 * 0.75; - vec3 _462 = _416.xyz + _459; + vec3 _462 = _416.xyz + (_458 * 0.75); vec4 _463 = vec4(_462.x, _462.y, _462.z, _416.w); - _28 _464 = _417; - _464._m0 = _463; + _77._m0 = _463; vec2 _473 = clamp(_82 + (vec3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _485 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _473, 0.0).w * _7._m1, 0.0, 1.0); vec4 _489 = textureLod(SPIRV_Cross_Combined_1, _473, 0.0); vec3 _505; + SPIRV_CROSS_BRANCH if (_489.y > 0.0) { _505 = _485 + (textureLod(SPIRV_Cross_Combined_2, _473, 0.0).xyz * clamp(_489.y * _489.z, 0.0, 1.0)); @@ -246,15 +251,14 @@ void main() { _505 = _485; } - vec3 _506 = _505 * 0.5; - vec3 _509 = _463.xyz + _506; + vec3 _509 = _463.xyz + (_505 * 0.5); vec4 _510 = vec4(_509.x, _509.y, _509.z, _463.w); - _28 _511 = _464; - _511._m0 = _510; + _77._m0 = _510; vec2 _520 = clamp(_82 + (vec3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _532 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _520, 0.0).w * _7._m1, 0.0, 1.0); vec4 _536 = textureLod(SPIRV_Cross_Combined_1, _520, 0.0); vec3 _552; + SPIRV_CROSS_BRANCH if (_536.y > 0.0) { _552 = _532 + (textureLod(SPIRV_Cross_Combined_2, _520, 0.0).xyz * clamp(_536.y * _536.z, 0.0, 1.0)); @@ -263,15 +267,14 @@ void main() { _552 = _532; } - vec3 _553 = _552 * 0.5; - vec3 _556 = _510.xyz + _553; + vec3 _556 = _510.xyz + (_552 * 0.5); vec4 _557 = vec4(_556.x, _556.y, _556.z, _510.w); - _28 _558 = _511; - _558._m0 = _557; + _77._m0 = _557; vec2 _567 = clamp(_82 + (vec3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _579 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _567, 0.0).w * _7._m1, 0.0, 1.0); vec4 _583 = textureLod(SPIRV_Cross_Combined_1, _567, 0.0); vec3 _599; + SPIRV_CROSS_BRANCH if (_583.y > 0.0) { _599 = _579 + (textureLod(SPIRV_Cross_Combined_2, _567, 0.0).xyz * clamp(_583.y * _583.z, 0.0, 1.0)); @@ -280,15 +283,14 @@ void main() { _599 = _579; } - vec3 _600 = _599 * 0.75; - vec3 _603 = _557.xyz + _600; + vec3 _603 = _557.xyz + (_599 * 0.75); vec4 _604 = vec4(_603.x, _603.y, _603.z, _557.w); - _28 _605 = _558; - _605._m0 = _604; + _77._m0 = _604; vec2 _614 = clamp(_82 + (vec3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _626 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _614, 0.0).w * _7._m1, 0.0, 1.0); vec4 _630 = textureLod(SPIRV_Cross_Combined_1, _614, 0.0); vec3 _646; + SPIRV_CROSS_BRANCH if (_630.y > 0.0) { _646 = _626 + (textureLod(SPIRV_Cross_Combined_2, _614, 0.0).xyz * clamp(_630.y * _630.z, 0.0, 1.0)); @@ -297,15 +299,14 @@ void main() { _646 = _626; } - vec3 _647 = _646 * 0.5; - vec3 _650 = _604.xyz + _647; + vec3 _650 = _604.xyz + (_646 * 0.5); vec4 _651 = vec4(_650.x, _650.y, _650.z, _604.w); - _28 _652 = _605; - _652._m0 = _651; + _77._m0 = _651; vec2 _661 = clamp(_82 + (vec3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); vec3 _673 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _661, 0.0).w * _7._m1, 0.0, 1.0); vec4 _677 = textureLod(SPIRV_Cross_Combined_1, _661, 0.0); vec3 _693; + SPIRV_CROSS_BRANCH if (_677.y > 0.0) { _693 = _673 + (textureLod(SPIRV_Cross_Combined_2, _661, 0.0).xyz * clamp(_677.y * _677.z, 0.0, 1.0)); @@ -316,13 +317,10 @@ void main() } vec3 _697 = _651.xyz + (_693 * 0.5); vec4 _698 = vec4(_697.x, _697.y, _697.z, _651.w); - _28 _699 = _652; - _699._m0 = _698; + _77._m0 = _698; vec3 _702 = _698.xyz / vec3(((((((((((((0.0 + 0.5) + 0.5) + 0.75) + 0.5) + 0.5) + 0.75) + 1.0) + 0.75) + 0.5) + 0.5) + 0.75) + 0.5) + 0.5); - _28 _704 = _699; - _704._m0 = vec4(_702.x, _702.y, _702.z, _698.w); - _28 _705 = _704; - _705._m0.w = 1.0; - _5 = _705._m0; + _77._m0 = vec4(_702.x, _702.y, _702.z, _698.w); + _77._m0.w = 1.0; + _5 = _77._m0; } diff --git a/reference/shaders/asm/geom/store-uint-layer.invalid.asm.geom b/reference/shaders/asm/geom/store-uint-layer.invalid.asm.geom deleted file mode 100644 index c768d5da863..00000000000 --- a/reference/shaders/asm/geom/store-uint-layer.invalid.asm.geom +++ /dev/null @@ -1,41 +0,0 @@ -#version 450 -layout(triangles) in; -layout(max_vertices = 3, triangle_strip) out; - -struct VertexOutput -{ - vec4 pos; -}; - -struct GeometryOutput -{ - vec4 pos; - uint layer; -}; - -void _main(VertexOutput _input[3], GeometryOutput stream) -{ - GeometryOutput _output; - _output.layer = 1u; - for (int v = 0; v < 3; v++) - { - _output.pos = _input[v].pos; - gl_Position = _output.pos; - gl_Layer = int(_output.layer); - EmitVertex(); - } - EndPrimitive(); -} - -void main() -{ - VertexOutput _input[3]; - _input[0].pos = gl_in[0].gl_Position; - _input[1].pos = gl_in[1].gl_Position; - _input[2].pos = gl_in[2].gl_Position; - VertexOutput param[3] = _input; - GeometryOutput param_1; - _main(param, param_1); - GeometryOutput stream = param_1; -} - diff --git a/reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc deleted file mode 100644 index 8cb7a4e64c3..00000000000 --- a/reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc +++ /dev/null @@ -1,79 +0,0 @@ -#version 450 -layout(vertices = 3) out; - -struct VertexOutput -{ - vec4 pos; - vec2 uv; -}; - -struct HSOut -{ - vec4 pos; - vec2 uv; -}; - -struct HSConstantOut -{ - float EdgeTess[3]; - float InsideTess; -}; - -struct VertexOutput_1 -{ - vec2 uv; -}; - -struct HSOut_1 -{ - vec2 uv; -}; - -layout(location = 0) in VertexOutput_1 p[]; -layout(location = 0) out HSOut_1 _entryPointOutput[3]; - -HSOut _hs_main(VertexOutput p_1[3], uint i) -{ - HSOut _output; - _output.pos = p_1[i].pos; - _output.uv = p_1[i].uv; - return _output; -} - -HSConstantOut PatchHS(VertexOutput _patch[3]) -{ - HSConstantOut _output; - _output.EdgeTess[0] = (vec2(1.0) + _patch[0].uv).x; - _output.EdgeTess[1] = (vec2(1.0) + _patch[0].uv).x; - _output.EdgeTess[2] = (vec2(1.0) + _patch[0].uv).x; - _output.InsideTess = (vec2(1.0) + _patch[0].uv).x; - return _output; -} - -void main() -{ - VertexOutput p_1[3]; - p_1[0].pos = gl_in[0].gl_Position; - p_1[0].uv = p[0].uv; - p_1[1].pos = gl_in[1].gl_Position; - p_1[1].uv = p[1].uv; - p_1[2].pos = gl_in[2].gl_Position; - p_1[2].uv = p[2].uv; - uint i = gl_InvocationID; - VertexOutput param[3] = p_1; - uint param_1 = i; - HSOut flattenTemp = _hs_main(param, param_1); - gl_out[gl_InvocationID].gl_Position = flattenTemp.pos; - _entryPointOutput[gl_InvocationID].uv = flattenTemp.uv; - barrier(); - if (int(gl_InvocationID) == 0) - { - VertexOutput param_2[3] = p_1; - HSConstantOut _patchConstantResult = PatchHS(param_2); - gl_TessLevelOuter[0] = _patchConstantResult.EdgeTess[0]; - gl_TessLevelOuter[1] = _patchConstantResult.EdgeTess[1]; - gl_TessLevelOuter[2] = _patchConstantResult.EdgeTess[2]; - gl_TessLevelInner[0] = _patchConstantResult.InsideTess; - } -} - diff --git a/reference/shaders/asm/vert/empty-io.asm.vert b/reference/shaders/asm/vert/empty-io.asm.vert index cc432cb8907..91e65d6d7a5 100644 --- a/reference/shaders/asm/vert/empty-io.asm.vert +++ b/reference/shaders/asm/vert/empty-io.asm.vert @@ -16,6 +16,7 @@ struct VSOutput_1 }; layout(location = 0) in vec4 position; +layout(location = 0) out VSOutput_1 _entryPointOutput; VSOutput _main(VSInput _input) { diff --git a/reference/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert b/reference/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert index 835456f5bd2..40eef2d09df 100644 --- a/reference/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert +++ b/reference/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert @@ -1,4 +1,7 @@ #version 450 +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif struct V2F { @@ -18,7 +21,11 @@ layout(binding = 0, std430) readonly buffer gInstanceData } gInstanceData_1; layout(location = 0) in vec3 PosL; +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else uniform int SPIRV_Cross_BaseInstance; +#endif layout(location = 0) out vec4 _entryPointOutput_Color; V2F _VS(vec3 PosL_1, uint instanceID) diff --git a/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert index fdba2a26046..daf0071ae7b 100644 --- a/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert +++ b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert @@ -10,9 +10,10 @@ const int _20 = (_7 + 2); #endif const uint _8 = SPIRV_CROSS_CONSTANT_ID_202; const uint _25 = (_8 % 5u); -const ivec4 _30 = ivec4(20, 30, _20, _20); -const ivec2 _32 = ivec2(_30.y, _30.x); -const int _33 = _30.y; +const int _30 = _7 - (-3) * (_7 / (-3)); +const ivec4 _32 = ivec4(20, 30, _20, _30); +const ivec2 _34 = ivec2(_32.y, _32.x); +const int _35 = _32.y; #ifndef SPIRV_CROSS_CONSTANT_ID_200 #define SPIRV_CROSS_CONSTANT_ID_200 3.141590118408203125 #endif @@ -25,10 +26,10 @@ void main() vec4 pos = vec4(0.0); pos.y += float(_20); pos.z += float(_25); - pos += vec4(_30); - vec2 _56 = pos.xy + vec2(_32); - pos = vec4(_56.x, _56.y, pos.z, pos.w); + pos += vec4(_32); + vec2 _58 = pos.xy + vec2(_34); + pos = vec4(_58.x, _58.y, pos.z, pos.w); gl_Position = pos; - _4 = _33; + _4 = _35; } diff --git a/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk index 02c3e312575..4cddf82138c 100644 --- a/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk +++ b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk @@ -4,9 +4,10 @@ layout(constant_id = 201) const int _7 = -10; const int _20 = (_7 + 2); layout(constant_id = 202) const uint _8 = 100u; const uint _25 = (_8 % 5u); -const ivec4 _30 = ivec4(20, 30, _20, _20); -const ivec2 _32 = ivec2(_30.y, _30.x); -const int _33 = _30.y; +const int _30 = _7 - (-3) * (_7 / (-3)); +const ivec4 _32 = ivec4(20, 30, _20, _30); +const ivec2 _34 = ivec2(_32.y, _32.x); +const int _35 = _32.y; layout(constant_id = 200) const float _9 = 3.141590118408203125; layout(location = 0) flat out int _4; @@ -14,12 +15,16 @@ layout(location = 0) flat out int _4; void main() { vec4 pos = vec4(0.0); - pos.y += float(_20); - pos.z += float(_25); - pos += vec4(_30); - vec2 _56 = pos.xy + vec2(_32); - pos = vec4(_56.x, _56.y, pos.z, pos.w); + float _42 = float(_20); + pos.y += _42; + float _47 = float(_25); + pos.z += _47; + vec4 _52 = vec4(_32); + pos += _52; + vec2 _55 = vec2(_34); + vec2 _58 = pos.xy + _55; + pos = vec4(_58.x, _58.y, pos.z, pos.w); gl_Position = pos; - _4 = _33; + _4 = _35; } diff --git a/reference/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert b/reference/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert index 31f13bd777f..d79c08f8a3c 100644 --- a/reference/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert +++ b/reference/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert @@ -1,6 +1,13 @@ #version 450 +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else uniform int SPIRV_Cross_BaseInstance; +#endif vec4 _main(uint vid, uint iid) { diff --git a/reference/shaders/comp/bake_gradient.comp b/reference/shaders/comp/bake_gradient.comp index 7b0bb34c64f..49fa9532445 100644 --- a/reference/shaders/comp/bake_gradient.comp +++ b/reference/shaders/comp/bake_gradient.comp @@ -29,8 +29,8 @@ void main() vec2 displacement = textureLod(uDisplacement, uv.zw, 0.0).xy * 1.2000000476837158203125; vec2 dDdx = (textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(1, 0)).xy - textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(-1, 0)).xy) * 0.60000002384185791015625; vec2 dDdy = (textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, 1)).xy - textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, -1)).xy) * 0.60000002384185791015625; - vec2 param = dDdx * _46.uScale.z; - vec2 param_1 = dDdy * _46.uScale.z; + mediump vec2 param = dDdx * _46.uScale.z; + mediump vec2 param_1 = dDdy * _46.uScale.z; float j = jacobian(param, param_1); displacement = vec2(0.0); imageStore(iHeightDisplacement, ivec2(gl_GlobalInvocationID.xy), vec4(h, displacement, 0.0)); diff --git a/reference/shaders/comp/barriers.comp b/reference/shaders/comp/barriers.comp index a1b975de830..1102c91e28c 100644 --- a/reference/shaders/comp/barriers.comp +++ b/reference/shaders/comp/barriers.comp @@ -28,41 +28,35 @@ void group_barrier() void barrier_shared_exec() { - memoryBarrierShared(); barrier(); } void full_barrier_exec() { memoryBarrier(); - memoryBarrierShared(); barrier(); } void image_barrier_exec() { memoryBarrierImage(); - memoryBarrierShared(); barrier(); } void buffer_barrier_exec() { memoryBarrierBuffer(); - memoryBarrierShared(); barrier(); } void group_barrier_exec() { groupMemoryBarrier(); - memoryBarrierShared(); barrier(); } void exec_barrier() { - memoryBarrierShared(); barrier(); } diff --git a/reference/shaders/comp/bitcast-16bit-2.invalid.comp b/reference/shaders/comp/bitcast-16bit-2.invalid.comp deleted file mode 100644 index bddc16d62bc..00000000000 --- a/reference/shaders/comp/bitcast-16bit-2.invalid.comp +++ /dev/null @@ -1,39 +0,0 @@ -#version 450 -#if defined(GL_AMD_gpu_shader_int16) -#extension GL_AMD_gpu_shader_int16 : require -#else -#error No extension available for Int16. -#endif -#if defined(GL_AMD_gpu_shader_half_float) -#extension GL_AMD_gpu_shader_half_float : require -#elif defined(GL_NV_gpu_shader5) -#extension GL_NV_gpu_shader5 : require -#else -#error No extension available for FP16. -#endif -layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; - -layout(binding = 1, std430) buffer SSBO1 -{ - i16vec4 outputs[]; -} _21; - -layout(binding = 0, std430) buffer SSBO0 -{ - ivec4 inputs[]; -} _29; - -layout(binding = 2, std140) uniform UBO -{ - f16vec4 const0; -} _40; - -void main() -{ - uint ident = gl_GlobalInvocationID.x; - i16vec2 _47 = unpackInt2x16(_29.inputs[ident].x) + float16BitsToInt16(_40.const0.xy); - _21.outputs[ident] = i16vec4(_47.x, _47.y, _21.outputs[ident].z, _21.outputs[ident].w); - i16vec2 _66 = i16vec2(unpackUint2x16(uint(_29.inputs[ident].y)) - float16BitsToUint16(_40.const0.zw)); - _21.outputs[ident] = i16vec4(_21.outputs[ident].x, _21.outputs[ident].y, _66.x, _66.y); -} - diff --git a/reference/shaders/comp/casts.comp b/reference/shaders/comp/casts.comp index 973668676af..b98890a4ddf 100644 --- a/reference/shaders/comp/casts.comp +++ b/reference/shaders/comp/casts.comp @@ -14,6 +14,6 @@ layout(binding = 0, std430) buffer SSBO0 void main() { uint ident = gl_GlobalInvocationID.x; - _21.outputs[ident] = mix(ivec4(0), ivec4(1), notEqual((_27.inputs[ident] & ivec4(3)), ivec4(uvec4(0u)))); + _21.outputs[ident] = ivec4(notEqual((_27.inputs[ident] & ivec4(3)), ivec4(uvec4(0u)))); } diff --git a/reference/shaders/comp/generate_height.comp b/reference/shaders/comp/generate_height.comp index fe733e2893d..e482bf0ca5a 100644 --- a/reference/shaders/comp/generate_height.comp +++ b/reference/shaders/comp/generate_height.comp @@ -27,7 +27,6 @@ uvec2 workaround_mix(uvec2 a, uvec2 b, bvec2 sel) { _86 = a.x; } - uint _94 = _86; uint _97; if (sel.y) { @@ -37,7 +36,7 @@ uvec2 workaround_mix(uvec2 a, uvec2 b, bvec2 sel) { _97 = a.y; } - return uvec2(_94, _97); + return uvec2(_86, _97); } vec2 alias(vec2 i, vec2 N) diff --git a/reference/shaders/comp/inout-struct.invalid.comp b/reference/shaders/comp/inout-struct.invalid.comp deleted file mode 100644 index 640e25bb952..00000000000 --- a/reference/shaders/comp/inout-struct.invalid.comp +++ /dev/null @@ -1,65 +0,0 @@ -#version 310 es -layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; - -struct Foo -{ - vec4 a; - vec4 b; - vec4 c; - vec4 d; -}; - -layout(binding = 1, std430) readonly buffer SSBO2 -{ - vec4 data[]; -} indata; - -layout(binding = 0, std430) writeonly buffer SSBO -{ - vec4 data[]; -} outdata; - -layout(binding = 2, std430) readonly buffer SSBO3 -{ - Foo foos[]; -} foobar; - -void baz(inout Foo foo) -{ - uint ident = gl_GlobalInvocationID.x; - foo.a = indata.data[(4u * ident) + 0u]; - foo.b = indata.data[(4u * ident) + 1u]; - foo.c = indata.data[(4u * ident) + 2u]; - foo.d = indata.data[(4u * ident) + 3u]; -} - -void meow(inout Foo foo) -{ - foo.a += vec4(10.0); - foo.b += vec4(20.0); - foo.c += vec4(30.0); - foo.d += vec4(40.0); -} - -vec4 bar(Foo foo) -{ - return ((foo.a + foo.b) + foo.c) + foo.d; -} - -void main() -{ - Foo param; - baz(param); - Foo foo = param; - Foo param_1 = foo; - meow(param_1); - foo = param_1; - Foo param_2 = foo; - Foo param_3; - param_3.a = foobar.foos[gl_GlobalInvocationID.x].a; - param_3.b = foobar.foos[gl_GlobalInvocationID.x].b; - param_3.c = foobar.foos[gl_GlobalInvocationID.x].c; - param_3.d = foobar.foos[gl_GlobalInvocationID.x].d; - outdata.data[gl_GlobalInvocationID.x] = bar(param_2) + bar(param_3); -} - diff --git a/reference/shaders/comp/shared.comp b/reference/shaders/comp/shared.comp index d0987a65287..545ef22e617 100644 --- a/reference/shaders/comp/shared.comp +++ b/reference/shaders/comp/shared.comp @@ -18,7 +18,6 @@ void main() uint ident = gl_GlobalInvocationID.x; float idata = _22.in_data[ident]; sShared[gl_LocalInvocationIndex] = idata; - memoryBarrierShared(); barrier(); _44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u]; } diff --git a/reference/shaders/comp/struct-packing.comp b/reference/shaders/comp/struct-packing.comp index cd1eda1b32b..f4b58342d48 100644 --- a/reference/shaders/comp/struct-packing.comp +++ b/reference/shaders/comp/struct-packing.comp @@ -43,48 +43,6 @@ struct Content S4 m3s[8]; }; -struct S0_1 -{ - vec2 a[1]; - float b; -}; - -struct S1_1 -{ - vec3 a; - float b; -}; - -struct S2_1 -{ - vec3 a[1]; - float b; -}; - -struct S3_1 -{ - vec2 a; - float b; -}; - -struct S4_1 -{ - vec2 c; -}; - -struct Content_1 -{ - S0_1 m0s[1]; - S1_1 m1s[1]; - S2_1 m2s[1]; - S0_1 m0; - S1_1 m1; - S2_1 m2; - S3_1 m3; - float m4; - S4_1 m3s[8]; -}; - layout(binding = 1, std430) restrict buffer SSBO1 { Content content; @@ -103,9 +61,9 @@ layout(binding = 1, std430) restrict buffer SSBO1 layout(binding = 0, std140) restrict buffer SSBO0 { - Content_1 content; - Content_1 content1[2]; - Content_1 content2; + Content content; + Content content1[2]; + Content content2; mat2 m0; mat2 m1; mat2x3 m2[4]; diff --git a/reference/shaders/desktop-only/comp/enhanced-layouts.comp b/reference/shaders/desktop-only/comp/enhanced-layouts.comp index 45b25064b6b..ba37ca237b8 100644 --- a/reference/shaders/desktop-only/comp/enhanced-layouts.comp +++ b/reference/shaders/desktop-only/comp/enhanced-layouts.comp @@ -8,13 +8,6 @@ struct Foo int c; }; -struct Foo_1 -{ - int a; - int b; - int c; -}; - layout(binding = 1, std140) buffer SSBO1 { layout(offset = 4) int a; @@ -27,7 +20,7 @@ layout(binding = 2, std430) buffer SSBO2 { layout(offset = 4) int a; layout(offset = 8) int b; - layout(offset = 16) Foo_1 foo; + layout(offset = 16) Foo foo; layout(offset = 48) int c[8]; } ssbo2; diff --git a/reference/shaders/desktop-only/comp/int64.desktop.comp b/reference/shaders/desktop-only/comp/int64.desktop.comp index 702456b303f..28afc2fbd7d 100644 --- a/reference/shaders/desktop-only/comp/int64.desktop.comp +++ b/reference/shaders/desktop-only/comp/int64.desktop.comp @@ -1,5 +1,11 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for 64-bit integers. +#endif layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; struct M0 diff --git a/reference/shaders/desktop-only/frag/fp16.invalid.desktop.frag b/reference/shaders/desktop-only/frag/fp16.invalid.desktop.frag deleted file mode 100644 index faf79b2b44d..00000000000 --- a/reference/shaders/desktop-only/frag/fp16.invalid.desktop.frag +++ /dev/null @@ -1,159 +0,0 @@ -#version 450 -#if defined(GL_AMD_gpu_shader_half_float) -#extension GL_AMD_gpu_shader_half_float : require -#elif defined(GL_NV_gpu_shader5) -#extension GL_NV_gpu_shader5 : require -#else -#error No extension available for FP16. -#endif - -struct ResType -{ - f16vec4 _m0; - ivec4 _m1; -}; - -layout(location = 3) in f16vec4 v4; -layout(location = 2) in f16vec3 v3; -layout(location = 0) in float16_t v1; -layout(location = 1) in f16vec2 v2; - -f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d) -{ - return f16mat2(f16vec2(a), f16vec2(b)) * f16mat2(f16vec2(c), f16vec2(d)); -} - -f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f) -{ - return f16mat3(f16vec3(a), f16vec3(b), f16vec3(c)) * f16mat3(f16vec3(d), f16vec3(e), f16vec3(f)); -} - -void test_constants() -{ - float16_t a = float16_t(1.0); - float16_t b = float16_t(1.5); - float16_t c = float16_t(-1.5); - float16_t d = float16_t(0.0 / 0.0); - float16_t e = float16_t(1.0 / 0.0); - float16_t f = float16_t(-1.0 / 0.0); - float16_t g = float16_t(1014.0); - float16_t h = float16_t(9.5367431640625e-07); -} - -float16_t test_result() -{ - return float16_t(1.0); -} - -void test_conversions() -{ - float16_t one = test_result(); - int a = int(one); - uint b = uint(one); - bool c = one != float16_t(0.0); - float d = float(one); - double e = double(one); - float16_t a2 = float16_t(a); - float16_t b2 = float16_t(b); - float16_t c2 = float16_t(c); - float16_t d2 = float16_t(d); - float16_t e2 = float16_t(e); -} - -void test_builtins() -{ - f16vec4 res = radians(v4); - res = degrees(v4); - res = sin(v4); - res = cos(v4); - res = tan(v4); - res = asin(v4); - res = atan(v4, v3.xyzz); - res = atan(v4); - res = sinh(v4); - res = cosh(v4); - res = tanh(v4); - res = asinh(v4); - res = acosh(v4); - res = atanh(v4); - res = pow(v4, v4); - res = exp(v4); - res = log(v4); - res = exp2(v4); - res = log2(v4); - res = sqrt(v4); - res = inversesqrt(v4); - res = abs(v4); - res = sign(v4); - res = floor(v4); - res = trunc(v4); - res = round(v4); - res = roundEven(v4); - res = ceil(v4); - res = fract(v4); - res = mod(v4, v4); - f16vec4 tmp; - f16vec4 _231 = modf(v4, tmp); - res = _231; - res = min(v4, v4); - res = max(v4, v4); - res = clamp(v4, v4, v4); - res = mix(v4, v4, v4); - res = mix(v4, v4, lessThan(v4, v4)); - res = step(v4, v4); - res = smoothstep(v4, v4, v4); - bvec4 btmp = isnan(v4); - btmp = isinf(v4); - res = fma(v4, v4, v4); - ResType _275; - _275._m0 = frexp(v4, _275._m1); - ivec4 itmp = _275._m1; - res = _275._m0; - res = ldexp(res, itmp); - uint pack0 = packFloat2x16(v4.xy); - uint pack1 = packFloat2x16(v4.zw); - res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1)); - float16_t t0 = length(v4); - t0 = distance(v4, v4); - t0 = dot(v4, v4); - f16vec3 res3 = cross(v3, v3); - res = normalize(v4); - res = faceforward(v4, v4, v4); - res = reflect(v4, v4); - res = refract(v4, v4, v1); - btmp = lessThan(v4, v4); - btmp = lessThanEqual(v4, v4); - btmp = greaterThan(v4, v4); - btmp = greaterThanEqual(v4, v4); - btmp = equal(v4, v4); - btmp = notEqual(v4, v4); - res = dFdx(v4); - res = dFdy(v4); - res = dFdxFine(v4); - res = dFdyFine(v4); - res = dFdxCoarse(v4); - res = dFdyCoarse(v4); - res = fwidth(v4); - res = fwidthFine(v4); - res = fwidthCoarse(v4); -} - -void main() -{ - f16vec2 param = v2; - f16vec2 param_1 = v2; - f16vec2 param_2 = v3.xy; - f16vec2 param_3 = v3.xy; - f16mat2 m0 = test_mat2(param, param_1, param_2, param_3); - f16vec3 param_4 = v3; - f16vec3 param_5 = v3; - f16vec3 param_6 = v3; - f16vec3 param_7 = v4.xyz; - f16vec3 param_8 = v4.xyz; - f16vec3 param_9 = v4.yzw; - f16mat3 m1 = test_mat3(param_4, param_5, param_6, param_7, param_8, param_9); - test_constants(); - test_conversions(); - test_builtins(); -} - diff --git a/reference/shaders/desktop-only/frag/image-size.frag b/reference/shaders/desktop-only/frag/image-size.frag new file mode 100644 index 00000000000..5bb060398ed --- /dev/null +++ b/reference/shaders/desktop-only/frag/image-size.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(binding = 0, r32f) uniform readonly writeonly image2D uImage1; +layout(binding = 1, r32f) uniform readonly writeonly image2D uImage2; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(vec2(imageSize(uImage1)), vec2(imageSize(uImage2))); +} + diff --git a/reference/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag b/reference/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag new file mode 100644 index 00000000000..1d9062064a8 --- /dev/null +++ b/reference/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(binding = 0, r32f) uniform image2D uImage1; +layout(binding = 1, r32f) uniform image2D uImage2; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(vec2(imageSize(uImage1)), vec2(imageSize(uImage2))); +} + diff --git a/reference/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag b/reference/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag index d5e45bda431..60c45908fa7 100644 --- a/reference/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag +++ b/reference/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag @@ -13,12 +13,14 @@ layout(location = 0) in vec3 vClip3; void main() { - vec4 _20 = vClip4; - _20.y = vClip4.w; - FragColor = textureProj(uShadow1D, vec4(_20.x, 0.0, vClip4.z, _20.y)); - vec4 _30 = vClip4; - _30.z = vClip4.w; - FragColor = textureProj(uShadow2D, vec4(_30.xy, vClip4.z, _30.z)); + vec4 _17 = vClip4; + vec4 _20 = _17; + _20.y = _17.w; + FragColor = textureProj(uShadow1D, vec4(_20.x, 0.0, _17.z, _20.y)); + vec4 _27 = vClip4; + vec4 _30 = _27; + _30.z = _27.w; + FragColor = textureProj(uShadow2D, vec4(_30.xy, _27.z, _30.z)); FragColor = textureProj(uSampler1D, vClip2).x; FragColor = textureProj(uSampler2D, vClip3).x; FragColor = textureProj(uSampler3D, vClip4).x; diff --git a/reference/shaders/desktop-only/tesc/basic.desktop.sso.tesc b/reference/shaders/desktop-only/tesc/basic.desktop.sso.tesc index 5e958256af5..c51699db6db 100644 --- a/reference/shaders/desktop-only/tesc/basic.desktop.sso.tesc +++ b/reference/shaders/desktop-only/tesc/basic.desktop.sso.tesc @@ -4,7 +4,7 @@ layout(vertices = 1) out; in gl_PerVertex { vec4 gl_Position; -} gl_in[gl_MaxPatchVertices]; +} gl_in[]; out gl_PerVertex { diff --git a/reference/shaders/desktop-only/tese/triangle.desktop.sso.tese b/reference/shaders/desktop-only/tese/triangle.desktop.sso.tese index 31027dae80f..c9bacd464e4 100644 --- a/reference/shaders/desktop-only/tese/triangle.desktop.sso.tese +++ b/reference/shaders/desktop-only/tese/triangle.desktop.sso.tese @@ -4,7 +4,7 @@ layout(triangles, cw, fractional_even_spacing) in; in gl_PerVertex { vec4 gl_Position; -} gl_in[gl_MaxPatchVertices]; +} gl_in[]; out gl_PerVertex { diff --git a/reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert b/reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert new file mode 100644 index 00000000000..2b3c5ce0516 --- /dev/null +++ b/reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert @@ -0,0 +1,24 @@ +#version 450 +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif + +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseVertex gl_BaseVertexARB +#else +uniform int SPIRV_Cross_BaseVertex; +#endif +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else +uniform int SPIRV_Cross_BaseInstance; +#endif +#ifndef GL_ARB_shader_draw_parameters +#error GL_ARB_shader_draw_parameters is not supported. +#endif + +void main() +{ + gl_Position = vec4(float(SPIRV_Cross_BaseVertex), float(SPIRV_Cross_BaseInstance), float(gl_DrawIDARB), 1.0); +} + diff --git a/reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert b/reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert.vk similarity index 100% rename from reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert rename to reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert.vk diff --git a/reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert b/reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert new file mode 100644 index 00000000000..bc16d0431aa --- /dev/null +++ b/reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert @@ -0,0 +1,24 @@ +#version 460 +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif + +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseVertex gl_BaseVertexARB +#else +uniform int SPIRV_Cross_BaseVertex; +#endif +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else +uniform int SPIRV_Cross_BaseInstance; +#endif +#ifndef GL_ARB_shader_draw_parameters +#error GL_ARB_shader_draw_parameters is not supported. +#endif + +void main() +{ + gl_Position = vec4(float(SPIRV_Cross_BaseVertex), float(SPIRV_Cross_BaseInstance), float(gl_DrawIDARB), 1.0); +} + diff --git a/reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vert b/reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert.vk similarity index 100% rename from reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vert rename to reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert.vk diff --git a/reference/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag b/reference/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag deleted file mode 100644 index ef6bb526ab9..00000000000 --- a/reference/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag +++ /dev/null @@ -1,24 +0,0 @@ -#version 450 - -layout(binding = 0) uniform sampler2D uTextures[2 * 3 * 1]; - -layout(location = 1) in vec2 vUV; -layout(location = 0) out vec4 FragColor; -layout(location = 0) flat in int vIndex; - -void main() -{ - vec4 values3[2 * 3 * 1]; - for (int z = 0; z < 2; z++) - { - for (int y = 0; y < 3; y++) - { - for (int x = 0; x < 1; x++) - { - values3[z * 3 * 1 + y * 1 + x] = texture(uTextures[z * 3 * 1 + y * 1 + x], vUV); - } - } - } - FragColor = (values3[1 * 3 * 1 + 2 * 1 + 0] + values3[0 * 3 * 1 + 2 * 1 + 0]) + values3[(vIndex + 1) * 3 * 1 + 2 * 1 + vIndex]; -} - diff --git a/reference/shaders/frag/avoid-expression-lowering-to-loop.frag b/reference/shaders/frag/avoid-expression-lowering-to-loop.frag new file mode 100644 index 00000000000..6313d896e27 --- /dev/null +++ b/reference/shaders/frag/avoid-expression-lowering-to-loop.frag @@ -0,0 +1,26 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 1, std140) uniform Count +{ + float count; +} _44; + +layout(binding = 0) uniform mediump sampler2D tex; + +layout(location = 0) in highp vec4 vertex; +layout(location = 0) out vec4 fragColor; + +void main() +{ + highp float size = 1.0 / float(textureSize(tex, 0).x); + float r = 0.0; + float d = dFdx(vertex.x); + for (float i = 0.0; i < _44.count; i += 1.0) + { + r += (size * d); + } + fragColor = vec4(r); +} + diff --git a/reference/shaders/frag/barycentric-khr.frag b/reference/shaders/frag/barycentric-khr.frag new file mode 100644 index 00000000000..71a44c38575 --- /dev/null +++ b/reference/shaders/frag/barycentric-khr.frag @@ -0,0 +1,13 @@ +#version 450 +#extension GL_EXT_fragment_shader_barycentric : require + +layout(location = 0) out vec2 value; +layout(location = 0) pervertexEXT in vec2 vUV[3]; +layout(location = 3) pervertexEXT in vec2 vUV2[3]; + +void main() +{ + value = ((vUV[0] * gl_BaryCoordEXT.x) + (vUV[1] * gl_BaryCoordEXT.y)) + (vUV[2] * gl_BaryCoordEXT.z); + value += (((vUV2[0] * gl_BaryCoordNoPerspEXT.x) + (vUV2[1] * gl_BaryCoordNoPerspEXT.y)) + (vUV2[2] * gl_BaryCoordNoPerspEXT.z)); +} + diff --git a/reference/shaders/frag/barycentric-nv.frag b/reference/shaders/frag/barycentric-nv.frag index 12d24bb739c..b3b57e2f880 100644 --- a/reference/shaders/frag/barycentric-nv.frag +++ b/reference/shaders/frag/barycentric-nv.frag @@ -1,20 +1,13 @@ #version 450 #extension GL_NV_fragment_shader_barycentric : require -layout(binding = 0, std430) readonly buffer Vertices -{ - vec2 uvs[]; -} _19; - layout(location = 0) out vec2 value; +layout(location = 0) pervertexNV in vec2 vUV[3]; +layout(location = 1) pervertexNV in vec2 vUV2[3]; void main() { - int prim = gl_PrimitiveID; - vec2 uv0 = _19.uvs[(3 * prim) + 0]; - vec2 uv1 = _19.uvs[(3 * prim) + 1]; - vec2 uv2 = _19.uvs[(3 * prim) + 2]; - value = ((uv0 * gl_BaryCoordNV.x) + (uv1 * gl_BaryCoordNV.y)) + (uv2 * gl_BaryCoordNV.z); - value += (((uv0 * gl_BaryCoordNoPerspNV.x) + (uv1 * gl_BaryCoordNoPerspNV.y)) + (uv2 * gl_BaryCoordNoPerspNV.z)); + value = ((vUV[0] * gl_BaryCoordNV.x) + (vUV[1] * gl_BaryCoordNV.y)) + (vUV[2] * gl_BaryCoordNV.z); + value += (((vUV2[0] * gl_BaryCoordNoPerspNV.x) + (vUV2[1] * gl_BaryCoordNoPerspNV.y)) + (vUV2[2] * gl_BaryCoordNoPerspNV.z)); } diff --git a/reference/shaders/frag/ground.frag b/reference/shaders/frag/ground.frag index 4d998d56898..c36bb317b65 100644 --- a/reference/shaders/frag/ground.frag +++ b/reference/shaders/frag/ground.frag @@ -38,7 +38,7 @@ void main() { vec3 Normal = (texture(TexNormalmap, TexCoord).xyz * 2.0) - vec3(1.0); Normal = normalize(Normal); - highp float param = length(EyeVec) / 1000.0; + float param = length(EyeVec) / 1000.0; vec2 scatter_uv; scatter_uv.x = saturate(param); vec3 nEye = normalize(EyeVec); @@ -53,10 +53,10 @@ void main() Color = mix(dirt, base, vec3(edge)); Color *= Color; float Roughness = 1.0 - (edge * grass_snow); - highp vec3 param_1 = Color; - highp vec3 param_2 = Normal; - highp float param_3 = Roughness; - highp float param_4 = 0.0; + vec3 param_1 = Color; + vec3 param_2 = Normal; + float param_3 = Roughness; + float param_4 = 0.0; Resolve(param_1, param_2, param_3, param_4); } diff --git a/reference/shaders/frag/modf-pointer-function-analysis.frag b/reference/shaders/frag/modf-pointer-function-analysis.frag new file mode 100644 index 00000000000..2ca0050bad0 --- /dev/null +++ b/reference/shaders/frag/modf-pointer-function-analysis.frag @@ -0,0 +1,32 @@ +#version 450 + +layout(location = 0) in vec4 v; +layout(location = 0) out vec4 vo0; +layout(location = 1) out vec4 vo1; + +vec4 modf_inner(out vec4 tmp) +{ + vec4 _20 = modf(v, tmp); + return _20; +} + +float modf_inner_partial(inout vec4 tmp) +{ + float _30 = modf(v.x, tmp.x); + return _30; +} + +void main() +{ + vec4 param; + vec4 _37 = modf_inner(param); + vec4 tmp = param; + vo0 = _37; + vo1 = tmp; + vec4 param_1 = tmp; + float _43 = modf_inner_partial(param_1); + tmp = param_1; + vo0.x += _43; + vo1.x += tmp.x; +} + diff --git a/reference/shaders/frag/partial-write-preserve.frag b/reference/shaders/frag/partial-write-preserve.frag index cf8a83cf0c4..87e689aff26 100644 --- a/reference/shaders/frag/partial-write-preserve.frag +++ b/reference/shaders/frag/partial-write-preserve.frag @@ -80,16 +80,16 @@ void branchy_inout_2(out B b) void main() { vec4 a = vec4(10.0); - highp vec4 param = a; + vec4 param = a; partial_inout(param); a = param; - highp vec4 param_1; + vec4 param_1; complete_inout(param_1); a = param_1; - highp vec4 param_2 = a; + vec4 param_2 = a; branchy_inout(param_2); a = param_2; - highp vec4 param_3; + vec4 param_3; branchy_inout_2(param_3); a = param_3; B b = B(10.0, 20.0); diff --git a/reference/shaders/frag/pixel-interlock-ordered.frag b/reference/shaders/frag/pixel-interlock-ordered.frag new file mode 100644 index 00000000000..915b56511f2 --- /dev/null +++ b/reference/shaders/frag/pixel-interlock-ordered.frag @@ -0,0 +1,35 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(pixel_interlock_ordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + SPIRV_Cross_beginInvocationInterlock(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + SPIRV_Cross_endInvocationInterlock(); +} + diff --git a/reference/shaders/frag/pixel-interlock-unordered.frag b/reference/shaders/frag/pixel-interlock-unordered.frag new file mode 100644 index 00000000000..13962daf19d --- /dev/null +++ b/reference/shaders/frag/pixel-interlock-unordered.frag @@ -0,0 +1,35 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(pixel_interlock_unordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + SPIRV_Cross_beginInvocationInterlock(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + SPIRV_Cross_endInvocationInterlock(); +} + diff --git a/reference/shaders/frag/post-depth-coverage-es.frag b/reference/shaders/frag/post-depth-coverage-es.frag new file mode 100644 index 00000000000..d086560e5d2 --- /dev/null +++ b/reference/shaders/frag/post-depth-coverage-es.frag @@ -0,0 +1,14 @@ +#version 310 es +#extension GL_EXT_post_depth_coverage : require +#extension GL_OES_sample_variables : require +precision mediump float; +precision highp int; +layout(early_fragment_tests, post_depth_coverage) in; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(float(gl_SampleMaskIn[0])); +} + diff --git a/reference/shaders/frag/post-depth-coverage.frag b/reference/shaders/frag/post-depth-coverage.frag new file mode 100644 index 00000000000..caca9c03cb5 --- /dev/null +++ b/reference/shaders/frag/post-depth-coverage.frag @@ -0,0 +1,15 @@ +#version 450 +#if defined(GL_ARB_post_depth_coverge) +#extension GL_ARB_post_depth_coverage : require +#else +#extension GL_EXT_post_depth_coverage : require +#endif +layout(early_fragment_tests, post_depth_coverage) in; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(float(gl_SampleMaskIn[0])); +} + diff --git a/reference/shaders/frag/round-even.frag b/reference/shaders/frag/round-even.frag new file mode 100644 index 00000000000..ab6f37adc14 --- /dev/null +++ b/reference/shaders/frag/round-even.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vA; +layout(location = 1) in float vB; + +void main() +{ + FragColor = roundEven(vA); + FragColor *= roundEven(vB); +} + diff --git a/reference/shaders/frag/round.frag b/reference/shaders/frag/round.frag new file mode 100644 index 00000000000..0f1fc0db0f3 --- /dev/null +++ b/reference/shaders/frag/round.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vA; +layout(location = 1) in float vB; + +void main() +{ + FragColor = round(vA); + FragColor *= round(vB); +} + diff --git a/reference/shaders/frag/sample-interlock-ordered.frag b/reference/shaders/frag/sample-interlock-ordered.frag new file mode 100644 index 00000000000..9d5f90e4aaf --- /dev/null +++ b/reference/shaders/frag/sample-interlock-ordered.frag @@ -0,0 +1,35 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(sample_interlock_ordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + SPIRV_Cross_beginInvocationInterlock(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0])); + SPIRV_Cross_endInvocationInterlock(); +} + diff --git a/reference/shaders/frag/sample-interlock-unordered.frag b/reference/shaders/frag/sample-interlock-unordered.frag new file mode 100644 index 00000000000..441198814e0 --- /dev/null +++ b/reference/shaders/frag/sample-interlock-unordered.frag @@ -0,0 +1,35 @@ +#version 450 +#ifdef GL_ARB_fragment_shader_interlock +#extension GL_ARB_fragment_shader_interlock : enable +#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB() +#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB() +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL() +#define SPIRV_Cross_endInvocationInterlock() +#endif +#if defined(GL_ARB_fragment_shader_interlock) +layout(sample_interlock_unordered) in; +#elif !defined(GL_INTEL_fragment_shader_ordering) +#error Fragment Shader Interlock/Ordering extension missing! +#endif + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + SPIRV_Cross_beginInvocationInterlock(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + SPIRV_Cross_endInvocationInterlock(); +} + diff --git a/reference/shaders/frag/sampler.frag b/reference/shaders/frag/sampler.frag index 0ec200c7146..d62f0afeff9 100644 --- a/reference/shaders/frag/sampler.frag +++ b/reference/shaders/frag/sampler.frag @@ -15,7 +15,7 @@ vec4 sample_texture(mediump sampler2D tex, vec2 uv) void main() { - highp vec2 param = vTex; + vec2 param = vTex; FragColor = vColor * sample_texture(uTex, param); } diff --git a/reference/shaders/frag/struct-type-unrelated-alias.frag b/reference/shaders/frag/struct-type-unrelated-alias.frag new file mode 100644 index 00000000000..f38d18ff28f --- /dev/null +++ b/reference/shaders/frag/struct-type-unrelated-alias.frag @@ -0,0 +1,18 @@ +#version 450 + +struct T +{ + float a; +}; + +layout(location = 0) out float FragColor; + +void main() +{ + T foo; + foo.a = 10.0; + T bar; + bar.a = 20.0; + FragColor = foo.a + bar.a; +} + diff --git a/reference/shaders/frag/switch-unreachable-break.frag b/reference/shaders/frag/switch-unreachable-break.frag new file mode 100644 index 00000000000..d8396d69a90 --- /dev/null +++ b/reference/shaders/frag/switch-unreachable-break.frag @@ -0,0 +1,36 @@ +#version 450 + +layout(binding = 0, std140) uniform UBO +{ + int cond; + int cond2; +} _13; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + bool frog = false; + switch (_13.cond) + { + case 1: + { + if (_13.cond2 < 50) + { + break; + } + else + { + discard; + } + break; // unreachable workaround + } + default: + { + frog = true; + break; + } + } + FragColor = mix(vec4(20.0), vec4(10.0), bvec4(frog)); +} + diff --git a/reference/shaders/frag/ubo-load-row-major-workaround.frag b/reference/shaders/frag/ubo-load-row-major-workaround.frag new file mode 100644 index 00000000000..13049b456b7 --- /dev/null +++ b/reference/shaders/frag/ubo-load-row-major-workaround.frag @@ -0,0 +1,48 @@ +#version 450 + +struct RowMajor +{ + mat4 B; +}; + +struct NestedRowMajor +{ + RowMajor rm; +}; + +layout(binding = 2, std140) uniform UBO3 +{ + layout(row_major) NestedRowMajor rm2; +} _17; + +layout(binding = 1, std140) uniform UBO2 +{ + layout(row_major) RowMajor rm; +} _35; + +layout(binding = 0, std140) uniform UBO +{ + layout(row_major) mat4 A; + mat4 C; +} _42; + +layout(binding = 3, std140) uniform UBONoWorkaround +{ + mat4 D; +} _56; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 Clip; + +NestedRowMajor spvWorkaroundRowMajor(NestedRowMajor wrap) { return wrap; } +mat4 spvWorkaroundRowMajor(mat4 wrap) { return wrap; } + +void main() +{ + NestedRowMajor rm2_loaded; + rm2_loaded.rm.B = spvWorkaroundRowMajor(_17.rm2).rm.B; + FragColor = (((rm2_loaded.rm.B * spvWorkaroundRowMajor(_35.rm.B)) * spvWorkaroundRowMajor(_42.A)) * spvWorkaroundRowMajor(_42.C)) * Clip; + FragColor += (_56.D * Clip); + FragColor += (_42.A[1] * Clip); +} + diff --git a/reference/shaders/frag/ubo_layout.frag b/reference/shaders/frag/ubo_layout.frag index 4b66e1396a7..bc0b01c065f 100644 --- a/reference/shaders/frag/ubo_layout.frag +++ b/reference/shaders/frag/ubo_layout.frag @@ -7,11 +7,6 @@ struct Str mat4 foo; }; -struct Str_1 -{ - mat4 foo; -}; - layout(binding = 0, std140) uniform UBO1 { layout(row_major) Str foo; @@ -19,7 +14,7 @@ layout(binding = 0, std140) uniform UBO1 layout(binding = 1, std140) uniform UBO2 { - Str_1 foo; + Str foo; } ubo0; layout(location = 0) out vec4 FragColor; diff --git a/reference/shaders/geom/geometry-passthrough.geom b/reference/shaders/geom/geometry-passthrough.geom new file mode 100644 index 00000000000..d0d8806ad2a --- /dev/null +++ b/reference/shaders/geom/geometry-passthrough.geom @@ -0,0 +1,27 @@ +#version 450 +#extension GL_NV_geometry_shader_passthrough : require +layout(triangles) in; + +layout(passthrough) in gl_PerVertex +{ + vec4 gl_Position; +} gl_in[]; + +layout(passthrough, location = 0) in VertexBlock +{ + int a; + int b; +} v1[3]; + +layout(location = 2) in VertexBlock2 +{ + int a; + layout(passthrough) int b; +} v2[3]; + + +void main() +{ + gl_Layer = (gl_InvocationID + v1[0].a) + v2[1].b; +} + diff --git a/reference/shaders/geom/multi-stream.geom b/reference/shaders/geom/multi-stream.geom new file mode 100644 index 00000000000..548164d7804 --- /dev/null +++ b/reference/shaders/geom/multi-stream.geom @@ -0,0 +1,14 @@ +#version 450 +layout(triangles) in; +layout(max_vertices = 2, points) out; + +void main() +{ + gl_Position = gl_in[0].gl_Position; + EmitStreamVertex(0); + EndStreamPrimitive(0); + gl_Position = gl_in[0].gl_Position + vec4(2.0); + EmitStreamVertex(1); + EndStreamPrimitive(1); +} + diff --git a/reference/shaders/geom/transform-feedback-streams.geom b/reference/shaders/geom/transform-feedback-streams.geom new file mode 100644 index 00000000000..4d238b4adff --- /dev/null +++ b/reference/shaders/geom/transform-feedback-streams.geom @@ -0,0 +1,26 @@ +#version 450 +layout(points) in; +layout(max_vertices = 2, points) out; + +layout(xfb_buffer = 1, xfb_stride = 20, stream = 1) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(location = 0, xfb_buffer = 2, xfb_stride = 32, xfb_offset = 16, stream = 1) out vec4 vFoo; +layout(xfb_buffer = 3, xfb_stride = 16, stream = 2) out VertOut +{ + layout(location = 1, xfb_offset = 0) vec4 vBar; +} _23; + + +void main() +{ + gl_Position = vec4(1.0); + vFoo = vec4(3.0); + EmitStreamVertex(1); + _23.vBar = vec4(5.0); + EmitStreamVertex(2); +} + diff --git a/reference/shaders/legacy/fragment/explicit-lod.legacy.vert b/reference/shaders/legacy/fragment/explicit-lod.legacy.vert new file mode 100644 index 00000000000..b73faa47ab5 --- /dev/null +++ b/reference/shaders/legacy/fragment/explicit-lod.legacy.vert @@ -0,0 +1,11 @@ +#version 100 + +uniform mediump sampler2D tex; + +varying mediump vec4 FragColor; + +void main() +{ + FragColor = texture2DLod(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625), 3.0); +} + diff --git a/reference/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag b/reference/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag new file mode 100644 index 00000000000..1c811d3089f --- /dev/null +++ b/reference/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag @@ -0,0 +1,39 @@ +#version 100 +precision mediump float; +precision highp int; + +struct Foo +{ + highp vec4 a; + highp vec4 b; +}; + +struct Bar +{ + highp vec4 a; + highp vec4 b; +}; + +struct Baz +{ + Foo foo; + Bar bar; +}; + +varying highp vec4 baz_foo_a; +varying highp vec4 baz_foo_b; +varying highp vec4 baz_bar_a; +varying highp vec4 baz_bar_b; +varying highp vec4 _33_a_a; +varying highp vec4 _33_a_b; +varying highp vec4 _33_b_a; +varying highp vec4 _33_b_b; + +void main() +{ + Baz bazzy = Baz(Foo(baz_foo_a, baz_foo_b), Bar(baz_bar_a, baz_bar_b)); + Foo bazzy_foo = Foo(baz_foo_a, baz_foo_b); + Bar bazzy_bar = Bar(baz_bar_a, baz_bar_b); + gl_FragData[0] = (((_33_a_a + _33_b_b) + bazzy.foo.b) + bazzy_foo.a) + bazzy_bar.b; +} + diff --git a/reference/shaders/legacy/fragment/round.legacy.frag b/reference/shaders/legacy/fragment/round.legacy.frag new file mode 100644 index 00000000000..9033bc3c56c --- /dev/null +++ b/reference/shaders/legacy/fragment/round.legacy.frag @@ -0,0 +1,13 @@ +#version 100 +precision mediump float; +precision highp int; + +varying highp vec4 vA; +varying highp float vB; + +void main() +{ + gl_FragData[0] = floor(vA + vec4(0.5)); + gl_FragData[0] *= floor(vB + float(0.5)); +} + diff --git a/reference/shaders/legacy/fragment/switch.legacy.frag b/reference/shaders/legacy/fragment/switch.legacy.frag new file mode 100644 index 00000000000..9155d1cdbcc --- /dev/null +++ b/reference/shaders/legacy/fragment/switch.legacy.frag @@ -0,0 +1,78 @@ +#version 100 +precision mediump float; +precision highp int; + +varying highp float vIndexF; + +void main() +{ + int vIndex = int(vIndexF); + highp vec4 v = vec4(0.0); + for (int spvDummy21 = 0; spvDummy21 < 1; spvDummy21++) + { + if (vIndex == 2) + { + v = vec4(0.0, 2.0, 3.0, 4.0); + break; + } + else if ((vIndex == 4) || (vIndex == 5)) + { + v = vec4(1.0, 2.0, 3.0, 4.0); + break; + } + else if ((vIndex == 8) || (vIndex == 9)) + { + v = vec4(40.0, 20.0, 30.0, 40.0); + break; + } + else if (vIndex == 10) + { + v = vec4(10.0); + highp vec4 _43 = v; + highp vec4 _44 = vec4(1.0); + highp vec4 _45 = _43 + _44; + v = _45; + highp vec4 _46 = v; + highp vec4 _47 = vec4(2.0); + highp vec4 _48 = _46 + _47; + v = _48; + break; + } + else if (vIndex == 11) + { + highp vec4 _43 = v; + highp vec4 _44 = vec4(1.0); + highp vec4 _45 = _43 + _44; + v = _45; + highp vec4 _46 = v; + highp vec4 _47 = vec4(2.0); + highp vec4 _48 = _46 + _47; + v = _48; + break; + } + else if (vIndex == 12) + { + highp vec4 _46 = v; + highp vec4 _47 = vec4(2.0); + highp vec4 _48 = _46 + _47; + v = _48; + break; + } + else + { + v = vec4(10.0, 20.0, 30.0, 40.0); + break; + } + } + highp vec4 w = vec4(20.0); + for (int spvDummy165 = 0; spvDummy165 < 1; spvDummy165++) + { + if ((vIndex == 10) || (vIndex == 20)) + { + w = vec4(40.0); + break; + } + } + gl_FragData[0] = v + w; +} + diff --git a/reference/shaders/legacy/vert/implicit-lod.legacy.vert b/reference/shaders/legacy/vert/implicit-lod.legacy.vert index 6e441074482..2d2050498e0 100644 --- a/reference/shaders/legacy/vert/implicit-lod.legacy.vert +++ b/reference/shaders/legacy/vert/implicit-lod.legacy.vert @@ -4,6 +4,6 @@ uniform mediump sampler2D tex; void main() { - gl_Position = texture2D(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625)); + gl_Position = texture2DLod(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625), 0.0); } diff --git a/reference/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert b/reference/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert new file mode 100644 index 00000000000..fa1d643bc4f --- /dev/null +++ b/reference/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert @@ -0,0 +1,18 @@ +#version 100 + +struct Foo +{ + float a[4]; +}; + +varying float foo_a[4]; + +void main() +{ + gl_Position = vec4(1.0); + for (int i = 0; i < 4; i++) + { + foo_a[i] = float(i + 2); + } +} + diff --git a/reference/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert b/reference/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert new file mode 100644 index 00000000000..cf807c41f7f --- /dev/null +++ b/reference/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert @@ -0,0 +1,49 @@ +#version 100 + +struct Foo +{ + vec4 a; + vec4 b; +}; + +struct Bar +{ + vec4 a; + vec4 b; +}; + +struct Baz +{ + Foo foo; + Bar bar; +}; + +varying vec4 _12_a_a; +varying vec4 _12_a_b; +varying vec4 _12_b_a; +varying vec4 _12_b_b; +varying vec4 baz_foo_a; +varying vec4 baz_foo_b; +varying vec4 baz_bar_a; +varying vec4 baz_bar_b; + +void main() +{ + _12_a_a = vec4(10.0); + _12_a_b = vec4(20.0); + _12_b_a = vec4(30.0); + _12_b_b = vec4(40.0); + _12_a_a = Foo(vec4(50.0), vec4(60.0)).a; + _12_a_b = Foo(vec4(50.0), vec4(60.0)).b; + _12_b_a = Bar(vec4(50.0), vec4(60.0)).a; + _12_b_b = Bar(vec4(50.0), vec4(60.0)).b; + baz_foo_a = Foo(vec4(100.0), vec4(200.0)).a; + baz_foo_b = Foo(vec4(100.0), vec4(200.0)).b; + baz_bar_a = Bar(vec4(300.0), vec4(400.0)).a; + baz_bar_b = Bar(vec4(300.0), vec4(400.0)).b; + baz_foo_a = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).foo.a; + baz_foo_b = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).foo.b; + baz_bar_a = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).bar.a; + baz_bar_b = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).bar.b; +} + diff --git a/reference/shaders/legacy/vert/struct-varying.legacy.vert b/reference/shaders/legacy/vert/struct-varying.legacy.vert index 261e986034f..fcdeb55bb6f 100644 --- a/reference/shaders/legacy/vert/struct-varying.legacy.vert +++ b/reference/shaders/legacy/vert/struct-varying.legacy.vert @@ -12,16 +12,10 @@ varying vec2 vout_b; void main() { Output s = Output(vec4(0.5), vec2(0.25)); - { - Output vout = s; - vout_a = vout.a; - vout_b = vout.b; - } - { - Output vout = s; - vout_a = vout.a; - vout_b = vout.b; - } + vout_a = s.a; + vout_b = s.b; + vout_a = s.a; + vout_b = s.b; Output tmp = Output(vout_a, vout_b); vout_a = tmp.a; vout_b = tmp.b; diff --git a/reference/shaders/legacy/vert/switch-nested.legacy.vert b/reference/shaders/legacy/vert/switch-nested.legacy.vert new file mode 100644 index 00000000000..3ec027b4472 --- /dev/null +++ b/reference/shaders/legacy/vert/switch-nested.legacy.vert @@ -0,0 +1,41 @@ +#version 100 + +struct UBO +{ + int func_arg; + int inner_func_arg; +}; + +uniform UBO _34; + +vec4 test_inner_func(bool b) +{ + if (b) + { + return vec4(1.0); + } + else + { + return vec4(0.0); + } +} + +vec4 test_func(bool b) +{ + if (b) + { + bool param = _34.inner_func_arg != 0; + return test_inner_func(param); + } + else + { + return vec4(0.0); + } +} + +void main() +{ + bool param = _34.func_arg != 0; + gl_Position = test_func(param); +} + diff --git a/reference/shaders/legacy/vert/transpose.legacy.vert b/reference/shaders/legacy/vert/transpose.legacy.vert index c73d1a11d92..ce5cf8b172f 100644 --- a/reference/shaders/legacy/vert/transpose.legacy.vert +++ b/reference/shaders/legacy/vert/transpose.legacy.vert @@ -11,12 +11,28 @@ uniform Buffer _13; attribute vec4 Position; +highp mat4 spvWorkaroundRowMajor(highp mat4 wrap) { return wrap; } +mediump mat4 spvWorkaroundRowMajorMP(mediump mat4 wrap) { return wrap; } + +mat4 spvTranspose(mat4 m) +{ + return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]); +} + void main() { - vec4 c0 = _13.M * (Position * _13.MVPRowMajor); - vec4 c1 = _13.M * (_13.MVPColMajor * Position); - vec4 c2 = _13.M * (_13.MVPRowMajor * Position); - vec4 c3 = _13.M * (Position * _13.MVPColMajor); - gl_Position = ((c0 + c1) + c2) + c3; + vec4 c0 = spvWorkaroundRowMajor(_13.M) * (Position * _13.MVPRowMajor); + vec4 c1 = spvWorkaroundRowMajor(_13.M) * (spvWorkaroundRowMajor(_13.MVPColMajor) * Position); + vec4 c2 = spvWorkaroundRowMajor(_13.M) * (_13.MVPRowMajor * Position); + vec4 c3 = spvWorkaroundRowMajor(_13.M) * (Position * spvWorkaroundRowMajor(_13.MVPColMajor)); + vec4 c4 = _13.MVPRowMajor * Position; + vec4 c5 = Position * spvWorkaroundRowMajor(_13.MVPColMajor); + vec4 c6 = Position * _13.MVPRowMajor; + vec4 c7 = spvWorkaroundRowMajor(_13.MVPColMajor) * Position; + vec4 c8 = (spvTranspose(_13.MVPRowMajor) * 2.0) * Position; + vec4 c9 = (spvTranspose(spvWorkaroundRowMajor(_13.MVPColMajor)) * 2.0) * Position; + vec4 c10 = Position * (spvTranspose(_13.MVPRowMajor) * 2.0); + vec4 c11 = Position * (spvTranspose(spvWorkaroundRowMajor(_13.MVPColMajor)) * 2.0); + gl_Position = ((((((((((c0 + c1) + c2) + c3) + c4) + c5) + c6) + c7) + c8) + c9) + c10) + c11; } diff --git a/reference/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk b/reference/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk new file mode 100644 index 00000000000..20597b986e7 --- /dev/null +++ b/reference/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, lines) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0u, 1u) + uvec2(gl_LocalInvocationIndex); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} + diff --git a/reference/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk b/reference/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk new file mode 100644 index 00000000000..ecb8285df2b --- /dev/null +++ b/reference/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, points) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} + diff --git a/reference/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk b/reference/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk new file mode 100644 index 00000000000..e10459d7b9c --- /dev/null +++ b/reference/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, triangles) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0u, 1u, 2u) + uvec3(gl_LocalInvocationIndex); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} + diff --git a/reference/shaders/tesc/water_tess.tesc b/reference/shaders/tesc/water_tess.tesc index 8d5a4a30479..6f499380b74 100644 --- a/reference/shaders/tesc/water_tess.tesc +++ b/reference/shaders/tesc/water_tess.tesc @@ -26,9 +26,7 @@ bool frustum_cull(vec2 p0) float radius = 0.5 * length(bb_max - bb_min); vec3 f0 = vec3(dot(_41.uFrustum[0], vec4(center, 1.0)), dot(_41.uFrustum[1], vec4(center, 1.0)), dot(_41.uFrustum[2], vec4(center, 1.0))); vec3 f1 = vec3(dot(_41.uFrustum[3], vec4(center, 1.0)), dot(_41.uFrustum[4], vec4(center, 1.0)), dot(_41.uFrustum[5], vec4(center, 1.0))); - vec3 _199 = f0; - float _200 = radius; - bool _205 = any(lessThanEqual(_199, vec3(-_200))); + bool _205 = any(lessThanEqual(f0, vec3(-radius))); bool _215; if (!_205) { diff --git a/reference/shaders/tese/load-array-of-array.tese b/reference/shaders/tese/load-array-of-array.tese new file mode 100644 index 00000000000..7fab08ef66e --- /dev/null +++ b/reference/shaders/tese/load-array-of-array.tese @@ -0,0 +1,16 @@ +#version 450 +layout(quads, ccw, equal_spacing) in; + +layout(location = 0) in vec4 vTexCoord[][1]; + +void main() +{ + vec4 _17_unrolled[32][1]; + for (int i = 0; i < int(32); i++) + { + _17_unrolled[i] = vTexCoord[i]; + } + vec4 tmp[32][1] = _17_unrolled; + gl_Position = (tmp[0][0] + tmp[2][0]) + tmp[3][0]; +} + diff --git a/reference/shaders/tese/patch-input-array.tese b/reference/shaders/tese/patch-input-array.tese new file mode 100644 index 00000000000..413d8b391fb --- /dev/null +++ b/reference/shaders/tese/patch-input-array.tese @@ -0,0 +1,10 @@ +#version 450 +layout(quads, ccw, equal_spacing) in; + +layout(location = 0) patch in float P[4]; + +void main() +{ + gl_Position = vec4(P[0], P[1], P[2], P[3]); +} + diff --git a/reference/shaders/tese/water_tess.tese b/reference/shaders/tese/water_tess.tese index e743ed3e9c0..a0cf42d513b 100644 --- a/reference/shaders/tese/water_tess.tese +++ b/reference/shaders/tese/water_tess.tese @@ -52,7 +52,7 @@ void main() vGradNormalTex = vec4(tex + (_31.uInvHeightmapSize * 0.5), tex * _31.uScale.zw); vec2 param_2 = tex; vec2 param_3 = off; - vec2 param_4 = lod; + mediump vec2 param_4 = lod; vec3 height_displacement = sample_height_displacement(param_2, param_3, param_4); pos += height_displacement.yz; vWorld = vec3(pos.x, height_displacement.x, pos.y); diff --git a/reference/shaders/vert/ground.vert b/reference/shaders/vert/ground.vert index 69f92534ccb..73923c447f6 100644 --- a/reference/shaders/vert/ground.vert +++ b/reference/shaders/vert/ground.vert @@ -1,4 +1,7 @@ #version 310 es +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif struct PatchData { @@ -44,7 +47,11 @@ layout(binding = 1) uniform mediump sampler2D TexLOD; layout(binding = 0) uniform mediump sampler2D TexHeightmap; layout(location = 1) in vec4 LODWeights; +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else uniform int SPIRV_Cross_BaseInstance; +#endif layout(location = 0) in vec2 Position; layout(location = 1) out vec3 EyeVec; layout(location = 0) out vec2 TexCoord; @@ -67,7 +74,6 @@ vec2 warp_position() { _110 = 0u; } - uint _116 = _110; uint _120; if (uPosition.y < 32u) { @@ -77,7 +83,7 @@ vec2 warp_position() { _120 = 0u; } - uvec2 rounding = uvec2(_116, _120); + uvec2 rounding = uvec2(_110, _120); vec4 lower_upper_snapped = vec4((uPosition + rounding).xyxy & (~mask).xxyy); return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, vec2(fract_lod)); } diff --git a/reference/shaders/vert/no-contraction.vert b/reference/shaders/vert/no-contraction.vert new file mode 100644 index 00000000000..83e392c6cc2 --- /dev/null +++ b/reference/shaders/vert/no-contraction.vert @@ -0,0 +1,24 @@ +#version 450 + +layout(location = 0) in vec4 vA; +layout(location = 1) in vec4 vB; +layout(location = 2) in vec4 vC; + +void main() +{ + precise vec4 _15 = vA * vB; + vec4 mul = _15; + precise vec4 _19 = vA + vB; + vec4 add = _19; + precise vec4 _23 = vA - vB; + vec4 sub = _23; + precise vec4 _27 = vA * vB; + precise vec4 _30 = _27 + vC; + vec4 mad = _30; + precise vec4 _34 = mul + add; + precise vec4 _36 = _34 + sub; + precise vec4 _38 = _36 + mad; + vec4 summed = _38; + gl_Position = summed; +} + diff --git a/reference/shaders/vert/ocean.vert b/reference/shaders/vert/ocean.vert index 720bd7d0de7..60fa80ebdb2 100644 --- a/reference/shaders/vert/ocean.vert +++ b/reference/shaders/vert/ocean.vert @@ -1,4 +1,7 @@ #version 310 es +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif struct PatchData { @@ -45,7 +48,11 @@ layout(binding = 1) uniform mediump sampler2D TexLOD; layout(binding = 0) uniform mediump sampler2D TexDisplacement; layout(location = 1) in vec4 LODWeights; +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else uniform int SPIRV_Cross_BaseInstance; +#endif layout(location = 0) in vec4 Position; layout(location = 0) out vec3 EyeVec; layout(location = 1) out vec4 TexCoord; diff --git a/reference/shaders/vert/read-from-row-major-array.vert b/reference/shaders/vert/read-from-row-major-array.vert index 1c950f3fa41..8b5ec967388 100644 --- a/reference/shaders/vert/read-from-row-major-array.vert +++ b/reference/shaders/vert/read-from-row-major-array.vert @@ -8,6 +8,9 @@ layout(binding = 0, std140) uniform Block layout(location = 0) in vec4 a_position; layout(location = 0) out mediump float v_vtxResult; +highp mat2x3 spvWorkaroundRowMajor(highp mat2x3 wrap) { return wrap; } +mediump mat2x3 spvWorkaroundRowMajorMP(mediump mat2x3 wrap) { return wrap; } + mediump float compare_float(float a, float b) { return float(abs(a - b) < 0.0500000007450580596923828125); @@ -37,7 +40,7 @@ void main() { gl_Position = a_position; mediump float result = 1.0; - mat2x3 param = _104.var[0][0]; + mat2x3 param = spvWorkaroundRowMajor(_104.var[0][0]); mat2x3 param_1 = mat2x3(vec3(2.0, 6.0, -6.0), vec3(0.0, 5.0, 5.0)); result *= compare_mat2x3(param, param_1); v_vtxResult = result; diff --git a/reference/shaders/vert/row-major-workaround.vert b/reference/shaders/vert/row-major-workaround.vert new file mode 100644 index 00000000000..4fe6885d101 --- /dev/null +++ b/reference/shaders/vert/row-major-workaround.vert @@ -0,0 +1,30 @@ +#version 310 es + +layout(binding = 0, std140) uniform Buffer +{ + layout(row_major) mat4 HP; + layout(row_major) mediump mat4 MP; +} _21; + +layout(binding = 1, std140) uniform Buffer2 +{ + layout(row_major) mediump mat4 MP2; +} _39; + +layout(location = 0) out vec4 H; +layout(location = 0) in vec4 Hin; +layout(location = 1) out mediump vec4 M; +layout(location = 1) in mediump vec4 Min; +layout(location = 2) out mediump vec4 M2; + +highp mat4 spvWorkaroundRowMajor(highp mat4 wrap) { return wrap; } +mediump mat4 spvWorkaroundRowMajorMP(mediump mat4 wrap) { return wrap; } + +void main() +{ + gl_Position = vec4(1.0); + H = spvWorkaroundRowMajor(_21.HP) * Hin; + M = spvWorkaroundRowMajor(_21.MP) * Min; + M2 = spvWorkaroundRowMajorMP(_39.MP2) * Min; +} + diff --git a/reference/shaders/vert/texture_buffer.vert b/reference/shaders/vert/texture_buffer.vert index e9442ce1196..217804dfce9 100644 --- a/reference/shaders/vert/texture_buffer.vert +++ b/reference/shaders/vert/texture_buffer.vert @@ -1,5 +1,5 @@ #version 310 es -#extension GL_OES_texture_buffer : require +#extension GL_EXT_texture_buffer : require layout(binding = 4) uniform highp samplerBuffer uSamp; layout(binding = 5, rgba32f) uniform readonly highp imageBuffer uSampo; diff --git a/reference/shaders/vert/transform-feedback-decorations.vert b/reference/shaders/vert/transform-feedback-decorations.vert new file mode 100644 index 00000000000..23e7cf3c19d --- /dev/null +++ b/reference/shaders/vert/transform-feedback-decorations.vert @@ -0,0 +1,22 @@ +#version 450 + +layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(location = 0, xfb_buffer = 2, xfb_stride = 32, xfb_offset = 16) out vec4 vFoo; +layout(xfb_buffer = 3, xfb_stride = 16) out VertOut +{ + layout(location = 1, xfb_offset = 0) vec4 vBar; +} _22; + + +void main() +{ + gl_Position = vec4(1.0); + vFoo = vec4(3.0); + _22.vBar = vec4(5.0); +} + diff --git a/reference/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk index 82ebb960856..771d0496447 100644 --- a/reference/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk +++ b/reference/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk @@ -3,7 +3,7 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer Block; -layout(buffer_reference, std430) buffer Block +layout(buffer_reference, buffer_reference_align = 4, std430) buffer Block { float v; }; diff --git a/reference/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk new file mode 100644 index 00000000000..f5907d3e2c0 --- /dev/null +++ b/reference/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk @@ -0,0 +1,28 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer Bar; +layout(buffer_reference) buffer Foo; +layout(buffer_reference, buffer_reference_align = 8, std430) buffer Bar +{ + uint a; + uint b; + Foo foo; +}; + +layout(buffer_reference, std430) buffer Foo +{ + uint v; +}; + +layout(push_constant, std430) uniform Push +{ + Bar bar; +} _13; + +void main() +{ + uint _24 = atomicAdd(_13.bar.b, 1u); +} + diff --git a/reference/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk new file mode 100644 index 00000000000..9cd3d3e5bbe --- /dev/null +++ b/reference/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk @@ -0,0 +1,29 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer Bar; +layout(buffer_reference) buffer Foo; +layout(buffer_reference, buffer_reference_align = 8, std430) buffer Bar +{ + uint a; + uint b; + Foo foo; +}; + +layout(buffer_reference, std430) buffer Foo +{ + uint v; +}; + +layout(push_constant, std430) uniform Push +{ + Bar bar; +} _15; + +void main() +{ + uint v = _15.bar.b; + uint _31 = atomicAdd(_15.bar.a, v); +} + diff --git a/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk new file mode 100644 index 00000000000..20a4f1b4239 --- /dev/null +++ b/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk @@ -0,0 +1,22 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer PtrInt; +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PtrInt +{ + int value; +}; + +layout(set = 0, binding = 0, std430) buffer Buf +{ + uvec2 ptr; + PtrInt ptrint; +} _13; + +void main() +{ + _13.ptr = uvec2(_13.ptrint); +} + diff --git a/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk new file mode 100644 index 00000000000..5cf6e2df36d --- /dev/null +++ b/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk @@ -0,0 +1,21 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer PtrInt; +layout(buffer_reference, buffer_reference_align = 16, std430) buffer PtrInt +{ + int value; +}; + +layout(set = 0, binding = 0, std430) buffer Buf +{ + uvec2 ptr; +} _10; + +void main() +{ + PtrInt(_10.ptr).value = 10; +} + diff --git a/reference/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk index 5752f81b268..8923d21d780 100644 --- a/reference/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk +++ b/reference/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk @@ -4,12 +4,12 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer PtrUint; layout(buffer_reference) buffer PtrInt; -layout(buffer_reference, std430) buffer PtrUint +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PtrUint { uint value; }; -layout(buffer_reference, std430) buffer PtrInt +layout(buffer_reference, buffer_reference_align = 16, std430) buffer PtrInt { int value; }; diff --git a/reference/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk new file mode 100644 index 00000000000..b7e88062a04 --- /dev/null +++ b/reference/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk @@ -0,0 +1,35 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer RO; +layout(buffer_reference) buffer RW; +layout(buffer_reference) buffer WO; +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer RO +{ + vec4 v[]; +}; + +layout(buffer_reference, buffer_reference_align = 16, std430) restrict buffer RW +{ + vec4 v[]; +}; + +layout(buffer_reference, buffer_reference_align = 16, std430) coherent writeonly buffer WO +{ + vec4 v[]; +}; + +layout(push_constant, std430) uniform Registers +{ + RO ro; + RW rw; + WO wo; +} registers; + +void main() +{ + registers.rw.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x]; + registers.wo.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x]; +} + diff --git a/reference/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk index e22974114bd..241483ede4c 100644 --- a/reference/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk +++ b/reference/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk @@ -1,10 +1,14 @@ #version 450 +#if defined(GL_ARB_gpu_shader_int64) #extension GL_ARB_gpu_shader_int64 : require +#else +#error No extension available for 64-bit integers. +#endif #extension GL_EXT_buffer_reference : require layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer Node; -layout(buffer_reference, std430) buffer Node +layout(buffer_reference, buffer_reference_align = 16, std430) buffer Node { layout(offset = 0) int value; layout(offset = 16) Node next; diff --git a/reference/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk new file mode 100644 index 00000000000..db022309be1 --- /dev/null +++ b/reference/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk @@ -0,0 +1,23 @@ +#version 450 +#extension GL_EXT_shader_atomic_float : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 1, std430) buffer SSBO +{ + float v; +} _18; + +layout(set = 0, binding = 0, r32f) uniform image2D uImage; + +shared float shared_v; + +void main() +{ + float _15 = atomicAdd(shared_v, 2.0); + float value = _15; + float _24 = atomicAdd(_18.v, value); + float _39 = imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), value); + float _45 = imageAtomicExchange(uImage, ivec2(gl_GlobalInvocationID.xy), value); + value = _45; +} + diff --git a/reference/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk b/reference/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk new file mode 100644 index 00000000000..fbe5e3d9640 --- /dev/null +++ b/reference/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk @@ -0,0 +1,72 @@ +#version 460 +#extension GL_EXT_ray_query : require +#extension GL_EXT_ray_flags_primitive_culling : require +#extension GL_EXT_ray_tracing : require +layout(primitive_culling); +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 1, std140) uniform Params +{ + uint ray_flags; + uint cull_mask; + vec3 origin; + float tmin; + vec3 dir; + float tmax; + float thit; + uvec2 bda; +} _19; + +layout(set = 0, binding = 0) uniform accelerationStructureEXT AS; + +rayQueryEXT q; +rayQueryEXT q2[2]; + +void main() +{ + rayQueryInitializeEXT(q, AS, _19.ray_flags, _19.cull_mask, _19.origin, _19.tmin, _19.dir, _19.tmax); + rayQueryInitializeEXT(q2[1], accelerationStructureEXT(_19.bda), _19.ray_flags, _19.cull_mask, _19.origin, _19.tmin, _19.dir, _19.tmax); + bool _67 = rayQueryProceedEXT(q); + bool res = _67; + rayQueryTerminateEXT(q2[0]); + rayQueryGenerateIntersectionEXT(q, _19.thit); + rayQueryConfirmIntersectionEXT(q2[1]); + float _75 = rayQueryGetRayTMinEXT(q); + float fval = _75; + uint _79 = rayQueryGetRayFlagsEXT(q2[0]); + uint type = _79; + vec3 _82 = rayQueryGetWorldRayDirectionEXT(q); + vec3 fvals = _82; + vec3 _83 = rayQueryGetWorldRayOriginEXT(q); + fvals = _83; + uint _86 = rayQueryGetIntersectionTypeEXT(q2[1], bool(1)); + type = _86; + bool _88 = rayQueryGetIntersectionCandidateAABBOpaqueEXT(q2[1]); + res = _88; + float _91 = rayQueryGetIntersectionTEXT(q2[1], bool(0)); + fval = _91; + int _94 = rayQueryGetIntersectionInstanceCustomIndexEXT(q, bool(1)); + int ival = _94; + int _96 = rayQueryGetIntersectionInstanceIdEXT(q2[0], bool(0)); + ival = _96; + uint _97 = rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(q, bool(1)); + type = _97; + int _99 = rayQueryGetIntersectionGeometryIndexEXT(q2[1], bool(0)); + ival = _99; + int _100 = rayQueryGetIntersectionPrimitiveIndexEXT(q, bool(1)); + ival = _100; + vec2 _103 = rayQueryGetIntersectionBarycentricsEXT(q2[0], bool(0)); + fvals.x = _103.x; + fvals.y = _103.y; + bool _110 = rayQueryGetIntersectionFrontFaceEXT(q, bool(1)); + res = _110; + vec3 _111 = rayQueryGetIntersectionObjectRayDirectionEXT(q, bool(0)); + fvals = _111; + vec3 _113 = rayQueryGetIntersectionObjectRayOriginEXT(q2[0], bool(1)); + fvals = _113; + mat4x3 _117 = rayQueryGetIntersectionObjectToWorldEXT(q, bool(0)); + mat4x3 matrices = _117; + mat4x3 _119 = rayQueryGetIntersectionWorldToObjectEXT(q2[1], bool(1)); + matrices = _119; +} + diff --git a/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag b/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag index af64fb87aa1..716e283baf8 100644 --- a/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag +++ b/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag @@ -9,7 +9,7 @@ layout(location = 0) out float FragColor; float samp2(mediump sampler2DShadow SPIRV_Cross_Combinedts) { - return texture(SPIRV_Cross_Combinedts, vec3(vec3(1.0).xy, vec3(1.0).z)); + return texture(SPIRV_Cross_Combinedts, vec3(vec3(1.0).xy, 1.0)); } float samp3(mediump sampler2D SPIRV_Cross_Combinedts) diff --git a/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag.vk b/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag.vk index f475ae53a9a..5ba3bdf35c2 100644 --- a/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag.vk +++ b/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag.vk @@ -10,7 +10,7 @@ layout(location = 0) out float FragColor; float samp2(mediump texture2D t, mediump samplerShadow s) { - return texture(sampler2DShadow(t, s), vec3(vec3(1.0).xy, vec3(1.0).z)); + return texture(sampler2DShadow(t, s), vec3(vec3(1.0).xy, 1.0)); } float samp3(mediump texture2D t, mediump sampler s) diff --git a/reference/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk b/reference/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk new file mode 100644 index 00000000000..153164920f0 --- /dev/null +++ b/reference/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk @@ -0,0 +1,15 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +layout(location = 0) out vec4 FragColor; + +void main() +{ + bool _15 = helperInvocationEXT(); + demote; + if (!_15) + { + FragColor = vec4(1.0, 0.0, 0.0, 1.0); + } +} + diff --git a/reference/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk b/reference/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk new file mode 100644 index 00000000000..302f8354021 --- /dev/null +++ b/reference/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk @@ -0,0 +1,10 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +void main() +{ + demote; + bool _9 = helperInvocationEXT(); + bool helper = _9; +} + diff --git a/reference/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk b/reference/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk index 6e98ca0bff1..9a7862a4637 100644 --- a/reference/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk +++ b/reference/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk @@ -1,19 +1,24 @@ #version 450 #extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_samplerless_texture_functions : require layout(set = 0, binding = 2, std140) uniform UBO { vec4 v[64]; } ubos[]; -layout(set = 0, binding = 3, std430) readonly buffer SSBO +layout(set = 0, binding = 3, std430) buffer SSBO { + uint counter; vec4 v[]; } ssbos[]; layout(set = 0, binding = 0) uniform texture2D uSamplers[]; layout(set = 0, binding = 1) uniform sampler uSamps[]; layout(set = 0, binding = 4) uniform sampler2D uCombinedSamplers[]; +layout(set = 0, binding = 0) uniform texture2DMS uSamplersMS[]; +layout(set = 0, binding = 5, r32f) uniform image2D uImages[]; +layout(set = 0, binding = 5, r32ui) uniform uimage2D uImagesU32[]; layout(location = 0) flat in int vIndex; layout(location = 0) out vec4 FragColor; @@ -22,9 +27,48 @@ layout(location = 1) in vec2 vUV; void main() { int i = vIndex; - FragColor = texture(sampler2D(uSamplers[nonuniformEXT(i + 10)], uSamps[nonuniformEXT(i + 40)]), vUV); - FragColor = texture(uCombinedSamplers[nonuniformEXT(i + 10)], vUV); - FragColor += ubos[nonuniformEXT(i + 20)].v[i + 40]; - FragColor += ssbos[nonuniformEXT(i + 50)].v[i + 60]; + FragColor = texture(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV); + int _49 = i + 10; + FragColor = texture(uCombinedSamplers[nonuniformEXT(_49)], vUV); + int _65 = i + 20; + int _69 = i + 40; + FragColor += ubos[nonuniformEXT(_65)].v[_69]; + int _83 = i + 50; + int _88 = i + 60; + FragColor += ssbos[nonuniformEXT(_83)].v[_88]; + int _96 = i + 60; + int _100 = i + 70; + ssbos[nonuniformEXT(_96)].v[_100] = vec4(20.0); + int _106 = i + 10; + FragColor = texelFetch(uSamplers[nonuniformEXT(_106)], ivec2(vUV), 0); + int _116 = i + 100; + uint _122 = atomicAdd(ssbos[_116].counter, 100u); + vec2 queried = textureQueryLod(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV); + int _139 = i + 10; + queried += textureQueryLod(uCombinedSamplers[nonuniformEXT(_139)], vUV); + vec4 _147 = FragColor; + vec2 _149 = _147.xy + queried; + FragColor.x = _149.x; + FragColor.y = _149.y; + int _157 = i + 20; + FragColor.x += float(textureQueryLevels(uSamplers[nonuniformEXT(_157)])); + int _172 = i + 20; + FragColor.y += float(textureSamples(uSamplersMS[nonuniformEXT(_172)])); + int _184 = i + 20; + vec4 _189 = FragColor; + vec2 _191 = _189.xy + vec2(textureSize(uSamplers[nonuniformEXT(_184)], 0)); + FragColor.x = _191.x; + FragColor.y = _191.y; + int _202 = i + 50; + FragColor += imageLoad(uImages[nonuniformEXT(_202)], ivec2(vUV)); + int _213 = i + 20; + vec4 _218 = FragColor; + vec2 _220 = _218.xy + vec2(imageSize(uImages[nonuniformEXT(_213)])); + FragColor.x = _220.x; + FragColor.y = _220.y; + int _227 = i + 60; + imageStore(uImages[nonuniformEXT(_227)], ivec2(vUV), vec4(50.0)); + int _240 = i + 70; + uint _248 = imageAtomicAdd(uImagesU32[nonuniformEXT(_240)], ivec2(vUV), 40u); } diff --git a/reference/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk b/reference/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk deleted file mode 100644 index 434cb3d3d9f..00000000000 --- a/reference/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk +++ /dev/null @@ -1,24 +0,0 @@ -#version 450 -#extension GL_EXT_scalar_block_layout : require - -layout(set = 0, binding = 0, std430) uniform UBO -{ - float a[1024]; - vec3 b[2]; -} _17; - -layout(set = 0, binding = 1, std430) uniform UBOEnhancedLayout -{ - layout(offset = 0) float c[1024]; - layout(offset = 4096) vec3 d[2]; - layout(offset = 10000) float e; -} _30; - -layout(location = 0) out float FragColor; -layout(location = 0) flat in int vIndex; - -void main() -{ - FragColor = (_17.a[vIndex] + _30.c[vIndex]) + _30.e; -} - diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag index 43393f4e770..8ca3085a10f 100644 --- a/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag +++ b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag @@ -30,11 +30,11 @@ void main() { vec2 off = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[1], 0)); vec2 off2 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[2], 1)); - highp vec2 param = (vTex + off) + off2; + vec2 param = (vTex + off) + off2; vec4 c0 = sample_func(param, SPIRV_Cross_CombineduTextureuSampler); - highp vec2 param_1 = (vTex + off) + off2; + vec2 param_1 = (vTex + off) + off2; vec4 c1 = sample_func_dual(param_1, SPIRV_Cross_CombineduTextureuSampler[1]); - highp vec2 param_2 = (vTex + off) + off2; + vec2 param_2 = (vTex + off) + off2; vec4 c2 = sample_func_dual_array(param_2, SPIRV_Cross_CombineduTextureuSampler); vec4 c3 = texture(SPIRV_Cross_CombineduTextureArrayuSampler[3], vTex3); vec4 c4 = texture(SPIRV_Cross_CombineduTextureCubeuSampler[1], vTex3); diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk index 495874ecc23..0afa489c588 100644 --- a/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk +++ b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk @@ -31,11 +31,11 @@ void main() { vec2 off = vec2(1.0) / vec2(textureSize(sampler2D(uTexture[1], uSampler), 0)); vec2 off2 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture[2], uSampler), 1)); - highp vec2 param = (vTex + off) + off2; + vec2 param = (vTex + off) + off2; vec4 c0 = sample_func(uSampler, param); - highp vec2 param_1 = (vTex + off) + off2; + vec2 param_1 = (vTex + off) + off2; vec4 c1 = sample_func_dual(uSampler, uTexture[1], param_1); - highp vec2 param_2 = (vTex + off) + off2; + vec2 param_2 = (vTex + off) + off2; vec4 c2 = sample_func_dual_array(uSampler, uTexture, param_2); vec4 c3 = texture(sampler2DArray(uTextureArray[3], uSampler), vTex3); vec4 c4 = texture(samplerCube(uTextureCube[1], uSampler), vTex3); diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag index 78477cfbae1..c2530d5d8e7 100644 --- a/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag +++ b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag @@ -25,9 +25,9 @@ void main() { vec2 off = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 0)); vec2 off2 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 1)); - highp vec2 param = (vTex + off) + off2; + vec2 param = (vTex + off) + off2; vec4 c0 = sample_func(param, SPIRV_Cross_CombineduTextureuSampler); - highp vec2 param_1 = (vTex + off) + off2; + vec2 param_1 = (vTex + off) + off2; vec4 c1 = sample_func_dual(param_1, SPIRV_Cross_CombineduTextureuSampler); vec4 c2 = texture(SPIRV_Cross_CombineduTextureArrayuSampler, vTex3); vec4 c3 = texture(SPIRV_Cross_CombineduTextureCubeuSampler, vTex3); diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk index cfa2f39616c..105379d770c 100644 --- a/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk +++ b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk @@ -26,9 +26,9 @@ void main() { vec2 off = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 0)); vec2 off2 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 1)); - highp vec2 param = (vTex + off) + off2; + vec2 param = (vTex + off) + off2; vec4 c0 = sample_func(uSampler, param); - highp vec2 param_1 = (vTex + off) + off2; + vec2 param_1 = (vTex + off) + off2; vec4 c1 = sample_func_dual(uSampler, uTexture, param_1); vec4 c2 = texture(sampler2DArray(uTextureArray, uSampler), vTex3); vec4 c3 = texture(samplerCube(uTextureCube, uSampler), vTex3); diff --git a/reference/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk b/reference/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk index 34bfea02604..04c4062a6c9 100644 --- a/reference/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk +++ b/reference/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk @@ -8,6 +8,7 @@ layout(location = 0) out float FragColor; void main() { - FragColor = float(f); + float _17 = float(f); + FragColor = _17; } diff --git a/reference/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk b/reference/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk new file mode 100644 index 00000000000..4cb6bf8d8fa --- /dev/null +++ b/reference/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk @@ -0,0 +1,22 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void in_func() +{ + if (payload > 0.0) + { + ignoreIntersectionEXT; + } + else + { + terminateRayEXT; + } +} + +void main() +{ + in_func(); +} + diff --git a/reference/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk b/reference/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk new file mode 100644 index 00000000000..cee09bc0ea5 --- /dev/null +++ b/reference/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk @@ -0,0 +1,22 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +rayPayloadInNV float payload; + +void in_func() +{ + if (payload > 0.0) + { + ignoreIntersectionNV(); + } + else + { + terminateRayNV(); + } +} + +void main() +{ + in_func(); +} + diff --git a/reference/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk b/reference/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk new file mode 100644 index 00000000000..5adfac164fa --- /dev/null +++ b/reference/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) callableDataInEXT float c; + +void main() +{ + executeCallableEXT(10u, 0); +} + diff --git a/reference/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..b6c1876d313 --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,24 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +struct Foo2 +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInEXT Foo payload; +hitAttributeEXT Foo2 hit; + +void main() +{ + payload.a = hit.a; + payload.b = hit.b; +} + diff --git a/reference/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..614a04d95e7 --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk @@ -0,0 +1,24 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +struct Foo2 +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInNV Foo payload; +hitAttributeNV Foo2 hit; + +void main() +{ + payload.a = hit.a; + payload.b = hit.b; +} + diff --git a/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..56e8ff4aa34 --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,29 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +struct Foo2 +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInEXT Foo payload; +hitAttributeEXT Foo2 hit; + +void in_function() +{ + payload.a = hit.a; + payload.b = hit.b; +} + +void main() +{ + in_function(); +} + diff --git a/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..e747bb29d05 --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk @@ -0,0 +1,29 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +struct Foo2 +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInNV Foo payload; +hitAttributeNV Foo2 hit; + +void in_function() +{ + payload.a = hit.a; + payload.b = hit.b; +} + +void main() +{ + in_function(); +} + diff --git a/reference/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..eeccd3bb092 --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,11 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec2 payload; +hitAttributeEXT vec2 hit; + +void main() +{ + payload = hit; +} + diff --git a/reference/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..908d96344f3 --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk @@ -0,0 +1,11 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec2 payload; +hitAttributeNV vec2 hit; + +void main() +{ + payload = hit; +} + diff --git a/reference/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..a51e6b088f3 --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,17 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInEXT Foo payload; +hitAttributeEXT Foo hit; + +void main() +{ + payload = hit; +} + diff --git a/reference/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..133bdfc1d90 --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk @@ -0,0 +1,17 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Foo +{ + float a; + float b; +}; + +layout(location = 0) rayPayloadInNV Foo payload; +hitAttributeNV Foo hit; + +void main() +{ + payload = hit; +} + diff --git a/reference/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..e4e0103ddb5 --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = gl_HitKindEXT; +} + diff --git a/reference/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..64f79a8dce0 --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = gl_HitKindNV; +} + diff --git a/reference/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..e94e3323c98 --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = gl_RayTmaxEXT; +} + diff --git a/reference/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..9004a00c40e --- /dev/null +++ b/reference/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV float payload; + +void main() +{ + payload = gl_HitTNV; +} + diff --git a/reference/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..a013baa11d5 --- /dev/null +++ b/reference/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = gl_IncomingRayFlagsEXT; +} + diff --git a/reference/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..d17ab8ce76c --- /dev/null +++ b/reference/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = gl_IncomingRayFlagsNV; +} + diff --git a/reference/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..e28af5d2527 --- /dev/null +++ b/reference/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = uint(gl_InstanceCustomIndexEXT); +} + diff --git a/reference/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..531a1fc2845 --- /dev/null +++ b/reference/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = uint(gl_InstanceCustomIndexNV); +} + diff --git a/reference/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..0413e0d234a --- /dev/null +++ b/reference/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = uint(gl_InstanceID); +} + diff --git a/reference/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..ff551db7c9d --- /dev/null +++ b/reference/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = uint(gl_InstanceID); +} + diff --git a/reference/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..237d4790e55 --- /dev/null +++ b/reference/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_ObjectRayDirectionEXT; +} + diff --git a/reference/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..01afa0e067a --- /dev/null +++ b/reference/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_ObjectRayDirectionNV; +} + diff --git a/reference/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..5739ac09ff5 --- /dev/null +++ b/reference/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_ObjectRayOriginEXT; +} + diff --git a/reference/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..a49e17a1738 --- /dev/null +++ b/reference/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_ObjectRayOriginNV; +} + diff --git a/reference/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..7922e1efbf4 --- /dev/null +++ b/reference/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_ObjectToWorldEXT * vec4(payload, 1.0); +} + diff --git a/reference/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..fc2c5ed0c2c --- /dev/null +++ b/reference/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_ObjectToWorldNV * vec4(payload, 1.0); +} + diff --git a/reference/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..e896816ec30 --- /dev/null +++ b/reference/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,20 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Payload +{ + vec4 a; +}; + +layout(location = 0) rayPayloadInEXT Payload payload; + +void write_incoming_payload_in_function() +{ + payload.a = vec4(10.0); +} + +void main() +{ + write_incoming_payload_in_function(); +} + diff --git a/reference/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..a3ddd56b176 --- /dev/null +++ b/reference/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk @@ -0,0 +1,20 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Payload +{ + vec4 a; +}; + +layout(location = 0) rayPayloadInNV Payload payload; + +void write_incoming_payload_in_function() +{ + payload.a = vec4(10.0); +} + +void main() +{ + write_incoming_payload_in_function(); +} + diff --git a/reference/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..667c015e8d6 --- /dev/null +++ b/reference/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = uint(gl_PrimitiveID); +} + diff --git a/reference/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..d3b0ef19429 --- /dev/null +++ b/reference/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = uint(gl_PrimitiveID); +} + diff --git a/reference/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..e94e3323c98 --- /dev/null +++ b/reference/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = gl_RayTmaxEXT; +} + diff --git a/reference/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..769c96ad6b7 --- /dev/null +++ b/reference/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV float payload; + +void main() +{ + payload = gl_RayTmaxNV; +} + diff --git a/reference/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..04b89549508 --- /dev/null +++ b/reference/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = gl_RayTminEXT; +} + diff --git a/reference/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..2709899a13a --- /dev/null +++ b/reference/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV float payload; + +void main() +{ + payload = gl_RayTminNV; +} + diff --git a/reference/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..05af948b379 --- /dev/null +++ b/reference/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = 1.0 + float(gl_InstanceID); +} + diff --git a/reference/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk index 547b9cd51a5..103fd66b801 100644 --- a/reference/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk +++ b/reference/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk @@ -5,6 +5,6 @@ layout(location = 0) rayPayloadInNV float payload; void main() { - payload = 1.0; + payload = 1.0 + float(gl_InstanceID); } diff --git a/reference/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..68ba2bafa54 --- /dev/null +++ b/reference/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_WorldRayDirectionEXT; +} + diff --git a/reference/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..4acf03e0649 --- /dev/null +++ b/reference/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_WorldRayDirectionNV; +} + diff --git a/reference/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..a5c6766e055 --- /dev/null +++ b/reference/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_WorldRayOriginEXT; +} + diff --git a/reference/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..70241f23620 --- /dev/null +++ b/reference/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_WorldRayOriginNV; +} + diff --git a/reference/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..309ca4c6f17 --- /dev/null +++ b/reference/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_WorldToObjectEXT * vec4(payload, 1.0); +} + diff --git a/reference/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk new file mode 100644 index 00000000000..0b93e38acd1 --- /dev/null +++ b/reference/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_WorldToObjectNV * vec4(payload, 1.0); +} + diff --git a/reference/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk b/reference/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk new file mode 100644 index 00000000000..d131b0aa800 --- /dev/null +++ b/reference/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk @@ -0,0 +1,17 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(push_constant, std430) uniform Registers +{ + uvec2 ptr; +} _19; + +layout(location = 0) rayPayloadEXT vec4 payload; + +void main() +{ + vec3 origin = vec3(0.0); + vec3 direction = vec3(0.0, 0.0, -1.0); + traceRayEXT(accelerationStructureEXT(_19.ptr), 1u, 255u, 0u, 0u, 0u, origin, 0.0, direction, 100.0, 0); +} + diff --git a/reference/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk b/reference/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk new file mode 100644 index 00000000000..8bb3d0070b3 --- /dev/null +++ b/reference/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk @@ -0,0 +1,17 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT vec4 payload; +layout(location = 1) callableDataEXT float blend; +layout(set = 0, binding = 1, rgba32f) uniform writeonly image2D image; + +void main() +{ + vec3 origin = vec3(0.0); + vec3 direction = vec3(0.0, 0.0, -1.0); + traceRayEXT(as, 1u, 255u, 0u, 0u, 0u, origin, 0.0, direction, 100.0, 0); + executeCallableEXT(0u, 1); + imageStore(image, ivec2(gl_LaunchIDEXT.xy), payload + vec4(blend)); +} + diff --git a/reference/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..1614c49626e --- /dev/null +++ b/reference/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchIDEXT.xy), vec4(1.0)); +} + diff --git a/reference/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..f907e6fd606 --- /dev/null +++ b/reference/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchIDNV.xy), vec4(1.0)); +} + diff --git a/reference/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..240e93daa48 --- /dev/null +++ b/reference/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchSizeEXT.xy) - ivec2(1), vec4(1.0)); +} + diff --git a/reference/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..08992c63194 --- /dev/null +++ b/reference/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchSizeNV.xy) - ivec2(1), vec4(1.0)); +} + diff --git a/reference/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..7885f4f3d34 --- /dev/null +++ b/reference/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,47 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Payload +{ + float a; + float b; +}; + +struct Block +{ + float a; + float b; + Payload c; + Payload d; +}; + +layout(set = 0, binding = 1) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT Payload payload2; +layout(location = 1) rayPayloadEXT float payload1; +layout(location = 2) rayPayloadEXT Block _71; +layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image; + +vec4 trace_in_function() +{ + vec4 result = vec4(0.0); + vec3 origin = vec3(1.0, 0.0, 0.0); + vec3 direction = vec3(0.0, 1.0, 0.0); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0); + result += vec4(payload2.a); + result += vec4(payload2.b); + return result; +} + +void main() +{ + vec3 origin = vec3(1.0, 0.0, 0.0); + vec3 direction = vec3(0.0, 1.0, 0.0); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 1); + vec4 result = vec4(payload1); + vec4 _62 = trace_in_function(); + result += _62; + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 2); + result += vec4(((((_71.a + _71.b) + _71.c.a) + _71.c.b) + _71.d.a) + _71.d.b); + imageStore(image, ivec2(gl_LaunchIDEXT.xy), result); +} + diff --git a/reference/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..5d6b24c6018 --- /dev/null +++ b/reference/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk @@ -0,0 +1,47 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Payload +{ + float a; + float b; +}; + +struct Block +{ + float a; + float b; + Payload c; + Payload d; +}; + +layout(set = 0, binding = 1) uniform accelerationStructureNV as; +layout(location = 1) rayPayloadNV Payload payload2; +layout(location = 0) rayPayloadNV float payload1; +layout(location = 2) rayPayloadNV Block _71; +layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image; + +vec4 trace_in_function() +{ + vec4 result = vec4(0.0); + vec3 origin = vec3(1.0, 0.0, 0.0); + vec3 direction = vec3(0.0, 1.0, 0.0); + traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 1); + result += vec4(payload2.a); + result += vec4(payload2.b); + return result; +} + +void main() +{ + vec3 origin = vec3(1.0, 0.0, 0.0); + vec3 direction = vec3(0.0, 1.0, 0.0); + traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0); + vec4 result = vec4(payload1); + vec4 _62 = trace_in_function(); + result += _62; + traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 2); + result += vec4(((((_71.a + _71.b) + _71.c.a) + _71.c.b) + _71.d.a) + _71.d.b); + imageStore(image, ivec2(gl_LaunchIDNV.xy), result); +} + diff --git a/reference/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..71e04d670b9 --- /dev/null +++ b/reference/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,21 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 1) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT float payload; + +float pure_call(vec2 launchID, vec2 launchSize) +{ + vec3 origin = vec3(launchID.x / launchSize.x, launchID.y / launchSize.y, 1.0); + vec3 direction = vec3(0.0, 0.0, -1.0); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0); + return 0.0; +} + +void main() +{ + vec2 param = vec2(gl_LaunchIDEXT.xy); + vec2 param_1 = vec2(gl_LaunchSizeEXT.xy); + float _64 = pure_call(param, param_1); +} + diff --git a/reference/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..67deac226d1 --- /dev/null +++ b/reference/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,17 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 1) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT float payload; +layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image; + +void main() +{ + vec4 col = vec4(0.0, 0.0, 0.0, 1.0); + vec3 origin = vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0); + vec3 direction = vec3(0.0, 0.0, -1.0); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0); + col.y = payload; + imageStore(image, ivec2(gl_LaunchIDEXT.xy), col); +} + diff --git a/reference/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk new file mode 100644 index 00000000000..3056e8ad281 --- /dev/null +++ b/reference/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk @@ -0,0 +1,17 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(shaderRecordEXT, std430) buffer sbt +{ + vec3 direction; + float tmax; +} _20; + +layout(set = 0, binding = 0) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT float payload; + +void main() +{ + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(0.0), 0.0, _20.direction, _20.tmax, 0); +} + diff --git a/reference/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk b/reference/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk new file mode 100644 index 00000000000..761609a8128 --- /dev/null +++ b/reference/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk @@ -0,0 +1,13 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +void in_func() +{ + bool _13 = reportIntersectionEXT(0.5, 10u); +} + +void main() +{ + in_func(); +} + diff --git a/reference/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk b/reference/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk new file mode 100644 index 00000000000..c99b78495ae --- /dev/null +++ b/reference/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk @@ -0,0 +1,13 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +void in_func() +{ + bool _13 = reportIntersectionNV(0.5, 10u); +} + +void main() +{ + in_func(); +} + diff --git a/reference/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk b/reference/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk new file mode 100644 index 00000000000..c055a268144 --- /dev/null +++ b/reference/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = 0.0; +} + diff --git a/reference/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk b/reference/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk new file mode 100644 index 00000000000..696c998c279 --- /dev/null +++ b/reference/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk @@ -0,0 +1,13 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadInEXT float p; + +void main() +{ + vec3 origin = vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0); + vec3 direction = vec3(0.0, 0.0, -1.0); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0); +} + diff --git a/reference/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk b/reference/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk new file mode 100644 index 00000000000..9cadcdb6dce --- /dev/null +++ b/reference/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk @@ -0,0 +1,8 @@ +#version 450 +#extension GL_EXT_device_group : require + +void main() +{ + gl_Position = vec4(float(gl_DeviceIndex)); +} + diff --git a/reference/shaders/vulkan/vert/small-storage.vk.vert b/reference/shaders/vulkan/vert/small-storage.vk.vert index b3aafc8d8c1..2c4beb71e5f 100644 --- a/reference/shaders/vulkan/vert/small-storage.vk.vert +++ b/reference/shaders/vulkan/vert/small-storage.vk.vert @@ -1,10 +1,20 @@ #version 450 -#if defined(GL_AMD_gpu_shader_int16) +#if defined(GL_EXT_shader_explicit_arithmetic_types_int16) +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#elif defined(GL_AMD_gpu_shader_int16) #extension GL_AMD_gpu_shader_int16 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require #else #error No extension available for Int16. #endif +#if defined(GL_EXT_shader_explicit_arithmetic_types_int8) #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for Int8. +#endif #if defined(GL_AMD_gpu_shader_half_float) #extension GL_AMD_gpu_shader_half_float : require #elif defined(GL_NV_gpu_shader5) diff --git a/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert b/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert index 60ba1882f82..d939aa625c5 100644 --- a/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert +++ b/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert @@ -1,6 +1,13 @@ #version 310 es +#ifdef GL_ARB_shader_draw_parameters +#extension GL_ARB_shader_draw_parameters : enable +#endif +#ifdef GL_ARB_shader_draw_parameters +#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB +#else uniform int SPIRV_Cross_BaseInstance; +#endif void main() { diff --git a/samples/cpp/Makefile b/samples/cpp/Makefile index 225bb3d57d1..e5b66d4b88f 100644 --- a/samples/cpp/Makefile +++ b/samples/cpp/Makefile @@ -1,3 +1,6 @@ +# Copyright 2016-2021 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + SOURCES := $(wildcard *.comp) SPIRV := $(SOURCES:.comp=.spv) CPP_INTERFACE := $(SOURCES:.comp=.spv.cpp) diff --git a/samples/cpp/atomics.comp b/samples/cpp/atomics.comp index 0bf6d2ad011..f315124743b 100644 --- a/samples/cpp/atomics.comp +++ b/samples/cpp/atomics.comp @@ -1,3 +1,6 @@ +// Copyright 2016-2021 The Khronos Group Inc. +// SPDX-License-Identifier: Apache-2.0 + #version 310 es layout(local_size_x = 64) in; diff --git a/samples/cpp/atomics.cpp b/samples/cpp/atomics.cpp index 89351a5ae5f..36d8fc15c06 100644 --- a/samples/cpp/atomics.cpp +++ b/samples/cpp/atomics.cpp @@ -1,5 +1,6 @@ /* * Copyright 2015-2017 ARM Limited + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/samples/cpp/multiply.comp b/samples/cpp/multiply.comp index 1ac7869ad05..678415aed65 100644 --- a/samples/cpp/multiply.comp +++ b/samples/cpp/multiply.comp @@ -1,3 +1,6 @@ +// Copyright 2016-2021 The Khronos Group Inc. +// SPDX-License-Identifier: Apache-2.0 + #version 310 es layout(local_size_x = 64) in; diff --git a/samples/cpp/multiply.cpp b/samples/cpp/multiply.cpp index daa1fc6477d..4ff61843198 100644 --- a/samples/cpp/multiply.cpp +++ b/samples/cpp/multiply.cpp @@ -1,5 +1,6 @@ /* * Copyright 2015-2017 ARM Limited + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/samples/cpp/shared.comp b/samples/cpp/shared.comp index 7d59060aa98..a9d55d2c5c6 100644 --- a/samples/cpp/shared.comp +++ b/samples/cpp/shared.comp @@ -1,3 +1,6 @@ +// Copyright 2016-2021 The Khronos Group Inc. +// SPDX-License-Identifier: Apache-2.0 + #version 310 es layout(local_size_x = 64) in; diff --git a/samples/cpp/shared.cpp b/samples/cpp/shared.cpp index 5be62d681fc..b997704bf81 100644 --- a/samples/cpp/shared.cpp +++ b/samples/cpp/shared.cpp @@ -1,5 +1,6 @@ /* * Copyright 2015-2017 ARM Limited + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp b/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp new file mode 100644 index 00000000000..3371e3af21b --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp @@ -0,0 +1,118 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 437 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %Baz "Baz" + OpMemberName %Baz 0 "c" + OpName %Bar "Bar" + OpMemberName %Bar 0 "d" + OpMemberName %Bar 1 "baz" + OpName %Foo "Foo" + OpMemberName %Foo 0 "a" + OpMemberName %Foo 1 "b" + OpMemberName %Foo 2 "c" + OpName %Baz_0 "Baz" + OpMemberName %Baz_0 0 "c" + OpName %Bar_0 "Bar" + OpMemberName %Bar_0 0 "d" + OpMemberName %Bar_0 1 "baz" + OpName %Foo_0 "Foo" + OpMemberName %Foo_0 0 "a" + OpMemberName %Foo_0 1 "b" + OpMemberName %Foo_0 2 "c" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "foo" + OpMemberName %SSBO 1 "foo2" + OpName %_ "" + OpDecorate %_arr_float_uint_4_0 ArrayStride 4 + OpDecorate %_arr__arr_float_uint_4_0_uint_2 ArrayStride 16 + OpMemberDecorate %Baz_0 0 Offset 0 + OpDecorate %_arr_Baz_0_uint_2 ArrayStride 4 + OpMemberDecorate %Bar_0 0 Offset 0 + OpMemberDecorate %Bar_0 1 Offset 32 + OpDecorate %_arr_Bar_0_uint_5 ArrayStride 40 + OpMemberDecorate %Foo_0 0 RowMajor + OpMemberDecorate %Foo_0 0 Offset 0 + OpMemberDecorate %Foo_0 0 MatrixStride 8 + OpMemberDecorate %Foo_0 1 Offset 16 + OpMemberDecorate %Foo_0 2 Offset 24 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 224 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%mat2v2float = OpTypeMatrix %v2float 2 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 + %uint_2 = OpConstant %uint 2 +%_arr__arr_float_uint_4_uint_2 = OpTypeArray %_arr_float_uint_4 %uint_2 + %Baz = OpTypeStruct %float +%_arr_Baz_uint_2 = OpTypeArray %Baz %uint_2 + %Bar = OpTypeStruct %_arr__arr_float_uint_4_uint_2 %_arr_Baz_uint_2 + %uint_5 = OpConstant %uint 5 +%_arr_Bar_uint_5 = OpTypeArray %Bar %uint_5 + %Foo = OpTypeStruct %mat2v2float %v2float %_arr_Bar_uint_5 +%_ptr_Function_Foo = OpTypePointer Function %Foo +%_arr_float_uint_4_0 = OpTypeArray %float %uint_4 +%_arr__arr_float_uint_4_0_uint_2 = OpTypeArray %_arr_float_uint_4_0 %uint_2 + %Baz_0 = OpTypeStruct %float +%_arr_Baz_0_uint_2 = OpTypeArray %Baz_0 %uint_2 + %Bar_0 = OpTypeStruct %_arr__arr_float_uint_4_0_uint_2 %_arr_Baz_0_uint_2 +%_arr_Bar_0_uint_5 = OpTypeArray %Bar_0 %uint_5 + %Foo_0 = OpTypeStruct %mat2v2float %v2float %_arr_Bar_0_uint_5 + %SSBO = OpTypeStruct %Foo_0 %Foo_0 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0 +%_ptr_Function_mat2v2float = OpTypePointer Function %mat2v2float + %int_1 = OpConstant %int 1 +%_ptr_Function_v2float = OpTypePointer Function %v2float + %int_2 = OpConstant %int 2 +%_ptr_Function__arr_Bar_uint_5 = OpTypePointer Function %_arr_Bar_uint_5 +%_ptr_Function_Bar = OpTypePointer Function %Bar +%_ptr_Function__arr__arr_float_uint_4_uint_2 = OpTypePointer Function %_arr__arr_float_uint_4_uint_2 +%_ptr_Function__arr_float_uint_4 = OpTypePointer Function %_arr_float_uint_4 +%_ptr_Function_float = OpTypePointer Function %float + %int_3 = OpConstant %int 3 +%_ptr_Function__arr_Baz_uint_2 = OpTypePointer Function %_arr_Baz_uint_2 +%_ptr_Function_Baz = OpTypePointer Function %Baz + %int_4 = OpConstant %int 4 + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_5 = OpConstant %float 5 +%_ptr_Uniform_mat2v2float = OpTypePointer Uniform %mat2v2float +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%_ptr_Uniform__arr_Bar_0_uint_5 = OpTypePointer Uniform %_arr_Bar_0_uint_5 +%_ptr_Uniform_Bar_0 = OpTypePointer Uniform %Bar_0 +%_ptr_Uniform__arr__arr_float_uint_4_0_uint_2 = OpTypePointer Uniform %_arr__arr_float_uint_4_0_uint_2 +%_ptr_Uniform__arr_float_uint_4_0 = OpTypePointer Uniform %_arr_float_uint_4_0 +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform__arr_Baz_0_uint_2 = OpTypePointer Uniform %_arr_Baz_0_uint_2 +%_ptr_Uniform_Baz_0 = OpTypePointer Uniform %Baz_0 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %ptr_load = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0 + %ptr_store = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_1 + %loaded = OpLoad %Foo_0 %ptr_load + OpStore %ptr_store %loaded + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp new file mode 100644 index 00000000000..87aee2db54f --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 37 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %T "T" + OpMemberName %T 0 "a" + OpName %v "v" + OpName %T_0 "T" + OpMemberName %T_0 0 "b" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "foo" + OpName %_ "" + OpName %T_1 "T" + OpMemberName %T_1 0 "c" + OpName %SSBO2 "SSBO2" + OpMemberName %SSBO2 0 "bar" + OpName %__0 "" + OpMemberDecorate %T_0 0 Offset 0 + OpDecorate %_runtimearr_T_0 ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpMemberDecorate %T_1 0 Offset 0 + OpDecorate %_runtimearr_T_1 ArrayStride 16 + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %T = OpTypeStruct %float +%_ptr_Function_T = OpTypePointer Function %T + %float_40 = OpConstant %float 40 + %11 = OpConstantComposite %T %float_40 + %T_0 = OpTypeStruct %float +%_runtimearr_T_0 = OpTypeRuntimeArray %T_0 + %SSBO1 = OpTypeStruct %_runtimearr_T_0 +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_10 = OpConstant %int 10 +%_ptr_Uniform_T_0 = OpTypePointer Uniform %T_0 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %T_1 = OpTypeStruct %float +%_runtimearr_T_1 = OpTypeRuntimeArray %T_1 + %SSBO2 = OpTypeStruct %_runtimearr_T_1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %__0 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int_30 = OpConstant %int 30 +%_ptr_Uniform_T_1 = OpTypePointer Uniform %T_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %v = OpVariable %_ptr_Function_T Function + OpStore %v %11 + %20 = OpLoad %T %v + %22 = OpAccessChain %_ptr_Uniform_T_0 %_ %int_0 %int_10 + %23 = OpCompositeExtract %float %20 0 + %25 = OpAccessChain %_ptr_Uniform_float %22 %int_0 + OpStore %25 %23 + %32 = OpLoad %T %v + %34 = OpAccessChain %_ptr_Uniform_T_1 %__0 %int_0 %int_30 + %35 = OpCompositeExtract %float %32 0 + %36 = OpAccessChain %_ptr_Uniform_float %34 %int_0 + OpStore %36 %35 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp b/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000000..3f2d141a1f5 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 23 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %c "c" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %SSBO = OpTypeStruct %uint %uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %c = OpVariable %_ptr_Function_uint Function + %15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 + %16 = OpAtomicLoad %uint %15 %int_1 %int_0 + OpStore %c %16 + %18 = OpLoad %uint %c + %19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + OpAtomicStore %19 %int_1 %int_0 %18 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp new file mode 100644 index 00000000000..435fa322215 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp @@ -0,0 +1,97 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 26 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "ints" + OpMemberName %SSBO 1 "uints" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %v4int = OpTypeVector %int 4 + %uint = OpTypeInt 32 0 + %v4uint = OpTypeVector %uint 4 + + %int_1 = OpConstant %int 1 + %uint_11 = OpConstant %uint 11 + + %SSBO = OpTypeStruct %v4int %v4uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int +%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint + %main = OpFunction %void None %3 + %5 = OpLabel + %ints_ptr = OpAccessChain %_ptr_Uniform_v4int %_ %int_0 + %uints_ptr = OpAccessChain %_ptr_Uniform_v4uint %_ %int_1 + %ints = OpLoad %v4int %ints_ptr + %uints = OpLoad %v4uint %uints_ptr + + %ints_alt = OpVectorShuffle %v4int %ints %ints 3 2 1 0 + %uints_alt = OpVectorShuffle %v4uint %uints %uints 3 2 1 0 + + %int_to_int_popcount = OpBitCount %v4int %ints + %int_to_uint_popcount = OpBitCount %v4uint %ints + %uint_to_int_popcount = OpBitCount %v4int %uints + %uint_to_uint_popcount = OpBitCount %v4uint %uints + + ; BitReverse must have matching types w.r.t. sign, yay. + %int_to_int_reverse = OpBitReverse %v4int %ints + ;%int_to_uint_reverse = OpBitReverse %v4uint %ints + ;%uint_to_int_reverse = OpBitReverse %v4int %uints + %uint_to_uint_reverse = OpBitReverse %v4uint %uints + + ; Base and Result must match. + %int_to_int_sbit = OpBitFieldSExtract %v4int %ints %int_1 %uint_11 + ;%int_to_uint_sbit = OpBitFieldSExtract %v4uint %ints %offset %count + ;%uint_to_int_sbit = OpBitFieldSExtract %v4int %uints %offset %count + %uint_to_uint_sbit = OpBitFieldSExtract %v4uint %uints %uint_11 %int_1 + + ; Base and Result must match. + %int_to_int_ubit = OpBitFieldUExtract %v4int %ints %int_1 %uint_11 + ;%int_to_uint_ubit = OpBitFieldUExtract %v4uint %ints %offset %count + ;%uint_to_int_ubit = OpBitFieldUExtract %v4int %uints %offset %count + %uint_to_uint_ubit = OpBitFieldUExtract %v4uint %uints %uint_11 %int_1 + + %int_to_int_insert = OpBitFieldInsert %v4int %ints %ints_alt %int_1 %uint_11 + %uint_to_uint_insert = OpBitFieldInsert %v4uint %uints %uints_alt %uint_11 %int_1 + + OpStore %ints_ptr %int_to_int_popcount + OpStore %uints_ptr %int_to_uint_popcount + OpStore %ints_ptr %uint_to_int_popcount + OpStore %uints_ptr %uint_to_uint_popcount + + OpStore %ints_ptr %int_to_int_reverse + ;OpStore %uints_ptr %int_to_uint_reverse + ;OpStore %ints_ptr %uint_to_int_reverse + OpStore %uints_ptr %uint_to_uint_reverse + + OpStore %ints_ptr %int_to_int_sbit + ;OpStore %uints_ptr %int_to_uint_sbit + ;OpStore %ints_ptr %uint_to_int_sbit + OpStore %uints_ptr %uint_to_uint_sbit + + OpStore %ints_ptr %int_to_int_ubit + ;OpStore %uints_ptr %int_to_uint_ubit + ;OpStore %ints_ptr %uint_to_int_ubit + OpStore %uints_ptr %uint_to_uint_ubit + + OpStore %ints_ptr %int_to_int_insert + OpStore %uints_ptr %uint_to_uint_insert + + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp b/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp new file mode 100644 index 00000000000..e3b785cd52b --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp @@ -0,0 +1,72 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "u" + OpMemberName %SSBO 1 "i" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + %SSBO = OpTypeStruct %uvec4 %ivec4 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uvec4 = OpTypePointer Uniform %uvec4 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_ivec4 = OpTypePointer Uniform %ivec4 + %main = OpFunction %void None %3 + %5 = OpLabel + %uptr = OpAccessChain %_ptr_Uniform_uvec4 %_ %int_0 + %iptr = OpAccessChain %_ptr_Uniform_ivec4 %_ %int_1 + %uvalue = OpLoad %uvec4 %uptr + %ivalue = OpLoad %ivec4 %iptr + + %lsb_uint_to_uint = OpExtInst %uvec4 %1 FindILsb %uvalue + %lsb_uint_to_int = OpExtInst %ivec4 %1 FindILsb %uvalue + %lsb_int_to_uint = OpExtInst %uvec4 %1 FindILsb %ivalue + %lsb_int_to_int = OpExtInst %ivec4 %1 FindILsb %ivalue + + %umsb_uint_to_uint = OpExtInst %uvec4 %1 FindUMsb %uvalue + %umsb_uint_to_int = OpExtInst %ivec4 %1 FindUMsb %uvalue + %umsb_int_to_uint = OpExtInst %uvec4 %1 FindUMsb %ivalue + %umsb_int_to_int = OpExtInst %ivec4 %1 FindUMsb %ivalue + + %smsb_uint_to_uint = OpExtInst %uvec4 %1 FindSMsb %uvalue + %smsb_uint_to_int = OpExtInst %ivec4 %1 FindSMsb %uvalue + %smsb_int_to_uint = OpExtInst %uvec4 %1 FindSMsb %ivalue + %smsb_int_to_int = OpExtInst %ivec4 %1 FindSMsb %ivalue + + OpStore %uptr %lsb_uint_to_uint + OpStore %iptr %lsb_uint_to_int + OpStore %uptr %lsb_int_to_uint + OpStore %iptr %lsb_int_to_int + + OpStore %uptr %umsb_uint_to_uint + OpStore %iptr %umsb_uint_to_int + OpStore %uptr %umsb_int_to_uint + OpStore %iptr %umsb_int_to_int + + OpStore %uptr %smsb_uint_to_uint + OpStore %iptr %smsb_uint_to_int + OpStore %uptr %smsb_int_to_uint + OpStore %iptr %smsb_int_to_int + + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp b/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp new file mode 100644 index 00000000000..132f38bf72d --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp @@ -0,0 +1,53 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 26 +; Schema: 0 + OpCapability Shader + OpCapability ShaderNonUniform + OpCapability RuntimeDescriptorArray + OpCapability StorageBufferArrayNonUniformIndexing + OpExtension "SPV_EXT_descriptor_indexing" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_nonuniform_qualifier" + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "v" + OpName %ssbos "ssbos" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %ssbos DescriptorSet 0 + OpDecorate %ssbos Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %22 NonUniform + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %SSBO = OpTypeStruct %uint +%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO +%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO + %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_2 = OpConstant %uint 2 +%_ptr_Input_uint = OpTypePointer Input %uint + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2 + %17 = OpLoad %uint %16 + %18 = OpCopyObject %uint %17 + %22 = OpAccessChain %_ptr_Uniform_uint %ssbos %18 %int_0 + %25 = OpAtomicIAdd %uint %22 %uint_1 %uint_0 %uint_1 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp new file mode 100644 index 00000000000..e1dcb0ef8e2 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp @@ -0,0 +1,81 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 49 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %gl_LocalInvocationID + OpExecutionMode %main LocalSize 4 4 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %gl_LocalInvocationID "gl_LocalInvocationID" + OpName %indexable "indexable" + OpName %indexable_0 "indexable" + OpName %25 "indexable" + OpName %38 "indexable" + OpDecorate %_runtimearr_int ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_runtimearr_int = OpTypeRuntimeArray %int + %SSBO = OpTypeStruct %_runtimearr_int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %uint_4 = OpConstant %uint 4 +%_arr_int_uint_4 = OpTypeArray %int %uint_4 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %25 = OpConstantComposite %_arr_int_uint_4 %int_0 %int_1 %int_2 %int_3 +%gl_LocalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Function__arr_int_uint_4 = OpTypePointer Function %_arr_int_uint_4 +%_ptr_Function_int = OpTypePointer Function %int + %int_4 = OpConstant %int 4 + %int_5 = OpConstant %int 5 + %int_6 = OpConstant %int 6 + %int_7 = OpConstant %int 7 + %38 = OpConstantComposite %_arr_int_uint_4 %int_4 %int_5 %int_6 %int_7 + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_int = OpTypePointer Uniform %int +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_4 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %indexable = OpVariable %_ptr_Function__arr_int_uint_4 Function +%indexable_0 = OpVariable %_ptr_Function__arr_int_uint_4 Function + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %27 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0 + %28 = OpLoad %uint %27 + OpStore %indexable %25 + %32 = OpAccessChain %_ptr_Function_int %indexable %28 + %33 = OpLoad %int %32 + %40 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_1 + %41 = OpLoad %uint %40 + OpStore %indexable_0 %38 + %43 = OpAccessChain %_ptr_Function_int %indexable_0 %41 + %44 = OpLoad %int %43 + %45 = OpIAdd %int %33 %44 + %47 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19 + OpStore %47 %45 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp new file mode 100644 index 00000000000..73f3ceee1ad --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp @@ -0,0 +1,59 @@ +; SPIR-V +; Version: 1.5 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 26 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + ;OpEntryPoint GLCompute %main "main" %Samp %ubo %ssbo %v %w + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 64 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %Samp "Samp" + OpName %UBO "UBO" + OpMemberName %UBO 0 "v" + OpName %ubo "ubo" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "v" + OpName %ssbo "ssbo" + OpName %v "v" + OpName %w "w" + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpDecorate %Samp DescriptorSet 0 + OpDecorate %Samp Binding 0 + OpMemberDecorate %UBO 0 Offset 0 + OpDecorate %UBO Block + OpDecorate %ubo DescriptorSet 0 + OpDecorate %ubo Binding 1 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 2 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %uint_64 = OpConstant %uint 64 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1 + %float = OpTypeFloat 32 + %12 = OpTypeImage %float 2D 0 0 0 1 Unknown + %13 = OpTypeSampledImage %12 +%_ptr_UniformConstant_13 = OpTypePointer UniformConstant %13 + %Samp = OpVariable %_ptr_UniformConstant_13 UniformConstant + %UBO = OpTypeStruct %float +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO + %ubo = OpVariable %_ptr_Uniform_UBO Uniform + %SSBO = OpTypeStruct %float +%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO + %ssbo = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer +%_ptr_Private_float = OpTypePointer Private %float + %v = OpVariable %_ptr_Private_float Private +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %w = OpVariable %_ptr_Workgroup_float Workgroup + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp b/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp new file mode 100644 index 00000000000..30db11d45bc --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp @@ -0,0 +1,55 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %ResTypeMod = OpTypeStruct %float %float +%_ptr_Function_ResTypeMod = OpTypePointer Function %ResTypeMod + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float_20 = OpConstant %float 20 + %int_1 = OpConstant %int 1 +%_ptr_Function_float = OpTypePointer Function %float +%ResTypeFrexp = OpTypeStruct %float %int +%_ptr_Function_ResTypeFrexp = OpTypePointer Function %ResTypeFrexp + %float_40 = OpConstant %float 40 +%_ptr_Function_int = OpTypePointer Function %int + %SSBO = OpTypeStruct %float %int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_int = OpTypePointer Uniform %int + %main = OpFunction %void None %3 + %5 = OpLabel + %modres = OpExtInst %ResTypeMod %1 ModfStruct %float_20 + %frexpres = OpExtInst %ResTypeFrexp %1 FrexpStruct %float_40 + + %modres_f = OpCompositeExtract %float %modres 0 + %modres_i = OpCompositeExtract %float %modres 1 + %frexpres_f = OpCompositeExtract %float %frexpres 0 + %frexpres_i = OpCompositeExtract %int %frexpres 1 + + %float_ptr = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %int_ptr = OpAccessChain %_ptr_Uniform_int %_ %int_1 + + OpStore %float_ptr %modres_f + OpStore %float_ptr %modres_i + OpStore %float_ptr %frexpres_f + OpStore %int_ptr %frexpres_i + + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp b/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp new file mode 100644 index 00000000000..5dad9dd5ed8 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp @@ -0,0 +1,55 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 32 +; Schema: 0 + OpCapability Shader + OpCapability ShaderNonUniform + OpCapability RuntimeDescriptorArray + OpCapability StorageImageArrayNonUniformIndexing + OpExtension "SPV_EXT_descriptor_indexing" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_nonuniform_qualifier" + OpName %main "main" + OpName %uImage "uImage" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %uImage DescriptorSet 0 + OpDecorate %uImage Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %30 NonUniform + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %7 = OpTypeImage %uint 2D 0 0 0 2 R32ui +%_runtimearr_7 = OpTypeRuntimeArray %7 +%_ptr_UniformConstant__runtimearr_7 = OpTypePointer UniformConstant %_runtimearr_7 + %uImage = OpVariable %_ptr_UniformConstant__runtimearr_7 UniformConstant + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_2 = OpConstant %uint 2 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_UniformConstant_7 = OpTypePointer UniformConstant %7 + %v2uint = OpTypeVector %uint 2 + %int = OpTypeInt 32 1 + %v2int = OpTypeVector %int 2 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 +%_ptr_Image_uint = OpTypePointer Image %uint + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2 + %17 = OpLoad %uint %16 + %18 = OpCopyObject %uint %17 + %20 = OpAccessChain %_ptr_UniformConstant_7 %uImage %18 + %22 = OpLoad %v3uint %gl_GlobalInvocationID + %23 = OpVectorShuffle %v2uint %22 %22 0 1 + %26 = OpBitcast %v2int %23 + %30 = OpImageTexelPointer %_ptr_Image_uint %20 %26 %uint_0 + %31 = OpAtomicIAdd %uint %30 %uint_1 %uint_0 %uint_1 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp b/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp new file mode 100644 index 00000000000..2eaef4bdbee --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp @@ -0,0 +1,60 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %spec_1 SpecId 1 + OpDecorate %spec_2 SpecId 2 + OpDecorate %spec_3 SpecId 3 + OpDecorate %spec_4 SpecId 4 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %float_2 = OpConstant %float 2 +%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float + %spec_1 = OpSpecConstant %uint 11 + %spec_2 = OpSpecConstant %uint 12 + %spec_3 = OpSpecConstant %uint 13 + %spec_4 = OpSpecConstant %uint 14 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 +%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %uint_3 %spec_1 %spec_2 + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %21 = OpLoad %uint %20 + %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + %25 = OpLoad %v4float %24 + %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2 + %27 = OpFAdd %v4float %25 %26 + %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + OpStore %28 %27 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp b/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp new file mode 100644 index 00000000000..3031f4bb8af --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp @@ -0,0 +1,76 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %spec_1 SpecId 1 + OpDecorate %spec_2 SpecId 2 + OpDecorate %spec_3 SpecId 3 + OpDecorate %spec_4 SpecId 4 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %float_2 = OpConstant %float 2 +%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float + ; Test that we can declare the spec constant as signed. + ; Needs implicit bitcast since WorkGroupSize is uint. + %spec_1 = OpSpecConstant %int 11 + %spec_2 = OpSpecConstant %int 12 + %spec_3 = OpSpecConstant %int 13 + %spec_4 = OpSpecConstant %int 14 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + ; Test that we can build spec constant composites out of local size id values. + ; Needs special case handling. + %spec_3_op = OpSpecConstantOp %uint IAdd %spec_3 %uint_3 +%WorkGroupSize = OpSpecConstantComposite %v3uint %spec_3_op %spec_4 %uint_2 + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %21 = OpLoad %uint %20 + %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + %25 = OpLoad %v4float %24 + %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2 + %27 = OpFAdd %v4float %25 %26 + %wg_f = OpConvertUToF %v3float %WorkGroupSize + %wg_f4 = OpVectorShuffle %v4float %wg_f %wg_f 0 1 2 2 + ; Test that we can use the spec constants directly which needs to translate to gl_WorkGroupSize.elem. + ; Needs special case handling. + %res = OpFAdd %v4float %27 %wg_f4 + %f0 = OpConvertSToF %float %spec_3 + %f1 = OpConvertSToF %float %spec_4 + %f2 = OpConvertSToF %float %uint_2 + %res1 = OpVectorTimesScalar %v4float %res %f0 + %res2 = OpVectorTimesScalar %v4float %res1 %f1 + %res3 = OpVectorTimesScalar %v4float %res2 %f2 + %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + OpStore %28 %res3 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp b/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp new file mode 100644 index 00000000000..e820da5fc45 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 27 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %_ %gl_NumWorkGroups %__0 + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "v" + OpName %_ "" + OpName %gl_NumWorkGroups "gl_NumWorkGroups" + OpName %UBO "UBO" + OpMemberName %UBO 0 "w" + OpName %__0 "" + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups + OpMemberDecorate %UBO 0 Offset 0 + OpDecorate %UBO Block + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %SSBO = OpTypeStruct %v3uint +%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO + %_ = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input + %UBO = OpTypeStruct %v3uint +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO + %__0 = OpVariable %_ptr_Uniform_UBO Uniform +%_ptr_Uniform_v3uint = OpTypePointer Uniform %v3uint +%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %15 = OpLoad %v3uint %gl_NumWorkGroups + %20 = OpAccessChain %_ptr_Uniform_v3uint %__0 %int_0 + %21 = OpLoad %v3uint %20 + %22 = OpIAdd %v3uint %15 %21 + %24 = OpAccessChain %_ptr_StorageBuffer_v3uint %_ %int_0 + OpStore %24 %22 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp b/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp new file mode 100644 index 00000000000..b4e622baced --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp @@ -0,0 +1,78 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %A "A" + OpName %B "A" + OpName %C "A" + OpName %D "A" + OpName %E "A" + OpName %F "A" + OpName %G "A" + OpName %H "A" + OpName %I "A" + OpName %J "A" + OpName %K "A" + OpName %L "A" + OpDecorate %_runtimearr_int ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %A SpecId 0 + OpDecorate %B SpecId 1 + OpDecorate %C SpecId 2 + OpDecorate %D SpecId 3 + OpDecorate %E SpecId 4 + OpDecorate %F SpecId 5 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_runtimearr_int = OpTypeRuntimeArray %int + %SSBO = OpTypeStruct %_runtimearr_int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %A = OpSpecConstant %int 0 + %B = OpSpecConstant %int 1 + %C = OpSpecConstant %int 2 + %D = OpSpecConstant %int 3 + %E = OpSpecConstant %int 4 + %F = OpSpecConstant %int 5 + %G = OpSpecConstantOp %int ISub %A %B + %H = OpSpecConstantOp %int ISub %G %C + %I = OpSpecConstantOp %int ISub %H %D + %J = OpSpecConstantOp %int ISub %I %E + %K = OpSpecConstantOp %int ISub %J %F + %L = OpSpecConstantOp %int IAdd %K %F +%_ptr_Uniform_int = OpTypePointer Uniform %int + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %32 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19 + OpStore %32 %L + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp b/shaders-hlsl-no-opt/asm/comp/storage-buffer-basic.nofxc.asm.comp similarity index 95% rename from shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp rename to shaders-hlsl-no-opt/asm/comp/storage-buffer-basic.nofxc.asm.comp index edb1a05e549..db9a8490df6 100644 --- a/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp +++ b/shaders-hlsl-no-opt/asm/comp/storage-buffer-basic.nofxc.asm.comp @@ -4,9 +4,9 @@ ; Bound: 31 ; Schema: 0 OpCapability Shader - OpCapability VariablePointers + ;OpCapability VariablePointers OpExtension "SPV_KHR_storage_buffer_storage_class" - OpExtension "SPV_KHR_variable_pointers" + ;OpExtension "SPV_KHR_variable_pointers" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %22 "main" %gl_WorkGroupID OpSource OpenCL_C 120 diff --git a/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag new file mode 100644 index 00000000000..6782b124730 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag @@ -0,0 +1,83 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 27 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %_ + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpMemberName %AA 0 "foo" + OpMemberName %AB 0 "foo" + OpMemberName %A 0 "_aa" + OpMemberName %A 1 "ab" + OpMemberName %BA 0 "foo" + OpMemberName %BB 0 "foo" + OpMemberName %B 0 "_ba" + OpMemberName %B 1 "bb" + OpName %VertexData "VertexData" + OpMemberName %VertexData 0 "_a" + OpMemberName %VertexData 1 "b" + OpName %_ "" + OpMemberName %CA 0 "foo" + OpMemberName %C 0 "_ca" + OpMemberName %DA 0 "foo" + OpMemberName %D 0 "da" + OpName %UBO "UBO" + OpMemberName %UBO 0 "_c" + OpMemberName %UBO 1 "d" + OpName %__0 "" + OpMemberName %E 0 "a" + OpName %SSBO "SSBO" + ;OpMemberName %SSBO 0 "e" Test that we don't try to assign bogus aliases. + OpMemberName %SSBO 1 "_e" + OpMemberName %SSBO 2 "f" + OpName %__1 "" + OpDecorate %VertexData Block + OpDecorate %_ Location 0 + OpMemberDecorate %CA 0 Offset 0 + OpMemberDecorate %C 0 Offset 0 + OpMemberDecorate %DA 0 Offset 0 + OpMemberDecorate %D 0 Offset 0 + OpMemberDecorate %UBO 0 Offset 0 + OpMemberDecorate %UBO 1 Offset 16 + OpDecorate %UBO Block + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + OpMemberDecorate %E 0 Offset 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpMemberDecorate %SSBO 2 Offset 8 + OpDecorate %SSBO BufferBlock + OpDecorate %__1 DescriptorSet 0 + OpDecorate %__1 Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %AA = OpTypeStruct %int + %AB = OpTypeStruct %int + %A = OpTypeStruct %AA %AB + %BA = OpTypeStruct %int + %BB = OpTypeStruct %int + %B = OpTypeStruct %BA %BB + %VertexData = OpTypeStruct %A %B +%_ptr_Input_VertexData = OpTypePointer Input %VertexData + %_ = OpVariable %_ptr_Input_VertexData Input + %CA = OpTypeStruct %int + %C = OpTypeStruct %CA + %DA = OpTypeStruct %int + %D = OpTypeStruct %DA + %UBO = OpTypeStruct %C %D +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO + %__0 = OpVariable %_ptr_Uniform_UBO Uniform + %E = OpTypeStruct %int + %SSBO = OpTypeStruct %E %E %E +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %__1 = OpVariable %_ptr_Uniform_SSBO Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag new file mode 100644 index 00000000000..9408e69ac09 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag @@ -0,0 +1,127 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vInput %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %vInput "vInput" + OpName %FragColor "FragColor" + OpName %phi "PHI" + OpDecorate %vInput RelaxedPrecision + OpDecorate %vInput Location 0 + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %b0 RelaxedPrecision + OpDecorate %b1 RelaxedPrecision + OpDecorate %b2 RelaxedPrecision + OpDecorate %b3 RelaxedPrecision + OpDecorate %c1 RelaxedPrecision + OpDecorate %c3 RelaxedPrecision + OpDecorate %d4_mp RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float + %vInput = OpVariable %_ptr_Input_v4float Input + %float_1 = OpConstant %float 1 + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Function_float = OpTypePointer Function %float + %float_2 = OpConstant %float 2 + %uint_1 = OpConstant %uint 1 + %float_3 = OpConstant %float 3 + %uint_2 = OpConstant %uint 2 + %float_4 = OpConstant %float 4 + %uint_3 = OpConstant %uint 3 + %v4float_arr2 = OpTypeArray %v4float %uint_2 + %v44float = OpTypeMatrix %v4float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %v4undef = OpUndef %v4float + %v4const = OpConstantNull %v4float + %v4arrconst = OpConstantNull %v4float_arr2 + %v44const = OpConstantNull %v44float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + + %loaded0 = OpLoad %v4float %vInput + + ; Basic case (highp). + %a0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %a1 = OpCompositeInsert %v4float %float_2 %a0 1 + %a2 = OpCompositeInsert %v4float %float_3 %a1 2 + %a3 = OpCompositeInsert %v4float %float_4 %a2 3 + OpStore %FragColor %a3 + + ; Basic case (mediump). + %b0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %b1 = OpCompositeInsert %v4float %float_2 %b0 1 + %b2 = OpCompositeInsert %v4float %float_3 %b1 2 + %b3 = OpCompositeInsert %v4float %float_4 %b2 3 + OpStore %FragColor %b3 + + ; Mix relaxed precision. + %c0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %c1 = OpCompositeInsert %v4float %float_2 %c0 1 + %c2 = OpCompositeInsert %v4float %float_3 %c1 2 + %c3 = OpCompositeInsert %v4float %float_4 %c2 3 + OpStore %FragColor %c3 + + ; SSA use after insert + %d0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %d1 = OpCompositeInsert %v4float %float_2 %d0 1 + %d2 = OpCompositeInsert %v4float %float_3 %d1 2 + %d3 = OpCompositeInsert %v4float %float_4 %d2 3 + %d4 = OpFAdd %v4float %d3 %d0 + OpStore %FragColor %d4 + %d4_mp = OpFAdd %v4float %d3 %d1 + OpStore %FragColor %d4_mp + + ; Verify Insert behavior on Undef. + %e0 = OpCompositeInsert %v4float %float_1 %v4undef 0 + %e1 = OpCompositeInsert %v4float %float_2 %e0 1 + %e2 = OpCompositeInsert %v4float %float_3 %e1 2 + %e3 = OpCompositeInsert %v4float %float_4 %e2 3 + OpStore %FragColor %e3 + + ; Verify Insert behavior on Constant. + %f0 = OpCompositeInsert %v4float %float_1 %v4const 0 + OpStore %FragColor %f0 + + ; Verify Insert behavior on Array. + %g0 = OpCompositeInsert %v4float_arr2 %float_1 %v4arrconst 1 2 + %g1 = OpCompositeInsert %v4float_arr2 %float_2 %g0 0 3 + %g2 = OpCompositeExtract %v4float %g1 0 + OpStore %FragColor %g2 + %g3 = OpCompositeExtract %v4float %g1 1 + OpStore %FragColor %g3 + + ; Verify Insert behavior on Matrix. + %h0 = OpCompositeInsert %v44float %float_1 %v44const 1 2 + %h1 = OpCompositeInsert %v44float %float_2 %h0 2 3 + %h2 = OpCompositeExtract %v4float %h1 0 + OpStore %FragColor %h2 + %h3 = OpCompositeExtract %v4float %h1 1 + OpStore %FragColor %h3 + %h4 = OpCompositeExtract %v4float %h1 2 + OpStore %FragColor %h4 + %h5 = OpCompositeExtract %v4float %h1 3 + OpStore %FragColor %h5 + + ; Verify that we cannot RMW PHI variables. + OpBranch %next + %next = OpLabel + %phi = OpPhi %v4float %d2 %5 + %i0 = OpCompositeInsert %v4float %float_4 %phi 3 + OpStore %FragColor %i0 + + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag new file mode 100644 index 00000000000..a9650ddbb6b --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag @@ -0,0 +1,61 @@ +; SPIR-V +; Version: 1.2 +; Generator: Khronos; 0 +; Bound: 43 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %EntryPoint_Main "main" + OpExecutionMode %EntryPoint_Main OriginUpperLeft + OpSource Unknown 100 + OpName %EmptyStructTest "EmptyStructTest" + OpName %EmptyStruct2Test "EmptyStruct2Test" + OpName %GetValue "GetValue" + OpName %GetValue2 "GetValue" + OpName %self "self" + OpName %self2 "self" + OpName %emptyStruct "emptyStruct" + OpName %value "value" + OpName %EntryPoint_Main "EntryPoint_Main" + +%EmptyStructTest = OpTypeStruct +%EmptyStruct2Test = OpTypeStruct %EmptyStructTest +%_ptr_Function_EmptyStruct2Test = OpTypePointer Function %EmptyStruct2Test + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float + %5 = OpTypeFunction %float %_ptr_Function_EmptyStruct2Test + %6 = OpTypeFunction %float %EmptyStruct2Test + %void = OpTypeVoid +%_ptr_Function_void = OpTypePointer Function %void + %8 = OpTypeFunction %void %_ptr_Function_EmptyStruct2Test + %9 = OpTypeFunction %void + %float_0 = OpConstant %float 0 + %value4 = OpConstantNull %EmptyStruct2Test + + %GetValue = OpFunction %float None %5 + %self = OpFunctionParameter %_ptr_Function_EmptyStruct2Test + %13 = OpLabel + OpReturnValue %float_0 + OpFunctionEnd + + %GetValue2 = OpFunction %float None %6 + %self2 = OpFunctionParameter %EmptyStruct2Test + %14 = OpLabel + OpReturnValue %float_0 + OpFunctionEnd + +%EntryPoint_Main = OpFunction %void None %9 + %37 = OpLabel + %emptyStruct = OpVariable %_ptr_Function_EmptyStruct2Test Function + %18 = OpVariable %_ptr_Function_EmptyStruct2Test Function + %value = OpVariable %_ptr_Function_float Function + %value2 = OpCompositeConstruct %EmptyStructTest + %value3 = OpCompositeConstruct %EmptyStruct2Test %value2 + %22 = OpFunctionCall %float %GetValue %emptyStruct + %23 = OpFunctionCall %float %GetValue2 %value3 + %24 = OpFunctionCall %float %GetValue2 %value4 + OpStore %value %22 + OpStore %value %23 + OpStore %value %24 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag b/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag new file mode 100644 index 00000000000..72f6d9d86e9 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag @@ -0,0 +1,96 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 53 +; Schema: 0 + OpCapability Shader + OpCapability ShaderNonUniform + OpCapability RuntimeDescriptorArray + OpCapability SampledImageArrayNonUniformIndexing + OpExtension "SPV_EXT_descriptor_indexing" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %vUV %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_nonuniform_qualifier" + OpName %main "main" + OpName %FragColor "FragColor" + OpName %uSamplers "uSamplers" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "indices" + OpName %_ "" + OpName %vUV "vUV" + OpName %uSampler "uSampler" + OpName %gl_FragCoord "gl_FragCoord" + OpDecorate %FragColor Location 0 + OpDecorate %uSamplers DescriptorSet 0 + OpDecorate %uSamplers Binding 0 + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO 0 NonWritable + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 2 + OpDecorate %_ Binding 0 + OpDecorate %26 NonUniform + OpDecorate %28 NonUniform + OpDecorate %29 NonUniform + OpDecorate %vUV Location 0 + OpDecorate %uSampler DescriptorSet 1 + OpDecorate %uSampler Binding 1 + OpDecorate %38 NonUniform + OpDecorate %gl_FragCoord BuiltIn FragCoord + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %10 = OpTypeImage %float 2D 0 0 0 1 Unknown + %11 = OpTypeSampledImage %10 +%_runtimearr_11 = OpTypeRuntimeArray %11 +%_ptr_UniformConstant__runtimearr_11 = OpTypePointer UniformConstant %_runtimearr_11 + %uSamplers = OpVariable %_ptr_UniformConstant__runtimearr_11 UniformConstant + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_10 = OpConstant %int 10 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11 + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %vUV = OpVariable %_ptr_Input_v2float Input + %float_0 = OpConstant %float 0 + %uSampler = OpVariable %_ptr_UniformConstant_11 UniformConstant +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_1 = OpConstant %uint 1 +%_ptr_Input_float = OpTypePointer Input %float + %main = OpFunction %void None %3 + %5 = OpLabel + %24 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %int_10 + %26 = OpLoad %uint %24 + %28 = OpAccessChain %_ptr_UniformConstant_11 %uSamplers %26 + %29 = OpLoad %11 %28 + %33 = OpLoad %v2float %vUV + %35 = OpImageSampleExplicitLod %v4float %29 %33 Lod %float_0 + OpStore %FragColor %35 + %37 = OpLoad %11 %uSampler + %38 = OpCopyObject %11 %37 + %39 = OpLoad %v2float %vUV + %44 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_1 + %45 = OpLoad %float %44 + %46 = OpConvertFToS %int %45 + %47 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %46 + %48 = OpLoad %uint %47 + %49 = OpConvertUToF %float %48 + %50 = OpImageSampleExplicitLod %v4float %38 %39 Lod %49 + %51 = OpLoad %v4float %FragColor + %52 = OpFAdd %v4float %51 %50 + OpStore %FragColor %52 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag b/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag new file mode 100644 index 00000000000..c5428a8bb9b --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag @@ -0,0 +1,99 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 59 +; Schema: 0 + OpCapability Shader + OpCapability ShaderNonUniform + OpCapability RuntimeDescriptorArray + OpCapability StorageBufferArrayNonUniformIndexing + OpExtension "SPV_EXT_descriptor_indexing" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vIndex %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_nonuniform_qualifier" + OpSourceExtension "GL_EXT_samplerless_texture_functions" + OpName %main "main" + OpName %i "i" + OpName %vIndex "vIndex" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "counter" + OpMemberName %SSBO 1 "v" + OpName %ssbos "ssbos" + OpName %FragColor "FragColor" + OpDecorate %vIndex Flat + OpDecorate %vIndex Location 0 + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %SSBO BufferBlock + OpDecorate %ssbos DescriptorSet 0 + OpDecorate %ssbos Binding 3 + OpDecorate %32 NonUniform + OpDecorate %39 NonUniform + OpDecorate %49 NonUniform + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int +%_ptr_Input_int = OpTypePointer Input %int + %vIndex = OpVariable %_ptr_Input_int Input + %uint = OpTypeInt 32 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %uint %_runtimearr_v4float +%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO +%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO + %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform + %int_60 = OpConstant %int 60 + %int_1 = OpConstant %int 1 + %int_70 = OpConstant %int 70 + %float_20 = OpConstant %float 20 + %30 = OpConstantComposite %v4float %float_20 %float_20 %float_20 %float_20 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_100 = OpConstant %int 100 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_100 = OpConstant %uint 100 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %uint_2 = OpConstant %uint 2 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %11 = OpLoad %int %vIndex + OpStore %i %11 + %20 = OpLoad %int %i + %22 = OpIAdd %int %20 %int_60 + %23 = OpCopyObject %int %22 + %25 = OpLoad %int %i + %27 = OpIAdd %int %25 %int_70 + %28 = OpCopyObject %int %27 + %32 = OpAccessChain %_ptr_Uniform_v4float %ssbos %23 %int_1 %28 + OpStore %32 %30 + %33 = OpLoad %int %i + %35 = OpIAdd %int %33 %int_100 + %36 = OpCopyObject %int %35 + %39 = OpAccessChain %_ptr_Uniform_uint %ssbos %36 %int_0 + %43 = OpAtomicIAdd %uint %39 %uint_1 %uint_0 %uint_100 + %46 = OpLoad %int %i + %47 = OpCopyObject %int %46 + %49 = OpAccessChain %_ptr_Uniform_SSBO %ssbos %47 + %50 = OpArrayLength %uint %49 1 + %51 = OpBitcast %int %50 + %52 = OpConvertSToF %float %51 + %55 = OpAccessChain %_ptr_Output_float %FragColor %uint_2 + %56 = OpLoad %float %55 + %57 = OpFAdd %float %56 %52 + %58 = OpAccessChain %_ptr_Output_float %FragColor %uint_2 + OpStore %58 %57 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag new file mode 100644 index 00000000000..17aab1d8f77 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag @@ -0,0 +1,25 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 10 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragDepth + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main DepthReplacing + OpSource GLSL 450 + OpName %main "main" + OpName %gl_FragDepth "gl_FragDepth" + OpDecorate %gl_FragDepth BuiltIn FragDepth + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Output_float = OpTypePointer Output %float + %float_0_5 = OpConstant %float 0.5 +%gl_FragDepth = OpVariable %_ptr_Output_float Output %float_0_5 + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag b/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag new file mode 100644 index 00000000000..3696660d36d --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag @@ -0,0 +1,69 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 40 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vColor %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %vColor "vColor" + OpName %uninit_function_int "uninit_function_int" + OpName %FragColor "FragColor" + OpName %uninit_int "uninit_int" + OpName %uninit_vector "uninit_vector" + OpName %uninit_matrix "uninit_matrix" + OpName %Foo "Foo" + OpMemberName %Foo 0 "a" + OpName %uninit_foo "uninit_foo" + OpDecorate %vColor Location 0 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %vColor = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %float_10 = OpConstant %float 10 + %bool = OpTypeBool + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_10 = OpConstant %int 10 + %int_20 = OpConstant %int 20 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output +%_ptr_Private_int = OpTypePointer Private %int + %uninit_int = OpUndef %int + %v4int = OpTypeVector %int 4 +%_ptr_Private_v4int = OpTypePointer Private %v4int +%uninit_vector = OpUndef %v4int +%mat4v4float = OpTypeMatrix %v4float 4 +%_ptr_Private_mat4v4float = OpTypePointer Private %mat4v4float +%uninit_matrix = OpUndef %mat4v4float + %Foo = OpTypeStruct %int +%_ptr_Private_Foo = OpTypePointer Private %Foo + %uninit_foo = OpUndef %Foo + %main = OpFunction %void None %3 + %5 = OpLabel +%uninit_function_int = OpVariable %_ptr_Function_int Function + %13 = OpAccessChain %_ptr_Input_float %vColor %uint_0 + %14 = OpLoad %float %13 + %17 = OpFOrdGreaterThan %bool %14 %float_10 + OpSelectionMerge %19 None + OpBranchConditional %17 %18 %24 + %18 = OpLabel + OpBranch %19 + %24 = OpLabel + OpBranch %19 + %19 = OpLabel + %27 = OpPhi %int %int_10 %18 %int_20 %24 + %28 = OpLoad %v4float %vColor + OpStore %FragColor %28 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag new file mode 100644 index 00000000000..ebd8d6bab75 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag @@ -0,0 +1,89 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + OpReturn + OpFunctionEnd + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + OpBeginInvocationInterlockEXT + %43 = OpFunctionCall %void %callee2_ + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag new file mode 100644 index 00000000000..69b8f911204 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag @@ -0,0 +1,121 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %ssbo2 DescriptorSet 0 + OpDecorate %ssbo2 Binding 2 + + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint + %SSBO2 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %bool = OpTypeBool + %true = OpConstantTrue %bool +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %callee3_res = OpFunctionCall %void %callee3_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %calle3_block = OpLabel + %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %frag_coord_x = OpLoad %float %frag_coord_x_ptr + %frag_coord_int = OpConvertFToS %int %frag_coord_x + %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int + OpStore %ssbo_ptr %uint_4 + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + + OpSelectionMerge %merged_block None + OpBranchConditional %true %dummy_block %merged_block + %dummy_block = OpLabel + OpBeginInvocationInterlockEXT + OpEndInvocationInterlockEXT + OpBranch %merged_block + + %merged_block = OpLabel + OpReturn + + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag new file mode 100644 index 00000000000..7c0fe9a2b24 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag @@ -0,0 +1,102 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %call3res = OpFunctionCall %void %callee3_ + %call4res = OpFunctionCall %void %callee4_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %begin3 = OpLabel + OpBeginInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee4_ = OpFunction %void None %3 + %begin4 = OpLabel + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag b/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag new file mode 100644 index 00000000000..a5a16f2873b --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 37 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %ACOS_f1_ "mat3" + OpName %a "a" + OpName %ACOS_i1_ "gl_Foo" + OpName %a_0 "a" + OpName %FragColor "FragColor" + OpName %param "param" + OpName %param_0 "param" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float + %8 = OpTypeFunction %float %_ptr_Function_float + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %14 = OpTypeFunction %float %_ptr_Function_int + %float_1 = OpConstant %float 1 +%_ptr_Output_float = OpTypePointer Output %float + %FragColor = OpVariable %_ptr_Output_float Output + %float_2 = OpConstant %float 2 + %int_4 = OpConstant %int 4 + %main = OpFunction %void None %3 + %5 = OpLabel + %param = OpVariable %_ptr_Function_float Function + %param_0 = OpVariable %_ptr_Function_int Function + OpStore %param %float_2 + %32 = OpFunctionCall %float %ACOS_f1_ %param + OpStore %param_0 %int_4 + %35 = OpFunctionCall %float %ACOS_i1_ %param_0 + %36 = OpFAdd %float %32 %35 + OpStore %FragColor %36 + OpReturn + OpFunctionEnd + %ACOS_f1_ = OpFunction %float None %8 + %a = OpFunctionParameter %_ptr_Function_float + %11 = OpLabel + %18 = OpLoad %float %a + %20 = OpFAdd %float %18 %float_1 + OpReturnValue %20 + OpFunctionEnd + %ACOS_i1_ = OpFunction %float None %14 + %a_0 = OpFunctionParameter %_ptr_Function_int + %17 = OpLabel + %23 = OpLoad %int %a_0 + %24 = OpConvertSToF %float %23 + %25 = OpFAdd %float %24 %float_1 + OpReturnValue %25 + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag b/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag new file mode 100644 index 00000000000..07450ee80b6 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag @@ -0,0 +1,62 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %bool = OpTypeBool + %false = OpConstantFalse %bool + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %true = OpConstantTrue %bool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %s = OpTypeStruct %float + %arr = OpTypeArray %float %uint_2 +%_ptr_Function_s = OpTypePointer Function %s +%_ptr_Function_arr = OpTypePointer Function %arr + %FragColor = OpVariable %_ptr_Output_v4float Output + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %17 = OpConstantComposite %v4float %float_1 %float_1 %float_0 %float_1 + %18 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1 + %19 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %20 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %s0 = OpConstantComposite %s %float_0 + %s1 = OpConstantComposite %s %float_1 + %v4bool = OpTypeVector %bool 4 + %b4 = OpConstantComposite %v4bool %false %true %false %true + %arr1 = OpConstantComposite %arr %float_0 %float_1 + %arr2 = OpConstantComposite %arr %float_1 %float_0 + %main = OpFunction %void None %3 + %5 = OpLabel + %ss = OpVariable %_ptr_Function_s Function + %arrvar = OpVariable %_ptr_Function_arr Function + ; Not trivial + %21 = OpSelect %v4float %false %17 %18 + OpStore %FragColor %21 + ; Trivial + %22 = OpSelect %v4float %false %19 %20 + OpStore %FragColor %22 + ; Vector not trivial + %23 = OpSelect %v4float %b4 %17 %18 + OpStore %FragColor %23 + ; Vector trivial + %24 = OpSelect %v4float %b4 %19 %20 + OpStore %FragColor %24 + ; Struct selection + %sout = OpSelect %s %false %s0 %s1 + OpStore %ss %sout + ; Array selection + %arrout = OpSelect %arr %true %arr1 %arr2 + OpStore %arrvar %arrout + + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag b/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag new file mode 100644 index 00000000000..421e4660932 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 33 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %Foo "Foo" + OpMemberName %Foo 0 "m" + OpMemberName %Foo 1 "v" + OpName %FooUBO "FooUBO" + OpMemberName %FooUBO 0 "foo" + OpName %_ "" + OpName %Bar "Bar" + OpMemberName %Bar 0 "m" + OpMemberName %Bar 1 "v" + OpName %BarUBO "BarUBO" + OpMemberName %BarUBO 0 "bar" + OpName %__0 "" + OpDecorate %FragColor Location 0 + OpDecorate %_arr_mat3v3float_uint_2 ArrayStride 48 + OpMemberDecorate %Foo 0 ColMajor + OpMemberDecorate %Foo 0 Offset 0 + OpMemberDecorate %Foo 0 MatrixStride 16 + OpMemberDecorate %Foo 1 Offset 92 + OpMemberDecorate %FooUBO 0 Offset 0 + OpDecorate %FooUBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpMemberDecorate %Bar 0 ColMajor + OpMemberDecorate %Bar 0 Offset 0 + OpMemberDecorate %Bar 0 MatrixStride 16 + OpMemberDecorate %Bar 1 Offset 44 + OpMemberDecorate %BarUBO 0 Offset 0 + OpDecorate %BarUBO Block + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %v3float = OpTypeVector %float 3 +%mat3v3float = OpTypeMatrix %v3float 3 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_arr_mat3v3float_uint_2 = OpTypeArray %mat3v3float %uint_2 + %Foo = OpTypeStruct %_arr_mat3v3float_uint_2 %float + %FooUBO = OpTypeStruct %Foo +%_ptr_Uniform_FooUBO = OpTypePointer Uniform %FooUBO + %_ = OpVariable %_ptr_Uniform_FooUBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Bar = OpTypeStruct %mat3v3float %float + %BarUBO = OpTypeStruct %Bar +%_ptr_Uniform_BarUBO = OpTypePointer Uniform %BarUBO + %__0 = OpVariable %_ptr_Uniform_BarUBO Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %23 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %int_1 + %24 = OpLoad %float %23 + %29 = OpAccessChain %_ptr_Uniform_float %__0 %int_0 %int_1 + %30 = OpLoad %float %29 + %31 = OpFAdd %float %24 %30 + %32 = OpCompositeConstruct %v4float %31 %31 %31 %31 + OpStore %FragColor %32 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag b/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag new file mode 100644 index 00000000000..a9b34893c83 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag @@ -0,0 +1,65 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 78 +; Schema: 0 + OpCapability Shader + OpCapability GroupNonUniform + OpCapability GroupNonUniformArithmetic + OpCapability GroupNonUniformClustered + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %index %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_KHR_shader_subgroup_arithmetic" + OpSourceExtension "GL_KHR_shader_subgroup_basic" + OpSourceExtension "GL_KHR_shader_subgroup_clustered" + OpName %main "main" + OpName %index "index" + OpName %FragColor "FragColor" + OpDecorate %index Flat + OpDecorate %index Location 0 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int + %index = OpVariable %_ptr_Input_int Input + %uint_3 = OpConstant %uint 3 + %uint_4 = OpConstant %uint 4 +%_ptr_Output_uint = OpTypePointer Output %uint + %FragColor = OpVariable %_ptr_Output_uint Output + %main = OpFunction %void None %3 + %5 = OpLabel + %i = OpLoad %int %index + %u = OpBitcast %uint %i + %res0 = OpGroupNonUniformSMin %uint %uint_3 Reduce %i + %res1 = OpGroupNonUniformSMax %uint %uint_3 Reduce %u + %res2 = OpGroupNonUniformUMin %uint %uint_3 Reduce %i + %res3 = OpGroupNonUniformUMax %uint %uint_3 Reduce %u + ;%res4 = OpGroupNonUniformSMax %uint %uint_3 InclusiveScan %i + ;%res5 = OpGroupNonUniformSMin %uint %uint_3 InclusiveScan %u + ;%res6 = OpGroupNonUniformUMax %uint %uint_3 ExclusiveScan %i + ;%res7 = OpGroupNonUniformUMin %uint %uint_3 ExclusiveScan %u + ;%res8 = OpGroupNonUniformSMin %uint %uint_3 ClusteredReduce %i %uint_4 + ;%res9 = OpGroupNonUniformSMax %uint %uint_3 ClusteredReduce %u %uint_4 + ;%res10 = OpGroupNonUniformUMin %uint %uint_3 ClusteredReduce %i %uint_4 + ;%res11 = OpGroupNonUniformUMax %uint %uint_3 ClusteredReduce %u %uint_4 + OpStore %FragColor %res0 + OpStore %FragColor %res1 + OpStore %FragColor %res2 + OpStore %FragColor %res3 + ;OpStore %FragColor %res4 + ;OpStore %FragColor %res5 + ;OpStore %FragColor %res6 + ;OpStore %FragColor %res7 + ;OpStore %FragColor %res8 + ;OpStore %FragColor %res9 + ;OpStore %FragColor %res10 + ;OpStore %FragColor %res11 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag similarity index 100% rename from shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag rename to shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag new file mode 100644 index 00000000000..2e5e030922b --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag @@ -0,0 +1,179 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 132 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %A %B %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %test_vector_ "test_vector(" + OpName %test_scalar_ "test_scalar(" + OpName %le "le" + OpName %A "A" + OpName %B "B" + OpName %leq "leq" + OpName %ge "ge" + OpName %geq "geq" + OpName %eq "eq" + OpName %neq "neq" + OpName %le_0 "le" + OpName %leq_0 "leq" + OpName %ge_0 "ge" + OpName %geq_0 "geq" + OpName %eq_0 "eq" + OpName %neq_0 "neq" + OpName %FragColor "FragColor" + OpDecorate %A Location 0 + OpDecorate %B Location 1 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %8 = OpTypeFunction %v4float + %11 = OpTypeFunction %float + %bool = OpTypeBool + %v4bool = OpTypeVector %bool 4 +%_ptr_Function_v4bool = OpTypePointer Function %v4bool +%_ptr_Input_v4float = OpTypePointer Input %v4float + %A = OpVariable %_ptr_Input_v4float Input + %B = OpVariable %_ptr_Input_v4float Input + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Function_bool = OpTypePointer Function %bool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %128 = OpFunctionCall %v4float %test_vector_ + %129 = OpFunctionCall %float %test_scalar_ + %130 = OpCompositeConstruct %v4float %129 %129 %129 %129 + %131 = OpFAdd %v4float %128 %130 + OpStore %FragColor %131 + OpReturn + OpFunctionEnd +%test_vector_ = OpFunction %v4float None %8 + %10 = OpLabel + %le = OpVariable %_ptr_Function_v4bool Function + %leq = OpVariable %_ptr_Function_v4bool Function + %ge = OpVariable %_ptr_Function_v4bool Function + %geq = OpVariable %_ptr_Function_v4bool Function + %eq = OpVariable %_ptr_Function_v4bool Function + %neq = OpVariable %_ptr_Function_v4bool Function + %20 = OpLoad %v4float %A + %22 = OpLoad %v4float %B + %23 = OpFUnordLessThan %v4bool %20 %22 + OpStore %le %23 + %25 = OpLoad %v4float %A + %26 = OpLoad %v4float %B + %27 = OpFUnordLessThanEqual %v4bool %25 %26 + OpStore %leq %27 + %29 = OpLoad %v4float %A + %30 = OpLoad %v4float %B + %31 = OpFUnordGreaterThan %v4bool %29 %30 + OpStore %ge %31 + %33 = OpLoad %v4float %A + %34 = OpLoad %v4float %B + %35 = OpFUnordGreaterThanEqual %v4bool %33 %34 + OpStore %geq %35 + %37 = OpLoad %v4float %A + %38 = OpLoad %v4float %B + %39 = OpFUnordEqual %v4bool %37 %38 + OpStore %eq %39 + %ordered = OpFOrdNotEqual %v4bool %37 %38 + OpStore %neq %ordered + %41 = OpLoad %v4float %A + %42 = OpLoad %v4float %B + %43 = OpFUnordNotEqual %v4bool %41 %42 + OpStore %neq %43 + %44 = OpLoad %v4bool %le + %49 = OpSelect %v4float %44 %48 %47 + %50 = OpLoad %v4bool %leq + %51 = OpSelect %v4float %50 %48 %47 + %52 = OpFAdd %v4float %49 %51 + %53 = OpLoad %v4bool %ge + %54 = OpSelect %v4float %53 %48 %47 + %55 = OpFAdd %v4float %52 %54 + %56 = OpLoad %v4bool %geq + %57 = OpSelect %v4float %56 %48 %47 + %58 = OpFAdd %v4float %55 %57 + %59 = OpLoad %v4bool %eq + %60 = OpSelect %v4float %59 %48 %47 + %61 = OpFAdd %v4float %58 %60 + %62 = OpLoad %v4bool %neq + %63 = OpSelect %v4float %62 %48 %47 + %64 = OpFAdd %v4float %61 %63 + OpReturnValue %64 + OpFunctionEnd +%test_scalar_ = OpFunction %float None %11 + %13 = OpLabel + %le_0 = OpVariable %_ptr_Function_bool Function + %leq_0 = OpVariable %_ptr_Function_bool Function + %ge_0 = OpVariable %_ptr_Function_bool Function + %geq_0 = OpVariable %_ptr_Function_bool Function + %eq_0 = OpVariable %_ptr_Function_bool Function + %neq_0 = OpVariable %_ptr_Function_bool Function + %72 = OpAccessChain %_ptr_Input_float %A %uint_0 + %73 = OpLoad %float %72 + %74 = OpAccessChain %_ptr_Input_float %B %uint_0 + %75 = OpLoad %float %74 + %76 = OpFUnordLessThan %bool %73 %75 + OpStore %le_0 %76 + %78 = OpAccessChain %_ptr_Input_float %A %uint_0 + %79 = OpLoad %float %78 + %80 = OpAccessChain %_ptr_Input_float %B %uint_0 + %81 = OpLoad %float %80 + %82 = OpFUnordLessThanEqual %bool %79 %81 + OpStore %leq_0 %82 + %84 = OpAccessChain %_ptr_Input_float %A %uint_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Input_float %B %uint_0 + %87 = OpLoad %float %86 + %88 = OpFUnordGreaterThan %bool %85 %87 + OpStore %ge_0 %88 + %90 = OpAccessChain %_ptr_Input_float %A %uint_0 + %91 = OpLoad %float %90 + %92 = OpAccessChain %_ptr_Input_float %B %uint_0 + %93 = OpLoad %float %92 + %94 = OpFUnordGreaterThanEqual %bool %91 %93 + OpStore %geq_0 %94 + %96 = OpAccessChain %_ptr_Input_float %A %uint_0 + %97 = OpLoad %float %96 + %98 = OpAccessChain %_ptr_Input_float %B %uint_0 + %99 = OpLoad %float %98 + %100 = OpFUnordEqual %bool %97 %99 + OpStore %eq_0 %100 + %102 = OpAccessChain %_ptr_Input_float %A %uint_0 + %103 = OpLoad %float %102 + %104 = OpAccessChain %_ptr_Input_float %B %uint_0 + %105 = OpLoad %float %104 + %106 = OpFUnordNotEqual %bool %103 %105 + OpStore %neq_0 %106 + %107 = OpLoad %bool %le_0 + %108 = OpSelect %float %107 %float_1 %float_0 + %109 = OpLoad %bool %leq_0 + %110 = OpSelect %float %109 %float_1 %float_0 + %111 = OpFAdd %float %108 %110 + %112 = OpLoad %bool %ge_0 + %113 = OpSelect %float %112 %float_1 %float_0 + %114 = OpFAdd %float %111 %113 + %115 = OpLoad %bool %geq_0 + %116 = OpSelect %float %115 %float_1 %float_0 + %117 = OpFAdd %float %114 %116 + %118 = OpLoad %bool %eq_0 + %119 = OpSelect %float %118 %float_1 %float_0 + %120 = OpFAdd %float %117 %119 + %121 = OpLoad %bool %neq_0 + %122 = OpSelect %float %121 %float_1 %float_0 + %123 = OpFAdd %float %120 %122 + OpReturnValue %123 + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag new file mode 100644 index 00000000000..2e5e030922b --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag @@ -0,0 +1,179 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 132 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %A %B %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %test_vector_ "test_vector(" + OpName %test_scalar_ "test_scalar(" + OpName %le "le" + OpName %A "A" + OpName %B "B" + OpName %leq "leq" + OpName %ge "ge" + OpName %geq "geq" + OpName %eq "eq" + OpName %neq "neq" + OpName %le_0 "le" + OpName %leq_0 "leq" + OpName %ge_0 "ge" + OpName %geq_0 "geq" + OpName %eq_0 "eq" + OpName %neq_0 "neq" + OpName %FragColor "FragColor" + OpDecorate %A Location 0 + OpDecorate %B Location 1 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %8 = OpTypeFunction %v4float + %11 = OpTypeFunction %float + %bool = OpTypeBool + %v4bool = OpTypeVector %bool 4 +%_ptr_Function_v4bool = OpTypePointer Function %v4bool +%_ptr_Input_v4float = OpTypePointer Input %v4float + %A = OpVariable %_ptr_Input_v4float Input + %B = OpVariable %_ptr_Input_v4float Input + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Function_bool = OpTypePointer Function %bool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %128 = OpFunctionCall %v4float %test_vector_ + %129 = OpFunctionCall %float %test_scalar_ + %130 = OpCompositeConstruct %v4float %129 %129 %129 %129 + %131 = OpFAdd %v4float %128 %130 + OpStore %FragColor %131 + OpReturn + OpFunctionEnd +%test_vector_ = OpFunction %v4float None %8 + %10 = OpLabel + %le = OpVariable %_ptr_Function_v4bool Function + %leq = OpVariable %_ptr_Function_v4bool Function + %ge = OpVariable %_ptr_Function_v4bool Function + %geq = OpVariable %_ptr_Function_v4bool Function + %eq = OpVariable %_ptr_Function_v4bool Function + %neq = OpVariable %_ptr_Function_v4bool Function + %20 = OpLoad %v4float %A + %22 = OpLoad %v4float %B + %23 = OpFUnordLessThan %v4bool %20 %22 + OpStore %le %23 + %25 = OpLoad %v4float %A + %26 = OpLoad %v4float %B + %27 = OpFUnordLessThanEqual %v4bool %25 %26 + OpStore %leq %27 + %29 = OpLoad %v4float %A + %30 = OpLoad %v4float %B + %31 = OpFUnordGreaterThan %v4bool %29 %30 + OpStore %ge %31 + %33 = OpLoad %v4float %A + %34 = OpLoad %v4float %B + %35 = OpFUnordGreaterThanEqual %v4bool %33 %34 + OpStore %geq %35 + %37 = OpLoad %v4float %A + %38 = OpLoad %v4float %B + %39 = OpFUnordEqual %v4bool %37 %38 + OpStore %eq %39 + %ordered = OpFOrdNotEqual %v4bool %37 %38 + OpStore %neq %ordered + %41 = OpLoad %v4float %A + %42 = OpLoad %v4float %B + %43 = OpFUnordNotEqual %v4bool %41 %42 + OpStore %neq %43 + %44 = OpLoad %v4bool %le + %49 = OpSelect %v4float %44 %48 %47 + %50 = OpLoad %v4bool %leq + %51 = OpSelect %v4float %50 %48 %47 + %52 = OpFAdd %v4float %49 %51 + %53 = OpLoad %v4bool %ge + %54 = OpSelect %v4float %53 %48 %47 + %55 = OpFAdd %v4float %52 %54 + %56 = OpLoad %v4bool %geq + %57 = OpSelect %v4float %56 %48 %47 + %58 = OpFAdd %v4float %55 %57 + %59 = OpLoad %v4bool %eq + %60 = OpSelect %v4float %59 %48 %47 + %61 = OpFAdd %v4float %58 %60 + %62 = OpLoad %v4bool %neq + %63 = OpSelect %v4float %62 %48 %47 + %64 = OpFAdd %v4float %61 %63 + OpReturnValue %64 + OpFunctionEnd +%test_scalar_ = OpFunction %float None %11 + %13 = OpLabel + %le_0 = OpVariable %_ptr_Function_bool Function + %leq_0 = OpVariable %_ptr_Function_bool Function + %ge_0 = OpVariable %_ptr_Function_bool Function + %geq_0 = OpVariable %_ptr_Function_bool Function + %eq_0 = OpVariable %_ptr_Function_bool Function + %neq_0 = OpVariable %_ptr_Function_bool Function + %72 = OpAccessChain %_ptr_Input_float %A %uint_0 + %73 = OpLoad %float %72 + %74 = OpAccessChain %_ptr_Input_float %B %uint_0 + %75 = OpLoad %float %74 + %76 = OpFUnordLessThan %bool %73 %75 + OpStore %le_0 %76 + %78 = OpAccessChain %_ptr_Input_float %A %uint_0 + %79 = OpLoad %float %78 + %80 = OpAccessChain %_ptr_Input_float %B %uint_0 + %81 = OpLoad %float %80 + %82 = OpFUnordLessThanEqual %bool %79 %81 + OpStore %leq_0 %82 + %84 = OpAccessChain %_ptr_Input_float %A %uint_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Input_float %B %uint_0 + %87 = OpLoad %float %86 + %88 = OpFUnordGreaterThan %bool %85 %87 + OpStore %ge_0 %88 + %90 = OpAccessChain %_ptr_Input_float %A %uint_0 + %91 = OpLoad %float %90 + %92 = OpAccessChain %_ptr_Input_float %B %uint_0 + %93 = OpLoad %float %92 + %94 = OpFUnordGreaterThanEqual %bool %91 %93 + OpStore %geq_0 %94 + %96 = OpAccessChain %_ptr_Input_float %A %uint_0 + %97 = OpLoad %float %96 + %98 = OpAccessChain %_ptr_Input_float %B %uint_0 + %99 = OpLoad %float %98 + %100 = OpFUnordEqual %bool %97 %99 + OpStore %eq_0 %100 + %102 = OpAccessChain %_ptr_Input_float %A %uint_0 + %103 = OpLoad %float %102 + %104 = OpAccessChain %_ptr_Input_float %B %uint_0 + %105 = OpLoad %float %104 + %106 = OpFUnordNotEqual %bool %103 %105 + OpStore %neq_0 %106 + %107 = OpLoad %bool %le_0 + %108 = OpSelect %float %107 %float_1 %float_0 + %109 = OpLoad %bool %leq_0 + %110 = OpSelect %float %109 %float_1 %float_0 + %111 = OpFAdd %float %108 %110 + %112 = OpLoad %bool %ge_0 + %113 = OpSelect %float %112 %float_1 %float_0 + %114 = OpFAdd %float %111 %113 + %115 = OpLoad %bool %geq_0 + %116 = OpSelect %float %115 %float_1 %float_0 + %117 = OpFAdd %float %114 %116 + %118 = OpLoad %bool %eq_0 + %119 = OpSelect %float %118 %float_1 %float_0 + %120 = OpFAdd %float %117 %119 + %121 = OpLoad %bool %neq_0 + %122 = OpSelect %float %121 %float_1 %float_0 + %123 = OpFAdd %float %120 %122 + OpReturnValue %123 + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh new file mode 100644 index 00000000000..7b38001d8d4 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh @@ -0,0 +1,150 @@ +; SPIR-V +; Version: 1.4 +; Generator: Unknown(30017); 21022 +; Bound: 89 +; Schema: 0 + OpCapability Shader + OpCapability Geometry + OpCapability ShaderViewportIndexLayerEXT + OpCapability MeshShadingEXT + OpExtension "SPV_EXT_mesh_shader" + OpExtension "SPV_EXT_shader_viewport_index_layer" + OpMemoryModel Logical GLSL450 + OpEntryPoint MeshEXT %main "main" %SV_Position %B %SV_CullPrimitive %SV_RenderTargetArrayIndex %SV_PrimitiveID %C %indices %32 %gl_LocalInvocationIndex %38 + OpExecutionMode %main OutputVertices 24 + OpExecutionMode %main OutputPrimitivesNV 8 + OpExecutionMode %main OutputTrianglesNV + OpExecutionMode %main LocalSize 2 3 4 + OpName %main "main" + OpName %SV_Position "SV_Position" + OpName %B "B" + OpName %SV_CullPrimitive "SV_CullPrimitive" + OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex" + OpName %SV_PrimitiveID "SV_PrimitiveID" + OpName %C "C" + OpName %indices "indices" + OpName %_ "" + OpDecorate %SV_Position BuiltIn Position + OpDecorate %B Location 1 + OpDecorate %SV_CullPrimitive BuiltIn CullPrimitiveEXT + OpDecorate %SV_CullPrimitive PerPrimitiveNV + OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer + OpDecorate %SV_RenderTargetArrayIndex PerPrimitiveNV + OpDecorate %SV_PrimitiveID BuiltIn PrimitiveId + OpDecorate %SV_PrimitiveID PerPrimitiveNV + OpDecorate %C Location 3 + OpDecorate %C PerPrimitiveNV + OpDecorate %indices BuiltIn PrimitiveTriangleIndicesEXT + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + %void = OpTypeVoid + %2 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_24 = OpConstant %uint 24 +%_arr_v4float_uint_24 = OpTypeArray %v4float %uint_24 +%_ptr_Output__arr_v4float_uint_24 = OpTypePointer Output %_arr_v4float_uint_24 +%SV_Position = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %B = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %bool = OpTypeBool + %uint_8 = OpConstant %uint 8 +%_arr_bool_uint_8 = OpTypeArray %bool %uint_8 +%_ptr_Output__arr_bool_uint_8 = OpTypePointer Output %_arr_bool_uint_8 +%SV_CullPrimitive = OpVariable %_ptr_Output__arr_bool_uint_8 Output +%_arr_uint_uint_8 = OpTypeArray %uint %uint_8 +%_ptr_Output__arr_uint_uint_8 = OpTypePointer Output %_arr_uint_uint_8 +%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%SV_PrimitiveID = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8 +%_ptr_Output__arr_v4float_uint_8 = OpTypePointer Output %_arr_v4float_uint_8 + %C = OpVariable %_ptr_Output__arr_v4float_uint_8 Output + %v3uint = OpTypeVector %uint 3 +%_arr_v3uint_uint_8 = OpTypeArray %v3uint %uint_8 +%_ptr_Output__arr_v3uint_uint_8 = OpTypePointer Output %_arr_v3uint_uint_8 + %indices = OpVariable %_ptr_Output__arr_v3uint_uint_8 Output + %uint_64 = OpConstant %uint 64 +%_arr_float_uint_64 = OpTypeArray %float %uint_64 +%_ptr_Workgroup__arr_float_uint_64 = OpTypePointer Workgroup %_arr_float_uint_64 + %32 = OpVariable %_ptr_Workgroup__arr_float_uint_64 Workgroup +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %_ = OpTypeStruct %float +%_ptr_TaskPayloadWorkgroupEXT__ = OpTypePointer TaskPayloadWorkgroupEXT %_ + %38 = OpVariable %_ptr_TaskPayloadWorkgroupEXT__ TaskPayloadWorkgroupEXT +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %uint_2 = OpConstant %uint 2 + %uint_264 = OpConstant %uint 264 +%_ptr_Output_float = OpTypePointer Output %float + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_3 = OpConstant %uint 3 +%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float +%_ptr_Output_v3uint = OpTypePointer Output %v3uint +%_ptr_Output_bool = OpTypePointer Output %bool +%_ptr_Output_uint = OpTypePointer Output %uint + %main = OpFunction %void None %2 + %4 = OpLabel + OpBranch %85 + %85 = OpLabel + %35 = OpLoad %uint %gl_LocalInvocationIndex + %39 = OpConvertUToF %float %35 + %41 = OpAccessChain %_ptr_Workgroup_float %32 %35 + OpStore %41 %39 + OpControlBarrier %uint_2 %uint_2 %uint_264 + OpSetMeshOutputsEXT %uint_24 %uint_8 + %44 = OpLoad %float %41 + %46 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_0 + OpStore %46 %44 + %48 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_1 + OpStore %48 %44 + %50 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_2 + OpStore %50 %44 + %51 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_3 + OpStore %51 %44 + %53 = OpBitwiseXor %uint %35 %uint_1 + %54 = OpAccessChain %_ptr_Workgroup_float %32 %53 + %55 = OpLoad %float %54 + %57 = OpInBoundsAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %38 %uint_0 + %58 = OpLoad %float %57 + %59 = OpFAdd %float %58 %55 + %60 = OpAccessChain %_ptr_Output_float %B %35 %uint_0 + OpStore %60 %59 + %61 = OpAccessChain %_ptr_Output_float %B %35 %uint_1 + OpStore %61 %59 + %62 = OpAccessChain %_ptr_Output_float %B %35 %uint_2 + OpStore %62 %59 + %63 = OpAccessChain %_ptr_Output_float %B %35 %uint_3 + OpStore %63 %59 + %64 = OpULessThan %bool %35 %uint_8 + OpSelectionMerge %87 None + OpBranchConditional %64 %86 %87 + %86 = OpLabel + %65 = OpIMul %uint %35 %uint_3 + %66 = OpIAdd %uint %65 %uint_1 + %67 = OpIAdd %uint %65 %uint_2 + %68 = OpCompositeConstruct %v3uint %65 %66 %67 + %70 = OpAccessChain %_ptr_Output_v3uint %indices %35 + OpStore %70 %68 + %71 = OpBitwiseAnd %uint %35 %uint_1 + %72 = OpINotEqual %bool %71 %uint_0 + %74 = OpAccessChain %_ptr_Output_bool %SV_CullPrimitive %35 + OpStore %74 %72 + %76 = OpAccessChain %_ptr_Output_uint %SV_PrimitiveID %35 + OpStore %76 %35 + %77 = OpAccessChain %_ptr_Output_uint %SV_RenderTargetArrayIndex %35 + OpStore %77 %35 + %78 = OpBitwiseXor %uint %35 %uint_2 + %79 = OpAccessChain %_ptr_Workgroup_float %32 %78 + %80 = OpLoad %float %79 + %81 = OpAccessChain %_ptr_Output_float %C %35 %uint_0 + OpStore %81 %80 + %82 = OpAccessChain %_ptr_Output_float %C %35 %uint_1 + OpStore %82 %80 + %83 = OpAccessChain %_ptr_Output_float %C %35 %uint_2 + OpStore %83 %80 + %84 = OpAccessChain %_ptr_Output_float %C %35 %uint_3 + OpStore %84 %80 + OpBranch %87 + %87 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag b/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag new file mode 100644 index 00000000000..eccff08b331 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag @@ -0,0 +1,93 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 65 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %vA %vB + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %vA "vA" + OpName %vB "vB" + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %vA RelaxedPrecision + OpDecorate %vA Flat + OpDecorate %vA Location 0 + OpDecorate %25 RelaxedPrecision + OpDecorate %30 RelaxedPrecision + OpDecorate %vB RelaxedPrecision + OpDecorate %vB Flat + OpDecorate %vB Location 1 + OpDecorate %38 RelaxedPrecision + OpDecorate %40 RelaxedPrecision + OpDecorate %49 RelaxedPrecision + OpDecorate %51 RelaxedPrecision + OpDecorate %53 RelaxedPrecision + OpDecorate %56 RelaxedPrecision + OpDecorate %64 RelaxedPrecision + OpDecorate %58 RelaxedPrecision + OpDecorate %57 RelaxedPrecision + OpDecorate %60 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %float_0 = OpConstant %float 0 + %11 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Input_int = OpTypePointer Input %int + %vA = OpVariable %_ptr_Input_int Input + %bool = OpTypeBool + %int_20 = OpConstant %int 20 + %int_50 = OpConstant %int 50 + %vB = OpVariable %_ptr_Input_int Input + %int_40 = OpConstant %int 40 + %int_60 = OpConstant %int 60 + %int_10 = OpConstant %int 10 + %float_1 = OpConstant %float 1 + %63 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %main = OpFunction %void None %3 + %5 = OpLabel + OpStore %FragColor %11 + OpBranch %17 + %17 = OpLabel + %60 = OpPhi %int %int_0 %5 %58 %20 + %57 = OpPhi %int %int_0 %5 %56 %20 + %25 = OpLoad %int %vA + %27 = OpSLessThan %bool %57 %25 + OpLoopMerge %19 %20 None + OpBranchConditional %27 %18 %19 + %18 = OpLabel + %30 = OpIAdd %int %25 %57 + %32 = OpIEqual %bool %30 %int_20 + OpSelectionMerge %34 None + OpBranchConditional %32 %33 %36 + %33 = OpLabel + OpBranch %34 + %36 = OpLabel + %38 = OpLoad %int %vB + %40 = OpIAdd %int %38 %57 + %42 = OpIEqual %bool %40 %int_40 + %64 = OpSelect %int %42 %int_60 %60 + OpBranch %34 + %34 = OpLabel + %58 = OpPhi %int %int_50 %33 %64 %36 + %49 = OpIAdd %int %58 %int_10 + %51 = OpLoad %v4float %FragColor + %53 = OpFAdd %v4float %51 %63 + OpStore %FragColor %53 + OpBranch %20 + %20 = OpLabel + %56 = OpIAdd %int %57 %49 + OpBranch %17 + %19 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert b/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert new file mode 100644 index 00000000000..a431e6a7174 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert @@ -0,0 +1,37 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 13 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ %foo + OpSource GLSL 450 + OpName %main "main" + OpName %Vert "Vert" + OpMemberName %Vert 0 "a" + OpMemberName %Vert 1 "b" + OpName %_ "" + OpName %Foo "Foo" + OpMemberName %Foo 0 "c" + OpMemberName %Foo 1 "d" + OpName %foo "foo" + OpDecorate %Vert Block + OpDecorate %_ Location 0 + OpDecorate %foo Location 2 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %Vert = OpTypeStruct %float %float +%_ptr_Output_Vert = OpTypePointer Output %Vert + %zero_vert = OpConstantNull %Vert + %_ = OpVariable %_ptr_Output_Vert Output %zero_vert + %Foo = OpTypeStruct %float %float +%_ptr_Output_Foo = OpTypePointer Output %Foo +%zero_foo = OpConstantNull %Foo + %foo = OpVariable %_ptr_Output_Foo Output %zero_foo + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert b/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert new file mode 100644 index 00000000000..aaa68662e5d --- /dev/null +++ b/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert @@ -0,0 +1,44 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 20 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %_ "" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex + %zero = OpConstantNull %gl_PerVertex + %_ = OpVariable %_ptr_Output_gl_PerVertex Output %zero + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %17 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + OpStore %19 %17 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert b/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert new file mode 100644 index 00000000000..94a883c1ed1 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert @@ -0,0 +1,119 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 59 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ %output_location_0 %output_location_2 %output_location_3 + OpSource GLSL 450 + OpName %main "main" + OpName %Foo "Struct_vec4" + OpMemberName %Foo 0 "m0" + OpName %c "c" + OpName %Foo_0 "Struct_vec4" + OpMemberName %Foo_0 0 "m0" + OpName %Bar "Struct_vec4" + OpMemberName %Bar 0 "m0" + OpName %UBO "UBO" + OpMemberName %UBO 0 "m0" + OpMemberName %UBO 1 "m1" + OpName %ubo_binding_0 "ubo_binding_0" + OpName %Bar_0 "Struct_vec4" + OpMemberName %Bar_0 0 "m0" + OpName %b "b" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %_ "" + OpName %VertexOut "VertexOut" + OpMemberName %VertexOut 0 "m0" + OpMemberName %VertexOut 1 "m1" + OpName %output_location_0 "output_location_0" + OpName %output_location_2 "output_location_2" + OpName %output_location_3 "output_location_3" + OpMemberDecorate %Foo_0 0 Offset 0 + OpMemberDecorate %Bar 0 Offset 0 + OpMemberDecorate %UBO 0 Offset 0 + OpMemberDecorate %UBO 1 Offset 16 + OpDecorate %UBO Block + OpDecorate %ubo_binding_0 DescriptorSet 0 + OpDecorate %ubo_binding_0 Binding 0 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %VertexOut Block + OpDecorate %output_location_0 Location 0 + OpDecorate %output_location_2 Location 2 + OpDecorate %output_location_3 Location 3 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %Foo = OpTypeStruct %v4float +%_ptr_Function_Foo = OpTypePointer Function %Foo + %Foo_0 = OpTypeStruct %v4float + %Bar = OpTypeStruct %v4float + %UBO = OpTypeStruct %Foo_0 %Bar +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO +%ubo_binding_0 = OpVariable %_ptr_Uniform_UBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %Bar_0 = OpTypeStruct %v4float +%_ptr_Function_Bar_0 = OpTypePointer Function %Bar_0 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_Bar = OpTypePointer Uniform %Bar + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex + %_ = OpVariable %_ptr_Output_gl_PerVertex Output +%_ptr_Output_v4float = OpTypePointer Output %v4float + %VertexOut = OpTypeStruct %Foo %Bar_0 +%_ptr_Output_VertexOut = OpTypePointer Output %VertexOut +%output_location_0 = OpVariable %_ptr_Output_VertexOut Output +%_ptr_Output_Foo = OpTypePointer Output %Foo +%_ptr_Output_Bar_0 = OpTypePointer Output %Bar_0 +%output_location_2 = OpVariable %_ptr_Output_Foo Output +%output_location_3 = OpVariable %_ptr_Output_Bar_0 Output + %main = OpFunction %void None %3 + %5 = OpLabel + %c = OpVariable %_ptr_Function_Foo Function + %b = OpVariable %_ptr_Function_Bar_0 Function + %19 = OpAccessChain %_ptr_Uniform_Foo_0 %ubo_binding_0 %int_0 + %20 = OpLoad %Foo_0 %19 + %21 = OpCompositeExtract %v4float %20 0 + %23 = OpAccessChain %_ptr_Function_v4float %c %int_0 + OpStore %23 %21 + %29 = OpAccessChain %_ptr_Uniform_Bar %ubo_binding_0 %int_1 + %30 = OpLoad %Bar %29 + %31 = OpCompositeExtract %v4float %30 0 + %32 = OpAccessChain %_ptr_Function_v4float %b %int_0 + OpStore %32 %31 + %39 = OpAccessChain %_ptr_Function_v4float %c %int_0 + %40 = OpLoad %v4float %39 + %41 = OpAccessChain %_ptr_Function_v4float %b %int_0 + %42 = OpLoad %v4float %41 + %43 = OpFAdd %v4float %40 %42 + %45 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + OpStore %45 %43 + %49 = OpLoad %Foo %c + %51 = OpAccessChain %_ptr_Output_Foo %output_location_0 %int_0 + OpStore %51 %49 + %52 = OpLoad %Bar_0 %b + %54 = OpAccessChain %_ptr_Output_Bar_0 %output_location_0 %int_1 + OpStore %54 %52 + %56 = OpLoad %Foo %c + OpStore %output_location_2 %56 + %58 = OpLoad %Bar_0 %b + OpStore %output_location_3 %58 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp b/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp new file mode 100644 index 00000000000..83b714bc406 --- /dev/null +++ b/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp @@ -0,0 +1,130 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float res; + int ires; + uint ures; + + vec4 f32; + ivec4 s32; + uvec4 u32; + + mat2 m2; + mat3 m3; + mat4 m4; +}; + +void main() +{ + float tmp; + vec2 v2; + vec3 v3; + vec4 v4; + int itmp; + + res = round(f32.x); + //res = roundEven(f32.x); + res = trunc(f32.x); + res = abs(f32.x); + ires = abs(s32.x); + res = sign(f32.x); + ires = sign(s32.x); + res = floor(f32.x); + res = ceil(f32.x); + res = fract(f32.x); + res = radians(f32.x); + res = degrees(f32.x); + res = sin(f32.x); + res = cos(f32.x); + res = tan(f32.x); + res = asin(f32.x); + res = acos(f32.x); + res = atan(f32.x); + res = sinh(f32.x); + res = cosh(f32.x); + res = tanh(f32.x); + //res = asinh(f32.x); + //res = acosh(f32.x); + //res = atanh(f32.x); + res = atan(f32.x, f32.y); + res = pow(f32.x, f32.y); + res = exp(f32.x); + res = log(f32.x); + res = exp2(f32.x); + res = log2(f32.x); + res = sqrt(f32.x); + res = inversesqrt(f32.x); + + res = length(f32.x); + res = distance(f32.x, f32.y); + res = normalize(f32.x); + res = faceforward(f32.x, f32.y, f32.z); + res = reflect(f32.x, f32.y); + res = refract(f32.x, f32.y, f32.z); + + res = length(f32.xy); + res = distance(f32.xy, f32.zw); + v2 = normalize(f32.xy); + v2 = faceforward(f32.xy, f32.yz, f32.zw); + v2 = reflect(f32.xy, f32.zw); + v2 = refract(f32.xy, f32.yz, f32.w); + + v3 = cross(f32.xyz, f32.yzw); + + res = determinant(m2); + res = determinant(m3); + res = determinant(m4); + m2 = inverse(m2); + m3 = inverse(m3); + m4 = inverse(m4); + + res = modf(f32.x, tmp); + // ModfStruct + + res = min(f32.x, f32.y); + ures = min(u32.x, u32.y); + ires = min(s32.x, s32.y); + res = max(f32.x, f32.y); + ures = max(u32.x, u32.y); + ires = max(s32.x, s32.y); + + res = clamp(f32.x, f32.y, f32.z); + ures = clamp(u32.x, u32.y, u32.z); + ires = clamp(s32.x, s32.y, s32.z); + + res = mix(f32.x, f32.y, f32.z); + res = step(f32.x, f32.y); + res = smoothstep(f32.x, f32.y, f32.z); + res = fma(f32.x, f32.y, f32.z); + + res = frexp(f32.x, itmp); + + // FrexpStruct + res = ldexp(f32.x, itmp); + + ures = packSnorm4x8(f32); + ures = packUnorm4x8(f32); + ures = packSnorm2x16(f32.xy); + ures = packUnorm2x16(f32.xy); + ures = packHalf2x16(f32.xy); + // packDouble2x32 + + v2 = unpackSnorm2x16(u32.x); + v2 = unpackUnorm2x16(u32.x); + v2 = unpackHalf2x16(u32.x); + v4 = unpackSnorm4x8(u32.x); + v4 = unpackUnorm4x8(u32.x); + // unpackDouble2x32 + + s32 = findLSB(s32); + s32 = findLSB(u32); + s32 = findMSB(s32); + s32 = findMSB(u32); + + // interpolateAtSample + // interpolateAtOffset + + // NMin, NMax, NClamp +} diff --git a/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp b/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp new file mode 100644 index 00000000000..f7a8787d3d8 --- /dev/null +++ b/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp @@ -0,0 +1,62 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 31 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %Foo "Foo" + OpMemberName %Foo 0 "abs" + OpName %f "f" + OpName %Foo_0 "Foo" + OpMemberName %Foo_0 0 "abs" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "foo" + OpMemberName %SSBO 1 "foo2" + OpName %_ "" + OpName %linear "abs" + OpMemberDecorate %Foo_0 0 Offset 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %Foo = OpTypeStruct %float +%_ptr_Function_Foo = OpTypePointer Function %Foo + %Foo_0 = OpTypeStruct %float + %SSBO = OpTypeStruct %Foo_0 %Foo_0 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Function_int = OpTypePointer Function %int + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %main = OpFunction %void None %3 + %5 = OpLabel + %f = OpVariable %_ptr_Function_Foo Function + %linear = OpVariable %_ptr_Function_int Function + %17 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0 + %18 = OpLoad %Foo_0 %17 + %19 = OpCompositeExtract %float %18 0 + %21 = OpAccessChain %_ptr_Function_float %f %int_0 + OpStore %21 %19 + OpStore %linear %int_10 + %26 = OpLoad %Foo %f + %27 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_1 + %28 = OpCompositeExtract %float %26 0 + %30 = OpAccessChain %_ptr_Uniform_float %27 %int_0 + OpStore %30 %28 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/comp/intmin-literal.comp b/shaders-hlsl-no-opt/comp/intmin-literal.comp new file mode 100644 index 00000000000..ee35cedabb9 --- /dev/null +++ b/shaders-hlsl-no-opt/comp/intmin-literal.comp @@ -0,0 +1,18 @@ +#version 450 + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 1) buffer SSBO +{ + float a; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + float b; +}; + +void main() +{ + a = intBitsToFloat(floatBitsToInt(b) ^ 0x80000000); +} diff --git a/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp b/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp new file mode 100644 index 00000000000..bc182c52923 --- /dev/null +++ b/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp @@ -0,0 +1,30 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_clustered : require +#extension GL_KHR_shader_subgroup_quad : require +layout(local_size_x = 30) in; + +layout(std430, binding = 0) buffer SSBO +{ + uint FragColor[]; +}; + +void main() +{ + bool v = gl_GlobalInvocationID.x != 3; + bvec4 v4; + v4.x = subgroupOr(v); + v4.y = subgroupAnd(v); + v4.z = subgroupXor(v); + v4.w = subgroupAllEqual(v); + + uvec4 w = uvec4(v4); + FragColor[gl_GlobalInvocationID.x] = w.x + w.y + w.z + w.w; +} + + diff --git a/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp b/shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp similarity index 85% rename from shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp rename to shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp index 81135e2a93e..bbda0efd5ba 100644 --- a/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp +++ b/shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp @@ -40,18 +40,18 @@ void main() //bool inverse_ballot_value = subgroupInverseBallot(ballot_value); //bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); uint bit_count = subgroupBallotBitCount(ballot_value); - //uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); - //uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); //uint lsb = subgroupBallotFindLSB(ballot_value); //uint msb = subgroupBallotFindMSB(ballot_value); // shuffle - //uint shuffled = subgroupShuffle(10u, 8u); - //uint shuffled_xor = subgroupShuffleXor(30u, 8u); + uint shuffled = subgroupShuffle(10u, 8u); + uint shuffled_xor = subgroupShuffleXor(30u, 8u); // shuffle relative - //uint shuffled_up = subgroupShuffleUp(20u, 4u); - //uint shuffled_down = subgroupShuffleDown(20u, 4u); + uint shuffled_up = subgroupShuffleUp(20u, 4u); + uint shuffled_down = subgroupShuffleDown(20u, 4u); // vote bool has_all = subgroupAll(true); @@ -72,6 +72,9 @@ void main() uvec4 anded = subgroupAnd(ballot_value); uvec4 ored = subgroupOr(ballot_value); uvec4 xored = subgroupXor(ballot_value); + bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42))); + bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42))); + bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42))); added = subgroupInclusiveAdd(added); iadded = subgroupInclusiveAdd(iadded); @@ -121,6 +124,10 @@ void main() anded = subgroupClusteredAnd(anded, 4u); ored = subgroupClusteredOr(ored, 4u); xored = subgroupClusteredXor(xored, 4u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u); #endif // quad diff --git a/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp b/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp new file mode 100644 index 00000000000..c3e0922a166 --- /dev/null +++ b/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp @@ -0,0 +1,14 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0) buffer A +{ + vec3 a; + vec3 b; +}; + +void main() +{ + bvec3 c = lessThan(b, vec3(1.0)); + a = mix(vec3(1, 0, 0), vec3(0, 0, 1), c); +} diff --git a/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp b/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp new file mode 100644 index 00000000000..5ffcc3f3a49 --- /dev/null +++ b/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0) buffer A +{ + mat3 a; + float b; +}; + +void main() +{ + // Scalar to Matrix + bool c = b < 1.0; + a = c ? mat3(vec3(1), vec3(1), vec3(1)) : mat3(vec3(0), vec3(0), vec3(0)); + a = c ? mat3(1) : mat3(0); +} diff --git a/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag b/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag new file mode 100644 index 00000000000..4f22da56d96 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag @@ -0,0 +1,50 @@ +#version 450 + +layout(set = 0, binding = 0) uniform UBO +{ + vec4 a[2]; // 0 + vec4 b; // 32 + vec4 c; // 48 + mat4x4 d; // 64 + + float e; // 128 + vec2 f; // 136 + + float g; // 144 + vec2 h; // 152 + + float i; // 160 + vec2 j; // 168 + + float k; + vec2 l; + + float m; + float n; + float o; + + vec4 p; + vec4 q; + vec3 r; + vec4 s; + vec4 t; + vec4 u; + float v; + float w; + float x; + float y; + float z; + float aa; + float ab; + float ac; + float ad; + float ae; + vec4 ef; +}; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = a[1]; +} diff --git a/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag b/shaders-hlsl-no-opt/frag/constant-buffer-array.invalid.sm51.frag similarity index 100% rename from shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag rename to shaders-hlsl-no-opt/frag/constant-buffer-array.invalid.sm51.frag diff --git a/shaders-hlsl/frag/fp16.invalid.desktop.frag b/shaders-hlsl-no-opt/frag/fp16.invalid.desktop.frag similarity index 100% rename from shaders-hlsl/frag/fp16.invalid.desktop.frag rename to shaders-hlsl-no-opt/frag/fp16.invalid.desktop.frag diff --git a/shaders-hlsl-no-opt/frag/frag-coord.frag b/shaders-hlsl-no-opt/frag/frag-coord.frag new file mode 100644 index 00000000000..e688659a6b3 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/frag-coord.frag @@ -0,0 +1,8 @@ +#version 450 + +layout(location = 0) out vec3 FragColor; + +void main() +{ + FragColor = gl_FragCoord.xyz / gl_FragCoord.w; +} diff --git a/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag b/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag new file mode 100644 index 00000000000..6f70c772645 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag @@ -0,0 +1,11 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +layout(location = 0) out float FragColor; + +void main() +{ + FragColor = float(gl_HelperInvocation); + demote; + FragColor = float(helperInvocationEXT()); +} diff --git a/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag b/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag new file mode 100644 index 00000000000..92e6621fda5 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag @@ -0,0 +1,72 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types : require + +layout(location = 0) out f16vec4 Output; +layout(location = 0) in f16vec4 Input; +layout(location = 1) out i16vec4 OutputI; +layout(location = 1) flat in i16vec4 InputI; +layout(location = 2) out u16vec4 OutputU; +layout(location = 2) flat in u16vec4 InputU; + +layout(set = 0, binding = 0) buffer Buf +{ + float16_t foo0[4]; + int16_t foo1[4]; + uint16_t foo2[4]; + + f16vec4 foo3[4]; + i16vec4 foo4[4]; + u16vec4 foo5[4]; + + f16mat2x3 foo6[4]; + layout(row_major) f16mat2x3 foo7[4]; +}; + +void main() +{ + int index = int(gl_FragCoord.x); + Output = Input + float16_t(20.0); + OutputI = InputI + int16_t(-40); + OutputU = InputU + uint16_t(20); + + // Load 16-bit scalar. + Output += foo0[index]; + OutputI += foo1[index]; + OutputU += foo2[index]; + + // Load 16-bit vector. + Output += foo3[index]; + OutputI += foo4[index]; + OutputU += foo5[index]; + + // Load 16-bit vector from ColMajor matrix. + Output += foo6[index][1].xyzz; + + // Load 16-bit vector from RowMajor matrix. + Output += foo7[index][1].xyzz; + + // Load 16-bit matrix from ColMajor. + f16mat2x3 m0 = foo6[index]; + // Load 16-bit matrix from RowMajor. + f16mat2x3 m1 = foo7[index]; + + // Store 16-bit scalar + foo0[index] = Output.x; + foo1[index] = OutputI.y; + foo2[index] = OutputU.z; + + // Store 16-bit vector + foo3[index] = Output; + foo4[index] = OutputI; + foo5[index] = OutputU; + + // Store 16-bit vector to ColMajor matrix. + foo6[index][1] = Output.xyz; + // Store 16-bit vector to RowMajor matrix. + foo7[index][1] = Output.xyz; + + // Store 16-bit matrix to ColMajor. + foo6[index] = f16mat2x3(Output.xyz, Output.wzy); + // Store 16-bit matrix to RowMajor. + foo7[index] = f16mat2x3(Output.xyz, Output.wzy); +} diff --git a/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag b/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag new file mode 100644 index 00000000000..452aa953a42 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag @@ -0,0 +1,14 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vUV; +layout(location = 1) flat in int vIndex; + +layout(set = 0, binding = 0) uniform texture2D uTex[]; +layout(set = 1, binding = 0) uniform sampler Immut; + +void main() +{ + FragColor = texture(nonuniformEXT(sampler2D(uTex[vIndex], Immut)), vUV); +} diff --git a/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag new file mode 100644 index 00000000000..59079fe58b4 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(set = 0, binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +}; + +layout(set = 0, binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +}; + +void callee2() +{ + values1[int(gl_FragCoord.x)] += 1; +} + +void callee() +{ + values0[int(gl_FragCoord.x)] += 1; + callee2(); +} + +void main() +{ + beginInvocationInterlockARB(); + callee(); + endInvocationInterlockARB(); +} diff --git a/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag b/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag new file mode 100644 index 00000000000..b4d9509ab49 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag @@ -0,0 +1,42 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 22 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %vUV + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %uSamp "uSamp" + OpName %vUV "vUV" + OpDecorate %FragColor Location 0 + OpDecorate %uSamp DescriptorSet 0 + OpDecorate %uSamp Binding 0 + OpDecorate %vUV Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %10 = OpTypeImage %float 2D 0 0 0 1 Unknown + %11 = OpTypeSampledImage %10 +%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11 + %uSamp = OpVariable %_ptr_UniformConstant_11 UniformConstant + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %vUV = OpVariable %_ptr_Input_v2float Input + %int = OpTypeInt 32 0 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %14 = OpLoad %11 %uSamp + %18 = OpLoad %v2float %vUV + %21 = OpImageGather %v4float %14 %18 %int_1 + OpStore %FragColor %21 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag b/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag new file mode 100644 index 00000000000..77760522f94 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag @@ -0,0 +1,16 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 0) uniform UBO +{ + layout(offset = 16) mat4 m; + layout(offset = 0) vec4 v; +}; + +layout(location = 0) in vec4 vColor; + +void main() +{ + FragColor = m * vColor + v; +} diff --git a/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag b/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag new file mode 100644 index 00000000000..41da8001f47 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) in vec4 vColor; +layout(location = 0) out vec4 FragColor; + +int uninit_int; +ivec4 uninit_vector; +mat4 uninit_matrix; + +struct Foo { int a; }; +Foo uninit_foo; + +void main() +{ + int uninit_function_int; + if (vColor.x > 10.0) + uninit_function_int = 10; + else + uninit_function_int = 20; + FragColor = vColor; +} diff --git a/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag b/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag new file mode 100644 index 00000000000..9a8d9d20b25 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag @@ -0,0 +1,11 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +layout(location = 0) out float FragColor; + +void main() +{ + FragColor = float(gl_HelperInvocation); + demote; + FragColor = float(gl_HelperInvocation); +} diff --git a/shaders-hlsl-no-opt/vert/base-instance.vert b/shaders-hlsl-no-opt/vert/base-instance.vert new file mode 100644 index 00000000000..20b686cfe50 --- /dev/null +++ b/shaders-hlsl-no-opt/vert/base-instance.vert @@ -0,0 +1,7 @@ +#version 450 +#extension GL_ARB_shader_draw_parameters : require + +void main() +{ + gl_Position = vec4(gl_BaseInstanceARB); +} diff --git a/shaders-hlsl-no-opt/vert/base-vertex.vert b/shaders-hlsl-no-opt/vert/base-vertex.vert new file mode 100644 index 00000000000..ef486c857d0 --- /dev/null +++ b/shaders-hlsl-no-opt/vert/base-vertex.vert @@ -0,0 +1,7 @@ +#version 450 +#extension GL_ARB_shader_draw_parameters : require + +void main() +{ + gl_Position = vec4(gl_BaseVertexARB); +} diff --git a/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert b/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert new file mode 100644 index 00000000000..98c39bd5ef2 --- /dev/null +++ b/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert @@ -0,0 +1,31 @@ +#version 450 + +struct Bar +{ + float v[2]; + float w; +}; + +layout(location = 0) out V +{ + float a; + float b[2]; + Bar c[2]; + Bar d; +}; + +void main() +{ + a = 1.0; + b[0] = 2.0; + b[1] = 3.0; + c[0].v[0] = 4.0; + c[0].v[1] = 5.0; + c[0].w = 6.0; + c[1].v[0] = 7.0; + c[1].v[1] = 8.0; + c[1].w = 9.0; + d.v[0] = 10.0; + d.v[1] = 11.0; + d.w = 12.0; +} diff --git a/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert b/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert new file mode 100644 index 00000000000..0b8dbb5a91d --- /dev/null +++ b/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert @@ -0,0 +1,5 @@ +#version 450 + +void main() +{ +} diff --git a/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert b/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert new file mode 100644 index 00000000000..b49480617ab --- /dev/null +++ b/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert @@ -0,0 +1,13 @@ +#version 450 + +layout(location = 0) in mat4 m4; +layout(location = 4) in mat3 m3; +layout(location = 7) in mat2 m2; +layout(location = 9) in vec4 v; + +void main() +{ + gl_Position = m4 * v; + gl_Position.xyz += m3 * v.xyz; + gl_Position.xy += m2 * v.xy; +} diff --git a/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000000..b7b4e0b2e1e --- /dev/null +++ b/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,101 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %inputs Restrict + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + OpDecorate %outputs Restrict + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %bool = OpTypeBool + %bvec4 = OpTypeVector %bool 4 + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + %uzero = OpConstant %uint 0 + %uone = OpConstant %uint 1 + %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone + %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + + %result_slt = OpSLessThan %bvec4 %input0 %input1 + %result_sle = OpSLessThanEqual %bvec4 %input0 %input1 + %result_ult = OpULessThan %bvec4 %input0 %input1 + %result_ule = OpULessThanEqual %bvec4 %input0 %input1 + %result_sgt = OpSGreaterThan %bvec4 %input0 %input1 + %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1 + %result_ugt = OpUGreaterThan %bvec4 %input0 %input1 + %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1 + + %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_slt + + %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sle + + %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ult + + %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ule + + %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sgt + + %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sge + + %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ugt + + %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_uge + + + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000000..6c060eedad9 --- /dev/null +++ b/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,203 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 139 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a1" + OpMemberName %SSBO 1 "a2" + OpMemberName %SSBO 2 "a3" + OpMemberName %SSBO 3 "a4" + OpMemberName %SSBO 4 "b1" + OpMemberName %SSBO 5 "b2" + OpMemberName %SSBO 6 "b3" + OpMemberName %SSBO 7 "b4" + OpMemberName %SSBO 8 "c1" + OpMemberName %SSBO 9 "c2" + OpMemberName %SSBO 10 "c3" + OpMemberName %SSBO 11 "c4" + OpName %_ "" + OpName %i "i" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 8 + OpMemberDecorate %SSBO 2 Offset 16 + OpMemberDecorate %SSBO 3 Offset 32 + OpMemberDecorate %SSBO 4 Offset 48 + OpMemberDecorate %SSBO 5 Offset 56 + OpMemberDecorate %SSBO 6 Offset 64 + OpMemberDecorate %SSBO 7 Offset 80 + OpMemberDecorate %SSBO 8 Offset 96 + OpMemberDecorate %SSBO 9 Offset 104 + OpMemberDecorate %SSBO 10 Offset 112 + OpMemberDecorate %SSBO 11 Offset 128 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %7 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 + %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_4 = OpConstant %int 4 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %int_8 = OpConstant %int 8 + %int_1 = OpConstant %int 1 + %int_5 = OpConstant %int 5 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float + %int_9 = OpConstant %int 9 + %int_2 = OpConstant %int 2 + %int_6 = OpConstant %int 6 +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %int_10 = OpConstant %int 10 + %int_3 = OpConstant %int 3 + %int_7 = OpConstant %int 7 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_11 = OpConstant %int 11 +%_ptr_Function_int = OpTypePointer Function %int + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %main = OpFunction %void None %7 + %35 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %37 = OpLoad %float %36 + %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %39 = OpLoad %float %38 + %40 = OpExtInst %float %1 NMin %37 %39 + %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %41 %40 + %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %43 = OpLoad %v2float %42 + %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %45 = OpLoad %v2float %44 + %46 = OpExtInst %v2float %1 NMin %43 %45 + %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %47 %46 + %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %49 = OpLoad %v3float %48 + %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %51 = OpLoad %v3float %50 + %52 = OpExtInst %v3float %1 NMin %49 %51 + %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %53 %52 + %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %55 = OpLoad %v4float %54 + %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %57 = OpLoad %v4float %56 + %58 = OpExtInst %v4float %1 NMin %55 %57 + %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %59 %58 + %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %61 = OpLoad %float %60 + %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %63 = OpLoad %float %62 + %64 = OpExtInst %float %1 NMax %61 %63 + %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %65 %64 + %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %67 = OpLoad %v2float %66 + %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %69 = OpLoad %v2float %68 + %70 = OpExtInst %v2float %1 NMax %67 %69 + %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %71 %70 + %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %73 = OpLoad %v3float %72 + %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %75 = OpLoad %v3float %74 + %76 = OpExtInst %v3float %1 NMax %73 %75 + %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %77 %76 + %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %79 = OpLoad %v4float %78 + %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %81 = OpLoad %v4float %80 + %82 = OpExtInst %v4float %1 NMax %79 %81 + %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %83 %82 + %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %87 = OpLoad %float %86 + %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %89 = OpLoad %float %88 + %90 = OpExtInst %float %1 NClamp %85 %87 %89 + %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %91 %90 + %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + %93 = OpLoad %v2float %92 + %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %95 = OpLoad %v2float %94 + %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %97 = OpLoad %v2float %96 + %98 = OpExtInst %v2float %1 NClamp %93 %95 %97 + %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %99 %98 + %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + %101 = OpLoad %v3float %100 + %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %103 = OpLoad %v3float %102 + %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %105 = OpLoad %v3float %104 + %106 = OpExtInst %v3float %1 NClamp %101 %103 %105 + %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %107 %106 + %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + %109 = OpLoad %v4float %108 + %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %111 = OpLoad %v4float %110 + %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %113 = OpLoad %v4float %112 + %114 = OpExtInst %v4float %1 NClamp %109 %111 %113 + %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %115 %114 + OpStore %i %int_0 + OpBranch %116 + %116 = OpLabel + OpLoopMerge %117 %118 None + OpBranch %119 + %119 = OpLabel + %120 = OpLoad %int %i + %121 = OpSLessThan %bool %120 %int_2 + OpBranchConditional %121 %122 %117 + %122 = OpLabel + %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %124 = OpLoad %v2float %123 + %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %126 = OpLoad %v2float %125 + %127 = OpExtInst %v2float %1 NMin %124 %126 + %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %128 %127 + OpBranch %118 + %118 = OpLabel + %129 = OpLoad %int %i + %130 = OpIAdd %int %129 %int_1 + OpStore %i %130 + %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %132 = OpLoad %float %131 + %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0 + %134 = OpLoad %float %133 + %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1 + %136 = OpLoad %float %135 + %137 = OpExtInst %float %1 NClamp %132 %134 %136 + %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %138 %137 + OpBranch %116 + %117 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag b/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag new file mode 100644 index 00000000000..43d0970e8d5 --- /dev/null +++ b/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag @@ -0,0 +1,55 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 34 +; Schema: 0 + OpCapability Shader + OpCapability Int64 + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_ARB_gpu_shader_int64" + OpName %main "main" + OpName %packed "packed" + OpName %unpacked "unpacked" + OpName %FragColor "FragColor" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %ulong = OpTypeInt 64 0 +%_ptr_Function_ulong = OpTypePointer Function %ulong + %uint = OpTypeInt 32 0 + %v2uint = OpTypeVector %uint 2 + %uint_18 = OpConstant %uint 18 + %uint_52 = OpConstant %uint 52 + %13 = OpConstantComposite %v2uint %uint_18 %uint_52 +%_ptr_Function_v2uint = OpTypePointer Function %v2uint + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %uint_0 = OpConstant %uint 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %uint_1 = OpConstant %uint 1 + %float_1 = OpConstant %float 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %packed = OpVariable %_ptr_Function_ulong Function + %unpacked = OpVariable %_ptr_Function_v2uint Function + %14 = OpBitcast %ulong %13 + OpStore %packed %14 + %17 = OpLoad %ulong %packed + %18 = OpBitcast %v2uint %17 + OpStore %unpacked %18 + %25 = OpAccessChain %_ptr_Function_uint %unpacked %uint_0 + %26 = OpLoad %uint %25 + %27 = OpConvertUToF %float %26 + %29 = OpAccessChain %_ptr_Function_uint %unpacked %uint_1 + %30 = OpLoad %uint %29 + %31 = OpConvertUToF %float %30 + %33 = OpCompositeConstruct %v4float %27 %31 %float_1 %float_1 + OpStore %FragColor %33 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag b/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag index ae7a972d7b2..e7e6f37ea27 100644 --- a/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag +++ b/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag @@ -5,6 +5,7 @@ ; Schema: 0 OpCapability Shader OpCapability StorageInputOutput16 + OpCapability Float16 OpExtension "SPV_KHR_16bit_storage" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 diff --git a/shaders-hlsl/comp/access-chain-load-composite.comp b/shaders-hlsl/comp/access-chain-load-composite.comp new file mode 100644 index 00000000000..69cc7a13be2 --- /dev/null +++ b/shaders-hlsl/comp/access-chain-load-composite.comp @@ -0,0 +1,35 @@ +#version 450 +layout(local_size_x = 1) in; + +struct Baz +{ + float c; +}; + +struct Bar +{ + float d[2][4]; + Baz baz[2]; +}; + +struct Foo +{ + mat2 a; + vec2 b; + Bar c[5]; +}; + +layout(row_major, std430, set = 0, binding = 0) buffer SSBO +{ + Foo foo; + Foo foo2; +}; + +void main() +{ + Foo f = foo; + f.a += 1.0; + f.b += 2.0; + f.c[3].d[1][1] += 5.0; + foo2 = f; +} diff --git a/shaders-hlsl/comp/access-chains.force-uav.comp b/shaders-hlsl/comp/access-chains.force-uav.comp new file mode 100644 index 00000000000..639f3cac155 --- /dev/null +++ b/shaders-hlsl/comp/access-chains.force-uav.comp @@ -0,0 +1,24 @@ +#version 310 es +layout(local_size_x = 1) in; + +// TODO: Read structs, matrices and arrays. + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 a[3][2][4]; + float b[3][2][4]; + vec4 unsized[]; +} ro; + +layout(std430, binding = 1) writeonly buffer SSBO1 +{ + vec4 c[3][2][4]; + float d[3][2][4]; + vec4 unsized[]; +} wo; + +void main() +{ + wo.c[2][gl_GlobalInvocationID.x][1] = ro.a[1][gl_GlobalInvocationID.x][2]; + wo.unsized[gl_GlobalInvocationID.x] = ro.unsized[gl_GlobalInvocationID.x]; +} diff --git a/shaders-hlsl/comp/image.nonwritable-uav-texture.comp b/shaders-hlsl/comp/image.nonwritable-uav-texture.comp new file mode 100644 index 00000000000..1d3c8b4c65d --- /dev/null +++ b/shaders-hlsl/comp/image.nonwritable-uav-texture.comp @@ -0,0 +1,77 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(r32f, binding = 0) uniform readonly image2D uImageInF; +layout(r32f, binding = 1) uniform writeonly image2D uImageOutF; +layout(r32i, binding = 2) uniform readonly iimage2D uImageInI; +layout(r32i, binding = 3) uniform writeonly iimage2D uImageOutI; +layout(r32ui, binding = 4) uniform readonly uimage2D uImageInU; +layout(r32ui, binding = 5) uniform writeonly uimage2D uImageOutU; +layout(r32f, binding = 6) uniform readonly imageBuffer uImageInBuffer; +layout(r32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer; + +layout(rg32f, binding = 8) uniform readonly image2D uImageInF2; +layout(rg32f, binding = 9) uniform writeonly image2D uImageOutF2; +layout(rg32i, binding = 10) uniform readonly iimage2D uImageInI2; +layout(rg32i, binding = 11) uniform writeonly iimage2D uImageOutI2; +layout(rg32ui, binding = 12) uniform readonly uimage2D uImageInU2; +layout(rg32ui, binding = 13) uniform writeonly uimage2D uImageOutU2; +layout(rg32f, binding = 14) uniform readonly imageBuffer uImageInBuffer2; +layout(rg32f, binding = 15) uniform writeonly imageBuffer uImageOutBuffer2; + +layout(rgba32f, binding = 16) uniform readonly image2D uImageInF4; +layout(rgba32f, binding = 17) uniform writeonly image2D uImageOutF4; +layout(rgba32i, binding = 18) uniform readonly iimage2D uImageInI4; +layout(rgba32i, binding = 19) uniform writeonly iimage2D uImageOutI4; +layout(rgba32ui, binding = 20) uniform readonly uimage2D uImageInU4; +layout(rgba32ui, binding = 21) uniform writeonly uimage2D uImageOutU4; +layout(rgba32f, binding = 22) uniform readonly imageBuffer uImageInBuffer4; +layout(rgba32f, binding = 23) uniform writeonly imageBuffer uImageOutBuffer4; + +layout(binding = 24) uniform writeonly image2D uImageNoFmtF; +layout(binding = 25) uniform writeonly uimage2D uImageNoFmtU; +layout(binding = 26) uniform writeonly iimage2D uImageNoFmtI; + +void main() +{ + vec4 f = imageLoad(uImageInF, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutF, ivec2(gl_GlobalInvocationID.xy), f); + + ivec4 i = imageLoad(uImageInI, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutI, ivec2(gl_GlobalInvocationID.xy), i); + + uvec4 u = imageLoad(uImageInU, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutU, ivec2(gl_GlobalInvocationID.xy), u); + + vec4 b = imageLoad(uImageInBuffer, int(gl_GlobalInvocationID.x)); + imageStore(uImageOutBuffer, int(gl_GlobalInvocationID.x), b); + + vec4 f2 = imageLoad(uImageInF2, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutF2, ivec2(gl_GlobalInvocationID.xy), f2); + + ivec4 i2 = imageLoad(uImageInI2, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutI2, ivec2(gl_GlobalInvocationID.xy), i2); + + uvec4 u2 = imageLoad(uImageInU2, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutU2, ivec2(gl_GlobalInvocationID.xy), u2); + + vec4 b2 = imageLoad(uImageInBuffer2, int(gl_GlobalInvocationID.x)); + imageStore(uImageOutBuffer2, int(gl_GlobalInvocationID.x), b2); + + vec4 f4 = imageLoad(uImageInF4, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutF4, ivec2(gl_GlobalInvocationID.xy), f4); + + ivec4 i4 = imageLoad(uImageInI4, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutI4, ivec2(gl_GlobalInvocationID.xy), i4); + + uvec4 u4 = imageLoad(uImageInU4, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutU4, ivec2(gl_GlobalInvocationID.xy), u4); + + vec4 b4 = imageLoad(uImageInBuffer4, int(gl_GlobalInvocationID.x)); + imageStore(uImageOutBuffer4, int(gl_GlobalInvocationID.x), b4); + + imageStore(uImageNoFmtF, ivec2(gl_GlobalInvocationID.xy), b2); + imageStore(uImageNoFmtU, ivec2(gl_GlobalInvocationID.xy), u4); + imageStore(uImageNoFmtI, ivec2(gl_GlobalInvocationID.xy), i4); +} + diff --git a/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp b/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp new file mode 100644 index 00000000000..3a2a8d0d2dd --- /dev/null +++ b/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp @@ -0,0 +1,213 @@ +#version 460 +#extension GL_EXT_ray_query : enable +#extension GL_EXT_ray_flags_primitive_culling : enable + +layout(primitive_culling); +struct Ray +{ + vec3 pos; + float tmin; + vec3 dir; + float tmax; +}; + +layout(std430, set = 0, binding = 0) buffer Log +{ + uint x; + uint y; +}; + +layout(binding = 1, set = 0) uniform accelerationStructureEXT rtas; +layout(std430, set = 0, binding = 2) buffer Rays { Ray rays[]; }; + +void doSomething() +{ + x = 0; + y = 0; +} + +Ray makeRayDesc() +{ + Ray ray; + ray.pos= vec3(0,0,0); + ray.dir = vec3(1,0,0); + ray.tmin = 0.0f; + ray.tmax = 9999.0; + return ray; +} + +void main() +{ + Ray ray = makeRayDesc(); + rayQueryEXT rayQuery; + rayQueryInitializeEXT(rayQuery, rtas, gl_RayFlagsNoneEXT, 0xFF, ray.pos, ray.tmin, ray.dir, ray.tmax); + + mat4x3 _mat4x3; + mat3x4 _mat3x4; + + while (rayQueryProceedEXT(rayQuery)) + { + uint candidateType = rayQueryGetIntersectionTypeEXT(rayQuery, false); + switch(candidateType) + { + case gl_RayQueryCandidateIntersectionTriangleEXT: + + rayQueryTerminateEXT(rayQuery); + _mat4x3 = rayQueryGetIntersectionObjectToWorldEXT(rayQuery, false); + _mat3x4 = transpose(_mat4x3); + rayQueryConfirmIntersectionEXT(rayQuery); + + if (rayQueryGetIntersectionFrontFaceEXT(rayQuery, true)) + { + doSomething(); + } + + if (rayQueryGetIntersectionBarycentricsEXT(rayQuery, true).x == 0) + { + doSomething(); + } + + if (rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true) > 0) + { + doSomething(); + } + + if (rayQueryGetIntersectionInstanceIdEXT(rayQuery, true) > 0) + { + doSomething(); + } + + if (rayQueryGetIntersectionObjectRayDirectionEXT(rayQuery, true).x > 0) + { + doSomething(); + } + + if (rayQueryGetIntersectionObjectRayOriginEXT(rayQuery, true).x > 0) + { + doSomething(); + } + + if (rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true) > 0) + { + doSomething(); + } + + if (rayQueryGetIntersectionTEXT(rayQuery, true) > 0.f) + { + doSomething(); + } + + if (rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(rayQuery, true) > 0) + { + doSomething(); + } + break; + + case gl_RayQueryCandidateIntersectionAABBEXT: + { + _mat4x3 = rayQueryGetIntersectionObjectToWorldEXT(rayQuery, false); + _mat3x4 = transpose(_mat4x3); + if (rayQueryGetIntersectionCandidateAABBOpaqueEXT(rayQuery)) + { + doSomething(); + } + + float t = 0.5; + rayQueryGenerateIntersectionEXT(rayQuery, t); + rayQueryTerminateEXT(rayQuery); + break; + } + } + } + + if(_mat3x4[0][0] == _mat4x3[0][0]) + { + doSomething(); + } + + uint committedStatus = rayQueryGetIntersectionTypeEXT(rayQuery, true); + + switch(committedStatus) + { + case gl_RayQueryCommittedIntersectionNoneEXT : + _mat4x3 = rayQueryGetIntersectionWorldToObjectEXT(rayQuery, false); + _mat3x4 = transpose(_mat4x3); + break; + + case gl_RayQueryCommittedIntersectionTriangleEXT : + _mat4x3 = rayQueryGetIntersectionWorldToObjectEXT(rayQuery, true); + _mat3x4 = transpose(_mat4x3); + + if (rayQueryGetIntersectionFrontFaceEXT(rayQuery, true)) + { + doSomething(); + } + + if (rayQueryGetIntersectionBarycentricsEXT(rayQuery, true).y == 0) + { + doSomething(); + } + break; + + case gl_RayQueryCommittedIntersectionGeneratedEXT : + + if(rayQueryGetIntersectionGeometryIndexEXT(rayQuery, true) > 0) + { + doSomething(); + } + + if(rayQueryGetIntersectionInstanceIdEXT(rayQuery, true) > 0) + { + doSomething(); + } + + if(rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true) > 0) + { + doSomething(); + } + + if(rayQueryGetIntersectionObjectRayDirectionEXT(rayQuery, true).z > 0) + { + doSomething(); + } + + if(rayQueryGetIntersectionObjectRayOriginEXT(rayQuery, true).x > 0) + { + doSomething(); + } + + if(rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true) > 0) + { + doSomething(); + } + + if(rayQueryGetIntersectionTEXT(rayQuery, true) > 0.f) + { + doSomething(); + } + break; + } + + if (_mat3x4[0][0] == _mat4x3[0][0]) + { + doSomething(); + } + + if (rayQueryGetRayFlagsEXT(rayQuery) > gl_RayFlagsSkipTrianglesEXT) + { + doSomething(); + } + + if (rayQueryGetRayTMinEXT(rayQuery) > 0.0) + { + doSomething(); + } + + vec3 o = rayQueryGetWorldRayOriginEXT(rayQuery); + vec3 d = rayQueryGetWorldRayDirectionEXT(rayQuery); + + if (o.x == d.z) + { + doSomething(); + } +} diff --git a/shaders-hlsl/flatten/array.flatten.vert b/shaders-hlsl/flatten/array.flatten.vert new file mode 100644 index 00000000000..fa6da076c96 --- /dev/null +++ b/shaders-hlsl/flatten/array.flatten.vert @@ -0,0 +1,19 @@ +#version 310 es + +layout(std140) uniform UBO +{ + vec4 A4[5][4][2]; + mat4 uMVP; + vec4 A1[2]; + vec4 A2[2][3]; + float A3[3]; + vec4 Offset; +}; +layout(location = 0) in vec4 aVertex; + +void main() +{ + vec4 a4 = A4[2][3][1]; // 2 * (4 * 2) + 3 * 2 + 1 = 16 + 6 + 1 = 23. + vec4 offset = A2[1][1] + A1[1] + A3[2]; + gl_Position = uMVP * aVertex + Offset + offset; +} diff --git a/shaders-hlsl/flatten/basic.flatten.vert b/shaders-hlsl/flatten/basic.flatten.vert new file mode 100644 index 00000000000..e60a9067b14 --- /dev/null +++ b/shaders-hlsl/flatten/basic.flatten.vert @@ -0,0 +1,16 @@ +#version 310 es + +layout(std140) uniform UBO +{ + mat4 uMVP; +}; + +layout(location = 0) in vec4 aVertex; +layout(location = 1) in vec3 aNormal; +layout(location = 0) out vec3 vNormal; + +void main() +{ + gl_Position = uMVP * aVertex; + vNormal = aNormal; +} diff --git a/shaders-hlsl/flatten/copy.flatten.vert b/shaders-hlsl/flatten/copy.flatten.vert new file mode 100644 index 00000000000..4f1b8805e74 --- /dev/null +++ b/shaders-hlsl/flatten/copy.flatten.vert @@ -0,0 +1,34 @@ +#version 310 es + +struct Light +{ + vec3 Position; + float Radius; + + vec4 Color; +}; + +layout(std140) uniform UBO +{ + mat4 uMVP; + + Light lights[4]; +}; + +layout(location = 0) in vec4 aVertex; +layout(location = 1) in vec3 aNormal; +layout(location = 0) out vec4 vColor; + +void main() +{ + gl_Position = uMVP * aVertex; + + vColor = vec4(0.0); + + for (int i = 0; i < 4; ++i) + { + Light light = lights[i]; + vec3 L = aVertex.xyz - light.Position; + vColor += dot(aNormal, normalize(L)) * (clamp(1.0 - length(L) / light.Radius, 0.0, 1.0) * lights[i].Color); + } +} diff --git a/shaders-hlsl/flatten/dynamic.flatten.vert b/shaders-hlsl/flatten/dynamic.flatten.vert new file mode 100644 index 00000000000..a341d452884 --- /dev/null +++ b/shaders-hlsl/flatten/dynamic.flatten.vert @@ -0,0 +1,33 @@ +#version 310 es + +struct Light +{ + vec3 Position; + float Radius; + + vec4 Color; +}; + +layout(std140) uniform UBO +{ + mat4 uMVP; + + Light lights[4]; +}; + +layout(location = 0) in vec4 aVertex; +layout(location = 1) in vec3 aNormal; +layout(location = 0) out vec4 vColor; + +void main() +{ + gl_Position = uMVP * aVertex; + + vColor = vec4(0.0); + + for (int i = 0; i < 4; ++i) + { + vec3 L = aVertex.xyz - lights[i].Position; + vColor += dot(aNormal, normalize(L)) * (clamp(1.0 - length(L) / lights[i].Radius, 0.0, 1.0) * lights[i].Color); + } +} diff --git a/shaders-hlsl/flatten/matrix-conversion.flatten.frag b/shaders-hlsl/flatten/matrix-conversion.flatten.frag new file mode 100644 index 00000000000..427825c3402 --- /dev/null +++ b/shaders-hlsl/flatten/matrix-conversion.flatten.frag @@ -0,0 +1,14 @@ +#version 310 es +precision mediump float; +layout(location = 0) out vec3 FragColor; +layout(location = 0) flat in vec3 vNormal; + +layout(binding = 0, std140) uniform UBO +{ + mat4 m; +}; + +void main() +{ + FragColor = mat3(m) * vNormal; +} diff --git a/shaders-hlsl/flatten/matrixindex.flatten.vert b/shaders-hlsl/flatten/matrixindex.flatten.vert new file mode 100644 index 00000000000..0ee78384324 --- /dev/null +++ b/shaders-hlsl/flatten/matrixindex.flatten.vert @@ -0,0 +1,25 @@ +#version 310 es + +layout(std140) uniform UBO +{ + layout(column_major) mat4 M1C; + layout(row_major) mat4 M1R; + layout(column_major) mat2x4 M2C; + layout(row_major) mat2x4 M2R; +}; + +layout(location = 0) out vec4 oA; +layout(location = 1) out vec4 oB; +layout(location = 2) out vec4 oC; +layout(location = 3) out vec4 oD; +layout(location = 4) out vec4 oE; + +void main() +{ + gl_Position = vec4(0.0); + oA = M1C[1]; + oB = M1R[1]; + oC = M2C[1]; + oD = M2R[0]; + oE = vec4(M1C[1][2], M1R[1][2], M2C[1][2], M2R[1][2]); +} diff --git a/shaders-hlsl/flatten/multiindex.flatten.vert b/shaders-hlsl/flatten/multiindex.flatten.vert new file mode 100644 index 00000000000..0b471d86e09 --- /dev/null +++ b/shaders-hlsl/flatten/multiindex.flatten.vert @@ -0,0 +1,13 @@ +#version 310 es + +layout(std140) uniform UBO +{ + vec4 Data[3][5]; +}; + +layout(location = 0) in ivec2 aIndex; + +void main() +{ + gl_Position = Data[aIndex.x][aIndex.y]; +} diff --git a/shaders-hlsl/flatten/push-constant.flatten.vert b/shaders-hlsl/flatten/push-constant.flatten.vert new file mode 100644 index 00000000000..c7b1b42e1b7 --- /dev/null +++ b/shaders-hlsl/flatten/push-constant.flatten.vert @@ -0,0 +1,17 @@ +#version 310 es + +layout(push_constant, std430) uniform PushMe +{ + mat4 MVP; + mat2 Rot; // The MatrixStride will be 8 here. + float Arr[4]; +} registers; + +layout(location = 0) in vec2 Rot; +layout(location = 1) in vec4 Pos; +layout(location = 0) out vec2 vRot; +void main() +{ + gl_Position = registers.MVP * Pos; + vRot = registers.Rot * Rot + registers.Arr[2]; // Constant access should work even if array stride is just 4 here. +} diff --git a/shaders-hlsl/flatten/rowmajor.flatten.vert b/shaders-hlsl/flatten/rowmajor.flatten.vert new file mode 100644 index 00000000000..88c468c8f25 --- /dev/null +++ b/shaders-hlsl/flatten/rowmajor.flatten.vert @@ -0,0 +1,16 @@ +#version 310 es + +layout(std140) uniform UBO +{ + layout(column_major) mat4 uMVPR; + layout(row_major) mat4 uMVPC; + layout(row_major) mat2x4 uMVP; +}; + +layout(location = 0) in vec4 aVertex; + +void main() +{ + vec2 v = aVertex * uMVP; + gl_Position = uMVPR * aVertex + uMVPC * aVertex; +} diff --git a/shaders-hlsl/flatten/struct.flatten.vert b/shaders-hlsl/flatten/struct.flatten.vert new file mode 100644 index 00000000000..936bb41b852 --- /dev/null +++ b/shaders-hlsl/flatten/struct.flatten.vert @@ -0,0 +1,30 @@ +#version 310 es + +struct Light +{ + vec3 Position; + float Radius; + + vec4 Color; +}; + +layout(std140) uniform UBO +{ + mat4 uMVP; + + Light light; +}; + +layout(location = 0) in vec4 aVertex; +layout(location = 1) in vec3 aNormal; +layout(location = 0) out vec4 vColor; + +void main() +{ + gl_Position = uMVP * aVertex; + + vColor = vec4(0.0); + + vec3 L = aVertex.xyz - light.Position; + vColor += dot(aNormal, normalize(L)) * (clamp(1.0 - length(L) / light.Radius, 0.0, 1.0) * light.Color); +} diff --git a/shaders-hlsl/flatten/struct.rowmajor.flatten.vert b/shaders-hlsl/flatten/struct.rowmajor.flatten.vert new file mode 100644 index 00000000000..231389b8f49 --- /dev/null +++ b/shaders-hlsl/flatten/struct.rowmajor.flatten.vert @@ -0,0 +1,26 @@ +#version 310 es + +struct Foo +{ + mat3x4 MVP0; + mat3x4 MVP1; +}; + +layout(std140, binding = 0) uniform UBO +{ + layout(row_major) Foo foo; +}; + +layout(location = 0) in vec4 v0; +layout(location = 1) in vec4 v1; +layout(location = 0) out vec3 V0; +layout(location = 1) out vec3 V1; + +void main() +{ + Foo f = foo; + vec3 a = v0 * f.MVP0; + vec3 b = v1 * f.MVP1; + V0 = a; + V1 = b; +} diff --git a/shaders-hlsl/flatten/swizzle.flatten.vert b/shaders-hlsl/flatten/swizzle.flatten.vert new file mode 100644 index 00000000000..fafff7734eb --- /dev/null +++ b/shaders-hlsl/flatten/swizzle.flatten.vert @@ -0,0 +1,47 @@ +#version 310 es + +// comments note the 16b alignment boundaries (see GL spec 7.6.2.2 Standard Uniform Block Layout) +layout(std140, binding = 0) uniform UBO +{ + // 16b boundary + vec4 A; + // 16b boundary + vec2 B0; + vec2 B1; + // 16b boundary + float C0; + // 16b boundary (vec3 is aligned to 16b) + vec3 C1; + // 16b boundary + vec3 D0; + float D1; + // 16b boundary + float E0; + float E1; + float E2; + float E3; + // 16b boundary + float F0; + vec2 F1; + // 16b boundary (vec2 before us is aligned to 8b) + float F2; +}; + +layout(location = 0) out vec4 oA; +layout(location = 1) out vec4 oB; +layout(location = 2) out vec4 oC; +layout(location = 3) out vec4 oD; +layout(location = 4) out vec4 oE; +layout(location = 5) out vec4 oF; + +void main() +{ + gl_Position = vec4(0.0); + + oA = A; + oB = vec4(B0, B1); + oC = vec4(C0, C1); + oD = vec4(D0, D1); + oE = vec4(E0, E1, E2, E3); + oF = vec4(F0, F1, F2); +} diff --git a/shaders-hlsl/flatten/types.flatten.frag b/shaders-hlsl/flatten/types.flatten.frag new file mode 100644 index 00000000000..faab5b7e058 --- /dev/null +++ b/shaders-hlsl/flatten/types.flatten.frag @@ -0,0 +1,27 @@ +#version 310 es +precision mediump float; + +layout(std140, binding = 0) uniform UBO0 +{ + vec4 a; + vec4 b; +}; + +layout(std140, binding = 0) uniform UBO1 +{ + ivec4 c; + ivec4 d; +}; + +layout(std140, binding = 0) uniform UBO2 +{ + uvec4 e; + uvec4 f; +}; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(c) + vec4(d) + vec4(e) + vec4(f) + a + b; +} diff --git a/shaders-hlsl/frag/demote-to-helper.frag b/shaders-hlsl/frag/demote-to-helper.frag new file mode 100644 index 00000000000..bdfef6f9b43 --- /dev/null +++ b/shaders-hlsl/frag/demote-to-helper.frag @@ -0,0 +1,7 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +void main() +{ + demote; +} diff --git a/shaders-hlsl/frag/image-query-uav.frag b/shaders-hlsl/frag/image-query-uav.frag new file mode 100644 index 00000000000..25103e6e95f --- /dev/null +++ b/shaders-hlsl/frag/image-query-uav.frag @@ -0,0 +1,18 @@ +#version 450 + +layout(rgba32f, binding = 0) uniform writeonly image1D uImage1D; +layout(rg32f, binding = 1) uniform writeonly image2D uImage2D; +layout(r32f, binding = 2) uniform readonly image2DArray uImage2DArray; +layout(rgba8, binding = 3) uniform writeonly image3D uImage3D; +layout(rgba8_snorm, binding = 6) uniform writeonly imageBuffer uImageBuffer; + +// There is no RWTexture2DMS. + +void main() +{ + int a = imageSize(uImage1D); + ivec2 b = imageSize(uImage2D); + ivec3 c = imageSize(uImage2DArray); + ivec3 d = imageSize(uImage3D); + int e = imageSize(uImageBuffer); +} diff --git a/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag b/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag new file mode 100644 index 00000000000..25103e6e95f --- /dev/null +++ b/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag @@ -0,0 +1,18 @@ +#version 450 + +layout(rgba32f, binding = 0) uniform writeonly image1D uImage1D; +layout(rg32f, binding = 1) uniform writeonly image2D uImage2D; +layout(r32f, binding = 2) uniform readonly image2DArray uImage2DArray; +layout(rgba8, binding = 3) uniform writeonly image3D uImage3D; +layout(rgba8_snorm, binding = 6) uniform writeonly imageBuffer uImageBuffer; + +// There is no RWTexture2DMS. + +void main() +{ + int a = imageSize(uImage1D); + ivec2 b = imageSize(uImage2D); + ivec3 c = imageSize(uImage2DArray); + ivec3 d = imageSize(uImage3D); + int e = imageSize(uImageBuffer); +} diff --git a/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag b/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag new file mode 100644 index 00000000000..30b957b26b4 --- /dev/null +++ b/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag @@ -0,0 +1,13 @@ +#version 450 + +layout(location = 0) in vec2 vUV; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 0) uniform sampler2D uSampler; + +void main() +{ + FragColor = textureProj(uSampler, vec3(vUV, 5.0)); + FragColor += texture(uSampler, vUV, 3.0); + FragColor += textureLod(uSampler, vUV, 2.0); + FragColor += textureGrad(uSampler, vUV, vec2(4.0), vec2(5.0)); +} diff --git a/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag b/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag index 0aadd14883c..35373279fc1 100644 --- a/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag +++ b/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag @@ -1,28 +1,52 @@ #version 450 #extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_samplerless_texture_functions : require layout(set = 0, binding = 0) uniform texture2D uSamplers[]; -layout(set = 1, binding = 0) uniform sampler2D uCombinedSamplers[]; -layout(set = 2, binding = 0) uniform sampler uSamps[]; +layout(set = 1, binding = 0) uniform texture2DMS uSamplersMS[]; +layout(set = 2, binding = 4) uniform sampler2D uCombinedSamplers[]; +layout(set = 3, binding = 1) uniform sampler uSamps[]; layout(location = 0) flat in int vIndex; layout(location = 1) in vec2 vUV; layout(location = 0) out vec4 FragColor; -layout(set = 3, binding = 0) uniform UBO +layout(r32f, set = 7, binding = 5) uniform image2D uImages[]; +layout(r32ui, set = 8, binding = 5) uniform uimage2D uImagesU32[]; + +layout(set = 9, binding = 2) uniform UBO { vec4 v[64]; } ubos[]; -layout(set = 4, binding = 0) readonly buffer SSBO +layout(set = 10, binding = 3) buffer SSBO { + uint counter; vec4 v[]; } ssbos[]; void main() { int i = vIndex; - FragColor = texture(sampler2D(uSamplers[nonuniformEXT(i + 10)], uSamps[nonuniformEXT(i + 40)]), vUV); + FragColor = texture(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV); FragColor = texture(uCombinedSamplers[nonuniformEXT(i + 10)], vUV); FragColor += ubos[nonuniformEXT(i + 20)].v[nonuniformEXT(i + 40)]; FragColor += ssbos[nonuniformEXT(i + 50)].v[nonuniformEXT(i + 60)]; + ssbos[nonuniformEXT(i + 60)].v[nonuniformEXT(i + 70)] = vec4(20.0); + + FragColor = texelFetch(uSamplers[nonuniformEXT(i + 10)], ivec2(vUV), 0); + atomicAdd(ssbos[nonuniformEXT(i + 100)].counter, 100u); + + vec2 queried = textureQueryLod(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV); + queried += textureQueryLod(uCombinedSamplers[nonuniformEXT(i + 10)], vUV); + FragColor.xy += queried; + + FragColor.x += float(textureQueryLevels(uSamplers[nonuniformEXT(i + 20)])); + FragColor.y += float(textureSamples(uSamplersMS[nonuniformEXT(i + 20)])); + FragColor.xy += vec2(textureSize(uSamplers[nonuniformEXT(i + 20)], 0)); + + FragColor += imageLoad(uImages[nonuniformEXT(i + 50)], ivec2(vUV)); + FragColor.xy += vec2(imageSize(uImages[nonuniformEXT(i + 20)])); + imageStore(uImages[nonuniformEXT(i + 60)], ivec2(vUV), vec4(50.0)); + + imageAtomicAdd(uImagesU32[nonuniformEXT(i + 70)], ivec2(vUV), 40u); } diff --git a/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag new file mode 100644 index 00000000000..ceac8cc50e4 --- /dev/null +++ b/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag @@ -0,0 +1,36 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2, rgba8) uniform readonly image2D img3; +layout(binding = 3) coherent buffer Buffer +{ + int foo; + uint bar; +}; +layout(binding = 4) buffer Buffer2 +{ + uint quux; +}; + +layout(binding = 5, rgba8) uniform writeonly image2D img4; +layout(binding = 6) buffer Buffer3 +{ + int baz; +}; + +void main() +{ + // Deliberately outside the critical section to test usage tracking. + baz = 0; + imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0)); + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0))); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, quux); + endInvocationInterlockARB(); +} diff --git a/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag b/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag new file mode 100644 index 00000000000..cd035467be2 --- /dev/null +++ b/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(set = 0, binding = 0) coherent readonly buffer SSBO +{ + vec4 a; +}; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = a; +} diff --git a/shaders-hlsl/frag/readonly-coherent-ssbo.frag b/shaders-hlsl/frag/readonly-coherent-ssbo.frag new file mode 100644 index 00000000000..cd035467be2 --- /dev/null +++ b/shaders-hlsl/frag/readonly-coherent-ssbo.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(set = 0, binding = 0) coherent readonly buffer SSBO +{ + vec4 a; +}; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = a; +} diff --git a/shaders-hlsl/frag/sample-mask-in-and-out.frag b/shaders-hlsl/frag/sample-mask-in-and-out.frag new file mode 100644 index 00000000000..75ed3cc1675 --- /dev/null +++ b/shaders-hlsl/frag/sample-mask-in-and-out.frag @@ -0,0 +1,9 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(1.0); + gl_SampleMask[0] = gl_SampleMaskIn[0]; +} diff --git a/shaders-hlsl/frag/sample-mask-in.frag b/shaders-hlsl/frag/sample-mask-in.frag new file mode 100644 index 00000000000..16031a35573 --- /dev/null +++ b/shaders-hlsl/frag/sample-mask-in.frag @@ -0,0 +1,11 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +void main() +{ + if ((gl_SampleMaskIn[0] & (1 << gl_SampleID)) != 0) + { + FragColor = vec4(1.0); + } +} diff --git a/shaders-hlsl/frag/sample-mask-out.frag b/shaders-hlsl/frag/sample-mask-out.frag new file mode 100644 index 00000000000..c7fb80eba9d --- /dev/null +++ b/shaders-hlsl/frag/sample-mask-out.frag @@ -0,0 +1,9 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(1.0); + gl_SampleMask[0] = 0; +} diff --git a/shaders-hlsl/frag/switch-unreachable-break.frag b/shaders-hlsl/frag/switch-unreachable-break.frag new file mode 100644 index 00000000000..b0421e60ef3 --- /dev/null +++ b/shaders-hlsl/frag/switch-unreachable-break.frag @@ -0,0 +1,32 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vInput; + +layout(set = 0, binding = 0) uniform UBO +{ + int cond; + int cond2; +}; + +void main() +{ + bool frog = false; + switch (cond) + { + case 1: + if (cond2 < 50) + break; + else + discard; + + break; + + default: + frog = true; + break; + } + + FragColor = frog ? vec4(10.0) : vec4(20.0); +} + diff --git a/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh new file mode 100644 index 00000000000..4f9500fe177 --- /dev/null +++ b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -0,0 +1,74 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(lines, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out vec4 vOut[]; +layout(location = 1) perprimitiveEXT out vec4 vPrim[]; + +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[]; + +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[]; + +shared float shared_float[16]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +taskPayloadSharedEXT TaskPayload payload; + +void main3() +{ + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0, 1) + gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; +} + +void main2() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + // gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22) + { + vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + main3(); + } +} + +void main() +{ + main2(); +} diff --git a/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh new file mode 100644 index 00000000000..4d8e3f64944 --- /dev/null +++ b/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -0,0 +1,64 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(triangles, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out vec4 vOut[]; +layout(location = 1) perprimitiveEXT out vec4 vPrim[]; + +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[]; + +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[]; + +shared float shared_float[16]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +taskPayloadSharedEXT TaskPayload payload; + +void main() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + // gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22) + { + vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0, 1, 2) + gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} diff --git a/shaders-hlsl/vert/invariant.vert b/shaders-hlsl/vert/invariant.vert new file mode 100644 index 00000000000..239b985da12 --- /dev/null +++ b/shaders-hlsl/vert/invariant.vert @@ -0,0 +1,13 @@ +#version 310 es + +invariant gl_Position; +layout(location = 0) invariant out vec4 vColor; +layout(location = 0) in vec4 vInput0; +layout(location = 1) in vec4 vInput1; +layout(location = 2) in vec4 vInput2; + +void main() +{ + gl_Position = vInput0 + vInput1 * vInput2; + vColor = (vInput0 - vInput1) * vInput2; +} diff --git a/shaders-hlsl/vert/no-contraction.vert b/shaders-hlsl/vert/no-contraction.vert new file mode 100644 index 00000000000..206fbf0de80 --- /dev/null +++ b/shaders-hlsl/vert/no-contraction.vert @@ -0,0 +1,15 @@ +#version 450 + +layout(location = 0) in vec4 vA; +layout(location = 1) in vec4 vB; +layout(location = 2) in vec4 vC; + +void main() +{ + precise vec4 mul = vA * vB; + precise vec4 add = vA + vB; + precise vec4 sub = vA - vB; + precise vec4 mad = vA * vB + vC; + precise vec4 summed = mul + add + sub + mad; + gl_Position = summed; +} diff --git a/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp new file mode 100644 index 00000000000..87aee2db54f --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 37 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %T "T" + OpMemberName %T 0 "a" + OpName %v "v" + OpName %T_0 "T" + OpMemberName %T_0 0 "b" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "foo" + OpName %_ "" + OpName %T_1 "T" + OpMemberName %T_1 0 "c" + OpName %SSBO2 "SSBO2" + OpMemberName %SSBO2 0 "bar" + OpName %__0 "" + OpMemberDecorate %T_0 0 Offset 0 + OpDecorate %_runtimearr_T_0 ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpMemberDecorate %T_1 0 Offset 0 + OpDecorate %_runtimearr_T_1 ArrayStride 16 + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %T = OpTypeStruct %float +%_ptr_Function_T = OpTypePointer Function %T + %float_40 = OpConstant %float 40 + %11 = OpConstantComposite %T %float_40 + %T_0 = OpTypeStruct %float +%_runtimearr_T_0 = OpTypeRuntimeArray %T_0 + %SSBO1 = OpTypeStruct %_runtimearr_T_0 +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_10 = OpConstant %int 10 +%_ptr_Uniform_T_0 = OpTypePointer Uniform %T_0 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %T_1 = OpTypeStruct %float +%_runtimearr_T_1 = OpTypeRuntimeArray %T_1 + %SSBO2 = OpTypeStruct %_runtimearr_T_1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %__0 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int_30 = OpConstant %int 30 +%_ptr_Uniform_T_1 = OpTypePointer Uniform %T_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %v = OpVariable %_ptr_Function_T Function + OpStore %v %11 + %20 = OpLoad %T %v + %22 = OpAccessChain %_ptr_Uniform_T_0 %_ %int_0 %int_10 + %23 = OpCompositeExtract %float %20 0 + %25 = OpAccessChain %_ptr_Uniform_float %22 %int_0 + OpStore %25 %23 + %32 = OpLoad %T %v + %34 = OpAccessChain %_ptr_Uniform_T_1 %__0 %int_0 %int_30 + %35 = OpCompositeExtract %float %32 0 + %36 = OpAccessChain %_ptr_Uniform_float %34 %int_0 + OpStore %36 %35 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp b/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000000..3f2d141a1f5 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 23 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %c "c" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %SSBO = OpTypeStruct %uint %uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %c = OpVariable %_ptr_Function_uint Function + %15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 + %16 = OpAtomicLoad %uint %15 %int_1 %int_0 + OpStore %c %16 + %18 = OpLoad %uint %c + %19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + OpAtomicStore %19 %int_1 %int_0 %18 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp b/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp new file mode 100644 index 00000000000..832a2735497 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %SSBO = OpTypeStruct %uint %int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 +%uint_4294967295 = OpConstant %uint 4294967295 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_int = OpTypePointer Uniform %int + %int_n3 = OpConstant %int -3 + %int_4 = OpConstant %int 4 + %v3uint = OpTypeVector %uint 3 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %13 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + %18 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + %22 = OpAccessChain %_ptr_Uniform_int %_ %int_1 + %25 = OpAccessChain %_ptr_Uniform_int %_ %int_1 + %30 = OpAtomicUMax %uint %13 %uint_1 %uint_0 %uint_1 + %31 = OpAtomicSMin %uint %13 %uint_1 %uint_0 %uint_1 + %32 = OpAtomicUMin %uint %18 %uint_1 %uint_0 %uint_4294967295 + %33 = OpAtomicSMax %uint %18 %uint_1 %uint_0 %uint_4294967295 + %34 = OpAtomicSMax %int %22 %uint_1 %uint_0 %int_n3 + %35 = OpAtomicUMin %int %22 %uint_1 %uint_0 %int_n3 + %36 = OpAtomicSMin %int %25 %uint_1 %uint_0 %int_4 + %37 = OpAtomicUMax %int %25 %uint_1 %uint_0 %int_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp b/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp new file mode 100644 index 00000000000..3651a4de527 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp @@ -0,0 +1,63 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 33 +; Schema: 0 + OpCapability Shader + OpCapability Float16 + OpCapability StorageBuffer16BitAccess + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types" + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpMemberName %SSBO 2 "c" + OpMemberName %SSBO 3 "d" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpMemberDecorate %SSBO 2 Offset 8 + OpMemberDecorate %SSBO 3 Offset 12 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %half = OpTypeFloat 16 + %v2half = OpTypeVector %half 2 + %float = OpTypeFloat 32 + %SSBO = OpTypeStruct %v2half %float %float %v2half +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half + %uint = OpTypeInt 32 0 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %int_3 = OpConstant %int 3 + %int_2 = OpConstant %int 2 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Uniform_v2half %_ %int_0 + %17 = OpLoad %v2half %16 + %20 = OpBitcast %float %17 + %22 = OpAccessChain %_ptr_Uniform_float %_ %int_1 + OpStore %22 %20 + %25 = OpAccessChain %_ptr_Uniform_float %_ %int_2 + %26 = OpLoad %float %25 + %28 = OpBitcast %v2half %26 + %29 = OpAccessChain %_ptr_Uniform_v2half %_ %int_3 + OpStore %29 %28 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp new file mode 100644 index 00000000000..435fa322215 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp @@ -0,0 +1,97 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 26 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "ints" + OpMemberName %SSBO 1 "uints" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %v4int = OpTypeVector %int 4 + %uint = OpTypeInt 32 0 + %v4uint = OpTypeVector %uint 4 + + %int_1 = OpConstant %int 1 + %uint_11 = OpConstant %uint 11 + + %SSBO = OpTypeStruct %v4int %v4uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int +%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint + %main = OpFunction %void None %3 + %5 = OpLabel + %ints_ptr = OpAccessChain %_ptr_Uniform_v4int %_ %int_0 + %uints_ptr = OpAccessChain %_ptr_Uniform_v4uint %_ %int_1 + %ints = OpLoad %v4int %ints_ptr + %uints = OpLoad %v4uint %uints_ptr + + %ints_alt = OpVectorShuffle %v4int %ints %ints 3 2 1 0 + %uints_alt = OpVectorShuffle %v4uint %uints %uints 3 2 1 0 + + %int_to_int_popcount = OpBitCount %v4int %ints + %int_to_uint_popcount = OpBitCount %v4uint %ints + %uint_to_int_popcount = OpBitCount %v4int %uints + %uint_to_uint_popcount = OpBitCount %v4uint %uints + + ; BitReverse must have matching types w.r.t. sign, yay. + %int_to_int_reverse = OpBitReverse %v4int %ints + ;%int_to_uint_reverse = OpBitReverse %v4uint %ints + ;%uint_to_int_reverse = OpBitReverse %v4int %uints + %uint_to_uint_reverse = OpBitReverse %v4uint %uints + + ; Base and Result must match. + %int_to_int_sbit = OpBitFieldSExtract %v4int %ints %int_1 %uint_11 + ;%int_to_uint_sbit = OpBitFieldSExtract %v4uint %ints %offset %count + ;%uint_to_int_sbit = OpBitFieldSExtract %v4int %uints %offset %count + %uint_to_uint_sbit = OpBitFieldSExtract %v4uint %uints %uint_11 %int_1 + + ; Base and Result must match. + %int_to_int_ubit = OpBitFieldUExtract %v4int %ints %int_1 %uint_11 + ;%int_to_uint_ubit = OpBitFieldUExtract %v4uint %ints %offset %count + ;%uint_to_int_ubit = OpBitFieldUExtract %v4int %uints %offset %count + %uint_to_uint_ubit = OpBitFieldUExtract %v4uint %uints %uint_11 %int_1 + + %int_to_int_insert = OpBitFieldInsert %v4int %ints %ints_alt %int_1 %uint_11 + %uint_to_uint_insert = OpBitFieldInsert %v4uint %uints %uints_alt %uint_11 %int_1 + + OpStore %ints_ptr %int_to_int_popcount + OpStore %uints_ptr %int_to_uint_popcount + OpStore %ints_ptr %uint_to_int_popcount + OpStore %uints_ptr %uint_to_uint_popcount + + OpStore %ints_ptr %int_to_int_reverse + ;OpStore %uints_ptr %int_to_uint_reverse + ;OpStore %ints_ptr %uint_to_int_reverse + OpStore %uints_ptr %uint_to_uint_reverse + + OpStore %ints_ptr %int_to_int_sbit + ;OpStore %uints_ptr %int_to_uint_sbit + ;OpStore %ints_ptr %uint_to_int_sbit + OpStore %uints_ptr %uint_to_uint_sbit + + OpStore %ints_ptr %int_to_int_ubit + ;OpStore %uints_ptr %int_to_uint_ubit + ;OpStore %ints_ptr %uint_to_int_ubit + OpStore %uints_ptr %uint_to_uint_ubit + + OpStore %ints_ptr %int_to_int_insert + OpStore %uints_ptr %uint_to_uint_insert + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/bitscan.asm.comp b/shaders-msl-no-opt/asm/comp/bitscan.asm.comp new file mode 100644 index 00000000000..e3b785cd52b --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/bitscan.asm.comp @@ -0,0 +1,72 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "u" + OpMemberName %SSBO 1 "i" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + %SSBO = OpTypeStruct %uvec4 %ivec4 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uvec4 = OpTypePointer Uniform %uvec4 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_ivec4 = OpTypePointer Uniform %ivec4 + %main = OpFunction %void None %3 + %5 = OpLabel + %uptr = OpAccessChain %_ptr_Uniform_uvec4 %_ %int_0 + %iptr = OpAccessChain %_ptr_Uniform_ivec4 %_ %int_1 + %uvalue = OpLoad %uvec4 %uptr + %ivalue = OpLoad %ivec4 %iptr + + %lsb_uint_to_uint = OpExtInst %uvec4 %1 FindILsb %uvalue + %lsb_uint_to_int = OpExtInst %ivec4 %1 FindILsb %uvalue + %lsb_int_to_uint = OpExtInst %uvec4 %1 FindILsb %ivalue + %lsb_int_to_int = OpExtInst %ivec4 %1 FindILsb %ivalue + + %umsb_uint_to_uint = OpExtInst %uvec4 %1 FindUMsb %uvalue + %umsb_uint_to_int = OpExtInst %ivec4 %1 FindUMsb %uvalue + %umsb_int_to_uint = OpExtInst %uvec4 %1 FindUMsb %ivalue + %umsb_int_to_int = OpExtInst %ivec4 %1 FindUMsb %ivalue + + %smsb_uint_to_uint = OpExtInst %uvec4 %1 FindSMsb %uvalue + %smsb_uint_to_int = OpExtInst %ivec4 %1 FindSMsb %uvalue + %smsb_int_to_uint = OpExtInst %uvec4 %1 FindSMsb %ivalue + %smsb_int_to_int = OpExtInst %ivec4 %1 FindSMsb %ivalue + + OpStore %uptr %lsb_uint_to_uint + OpStore %iptr %lsb_uint_to_int + OpStore %uptr %lsb_int_to_uint + OpStore %iptr %lsb_int_to_int + + OpStore %uptr %umsb_uint_to_uint + OpStore %iptr %umsb_uint_to_int + OpStore %uptr %umsb_int_to_uint + OpStore %iptr %umsb_int_to_int + + OpStore %uptr %smsb_uint_to_uint + OpStore %iptr %smsb_uint_to_int + OpStore %uptr %smsb_int_to_uint + OpStore %iptr %smsb_int_to_int + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp b/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp new file mode 100644 index 00000000000..37ff035fa6d --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp @@ -0,0 +1,85 @@ +; SPIR-V +; Version: 1.3 +; Generator: Google spiregg; 0 +; Bound: 40 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_GOOGLE_hlsl_functionality1" + OpExtension "SPV_GOOGLE_user_type" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %csMainClear "main" %gl_GlobalInvocationID + OpExecutionMode %csMainClear LocalSize 64 1 1 + OpSource HLSL 600 + OpName %type_CommonConstants "type.CommonConstants" + OpMemberName %type_CommonConstants 0 "g_count" + OpMemberName %type_CommonConstants 1 "g_padding4" + OpName %CommonConstants "CommonConstants" + OpName %type_RWStructuredBuffer_MyStruct "type.RWStructuredBuffer.MyStruct" + OpName %MyStruct "MyStruct" + OpMemberName %MyStruct 0 "m_coefficients" + OpName %g_data "g_data" + OpName %csMainClear "csMainClear" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorateString %gl_GlobalInvocationID UserSemantic "SV_DispatchThreadID" + OpDecorate %CommonConstants DescriptorSet 0 + OpDecorate %CommonConstants Binding 0 + OpDecorate %g_data DescriptorSet 0 + OpDecorate %g_data Binding 1 + OpMemberDecorate %type_CommonConstants 0 Offset 0 + OpMemberDecorate %type_CommonConstants 1 Offset 4 + OpDecorate %type_CommonConstants Block + OpDecorateString %CommonConstants UserTypeGOOGLE "cbuffer" + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %MyStruct 0 Offset 0 + OpDecorate %_runtimearr_MyStruct ArrayStride 64 + OpMemberDecorate %type_RWStructuredBuffer_MyStruct 0 Offset 0 + OpDecorate %type_RWStructuredBuffer_MyStruct BufferBlock + OpDecorateString %g_data UserTypeGOOGLE "rwstructuredbuffer" + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %v3uint = OpTypeVector %uint 3 +%type_CommonConstants = OpTypeStruct %uint %v3uint +%_ptr_Uniform_type_CommonConstants = OpTypePointer Uniform %type_CommonConstants +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %MyStruct = OpTypeStruct %_arr_v4float_uint_4 +%_runtimearr_MyStruct = OpTypeRuntimeArray %MyStruct +%type_RWStructuredBuffer_MyStruct = OpTypeStruct %_runtimearr_MyStruct +%_ptr_Uniform_type_RWStructuredBuffer_MyStruct = OpTypePointer Uniform %type_RWStructuredBuffer_MyStruct +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %21 = OpTypeFunction %void +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bool = OpTypeBool +%_ptr_Uniform_MyStruct = OpTypePointer Uniform %MyStruct +%CommonConstants = OpVariable %_ptr_Uniform_type_CommonConstants Uniform + %g_data = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_MyStruct Uniform +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 + %26 = OpConstantNull %v4float + %27 = OpConstantComposite %_arr_v4float_uint_4 %26 %26 %26 %26 + %28 = OpConstantComposite %MyStruct %27 +%csMainClear = OpFunction %void None %21 + %29 = OpLabel + %30 = OpLoad %v3uint %gl_GlobalInvocationID + OpSelectionMerge %31 None + OpSwitch %uint_0 %32 + %32 = OpLabel + %33 = OpCompositeExtract %uint %30 0 + %34 = OpAccessChain %_ptr_Uniform_uint %CommonConstants %int_0 + %35 = OpLoad %uint %34 + %36 = OpUGreaterThanEqual %bool %33 %35 + OpSelectionMerge %37 DontFlatten + OpBranchConditional %36 %38 %37 + %38 = OpLabel + OpBranch %31 + %37 = OpLabel + %39 = OpAccessChain %_ptr_Uniform_MyStruct %g_data %int_0 %33 + OpStore %39 %28 + OpBranch %31 + %31 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp b/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp new file mode 100644 index 00000000000..8aaa9500afb --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp @@ -0,0 +1,80 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 32 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpName %foo2 "foo2" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpName %_ "" + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %_arr_float_uint_4 ArrayStride 4 + OpDecorate %struct_arr ArrayStride 32 + OpMemberDecorate %struct 0 Offset 0 + OpMemberDecorate %struct 1 Offset 16 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_4 = OpConstant %uint 4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Private__arr_float_uint_4 = OpTypePointer Private %_arr_float_uint_4 + %foo = OpVariable %_ptr_Private__arr_float_uint_4 Private + %foo2 = OpVariable %_ptr_Private__arr_float_uint_4 Private + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %struct = OpTypeStruct %_arr_float_uint_4 %_arr_float_uint_4 + %struct_arr = OpTypeArray %struct %uint_2 + %ptr_struct = OpTypePointer Function %struct +%_ptr_Private_float = OpTypePointer Private %float + %int_1 = OpConstant %int 1 + %float_2 = OpConstant %float 2 + %int_2 = OpConstant %int 2 + %float_3 = OpConstant %float 3 + %int_3 = OpConstant %int 3 + %float_4 = OpConstant %float 4 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %carr = OpConstantComposite %_arr_float_uint_4 %float_1 %float_2 %float_3 %float_4 + %struct_constant_0 = OpConstantComposite %struct %carr %carr + %struct_constant_1 = OpConstantComposite %struct %carr %carr + %struct_arr_constant = OpConstantComposite %struct_arr %struct_constant_0 %struct_constant_1 + %SSBO = OpTypeStruct %uint %int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %struct_var = OpVariable %ptr_struct Function + %16 = OpAccessChain %_ptr_Private_float %foo %int_0 + OpStore %16 %float_1 + OpStore %foo %carr + %19 = OpAccessChain %_ptr_Private_float %foo %int_1 + OpStore %19 %float_2 + %22 = OpAccessChain %_ptr_Private_float %foo %int_2 + OpStore %22 %float_3 + %25 = OpAccessChain %_ptr_Private_float %foo %int_3 + OpStore %25 %float_4 + OpCopyMemory %foo2 %foo + %l0 = OpLoad %_arr_float_uint_4 %foo + %l1 = OpLoad %_arr_float_uint_4 %foo2 + %struct0 = OpCompositeConstruct %struct %l0 %l1 + OpStore %struct_var %struct0 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp b/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp new file mode 100644 index 00000000000..ed8d0ba6f5e --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp @@ -0,0 +1,106 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 62 +; Schema: 0 + OpCapability Shader + OpCapability Int64 + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_KHR_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_ARB_gpu_shader_int64" + OpSourceExtension "GL_EXT_buffer_reference" + OpSourceExtension "GL_EXT_buffer_reference_uvec2" + OpName %main "main" + OpName %SomeBuffer "SomeBuffer" + OpMemberName %SomeBuffer 0 "v" + OpMemberName %SomeBuffer 1 "a" + OpMemberName %SomeBuffer 2 "b" + OpName %Registers "Registers" + OpMemberName %Registers 0 "address" + OpMemberName %Registers 1 "address2" + OpName %registers "registers" + OpName %a "a" + OpName %b "b" + OpMemberDecorate %SomeBuffer 0 Offset 0 + OpMemberDecorate %SomeBuffer 1 Offset 16 + OpMemberDecorate %SomeBuffer 2 Offset 24 + OpDecorate %SomeBuffer Block + OpMemberDecorate %Registers 0 Offset 0 + OpMemberDecorate %Registers 1 Offset 8 + OpDecorate %Registers Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_SomeBuffer PhysicalStorageBuffer + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %ulong = OpTypeInt 64 0 + %uint = OpTypeInt 32 0 + %v2uint = OpTypeVector %uint 2 + %SomeBuffer = OpTypeStruct %v4float %ulong %v2uint +%_ptr_PhysicalStorageBuffer_SomeBuffer = OpTypePointer PhysicalStorageBuffer %SomeBuffer +%_ptr_Function__ptr_PhysicalStorageBuffer_SomeBuffer = OpTypePointer Function %_ptr_PhysicalStorageBuffer_SomeBuffer + %Registers = OpTypeStruct %ulong %v2uint +%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers + %registers = OpVariable %_ptr_PushConstant_Registers PushConstant + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_PushConstant_ulong = OpTypePointer PushConstant %ulong + %int_1 = OpConstant %int 1 +%_ptr_PushConstant_v2uint = OpTypePointer PushConstant %v2uint + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %35 = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_4 +%_ptr_PhysicalStorageBuffer_v4float = OpTypePointer PhysicalStorageBuffer %v4float + %float_5 = OpConstant %float 5 + %float_6 = OpConstant %float 6 + %float_7 = OpConstant %float 7 + %float_8 = OpConstant %float 8 + %43 = OpConstantComposite %v4float %float_5 %float_6 %float_7 %float_8 +%_ptr_Function_ulong = OpTypePointer Function %ulong +%_ptr_Function_v2uint = OpTypePointer Function %v2uint +%_ptr_PhysicalStorageBuffer_ulong = OpTypePointer PhysicalStorageBuffer %ulong + %int_2 = OpConstant %int 2 +%_ptr_PhysicalStorageBuffer_v2uint = OpTypePointer PhysicalStorageBuffer %v2uint + %main = OpFunction %void None %3 + %5 = OpLabel + %a = OpVariable %_ptr_Function_ulong Function + %b = OpVariable %_ptr_Function_v2uint Function + %21 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_0 + %27 = OpAccessChain %_ptr_PushConstant_v2uint %registers %int_1 + %uint_ptr0 = OpLoad %ulong %21 + %uint_ptr1 = OpLoad %v2uint %27 + + ; ConvertUToPtr and vice versa do not accept vectors. + %ulong_ptr0 = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr0 + %ulong_ptr1 = OpBitcast %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr0 + %uvec2_ptr0 = OpBitcast %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr1 + + %vec4_write0 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %ulong_ptr0 %int_0 + %vec4_write1 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %ulong_ptr1 %int_0 + %vec4_write2 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %uvec2_ptr0 %int_0 + + OpStore %vec4_write0 %35 Aligned 16 + OpStore %vec4_write1 %35 Aligned 16 + OpStore %vec4_write2 %35 Aligned 16 + + %ulong_from_ptr0 = OpConvertPtrToU %ulong %ulong_ptr0 + %ulong_from_ptr1 = OpBitcast %ulong %ulong_ptr1 + %uvec2_from_ptr0 = OpBitcast %v2uint %uvec2_ptr0 + + %ptr0 = OpAccessChain %_ptr_PhysicalStorageBuffer_ulong %ulong_ptr0 %int_1 + %ptr1 = OpAccessChain %_ptr_PhysicalStorageBuffer_ulong %ulong_ptr1 %int_1 + %ptr2 = OpAccessChain %_ptr_PhysicalStorageBuffer_v2uint %uvec2_ptr0 %int_2 + + OpStore %ptr0 %ulong_from_ptr0 Aligned 8 + OpStore %ptr1 %ulong_from_ptr1 Aligned 8 + OpStore %ptr2 %uvec2_from_ptr0 Aligned 8 + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp b/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp new file mode 100644 index 00000000000..c7b76a8c064 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp @@ -0,0 +1,54 @@ + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %Block "Block" + OpName %SSBO "SSBO" + OpName %SSBO_Var "ssbo" + OpName %UBO_Var "ubo" + OpDecorate %SSBO_Var Binding 0 + OpDecorate %SSBO_Var DescriptorSet 0 + OpDecorate %UBO_Var Binding 1 + OpDecorate %UBO_Var DescriptorSet 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %Block 0 Offset 0 + OpMemberDecorate %Block 1 Offset 16 + OpDecorate %BlockArray ArrayStride 32 + OpDecorate %arr_uvec2_2 ArrayStride 8 + OpDecorate %SSBO Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uvec2 = OpTypeVector %uint 2 + %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2 + %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2 + %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2 + %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2 + %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2 + %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup + %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2 + %Block_ptr = OpTypePointer StorageBuffer %Block +%BlockArray = OpTypeArray %Block %uint_3 +%SSBO = OpTypeStruct %BlockArray +%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO +%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer +%UBO_Ptr = OpTypePointer Uniform %SSBO +%UBO_Var = OpVariable %UBO_Ptr Uniform +%void = OpTypeVoid +%func_type = OpTypeFunction %void + + %main = OpFunction %void None %func_type + %25 = OpLabel + %func = OpVariable %arr_uvec2_2_ptr_func Function + + ; Copy device array to temporary. + %ptr = OpAccessChain %Block_ptr %SSBO_Var %uint_0 %uint_0 + %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1 + %constructed = OpCompositeConstruct %Block %loaded_array %loaded_array + OpStore %ptr %constructed + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp new file mode 100644 index 00000000000..e1dcb0ef8e2 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp @@ -0,0 +1,81 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 49 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %gl_LocalInvocationID + OpExecutionMode %main LocalSize 4 4 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %gl_LocalInvocationID "gl_LocalInvocationID" + OpName %indexable "indexable" + OpName %indexable_0 "indexable" + OpName %25 "indexable" + OpName %38 "indexable" + OpDecorate %_runtimearr_int ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_runtimearr_int = OpTypeRuntimeArray %int + %SSBO = OpTypeStruct %_runtimearr_int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %uint_4 = OpConstant %uint 4 +%_arr_int_uint_4 = OpTypeArray %int %uint_4 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %25 = OpConstantComposite %_arr_int_uint_4 %int_0 %int_1 %int_2 %int_3 +%gl_LocalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Function__arr_int_uint_4 = OpTypePointer Function %_arr_int_uint_4 +%_ptr_Function_int = OpTypePointer Function %int + %int_4 = OpConstant %int 4 + %int_5 = OpConstant %int 5 + %int_6 = OpConstant %int 6 + %int_7 = OpConstant %int 7 + %38 = OpConstantComposite %_arr_int_uint_4 %int_4 %int_5 %int_6 %int_7 + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_int = OpTypePointer Uniform %int +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_4 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %indexable = OpVariable %_ptr_Function__arr_int_uint_4 Function +%indexable_0 = OpVariable %_ptr_Function__arr_int_uint_4 Function + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %27 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0 + %28 = OpLoad %uint %27 + OpStore %indexable %25 + %32 = OpAccessChain %_ptr_Function_int %indexable %28 + %33 = OpLoad %int %32 + %40 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_1 + %41 = OpLoad %uint %40 + OpStore %indexable_0 %38 + %43 = OpAccessChain %_ptr_Function_int %indexable_0 %41 + %44 = OpLoad %int %43 + %45 = OpIAdd %int %33 %44 + %47 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19 + OpStore %47 %45 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp new file mode 100644 index 00000000000..6a7065a6fb8 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp @@ -0,0 +1,81 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 48 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %ssbo + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %B1 "B1" + OpName %A "A" + OpName %C "C" + OpName %B2 "B2" + OpMemberName %A 0 "a" + OpMemberName %A 1 "b1" + OpMemberName %A 2 "b1_array" + OpMemberName %C 0 "c" + OpMemberName %C 1 "b2" + OpMemberName %C 2 "b2_array" + OpMemberName %B1 0 "elem1" + OpMemberName %B2 0 "elem2" + OpMemberName %SSBO 0 "a_block" + OpMemberName %SSBO 1 "c_block" + OpDecorate %B1Array ArrayStride 16 + OpDecorate %B2Array ArrayStride 16 + OpMemberDecorate %B1 0 Offset 0 + OpMemberDecorate %A 0 Offset 0 + OpMemberDecorate %A 1 Offset 16 + OpMemberDecorate %A 2 Offset 32 + OpMemberDecorate %A 3 Offset 96 + OpMemberDecorate %B2 0 Offset 0 + OpMemberDecorate %C 0 Offset 0 + OpMemberDecorate %C 1 Offset 16 + OpMemberDecorate %C 2 Offset 32 + OpMemberDecorate %C 3 Offset 96 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 112 + OpMemberDecorate %A0 0 Offset 0 + OpMemberDecorate %C0 0 Offset 0 + OpMemberDecorate %A0 0 RowMajor + OpMemberDecorate %A0 0 MatrixStride 8 + OpMemberDecorate %C0 0 ColMajor + OpMemberDecorate %C0 0 MatrixStride 16 + OpDecorate %SSBO Block + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 + %v4float = OpTypeVector %float 4 + %v2float = OpTypeVector %float 2 + %m2float = OpTypeMatrix %v2float 2 + %A0 = OpTypeStruct %m2float + %C0 = OpTypeStruct %m2float + %B2 = OpTypeStruct %v4float + %B2Array = OpTypeArray %B2 %uint_4 + %C = OpTypeStruct %v4float %B2 %B2Array %C0 + %B1 = OpTypeStruct %v4float + %B1Array = OpTypeArray %B1 %uint_4 + %A = OpTypeStruct %v4float %B1 %B1Array %A0 + %SSBO = OpTypeStruct %A %C +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %ssbo = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_C = OpTypePointer StorageBuffer %C + %int_0 = OpConstant %int 0 +%_ptr_Uniform_A = OpTypePointer StorageBuffer %A + %main = OpFunction %void None %3 + %5 = OpLabel + %22 = OpAccessChain %_ptr_Uniform_C %ssbo %int_1 + %39 = OpAccessChain %_ptr_Uniform_A %ssbo %int_0 + %23 = OpLoad %C %22 + %24 = OpCopyLogical %A %23 + OpStore %39 %24 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp new file mode 100644 index 00000000000..026bd113172 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 24 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %2 "main" %3 %4 + OpExecutionMode %2 LocalSize 1 1 1 + OpDecorate %5 Block + OpMemberDecorate %5 0 Offset 0 + OpMemberDecorate %5 1 Offset 16 + OpMemberDecorate %5 2 Offset 48 + OpMemberDecorate %5 3 Offset 64 + OpMemberDecorate %5 4 Offset 80 + OpMemberDecorate %5 5 Offset 96 + OpMemberDecorate %5 6 Offset 112 + OpDecorate %6 Block + OpMemberDecorate %6 0 Offset 0 + OpMemberDecorate %6 1 Offset 4 + OpMemberDecorate %6 2 Offset 12 + OpMemberDecorate %6 3 Offset 16 + OpMemberDecorate %6 4 Offset 32 + OpMemberDecorate %6 5 Offset 48 + OpMemberDecorate %6 6 Offset 64 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 0 + OpDecorate %4 DescriptorSet 0 + OpDecorate %4 Binding 1 + OpDecorate %7 ArrayStride 4 + OpDecorate %8 ArrayStride 16 + OpMemberDecorate %9 0 Offset 4 + OpMemberDecorate %10 0 Offset 8 + %11 = OpTypeVoid + %12 = OpTypeFloat 32 + %13 = OpTypeVector %12 2 + %14 = OpTypeVector %12 3 + %15 = OpTypeVector %12 4 + %16 = OpTypeMatrix %15 4 + %17 = OpTypeInt 32 0 + %18 = OpConstant %17 2 + %7 = OpTypeArray %17 %18 + %8 = OpTypeArray %17 %18 + %9 = OpTypeStruct %17 + %10 = OpTypeStruct %17 + %5 = OpTypeStruct %17 %8 %17 %9 %15 %14 %13 + %19 = OpTypePointer StorageBuffer %5 + %6 = OpTypeStruct %17 %7 %17 %10 %15 %14 %13 + %20 = OpTypePointer StorageBuffer %6 + %3 = OpVariable %20 StorageBuffer + %4 = OpVariable %19 StorageBuffer + %21 = OpTypeFunction %11 + %2 = OpFunction %11 None %21 + %1 = OpLabel + %22 = OpLoad %6 %3 + %23 = OpCopyLogical %5 %22 + OpStore %4 %23 + OpReturn + OpFunctionEnd + diff --git a/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp new file mode 100644 index 00000000000..20fa0b099b8 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp @@ -0,0 +1,69 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 48 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %ssbo + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %B1 "B1" + OpName %A "A" + OpName %C "C" + OpName %B2 "B2" + OpMemberName %A 0 "a" + OpMemberName %A 1 "b1" + OpMemberName %A 2 "b1_array" + OpMemberName %C 0 "c" + OpMemberName %C 1 "b2" + OpMemberName %C 2 "b2_array" + OpMemberName %B1 0 "elem1" + OpMemberName %B2 0 "elem2" + OpMemberName %SSBO 0 "a_block" + OpMemberName %SSBO 1 "c_block" + OpDecorate %B1Array ArrayStride 16 + OpDecorate %B2Array ArrayStride 16 + OpMemberDecorate %B1 0 Offset 0 + OpMemberDecorate %A 0 Offset 0 + OpMemberDecorate %A 1 Offset 16 + OpMemberDecorate %A 2 Offset 32 + OpMemberDecorate %B2 0 Offset 0 + OpMemberDecorate %C 0 Offset 0 + OpMemberDecorate %C 1 Offset 16 + OpMemberDecorate %C 2 Offset 32 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 96 + OpDecorate %SSBO Block + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 + %v4float = OpTypeVector %float 4 + %B2 = OpTypeStruct %v4float + %B2Array = OpTypeArray %B2 %uint_4 + %C = OpTypeStruct %v4float %B2 %B2Array + %B1 = OpTypeStruct %v4float + %B1Array = OpTypeArray %B1 %uint_4 + %A = OpTypeStruct %v4float %B1 %B1Array + %SSBO = OpTypeStruct %A %C +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %ssbo = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_C = OpTypePointer StorageBuffer %C + %int_0 = OpConstant %int 0 +%_ptr_Uniform_A = OpTypePointer StorageBuffer %A + %main = OpFunction %void None %3 + %5 = OpLabel + %22 = OpAccessChain %_ptr_Uniform_C %ssbo %int_1 + %39 = OpAccessChain %_ptr_Uniform_A %ssbo %int_0 + %23 = OpLoad %C %22 + %24 = OpCopyLogical %A %23 + OpStore %39 %24 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp b/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp new file mode 100644 index 00000000000..d59aad3cef1 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp @@ -0,0 +1,53 @@ + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %Block "Block" + OpName %SSBO "SSBO" + OpName %SSBO_Var "ssbo" + OpName %UBO_Var "ubo" + OpDecorate %SSBO_Var Binding 0 + OpDecorate %SSBO_Var DescriptorSet 0 + OpDecorate %UBO_Var Binding 1 + OpDecorate %UBO_Var DescriptorSet 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %Block 0 Offset 0 + OpMemberDecorate %Block 1 Offset 16 + OpDecorate %BlockArray ArrayStride 32 + OpDecorate %arr_uvec2_2 ArrayStride 8 + OpDecorate %SSBO Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uvec2 = OpTypeVector %uint 2 + %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2 + %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2 + %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2 + %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2 + %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2 + %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup + %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2 +%BlockArray = OpTypeArray %Block %uint_3 +%SSBO = OpTypeStruct %BlockArray +%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO +%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer +%UBO_Ptr = OpTypePointer Uniform %SSBO +%UBO_Var = OpVariable %UBO_Ptr Uniform +%void = OpTypeVoid +%func_type = OpTypeFunction %void + + %main = OpFunction %void None %func_type + %25 = OpLabel + %func = OpVariable %arr_uvec2_2_ptr_func Function + + ; Copy device array to temporary. + %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0 + %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %ptr_arr_0 %loaded_array + OpStore %ptr_arr_0 %loaded_array + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp b/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp new file mode 100644 index 00000000000..d59aad3cef1 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp @@ -0,0 +1,53 @@ + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %Block "Block" + OpName %SSBO "SSBO" + OpName %SSBO_Var "ssbo" + OpName %UBO_Var "ubo" + OpDecorate %SSBO_Var Binding 0 + OpDecorate %SSBO_Var DescriptorSet 0 + OpDecorate %UBO_Var Binding 1 + OpDecorate %UBO_Var DescriptorSet 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %Block 0 Offset 0 + OpMemberDecorate %Block 1 Offset 16 + OpDecorate %BlockArray ArrayStride 32 + OpDecorate %arr_uvec2_2 ArrayStride 8 + OpDecorate %SSBO Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uvec2 = OpTypeVector %uint 2 + %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2 + %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2 + %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2 + %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2 + %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2 + %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup + %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2 +%BlockArray = OpTypeArray %Block %uint_3 +%SSBO = OpTypeStruct %BlockArray +%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO +%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer +%UBO_Ptr = OpTypePointer Uniform %SSBO +%UBO_Var = OpVariable %UBO_Ptr Uniform +%void = OpTypeVoid +%func_type = OpTypeFunction %void + + %main = OpFunction %void None %func_type + %25 = OpLabel + %func = OpVariable %arr_uvec2_2_ptr_func Function + + ; Copy device array to temporary. + %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0 + %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %ptr_arr_0 %loaded_array + OpStore %ptr_arr_0 %loaded_array + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp b/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp new file mode 100644 index 00000000000..d9d0d51c39c --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp @@ -0,0 +1,81 @@ + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %Block "Block" + OpName %SSBO "SSBO" + OpName %SSBO_Var "ssbo" + OpName %UBO_Var "ubo" + OpDecorate %SSBO_Var Binding 0 + OpDecorate %SSBO_Var DescriptorSet 0 + OpDecorate %UBO_Var Binding 1 + OpDecorate %UBO_Var DescriptorSet 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %Block 0 Offset 0 + OpMemberDecorate %Block 1 Offset 16 + OpDecorate %BlockArray ArrayStride 32 + OpDecorate %arr_uvec2_2 ArrayStride 8 + OpDecorate %SSBO Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uvec2 = OpTypeVector %uint 2 + %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2 + %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2 + %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2 + %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2 + %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2 + %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup + %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2 +%BlockArray = OpTypeArray %Block %uint_3 +%SSBO = OpTypeStruct %BlockArray +%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO +%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer +%UBO_Ptr = OpTypePointer Uniform %SSBO +%UBO_Var = OpVariable %UBO_Ptr Uniform +%void = OpTypeVoid +%func_type = OpTypeFunction %void + + %main = OpFunction %void None %func_type + %25 = OpLabel + %func = OpVariable %arr_uvec2_2_ptr_func Function + + ; DeviceToDevice + %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0 + %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %ptr_arr_0 %loaded_array + + ; ConstantToDevice + %ptr_arr_1_const = OpAccessChain %arr_uvec2_2_ptr_const %UBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array_const = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %ptr_arr_0 %loaded_array_const + + ; StackToDevice + %loaded_array_func = OpLoad %arr_uvec2_2 %func + OpStore %ptr_arr_0 %loaded_array_func + + ; ThreadGroupToDevice + %loaded_array_workgroup = OpLoad %arr_uvec2_2 %wg + OpStore %ptr_arr_0 %loaded_array_workgroup + + ; DeviceToThreadGroup + %loaded_array_2 = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %wg %loaded_array_2 + + ; DeviceToStack + %loaded_array_3 = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %func %loaded_array_3 + + ; ConstantToThreadGroup + %loaded_array_const_2 = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %wg %loaded_array_const_2 + + ; ConstantToStack + %loaded_array_const_3 = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %func %loaded_array_const_3 + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp b/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp new file mode 100644 index 00000000000..d9d0d51c39c --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp @@ -0,0 +1,81 @@ + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %Block "Block" + OpName %SSBO "SSBO" + OpName %SSBO_Var "ssbo" + OpName %UBO_Var "ubo" + OpDecorate %SSBO_Var Binding 0 + OpDecorate %SSBO_Var DescriptorSet 0 + OpDecorate %UBO_Var Binding 1 + OpDecorate %UBO_Var DescriptorSet 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %Block 0 Offset 0 + OpMemberDecorate %Block 1 Offset 16 + OpDecorate %BlockArray ArrayStride 32 + OpDecorate %arr_uvec2_2 ArrayStride 8 + OpDecorate %SSBO Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uvec2 = OpTypeVector %uint 2 + %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2 + %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2 + %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2 + %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2 + %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2 + %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup + %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2 +%BlockArray = OpTypeArray %Block %uint_3 +%SSBO = OpTypeStruct %BlockArray +%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO +%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer +%UBO_Ptr = OpTypePointer Uniform %SSBO +%UBO_Var = OpVariable %UBO_Ptr Uniform +%void = OpTypeVoid +%func_type = OpTypeFunction %void + + %main = OpFunction %void None %func_type + %25 = OpLabel + %func = OpVariable %arr_uvec2_2_ptr_func Function + + ; DeviceToDevice + %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0 + %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %ptr_arr_0 %loaded_array + + ; ConstantToDevice + %ptr_arr_1_const = OpAccessChain %arr_uvec2_2_ptr_const %UBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array_const = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %ptr_arr_0 %loaded_array_const + + ; StackToDevice + %loaded_array_func = OpLoad %arr_uvec2_2 %func + OpStore %ptr_arr_0 %loaded_array_func + + ; ThreadGroupToDevice + %loaded_array_workgroup = OpLoad %arr_uvec2_2 %wg + OpStore %ptr_arr_0 %loaded_array_workgroup + + ; DeviceToThreadGroup + %loaded_array_2 = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %wg %loaded_array_2 + + ; DeviceToStack + %loaded_array_3 = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %func %loaded_array_3 + + ; ConstantToThreadGroup + %loaded_array_const_2 = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %wg %loaded_array_const_2 + + ; ConstantToStack + %loaded_array_const_3 = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %func %loaded_array_const_3 + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp new file mode 100644 index 00000000000..73f3ceee1ad --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp @@ -0,0 +1,59 @@ +; SPIR-V +; Version: 1.5 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 26 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + ;OpEntryPoint GLCompute %main "main" %Samp %ubo %ssbo %v %w + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 64 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %Samp "Samp" + OpName %UBO "UBO" + OpMemberName %UBO 0 "v" + OpName %ubo "ubo" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "v" + OpName %ssbo "ssbo" + OpName %v "v" + OpName %w "w" + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpDecorate %Samp DescriptorSet 0 + OpDecorate %Samp Binding 0 + OpMemberDecorate %UBO 0 Offset 0 + OpDecorate %UBO Block + OpDecorate %ubo DescriptorSet 0 + OpDecorate %ubo Binding 1 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 2 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %uint_64 = OpConstant %uint 64 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1 + %float = OpTypeFloat 32 + %12 = OpTypeImage %float 2D 0 0 0 1 Unknown + %13 = OpTypeSampledImage %12 +%_ptr_UniformConstant_13 = OpTypePointer UniformConstant %13 + %Samp = OpVariable %_ptr_UniformConstant_13 UniformConstant + %UBO = OpTypeStruct %float +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO + %ubo = OpVariable %_ptr_Uniform_UBO Uniform + %SSBO = OpTypeStruct %float +%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO + %ssbo = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer +%_ptr_Private_float = OpTypePointer Private %float + %v = OpVariable %_ptr_Private_float Private +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %w = OpVariable %_ptr_Workgroup_float Workgroup + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp b/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp new file mode 100644 index 00000000000..30db11d45bc --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp @@ -0,0 +1,55 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %ResTypeMod = OpTypeStruct %float %float +%_ptr_Function_ResTypeMod = OpTypePointer Function %ResTypeMod + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float_20 = OpConstant %float 20 + %int_1 = OpConstant %int 1 +%_ptr_Function_float = OpTypePointer Function %float +%ResTypeFrexp = OpTypeStruct %float %int +%_ptr_Function_ResTypeFrexp = OpTypePointer Function %ResTypeFrexp + %float_40 = OpConstant %float 40 +%_ptr_Function_int = OpTypePointer Function %int + %SSBO = OpTypeStruct %float %int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_int = OpTypePointer Uniform %int + %main = OpFunction %void None %3 + %5 = OpLabel + %modres = OpExtInst %ResTypeMod %1 ModfStruct %float_20 + %frexpres = OpExtInst %ResTypeFrexp %1 FrexpStruct %float_40 + + %modres_f = OpCompositeExtract %float %modres 0 + %modres_i = OpCompositeExtract %float %modres 1 + %frexpres_f = OpCompositeExtract %float %frexpres 0 + %frexpres_i = OpCompositeExtract %int %frexpres 1 + + %float_ptr = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %int_ptr = OpAccessChain %_ptr_Uniform_int %_ %int_1 + + OpStore %float_ptr %modres_f + OpStore %float_ptr %modres_i + OpStore %float_ptr %frexpres_f + OpStore %int_ptr %frexpres_i + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp b/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp new file mode 100644 index 00000000000..2eaef4bdbee --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp @@ -0,0 +1,60 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %spec_1 SpecId 1 + OpDecorate %spec_2 SpecId 2 + OpDecorate %spec_3 SpecId 3 + OpDecorate %spec_4 SpecId 4 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %float_2 = OpConstant %float 2 +%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float + %spec_1 = OpSpecConstant %uint 11 + %spec_2 = OpSpecConstant %uint 12 + %spec_3 = OpSpecConstant %uint 13 + %spec_4 = OpSpecConstant %uint 14 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 +%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %uint_3 %spec_1 %spec_2 + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %21 = OpLoad %uint %20 + %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + %25 = OpLoad %v4float %24 + %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2 + %27 = OpFAdd %v4float %25 %26 + %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + OpStore %28 %27 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp b/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp new file mode 100644 index 00000000000..3031f4bb8af --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp @@ -0,0 +1,76 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %spec_1 SpecId 1 + OpDecorate %spec_2 SpecId 2 + OpDecorate %spec_3 SpecId 3 + OpDecorate %spec_4 SpecId 4 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %float_2 = OpConstant %float 2 +%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float + ; Test that we can declare the spec constant as signed. + ; Needs implicit bitcast since WorkGroupSize is uint. + %spec_1 = OpSpecConstant %int 11 + %spec_2 = OpSpecConstant %int 12 + %spec_3 = OpSpecConstant %int 13 + %spec_4 = OpSpecConstant %int 14 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + ; Test that we can build spec constant composites out of local size id values. + ; Needs special case handling. + %spec_3_op = OpSpecConstantOp %uint IAdd %spec_3 %uint_3 +%WorkGroupSize = OpSpecConstantComposite %v3uint %spec_3_op %spec_4 %uint_2 + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %21 = OpLoad %uint %20 + %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + %25 = OpLoad %v4float %24 + %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2 + %27 = OpFAdd %v4float %25 %26 + %wg_f = OpConvertUToF %v3float %WorkGroupSize + %wg_f4 = OpVectorShuffle %v4float %wg_f %wg_f 0 1 2 2 + ; Test that we can use the spec constants directly which needs to translate to gl_WorkGroupSize.elem. + ; Needs special case handling. + %res = OpFAdd %v4float %27 %wg_f4 + %f0 = OpConvertSToF %float %spec_3 + %f1 = OpConvertSToF %float %spec_4 + %f2 = OpConvertSToF %float %uint_2 + %res1 = OpVectorTimesScalar %v4float %res %f0 + %res2 = OpVectorTimesScalar %v4float %res1 %f1 + %res3 = OpVectorTimesScalar %v4float %res2 %f2 + %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + OpStore %28 %res3 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp b/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp new file mode 100644 index 00000000000..126b01e4616 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp @@ -0,0 +1,116 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 91 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %2 "main" + OpExecutionMode %2 LocalSize 1 1 1 + OpDecorate %_arr_v2uint_uint_324 ArrayStride 8 + OpMemberDecorate %_struct_6 0 NonWritable + OpMemberDecorate %_struct_6 0 Offset 0 + OpDecorate %_struct_6 BufferBlock + OpDecorate %7 DescriptorSet 0 + OpDecorate %7 Binding 0 + OpDecorate %_arr_v2float_uint_648 ArrayStride 8 + OpMemberDecorate %_struct_9 0 Offset 0 + OpDecorate %_struct_9 BufferBlock + OpDecorate %11 DescriptorSet 0 + OpDecorate %11 Binding 1 + OpDecorate %_arr_v2float_uint_648_0 ArrayStride 8 + OpMemberDecorate %_struct_13 0 Offset 0 + OpDecorate %_struct_13 BufferBlock + OpDecorate %14 DescriptorSet 0 + OpDecorate %14 Binding 2 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%_ptr_Function_v2float = OpTypePointer Function %v2float + %_struct_19 = OpTypeStruct %v2float %v2float + %10 = OpTypeFunction %_struct_19 %_ptr_Function_v2float +%_ptr_Function__struct_19 = OpTypePointer Function %_struct_19 + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %uint_0 = OpConstant %uint 0 + %uint_648 = OpConstant %uint 648 + %bool = OpTypeBool + %v2uint = OpTypeVector %uint 2 +%_ptr_Function_v2uint = OpTypePointer Function %v2uint + %uint_324 = OpConstant %uint 324 +%_arr_v2uint_uint_324 = OpTypeArray %v2uint %uint_324 + %_struct_6 = OpTypeStruct %_arr_v2uint_uint_324 +%_ptr_Uniform__struct_6 = OpTypePointer Uniform %_struct_6 + %7 = OpVariable %_ptr_Uniform__struct_6 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_2 = OpConstant %uint 2 +%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint +%_arr_v2float_uint_648 = OpTypeArray %v2float %uint_648 + %_struct_9 = OpTypeStruct %_arr_v2float_uint_648 +%_ptr_Uniform__struct_9 = OpTypePointer Uniform %_struct_9 + %11 = OpVariable %_ptr_Uniform__struct_9 Uniform + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%_arr_v2float_uint_648_0 = OpTypeArray %v2float %uint_648 + %_struct_13 = OpTypeStruct %_arr_v2float_uint_648_0 +%_ptr_Uniform__struct_13 = OpTypePointer Uniform %_struct_13 + %14 = OpVariable %_ptr_Uniform__struct_13 Uniform + %int_1 = OpConstant %int 1 + %2 = OpFunction %void None %3 + %5 = OpLabel + %46 = OpVariable %_ptr_Function_uint Function + %47 = OpVariable %_ptr_Function_v2uint Function + %48 = OpVariable %_ptr_Function_v2float Function + %50 = OpVariable %_ptr_Function__struct_19 Function + OpStore %46 %uint_0 + OpBranch %30 + %30 = OpLabel + OpLoopMerge %32 %33 None + OpBranch %34 + %34 = OpLabel + %35 = OpLoad %uint %46 + %38 = OpULessThan %bool %35 %uint_648 + OpBranchConditional %38 %31 %32 + %31 = OpLabel + %49 = OpLoad %uint %46 + %51 = OpUDiv %uint %49 %uint_2 + %53 = OpAccessChain %_ptr_Uniform_v2uint %7 %int_0 %51 + %54 = OpLoad %v2uint %53 + OpStore %47 %54 + %56 = OpLoad %v2uint %47 + %57 = OpBitcast %v2float %56 + OpStore %48 %57 + %62 = OpLoad %uint %46 + %64 = OpIAdd %uint %62 %uint_1 + %65 = OpLoad %v2float %48 + %66 = OpLoad %uint %46 + %68 = OpAccessChain %_ptr_Uniform_v2float %11 %int_0 %66 + %69 = OpExtInst %v2float %1 Modf %65 %68 + %70 = OpAccessChain %_ptr_Uniform_v2float %11 %int_0 %64 + OpStore %70 %69 + %73 = OpLoad %v2float %48 + %74 = OpExtInst %_struct_19 %1 ModfStruct %73 + OpStore %50 %74 + %79 = OpLoad %uint %46 + %81 = OpAccessChain %_ptr_Function_v2float %50 %int_1 + %82 = OpLoad %v2float %81 + %83 = OpAccessChain %_ptr_Uniform_v2float %14 %int_0 %79 + OpStore %83 %82 + %84 = OpLoad %uint %46 + %85 = OpIAdd %uint %84 %uint_1 + %86 = OpAccessChain %_ptr_Function_v2float %50 %int_0 + %87 = OpLoad %v2float %86 + %88 = OpAccessChain %_ptr_Uniform_v2float %14 %int_0 %85 + OpStore %88 %87 + OpBranch %33 + %33 = OpLabel + %89 = OpLoad %uint %46 + %90 = OpIAdd %uint %89 %uint_2 + OpStore %46 %90 + OpBranch %30 + %32 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp new file mode 100644 index 00000000000..8319dfdb607 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp @@ -0,0 +1,98 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 59 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointersStorageBuffer + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5 %6 + OpExecutionMode %1 LocalSize 4 1 1 + OpDecorate %7 Block + OpMemberDecorate %7 0 Offset 0 + OpDecorate %8 ArrayStride 16 + OpDecorate %9 Block + OpMemberDecorate %9 0 Offset 0 + OpDecorate %10 ArrayStride 68 + OpDecorate %11 Block + OpMemberDecorate %11 0 Offset 0 + OpDecorate %12 ArrayStride 4 + OpDecorate %13 ArrayStride 4 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 1 + OpDecorate %4 DescriptorSet 0 + OpDecorate %4 Binding 2 + OpDecorate %5 BuiltIn LocalInvocationId + OpDecorate %6 BuiltIn WorkgroupId + %14 = OpTypeVoid + %15 = OpTypeBool + %16 = OpTypeInt 32 1 + %17 = OpConstant %16 0 + %18 = OpConstant %16 1 + %19 = OpConstant %16 4 + %20 = OpConstant %16 16 + %21 = OpConstant %16 17 + %22 = OpTypeVector %16 3 + %23 = OpTypePointer Input %22 + %12 = OpTypeArray %16 %19 + %8 = OpTypeRuntimeArray %12 + %7 = OpTypeStruct %8 + %24 = OpTypePointer StorageBuffer %7 + %25 = OpTypePointer StorageBuffer %12 + %13 = OpTypeArray %16 %21 + %10 = OpTypeRuntimeArray %13 + %9 = OpTypeStruct %10 + %26 = OpTypePointer StorageBuffer %9 + %27 = OpTypePointer StorageBuffer %13 + %28 = OpTypePointer StorageBuffer %16 + %11 = OpTypeStruct %16 + %29 = OpTypePointer Uniform %11 + %30 = OpTypePointer Uniform %16 + %2 = OpVariable %24 StorageBuffer + %3 = OpVariable %26 StorageBuffer + %4 = OpVariable %29 Uniform + %5 = OpVariable %23 Input + %6 = OpVariable %23 Input + %31 = OpTypeFunction %14 + %1 = OpFunction %14 None %31 + %32 = OpLabel + %33 = OpAccessChain %30 %4 %17 + %34 = OpLoad %16 %33 + %35 = OpLoad %22 %6 + %36 = OpCompositeExtract %16 %35 0 + %37 = OpLoad %22 %5 + %38 = OpCompositeExtract %16 %37 0 + %39 = OpAccessChain %25 %2 %17 %17 + %40 = OpAccessChain %25 %2 %17 %36 + %41 = OpSGreaterThanEqual %15 %36 %34 + OpSelectionMerge %42 None + OpBranchConditional %41 %43 %42 + %43 = OpLabel + OpReturn + %42 = OpLabel + %44 = OpIEqual %15 %38 %18 + OpSelectionMerge %45 None + OpBranchConditional %44 %46 %45 + %46 = OpLabel + %47 = OpPtrDiff %16 %40 %39 + %48 = OpAccessChain %28 %3 %17 %36 %20 + OpStore %48 %47 + OpBranch %45 + %45 = OpLabel + %49 = OpPhi %16 %17 %42 %17 %46 %50 %45 + %50 = OpIAdd %16 %49 %18 + %51 = OpIEqual %15 %50 %19 + %52 = OpIMul %16 %38 %19 + %53 = OpIAdd %16 %52 %49 + %54 = OpAccessChain %28 %40 %38 + %55 = OpAccessChain %28 %40 %49 + %56 = OpPtrDiff %16 %54 %55 + %57 = OpAccessChain %28 %3 %17 %36 %53 + OpStore %57 %56 + OpLoopMerge %58 %45 None + OpBranchConditional %51 %58 %45 + %58 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp new file mode 100644 index 00000000000..8566491955e --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp @@ -0,0 +1,79 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 46 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointersStorageBuffer + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5 + OpExecutionMode %1 LocalSize 1 1 1 + OpDecorate %6 ArrayStride 4 + OpDecorate %7 Block + OpMemberDecorate %7 0 Offset 0 + OpMemberDecorate %7 1 Offset 4 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %8 ArrayStride 8 + OpDecorate %9 Block + OpMemberDecorate %9 0 Offset 0 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 1 + OpDecorate %10 ArrayStride 4 + %11 = OpTypeVoid + %12 = OpTypeBool + %13 = OpTypeInt 32 1 + %14 = OpConstant %13 -1 + %15 = OpConstant %13 0 + %16 = OpConstant %13 1 + %17 = OpConstant %13 2 + %18 = OpConstant %13 3 + %19 = OpTypeVector %13 2 + %6 = OpTypeRuntimeArray %13 + %7 = OpTypeStruct %13 %6 + %20 = OpTypePointer StorageBuffer %7 + %2 = OpVariable %20 StorageBuffer + %8 = OpTypeRuntimeArray %19 + %9 = OpTypeStruct %8 + %21 = OpTypePointer StorageBuffer %9 + %3 = OpVariable %21 StorageBuffer + %10 = OpTypePointer StorageBuffer %13 + %22 = OpTypePointer Private %10 + %4 = OpVariable %22 Private + %5 = OpVariable %22 Private + %23 = OpTypePointer StorageBuffer %13 + %24 = OpTypePointer StorageBuffer %19 + %25 = OpTypeFunction %11 + %1 = OpFunction %11 None %25 + %26 = OpLabel + %27 = OpAccessChain %23 %2 %15 + %28 = OpLoad %13 %27 + %29 = OpAccessChain %10 %2 %16 %15 + OpStore %4 %29 + %30 = OpPtrAccessChain %10 %29 %28 + OpStore %5 %30 + %31 = OpSLessThanEqual %12 %28 %15 + OpSelectionMerge %32 None + OpBranchConditional %31 %32 %33 + %33 = OpLabel + %34 = OpPhi %13 %15 %26 %35 %33 + %36 = OpLoad %10 %4 + %37 = OpLoad %10 %5 + %38 = OpPtrAccessChain %10 %36 %16 + %39 = OpPtrAccessChain %10 %37 %14 + %35 = OpIAdd %13 %34 %16 + OpStore %4 %38 + OpStore %5 %39 + %40 = OpPtrDiff %13 %36 %37 + %41 = OpPtrDiff %13 %37 %36 + %42 = OpCompositeConstruct %19 %40 %41 + %43 = OpAccessChain %24 %3 %15 %34 + OpStore %43 %42 + %44 = OpSGreaterThanEqual %12 %34 %28 + OpLoopMerge %45 %33 None + OpBranchConditional %44 %45 %33 + %45 = OpLabel + OpBranch %32 + %32 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp new file mode 100644 index 00000000000..5a97976ce8e --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp @@ -0,0 +1,96 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 64 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointers + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5 + OpExecutionMode %1 LocalSize 1 1 1 + OpDecorate %6 ArrayStride 4 + OpDecorate %7 Block + OpMemberDecorate %7 0 Offset 0 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 1 + OpDecorate %4 DescriptorSet 0 + OpDecorate %4 Binding 2 + OpDecorate %5 DescriptorSet 0 + OpDecorate %5 Binding 3 + %8 = OpTypeVoid + %9 = OpTypeBool + %10 = OpTypeInt 32 0 + %11 = OpConstant %10 0 + %12 = OpConstant %10 1 + %6 = OpTypeRuntimeArray %10 + %7 = OpTypeStruct %6 + %13 = OpTypePointer StorageBuffer %7 + %14 = OpTypePointer StorageBuffer %6 + %15 = OpTypePointer StorageBuffer %10 + %2 = OpVariable %13 StorageBuffer + %3 = OpVariable %13 StorageBuffer + %4 = OpVariable %13 StorageBuffer + %5 = OpVariable %13 StorageBuffer + %16 = OpTypeFunction %8 + %1 = OpFunction %8 None %16 + %17 = OpLabel + %18 = OpCopyObject %10 %11 + %19 = OpAccessChain %14 %2 %11 + %20 = OpAccessChain %15 %2 %11 %11 + %21 = OpAccessChain %14 %3 %11 + %22 = OpAccessChain %15 %3 %11 %11 + %23 = OpAccessChain %14 %4 %11 + %24 = OpAccessChain %15 %4 %11 %11 + %25 = OpPtrEqual %9 %2 %3 + %26 = OpSelect %10 %25 %12 %11 + %27 = OpAccessChain %15 %5 %11 %18 + %28 = OpIAdd %10 %18 %12 + OpStore %27 %26 + %29 = OpPtrEqual %9 %19 %21 + %30 = OpSelect %10 %29 %12 %11 + %31 = OpAccessChain %15 %5 %11 %28 + %32 = OpIAdd %10 %28 %12 + OpStore %31 %30 + %33 = OpPtrEqual %9 %20 %22 + %34 = OpSelect %10 %33 %12 %11 + %35 = OpAccessChain %15 %5 %11 %32 + %36 = OpIAdd %10 %32 %12 + OpStore %35 %34 + %37 = OpPtrEqual %9 %2 %4 + %38 = OpSelect %10 %37 %12 %11 + %39 = OpAccessChain %15 %5 %11 %36 + %40 = OpIAdd %10 %36 %12 + OpStore %39 %38 + %41 = OpPtrEqual %9 %19 %23 + %42 = OpSelect %10 %41 %12 %11 + %43 = OpAccessChain %15 %5 %11 %40 + %44 = OpIAdd %10 %40 %12 + OpStore %43 %42 + %45 = OpPtrEqual %9 %20 %24 + %46 = OpSelect %10 %45 %12 %11 + %47 = OpAccessChain %15 %5 %11 %44 + %48 = OpIAdd %10 %44 %12 + OpStore %47 %46 + %49 = OpPtrEqual %9 %3 %4 + %50 = OpSelect %10 %49 %12 %11 + %51 = OpAccessChain %15 %5 %11 %48 + %52 = OpIAdd %10 %48 %12 + OpStore %51 %50 + %53 = OpPtrEqual %9 %21 %23 + %54 = OpSelect %10 %53 %12 %11 + %55 = OpAccessChain %15 %5 %11 %52 + %56 = OpIAdd %10 %52 %12 + OpStore %55 %54 + %57 = OpPtrEqual %9 %22 %24 + %58 = OpSelect %10 %57 %12 %11 + %59 = OpAccessChain %15 %5 %11 %56 + %60 = OpIAdd %10 %56 %12 + OpStore %59 %58 + %61 = OpPtrEqual %9 %2 %2 + %62 = OpSelect %10 %61 %12 %11 + %63 = OpAccessChain %15 %5 %11 %60 + OpStore %63 %62 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp new file mode 100644 index 00000000000..89813b22654 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp @@ -0,0 +1,98 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 63 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointersStorageBuffer + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 %3 %4 + OpExecutionMode %1 LocalSize 1 1 1 + OpDecorate %5 ArrayStride 4 + OpDecorate %6 Block + OpDecorate %7 Block + OpMemberDecorate %6 0 ColMajor + OpMemberDecorate %6 0 Offset 0 + OpMemberDecorate %6 0 MatrixStride 16 + OpMemberDecorate %6 1 RowMajor + OpMemberDecorate %6 1 Offset 64 + OpMemberDecorate %6 1 MatrixStride 16 + OpMemberDecorate %6 2 Offset 128 + OpMemberDecorate %6 3 Offset 132 + OpMemberDecorate %7 0 Offset 0 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 1 + OpDecorate %4 DescriptorSet 0 + OpDecorate %4 Binding 2 + %8 = OpTypeVoid + %9 = OpTypeBool + %10 = OpTypeInt 32 0 + %11 = OpConstant %10 0 + %12 = OpConstant %10 1 + %13 = OpConstant %10 2 + %14 = OpConstant %10 3 + %15 = OpTypeFloat 32 + %5 = OpTypeRuntimeArray %10 + %16 = OpTypeVector %15 4 + %17 = OpTypeMatrix %16 4 + %6 = OpTypeStruct %17 %17 %15 %15 + %7 = OpTypeStruct %5 + %18 = OpTypePointer StorageBuffer %6 + %19 = OpTypePointer StorageBuffer %7 + %20 = OpTypePointer StorageBuffer %17 + %21 = OpTypePointer StorageBuffer %10 + %22 = OpTypePointer StorageBuffer %15 + %23 = OpTypePointer StorageBuffer %16 + %2 = OpVariable %18 StorageBuffer + %3 = OpVariable %18 StorageBuffer + %4 = OpVariable %19 StorageBuffer + %24 = OpTypeFunction %8 + %1 = OpFunction %8 None %24 + %25 = OpLabel + %26 = OpCopyObject %10 %11 + %27 = OpAccessChain %22 %2 %13 + %28 = OpAccessChain %22 %2 %14 + %29 = OpAccessChain %22 %3 %13 + %30 = OpAccessChain %22 %3 %14 + %31 = OpAccessChain %20 %2 %11 + %32 = OpAccessChain %20 %2 %12 + %33 = OpAccessChain %23 %2 %11 %11 + %34 = OpAccessChain %23 %2 %11 %12 + %35 = OpAccessChain %22 %2 %11 %11 %11 + %36 = OpPtrEqual %9 %27 %28 + %37 = OpSelect %10 %36 %11 %12 + %38 = OpAccessChain %21 %4 %11 %26 + %39 = OpIAdd %10 %26 %12 + OpStore %38 %37 + %40 = OpPtrEqual %9 %27 %29 + %41 = OpSelect %10 %40 %11 %12 + %42 = OpAccessChain %21 %4 %11 %39 + %43 = OpIAdd %10 %39 %12 + OpStore %42 %41 + %44 = OpSelect %22 %40 %27 %28 + %45 = OpSelect %22 %40 %29 %30 + %46 = OpPtrEqual %9 %44 %45 + %47 = OpSelect %10 %46 %11 %12 + %48 = OpAccessChain %21 %4 %11 %43 + %49 = OpIAdd %10 %43 %12 + OpStore %48 %47 + %50 = OpSelect %22 %46 %27 %28 + %51 = OpPtrEqual %9 %50 %35 + %52 = OpSelect %10 %51 %11 %12 + %53 = OpAccessChain %21 %4 %11 %49 + %54 = OpIAdd %10 %49 %12 + OpStore %53 %52 + %55 = OpPtrEqual %9 %31 %32 + %56 = OpSelect %10 %55 %11 %12 + %57 = OpAccessChain %21 %4 %11 %54 + %58 = OpIAdd %10 %54 %12 + OpStore %57 %56 + %59 = OpPtrEqual %9 %33 %34 + %60 = OpSelect %10 %59 %11 %12 + %61 = OpAccessChain %21 %4 %11 %58 + %62 = OpIAdd %10 %58 %12 + OpStore %61 %56 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp new file mode 100644 index 00000000000..1cbf8045c55 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp @@ -0,0 +1,96 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 64 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointers + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5 + OpExecutionMode %1 LocalSize 1 1 1 + OpDecorate %6 ArrayStride 4 + OpDecorate %7 Block + OpMemberDecorate %7 0 Offset 0 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 1 + OpDecorate %4 DescriptorSet 0 + OpDecorate %4 Binding 2 + OpDecorate %5 DescriptorSet 0 + OpDecorate %5 Binding 3 + %8 = OpTypeVoid + %9 = OpTypeBool + %10 = OpTypeInt 32 0 + %11 = OpConstant %10 0 + %12 = OpConstant %10 1 + %6 = OpTypeRuntimeArray %10 + %7 = OpTypeStruct %6 + %13 = OpTypePointer StorageBuffer %7 + %14 = OpTypePointer StorageBuffer %6 + %15 = OpTypePointer StorageBuffer %10 + %2 = OpVariable %13 StorageBuffer + %3 = OpVariable %13 StorageBuffer + %4 = OpVariable %13 StorageBuffer + %5 = OpVariable %13 StorageBuffer + %16 = OpTypeFunction %8 + %1 = OpFunction %8 None %16 + %17 = OpLabel + %18 = OpCopyObject %10 %11 + %19 = OpAccessChain %14 %2 %11 + %20 = OpAccessChain %15 %2 %11 %11 + %21 = OpAccessChain %14 %3 %11 + %22 = OpAccessChain %15 %3 %11 %11 + %23 = OpAccessChain %14 %4 %11 + %24 = OpAccessChain %15 %4 %11 %11 + %25 = OpPtrNotEqual %9 %2 %3 + %26 = OpSelect %10 %25 %12 %11 + %27 = OpAccessChain %15 %5 %11 %18 + %28 = OpIAdd %10 %18 %12 + OpStore %27 %26 + %29 = OpPtrNotEqual %9 %19 %21 + %30 = OpSelect %10 %29 %12 %11 + %31 = OpAccessChain %15 %5 %11 %28 + %32 = OpIAdd %10 %28 %12 + OpStore %31 %30 + %33 = OpPtrNotEqual %9 %20 %22 + %34 = OpSelect %10 %33 %12 %11 + %35 = OpAccessChain %15 %5 %11 %32 + %36 = OpIAdd %10 %32 %12 + OpStore %35 %34 + %37 = OpPtrNotEqual %9 %2 %4 + %38 = OpSelect %10 %37 %12 %11 + %39 = OpAccessChain %15 %5 %11 %36 + %40 = OpIAdd %10 %36 %12 + OpStore %39 %38 + %41 = OpPtrNotEqual %9 %19 %23 + %42 = OpSelect %10 %41 %12 %11 + %43 = OpAccessChain %15 %5 %11 %40 + %44 = OpIAdd %10 %40 %12 + OpStore %43 %42 + %45 = OpPtrNotEqual %9 %20 %24 + %46 = OpSelect %10 %45 %12 %11 + %47 = OpAccessChain %15 %5 %11 %44 + %48 = OpIAdd %10 %44 %12 + OpStore %47 %46 + %49 = OpPtrNotEqual %9 %3 %4 + %50 = OpSelect %10 %49 %12 %11 + %51 = OpAccessChain %15 %5 %11 %48 + %52 = OpIAdd %10 %48 %12 + OpStore %51 %50 + %53 = OpPtrNotEqual %9 %21 %23 + %54 = OpSelect %10 %53 %12 %11 + %55 = OpAccessChain %15 %5 %11 %52 + %56 = OpIAdd %10 %52 %12 + OpStore %55 %54 + %57 = OpPtrNotEqual %9 %22 %24 + %58 = OpSelect %10 %57 %12 %11 + %59 = OpAccessChain %15 %5 %11 %56 + %60 = OpIAdd %10 %56 %12 + OpStore %59 %58 + %61 = OpPtrNotEqual %9 %2 %2 + %62 = OpSelect %10 %61 %12 %11 + %63 = OpAccessChain %15 %5 %11 %60 + OpStore %63 %62 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp b/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp new file mode 100644 index 00000000000..b4e622baced --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp @@ -0,0 +1,78 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %A "A" + OpName %B "A" + OpName %C "A" + OpName %D "A" + OpName %E "A" + OpName %F "A" + OpName %G "A" + OpName %H "A" + OpName %I "A" + OpName %J "A" + OpName %K "A" + OpName %L "A" + OpDecorate %_runtimearr_int ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %A SpecId 0 + OpDecorate %B SpecId 1 + OpDecorate %C SpecId 2 + OpDecorate %D SpecId 3 + OpDecorate %E SpecId 4 + OpDecorate %F SpecId 5 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_runtimearr_int = OpTypeRuntimeArray %int + %SSBO = OpTypeStruct %_runtimearr_int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %A = OpSpecConstant %int 0 + %B = OpSpecConstant %int 1 + %C = OpSpecConstant %int 2 + %D = OpSpecConstant %int 3 + %E = OpSpecConstant %int 4 + %F = OpSpecConstant %int 5 + %G = OpSpecConstantOp %int ISub %A %B + %H = OpSpecConstantOp %int ISub %G %C + %I = OpSpecConstantOp %int ISub %H %D + %J = OpSpecConstantOp %int ISub %I %E + %K = OpSpecConstantOp %int ISub %J %F + %L = OpSpecConstantOp %int IAdd %K %F +%_ptr_Uniform_int = OpTypePointer Uniform %int + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %32 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19 + OpStore %32 %L + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp b/shaders-msl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp similarity index 100% rename from shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp rename to shaders-msl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp diff --git a/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag new file mode 100644 index 00000000000..1a268acb2fa --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag @@ -0,0 +1,50 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 25 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %UBOs "UBOs" + OpMemberName %UBOs 0 "v" + OpName %ubos "ubos" + OpDecorate %FragColor Location 0 + OpMemberDecorate %UBOs 0 Offset 0 + OpDecorate %UBOs Block + OpDecorate %ubos DescriptorSet 0 + OpDecorate %ubos Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %UBOs = OpTypeStruct %v4float + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_arr_UBOs_uint_2 = OpTypeArray %UBOs %uint_2 +%_ptr_Uniform__arr_UBOs_uint_2 = OpTypePointer Uniform %_arr_UBOs_uint_2 +%_ptr_Uniform_UBOs = OpTypePointer Uniform %UBOs + %ubos = OpVariable %_ptr_Uniform__arr_UBOs_uint_2 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %ptr0_partial = OpAccessChain %_ptr_Uniform_UBOs %ubos %int_0 + %ptr0 = OpAccessChain %_ptr_Uniform_v4float %ptr0_partial %int_0 + %ptr1_partial = OpAccessChain %_ptr_Uniform_UBOs %ubos %int_1 + %ptr1 = OpAccessChain %_ptr_Uniform_v4float %ptr1_partial %int_0 + %20 = OpLoad %v4float %ptr0 + %23 = OpLoad %v4float %ptr1 + %24 = OpFAdd %v4float %20 %23 + OpStore %FragColor %24 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag b/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag new file mode 100644 index 00000000000..1a268acb2fa --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag @@ -0,0 +1,50 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 25 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %UBOs "UBOs" + OpMemberName %UBOs 0 "v" + OpName %ubos "ubos" + OpDecorate %FragColor Location 0 + OpMemberDecorate %UBOs 0 Offset 0 + OpDecorate %UBOs Block + OpDecorate %ubos DescriptorSet 0 + OpDecorate %ubos Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %UBOs = OpTypeStruct %v4float + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_arr_UBOs_uint_2 = OpTypeArray %UBOs %uint_2 +%_ptr_Uniform__arr_UBOs_uint_2 = OpTypePointer Uniform %_arr_UBOs_uint_2 +%_ptr_Uniform_UBOs = OpTypePointer Uniform %UBOs + %ubos = OpVariable %_ptr_Uniform__arr_UBOs_uint_2 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %ptr0_partial = OpAccessChain %_ptr_Uniform_UBOs %ubos %int_0 + %ptr0 = OpAccessChain %_ptr_Uniform_v4float %ptr0_partial %int_0 + %ptr1_partial = OpAccessChain %_ptr_Uniform_UBOs %ubos %int_1 + %ptr1 = OpAccessChain %_ptr_Uniform_v4float %ptr1_partial %int_0 + %20 = OpLoad %v4float %ptr0 + %23 = OpLoad %v4float %ptr1 + %24 = OpFAdd %v4float %20 %23 + OpStore %FragColor %24 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag new file mode 100644 index 00000000000..6782b124730 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag @@ -0,0 +1,83 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 27 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %_ + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpMemberName %AA 0 "foo" + OpMemberName %AB 0 "foo" + OpMemberName %A 0 "_aa" + OpMemberName %A 1 "ab" + OpMemberName %BA 0 "foo" + OpMemberName %BB 0 "foo" + OpMemberName %B 0 "_ba" + OpMemberName %B 1 "bb" + OpName %VertexData "VertexData" + OpMemberName %VertexData 0 "_a" + OpMemberName %VertexData 1 "b" + OpName %_ "" + OpMemberName %CA 0 "foo" + OpMemberName %C 0 "_ca" + OpMemberName %DA 0 "foo" + OpMemberName %D 0 "da" + OpName %UBO "UBO" + OpMemberName %UBO 0 "_c" + OpMemberName %UBO 1 "d" + OpName %__0 "" + OpMemberName %E 0 "a" + OpName %SSBO "SSBO" + ;OpMemberName %SSBO 0 "e" Test that we don't try to assign bogus aliases. + OpMemberName %SSBO 1 "_e" + OpMemberName %SSBO 2 "f" + OpName %__1 "" + OpDecorate %VertexData Block + OpDecorate %_ Location 0 + OpMemberDecorate %CA 0 Offset 0 + OpMemberDecorate %C 0 Offset 0 + OpMemberDecorate %DA 0 Offset 0 + OpMemberDecorate %D 0 Offset 0 + OpMemberDecorate %UBO 0 Offset 0 + OpMemberDecorate %UBO 1 Offset 16 + OpDecorate %UBO Block + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + OpMemberDecorate %E 0 Offset 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpMemberDecorate %SSBO 2 Offset 8 + OpDecorate %SSBO BufferBlock + OpDecorate %__1 DescriptorSet 0 + OpDecorate %__1 Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %AA = OpTypeStruct %int + %AB = OpTypeStruct %int + %A = OpTypeStruct %AA %AB + %BA = OpTypeStruct %int + %BB = OpTypeStruct %int + %B = OpTypeStruct %BA %BB + %VertexData = OpTypeStruct %A %B +%_ptr_Input_VertexData = OpTypePointer Input %VertexData + %_ = OpVariable %_ptr_Input_VertexData Input + %CA = OpTypeStruct %int + %C = OpTypeStruct %CA + %DA = OpTypeStruct %int + %D = OpTypeStruct %DA + %UBO = OpTypeStruct %C %D +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO + %__0 = OpVariable %_ptr_Uniform_UBO Uniform + %E = OpTypeStruct %int + %SSBO = OpTypeStruct %E %E %E +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %__1 = OpVariable %_ptr_Uniform_SSBO Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag b/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag new file mode 100644 index 00000000000..fb7cdb07184 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag @@ -0,0 +1,70 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 43 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %frag "main" %gl_FragCoord %out_var_SV_Target + OpExecutionMode %frag OriginUpperLeft + OpSource HLSL 600 + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "_BorderWidths" + OpName %_Globals "$Globals" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %frag "frag" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 0 + OpDecorate %_arr_float_uint_4 ArrayStride 16 + OpMemberDecorate %type__Globals 0 Offset 0 + OpDecorate %type__Globals Block + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %float = OpTypeFloat 32 + %float_0 = OpConstant %float 0 + %int_2 = OpConstant %int 2 + %uint = OpTypeInt 32 0 + %float_1 = OpConstant %float 1 + %uint_4 = OpConstant %uint 4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%type__Globals = OpTypeStruct %_arr_float_uint_4 +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %21 = OpTypeFunction %void + %v2float = OpTypeVector %float 2 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %bool = OpTypeBool + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%out_var_SV_Target = OpVariable %_ptr_Output_v4float Output + %frag = OpFunction %void None %21 + %25 = OpLabel + %26 = OpLoad %v4float %gl_FragCoord + %27 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_0 + %28 = OpLoad %float %27 + %29 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_1 + %30 = OpLoad %float %29 + %31 = OpCompositeConstruct %v2float %28 %30 + %32 = OpCompositeExtract %float %26 0 + %33 = OpFOrdGreaterThan %bool %32 %float_0 + OpSelectionMerge %34 None + OpBranchConditional %33 %35 %34 + %35 = OpLabel + %36 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_2 + %37 = OpLoad %float %36 + %38 = OpCompositeInsert %v2float %37 %31 0 + OpBranch %34 + %34 = OpLabel + %39 = OpPhi %v2float %31 %25 %38 %35 + %40 = OpCompositeExtract %float %39 0 + %41 = OpCompositeExtract %float %39 1 + %42 = OpCompositeConstruct %v4float %40 %41 %float_0 %float_1 + OpStore %out_var_SV_Target %42 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag new file mode 100644 index 00000000000..9408e69ac09 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag @@ -0,0 +1,127 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vInput %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %vInput "vInput" + OpName %FragColor "FragColor" + OpName %phi "PHI" + OpDecorate %vInput RelaxedPrecision + OpDecorate %vInput Location 0 + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %b0 RelaxedPrecision + OpDecorate %b1 RelaxedPrecision + OpDecorate %b2 RelaxedPrecision + OpDecorate %b3 RelaxedPrecision + OpDecorate %c1 RelaxedPrecision + OpDecorate %c3 RelaxedPrecision + OpDecorate %d4_mp RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float + %vInput = OpVariable %_ptr_Input_v4float Input + %float_1 = OpConstant %float 1 + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Function_float = OpTypePointer Function %float + %float_2 = OpConstant %float 2 + %uint_1 = OpConstant %uint 1 + %float_3 = OpConstant %float 3 + %uint_2 = OpConstant %uint 2 + %float_4 = OpConstant %float 4 + %uint_3 = OpConstant %uint 3 + %v4float_arr2 = OpTypeArray %v4float %uint_2 + %v44float = OpTypeMatrix %v4float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %v4undef = OpUndef %v4float + %v4const = OpConstantNull %v4float + %v4arrconst = OpConstantNull %v4float_arr2 + %v44const = OpConstantNull %v44float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + + %loaded0 = OpLoad %v4float %vInput + + ; Basic case (highp). + %a0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %a1 = OpCompositeInsert %v4float %float_2 %a0 1 + %a2 = OpCompositeInsert %v4float %float_3 %a1 2 + %a3 = OpCompositeInsert %v4float %float_4 %a2 3 + OpStore %FragColor %a3 + + ; Basic case (mediump). + %b0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %b1 = OpCompositeInsert %v4float %float_2 %b0 1 + %b2 = OpCompositeInsert %v4float %float_3 %b1 2 + %b3 = OpCompositeInsert %v4float %float_4 %b2 3 + OpStore %FragColor %b3 + + ; Mix relaxed precision. + %c0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %c1 = OpCompositeInsert %v4float %float_2 %c0 1 + %c2 = OpCompositeInsert %v4float %float_3 %c1 2 + %c3 = OpCompositeInsert %v4float %float_4 %c2 3 + OpStore %FragColor %c3 + + ; SSA use after insert + %d0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %d1 = OpCompositeInsert %v4float %float_2 %d0 1 + %d2 = OpCompositeInsert %v4float %float_3 %d1 2 + %d3 = OpCompositeInsert %v4float %float_4 %d2 3 + %d4 = OpFAdd %v4float %d3 %d0 + OpStore %FragColor %d4 + %d4_mp = OpFAdd %v4float %d3 %d1 + OpStore %FragColor %d4_mp + + ; Verify Insert behavior on Undef. + %e0 = OpCompositeInsert %v4float %float_1 %v4undef 0 + %e1 = OpCompositeInsert %v4float %float_2 %e0 1 + %e2 = OpCompositeInsert %v4float %float_3 %e1 2 + %e3 = OpCompositeInsert %v4float %float_4 %e2 3 + OpStore %FragColor %e3 + + ; Verify Insert behavior on Constant. + %f0 = OpCompositeInsert %v4float %float_1 %v4const 0 + OpStore %FragColor %f0 + + ; Verify Insert behavior on Array. + %g0 = OpCompositeInsert %v4float_arr2 %float_1 %v4arrconst 1 2 + %g1 = OpCompositeInsert %v4float_arr2 %float_2 %g0 0 3 + %g2 = OpCompositeExtract %v4float %g1 0 + OpStore %FragColor %g2 + %g3 = OpCompositeExtract %v4float %g1 1 + OpStore %FragColor %g3 + + ; Verify Insert behavior on Matrix. + %h0 = OpCompositeInsert %v44float %float_1 %v44const 1 2 + %h1 = OpCompositeInsert %v44float %float_2 %h0 2 3 + %h2 = OpCompositeExtract %v4float %h1 0 + OpStore %FragColor %h2 + %h3 = OpCompositeExtract %v4float %h1 1 + OpStore %FragColor %h3 + %h4 = OpCompositeExtract %v4float %h1 2 + OpStore %FragColor %h4 + %h5 = OpCompositeExtract %v4float %h1 3 + OpStore %FragColor %h5 + + ; Verify that we cannot RMW PHI variables. + OpBranch %next + %next = OpLabel + %phi = OpPhi %v4float %d2 %5 + %i0 = OpCompositeInsert %v4float %float_4 %phi 3 + OpStore %FragColor %i0 + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag b/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag new file mode 100644 index 00000000000..d4bf014bbfa --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag @@ -0,0 +1,33 @@ +OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %frag_out +OpExecutionMode %main OriginUpperLeft +OpDecorate %frag_out Location 0 +OpMemberDecorate %type 1 Offset 0 +%void = OpTypeVoid +%float = OpTypeFloat 32 +%uint = OpTypeInt 32 0 +%uint_2 = OpConstant %uint 2 +%const_1 = OpConstant %float 1.0 +%const_2 = OpConstant %float 2.0 +%const_3 = OpConstant %float 3.0 +%const_4 = OpConstant %float 4.0 +%const_5 = OpConstant %float 5.0 +%const_6 = OpConstant %float 6.0 +%arr_float_2 = OpTypeArray %float %uint_2 +%const_arr0 = OpConstantComposite %arr_float_2 %const_1 %const_2 +%const_arr1 = OpConstantComposite %arr_float_2 %const_3 %const_4 +%const_arr2 = OpConstantComposite %arr_float_2 %const_5 %const_6 +%type = OpTypeStruct %arr_float_2 %arr_float_2 %arr_float_2 +%float_ptr = OpTypePointer Output %float +%const_var = OpConstantComposite %type %const_arr0 %const_arr1 %const_arr2 +%type_ptr = OpTypePointer Function %type +%frag_out = OpVariable %float_ptr Output +%main_func = OpTypeFunction %void +%main = OpFunction %void None %main_func +%label = OpLabel +%var = OpVariable %type_ptr Function +OpStore %var %const_var +OpStore %frag_out %const_1 +OpReturn +OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag b/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag new file mode 100644 index 00000000000..ebab7fd0c97 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag @@ -0,0 +1,50 @@ +; SPIR-V +; Version: 1.3 +; Generator: Google rspirv; 0 +; Bound: 43 +; Schema: 0 + OpCapability ImageQuery + OpCapability Int8 + OpCapability RuntimeDescriptorArray + OpCapability StorageImageWriteWithoutFormat + OpCapability Shader + OpCapability VulkanMemoryModel + OpExtension "SPV_EXT_descriptor_indexing" + OpExtension "SPV_KHR_vulkan_memory_model" + OpMemoryModel Logical Vulkan + OpEntryPoint Fragment %1 "main" + OpExecutionMode %1 OriginUpperLeft + OpDecorate %2 ArrayStride 4 + OpMemberDecorate %3 0 Offset 0 + %4 = OpTypeInt 32 0 + %5 = OpTypeFloat 32 + %6 = OpTypePointer Function %5 + %7 = OpTypeVoid + %8 = OpTypeFunction %7 + %9 = OpConstant %4 0 + %10 = OpConstant %4 1 + %11 = OpConstant %4 2 + %12 = OpConstant %4 4 + %13 = OpConstant %4 3 + %14 = OpConstant %5 0 + %2 = OpTypeArray %5 %12 + %15 = OpTypePointer Function %2 + %16 = OpTypeFunction %7 %15 + %3 = OpTypeStruct %2 + %17 = OpTypePointer Function %3 + %1 = OpFunction %7 None %8 + %31 = OpLabel + %33 = OpVariable %17 Function + %34 = OpVariable %15 Function + %39 = OpAccessChain %6 %34 %9 + OpStore %39 %14 + %40 = OpAccessChain %6 %34 %10 + OpStore %40 %14 + %41 = OpAccessChain %6 %34 %11 + OpStore %41 %14 + %42 = OpAccessChain %6 %34 %13 + OpStore %42 %14 + %37 = OpAccessChain %15 %33 %9 + OpCopyMemory %37 %34 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag new file mode 100644 index 00000000000..a9650ddbb6b --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag @@ -0,0 +1,61 @@ +; SPIR-V +; Version: 1.2 +; Generator: Khronos; 0 +; Bound: 43 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %EntryPoint_Main "main" + OpExecutionMode %EntryPoint_Main OriginUpperLeft + OpSource Unknown 100 + OpName %EmptyStructTest "EmptyStructTest" + OpName %EmptyStruct2Test "EmptyStruct2Test" + OpName %GetValue "GetValue" + OpName %GetValue2 "GetValue" + OpName %self "self" + OpName %self2 "self" + OpName %emptyStruct "emptyStruct" + OpName %value "value" + OpName %EntryPoint_Main "EntryPoint_Main" + +%EmptyStructTest = OpTypeStruct +%EmptyStruct2Test = OpTypeStruct %EmptyStructTest +%_ptr_Function_EmptyStruct2Test = OpTypePointer Function %EmptyStruct2Test + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float + %5 = OpTypeFunction %float %_ptr_Function_EmptyStruct2Test + %6 = OpTypeFunction %float %EmptyStruct2Test + %void = OpTypeVoid +%_ptr_Function_void = OpTypePointer Function %void + %8 = OpTypeFunction %void %_ptr_Function_EmptyStruct2Test + %9 = OpTypeFunction %void + %float_0 = OpConstant %float 0 + %value4 = OpConstantNull %EmptyStruct2Test + + %GetValue = OpFunction %float None %5 + %self = OpFunctionParameter %_ptr_Function_EmptyStruct2Test + %13 = OpLabel + OpReturnValue %float_0 + OpFunctionEnd + + %GetValue2 = OpFunction %float None %6 + %self2 = OpFunctionParameter %EmptyStruct2Test + %14 = OpLabel + OpReturnValue %float_0 + OpFunctionEnd + +%EntryPoint_Main = OpFunction %void None %9 + %37 = OpLabel + %emptyStruct = OpVariable %_ptr_Function_EmptyStruct2Test Function + %18 = OpVariable %_ptr_Function_EmptyStruct2Test Function + %value = OpVariable %_ptr_Function_float Function + %value2 = OpCompositeConstruct %EmptyStructTest + %value3 = OpCompositeConstruct %EmptyStruct2Test %value2 + %22 = OpFunctionCall %float %GetValue %emptyStruct + %23 = OpFunctionCall %float %GetValue2 %value3 + %24 = OpFunctionCall %float %GetValue2 %value4 + OpStore %value %22 + OpStore %value %23 + OpStore %value %24 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag b/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag new file mode 100644 index 00000000000..2f522f44b57 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag @@ -0,0 +1,27 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 12 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %foo "_5ma@@in" %FragColor + OpExecutionMode %foo OriginUpperLeft + OpSource GLSL 450 + OpName %foo "FOO" + OpName %FragColor "FragColor" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %11 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %foo = OpFunction %void None %3 + %5 = OpLabel + OpStore %FragColor %11 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/image-gather.asm.frag b/shaders-msl-no-opt/asm/frag/image-gather.asm.frag new file mode 100644 index 00000000000..f26bb07264e --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/image-gather.asm.frag @@ -0,0 +1,74 @@ +; SPIR-V +; Version: 1.3 +; Generator: Google spiregg; 0 +; Bound: 36 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_GOOGLE_hlsl_functionality1" + OpExtension "SPV_GOOGLE_user_type" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %psMain "main" %gl_FragCoord %in_var_TEXCOORD0 %out_var_SV_Target0 + OpExecutionMode %psMain OriginUpperLeft + OpSource HLSL 500 + OpName %type_2d_image "type.2d.image" + OpName %g_texture "g_texture" + OpName %type_sampler "type.sampler" + OpName %g_sampler "g_sampler" + OpName %g_comp "g_comp" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %psMain "psMain" + OpName %type_sampled_image "type.sampled.image" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_Position" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %in_var_TEXCOORD0 Location 0 + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %g_texture DescriptorSet 0 + OpDecorate %g_texture Binding 0 + OpDecorate %g_sampler DescriptorSet 0 + OpDecorate %g_sampler Binding 0 + OpDecorate %g_comp DescriptorSet 0 + OpDecorate %g_comp Binding 1 + OpDecorateString %g_texture UserTypeGOOGLE "texture2d" + %float = OpTypeFloat 32 + %float_0_5 = OpConstant %float 0.5 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %v2int = OpTypeVector %int 2 + %16 = OpConstantComposite %v2int %int_0 %int_0 +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %25 = OpTypeFunction %void +%type_sampled_image = OpTypeSampledImage %type_2d_image +%g_texture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant + %g_sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant + %g_comp = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output + %psMain = OpFunction %void None %25 + %26 = OpLabel + %27 = OpLoad %v2float %in_var_TEXCOORD0 + %28 = OpLoad %type_2d_image %g_texture + %29 = OpLoad %type_sampler %g_comp + %30 = OpSampledImage %type_sampled_image %28 %29 + %32 = OpLoad %type_sampler %g_sampler + %33 = OpSampledImage %type_sampled_image %28 %32 + %31 = OpImageGather %v4float %33 %27 %int_1 ConstOffset %16 + %34 = OpImageGather %v4float %33 %27 %int_0 ConstOffset %16 + %35 = OpFMul %v4float %34 %31 + OpStore %out_var_SV_Target0 %35 + OpReturn + OpFunctionEnd + diff --git a/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag b/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag new file mode 100644 index 00000000000..1840c9b1370 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag @@ -0,0 +1,65 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %foos %bars + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %Foo "Foo" + OpMemberName %Foo 0 "a" + OpMemberName %Foo 1 "b" + OpName %foos "ALIAS" + OpName %bars "ALIAS" + OpDecorate %FragColor Location 0 + OpDecorate %foos Location 1 + OpDecorate %bars Location 10 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %Foo = OpTypeStruct %float %float + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_Foo_uint_4 = OpTypeArray %Foo %uint_4 +%_ptr_Input__arr_Foo_uint_4 = OpTypePointer Input %_arr_Foo_uint_4 + %foos = OpVariable %_ptr_Input__arr_Foo_uint_4 Input + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_0 = OpConstant %uint 0 +%_ptr_Output_float = OpTypePointer Output %float + %int_1 = OpConstant %int 1 + %uint_1 = OpConstant %uint 1 + %int_2 = OpConstant %int 2 + %uint_2 = OpConstant %uint 2 + %bars = OpVariable %_ptr_Input__arr_Foo_uint_4 Input + %int_3 = OpConstant %int 3 + %uint_3 = OpConstant %uint 3 + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpAccessChain %_ptr_Input_float %foos %int_0 %int_0 + %20 = OpLoad %float %19 + %23 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + OpStore %23 %20 + %25 = OpAccessChain %_ptr_Input_float %foos %int_1 %int_1 + %26 = OpLoad %float %25 + %28 = OpAccessChain %_ptr_Output_float %FragColor %uint_1 + OpStore %28 %26 + %30 = OpAccessChain %_ptr_Input_float %foos %int_2 %int_0 + %31 = OpLoad %float %30 + %33 = OpAccessChain %_ptr_Output_float %FragColor %uint_2 + OpStore %33 %31 + %36 = OpAccessChain %_ptr_Input_float %bars %int_3 %int_1 + %37 = OpLoad %float %36 + %39 = OpAccessChain %_ptr_Output_float %FragColor %uint_3 + OpStore %39 %37 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag b/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag deleted file mode 100644 index 8b09e5b68f8..00000000000 --- a/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag +++ /dev/null @@ -1,646 +0,0 @@ -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 3 -; Bound: 1532 -; Schema: 0 - OpCapability Shader - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Fragment %main "main" %IN_HPosition %IN_Uv_EdgeDistance1 %IN_UvStuds_EdgeDistance2 %IN_Color %IN_LightPosition_Fog %IN_View_Depth %IN_Normal_SpecPower %IN_Tangent %IN_PosLightSpace_Reflectance %IN_studIndex %_entryPointOutput - OpExecutionMode %main OriginUpperLeft - OpSource HLSL 500 - OpName %main "main" - OpName %VertexOutput "VertexOutput" - OpMemberName %VertexOutput 0 "HPosition" - OpMemberName %VertexOutput 1 "Uv_EdgeDistance1" - OpMemberName %VertexOutput 2 "UvStuds_EdgeDistance2" - OpMemberName %VertexOutput 3 "Color" - OpMemberName %VertexOutput 4 "LightPosition_Fog" - OpMemberName %VertexOutput 5 "View_Depth" - OpMemberName %VertexOutput 6 "Normal_SpecPower" - OpMemberName %VertexOutput 7 "Tangent" - OpMemberName %VertexOutput 8 "PosLightSpace_Reflectance" - OpMemberName %VertexOutput 9 "studIndex" - OpName %Surface "Surface" - OpMemberName %Surface 0 "albedo" - OpMemberName %Surface 1 "normal" - OpMemberName %Surface 2 "specular" - OpMemberName %Surface 3 "gloss" - OpMemberName %Surface 4 "reflectance" - OpMemberName %Surface 5 "opacity" - OpName %SurfaceInput "SurfaceInput" - OpMemberName %SurfaceInput 0 "Color" - OpMemberName %SurfaceInput 1 "Uv" - OpMemberName %SurfaceInput 2 "UvStuds" - OpName %Globals "Globals" - OpMemberName %Globals 0 "ViewProjection" - OpMemberName %Globals 1 "ViewRight" - OpMemberName %Globals 2 "ViewUp" - OpMemberName %Globals 3 "ViewDir" - OpMemberName %Globals 4 "CameraPosition" - OpMemberName %Globals 5 "AmbientColor" - OpMemberName %Globals 6 "Lamp0Color" - OpMemberName %Globals 7 "Lamp0Dir" - OpMemberName %Globals 8 "Lamp1Color" - OpMemberName %Globals 9 "FogParams" - OpMemberName %Globals 10 "FogColor" - OpMemberName %Globals 11 "LightBorder" - OpMemberName %Globals 12 "LightConfig0" - OpMemberName %Globals 13 "LightConfig1" - OpMemberName %Globals 14 "LightConfig2" - OpMemberName %Globals 15 "LightConfig3" - OpMemberName %Globals 16 "RefractionBias_FadeDistance_GlowFactor" - OpMemberName %Globals 17 "OutlineBrightness_ShadowInfo" - OpMemberName %Globals 18 "ShadowMatrix0" - OpMemberName %Globals 19 "ShadowMatrix1" - OpMemberName %Globals 20 "ShadowMatrix2" - OpName %CB0 "CB0" - OpMemberName %CB0 0 "CB0" - OpName %_ "" - OpName %LightMapTexture "LightMapTexture" - OpName %LightMapSampler "LightMapSampler" - OpName %ShadowMapSampler "ShadowMapSampler" - OpName %ShadowMapTexture "ShadowMapTexture" - OpName %EnvironmentMapTexture "EnvironmentMapTexture" - OpName %EnvironmentMapSampler "EnvironmentMapSampler" - OpName %IN_HPosition "IN.HPosition" - OpName %IN_Uv_EdgeDistance1 "IN.Uv_EdgeDistance1" - OpName %IN_UvStuds_EdgeDistance2 "IN.UvStuds_EdgeDistance2" - OpName %IN_Color "IN.Color" - OpName %IN_LightPosition_Fog "IN.LightPosition_Fog" - OpName %IN_View_Depth "IN.View_Depth" - OpName %IN_Normal_SpecPower "IN.Normal_SpecPower" - OpName %IN_Tangent "IN.Tangent" - OpName %IN_PosLightSpace_Reflectance "IN.PosLightSpace_Reflectance" - OpName %IN_studIndex "IN.studIndex" - OpName %_entryPointOutput "@entryPointOutput" - OpName %DiffuseMapSampler "DiffuseMapSampler" - OpName %DiffuseMapTexture "DiffuseMapTexture" - OpName %NormalMapSampler "NormalMapSampler" - OpName %NormalMapTexture "NormalMapTexture" - OpName %NormalDetailMapTexture "NormalDetailMapTexture" - OpName %NormalDetailMapSampler "NormalDetailMapSampler" - OpName %StudsMapTexture "StudsMapTexture" - OpName %StudsMapSampler "StudsMapSampler" - OpName %SpecularMapSampler "SpecularMapSampler" - OpName %SpecularMapTexture "SpecularMapTexture" - OpName %Params "Params" - OpMemberName %Params 0 "LqmatFarTilingFactor" - OpName %CB2 "CB2" - OpMemberName %CB2 0 "CB2" - OpMemberDecorate %Globals 0 ColMajor - OpMemberDecorate %Globals 0 Offset 0 - OpMemberDecorate %Globals 0 MatrixStride 16 - OpMemberDecorate %Globals 1 Offset 64 - OpMemberDecorate %Globals 2 Offset 80 - OpMemberDecorate %Globals 3 Offset 96 - OpMemberDecorate %Globals 4 Offset 112 - OpMemberDecorate %Globals 5 Offset 128 - OpMemberDecorate %Globals 6 Offset 144 - OpMemberDecorate %Globals 7 Offset 160 - OpMemberDecorate %Globals 8 Offset 176 - OpMemberDecorate %Globals 9 Offset 192 - OpMemberDecorate %Globals 10 Offset 208 - OpMemberDecorate %Globals 11 Offset 224 - OpMemberDecorate %Globals 12 Offset 240 - OpMemberDecorate %Globals 13 Offset 256 - OpMemberDecorate %Globals 14 Offset 272 - OpMemberDecorate %Globals 15 Offset 288 - OpMemberDecorate %Globals 16 Offset 304 - OpMemberDecorate %Globals 17 Offset 320 - OpMemberDecorate %Globals 18 Offset 336 - OpMemberDecorate %Globals 19 Offset 352 - OpMemberDecorate %Globals 20 Offset 368 - OpMemberDecorate %CB0 0 Offset 0 - OpDecorate %CB0 Block - OpDecorate %_ DescriptorSet 0 - OpDecorate %_ Binding 0 - OpDecorate %LightMapTexture DescriptorSet 1 - OpDecorate %LightMapTexture Binding 6 - OpDecorate %LightMapSampler DescriptorSet 1 - OpDecorate %LightMapSampler Binding 6 - OpDecorate %ShadowMapSampler DescriptorSet 1 - OpDecorate %ShadowMapSampler Binding 1 - OpDecorate %ShadowMapTexture DescriptorSet 1 - OpDecorate %ShadowMapTexture Binding 1 - OpDecorate %EnvironmentMapTexture DescriptorSet 1 - OpDecorate %EnvironmentMapTexture Binding 2 - OpDecorate %EnvironmentMapSampler DescriptorSet 1 - OpDecorate %EnvironmentMapSampler Binding 2 - OpDecorate %IN_HPosition BuiltIn FragCoord - OpDecorate %IN_Uv_EdgeDistance1 Location 0 - OpDecorate %IN_UvStuds_EdgeDistance2 Location 1 - OpDecorate %IN_Color Location 2 - OpDecorate %IN_LightPosition_Fog Location 3 - OpDecorate %IN_View_Depth Location 4 - OpDecorate %IN_Normal_SpecPower Location 5 - OpDecorate %IN_Tangent Location 6 - OpDecorate %IN_PosLightSpace_Reflectance Location 7 - OpDecorate %IN_studIndex Location 8 - OpDecorate %_entryPointOutput Location 0 - OpDecorate %DiffuseMapSampler DescriptorSet 1 - OpDecorate %DiffuseMapSampler Binding 3 - OpDecorate %DiffuseMapTexture DescriptorSet 1 - OpDecorate %DiffuseMapTexture Binding 3 - OpDecorate %NormalMapSampler DescriptorSet 1 - OpDecorate %NormalMapSampler Binding 4 - OpDecorate %NormalMapTexture DescriptorSet 1 - OpDecorate %NormalMapTexture Binding 4 - OpDecorate %NormalDetailMapTexture DescriptorSet 1 - OpDecorate %NormalDetailMapTexture Binding 8 - OpDecorate %NormalDetailMapSampler DescriptorSet 1 - OpDecorate %NormalDetailMapSampler Binding 8 - OpDecorate %StudsMapTexture DescriptorSet 1 - OpDecorate %StudsMapTexture Binding 0 - OpDecorate %StudsMapSampler DescriptorSet 1 - OpDecorate %StudsMapSampler Binding 0 - OpDecorate %SpecularMapSampler DescriptorSet 1 - OpDecorate %SpecularMapSampler Binding 5 - OpDecorate %SpecularMapTexture DescriptorSet 1 - OpDecorate %SpecularMapTexture Binding 5 - OpMemberDecorate %Params 0 Offset 0 - OpMemberDecorate %CB2 0 Offset 0 - OpDecorate %CB2 Block - %void = OpTypeVoid - %3 = OpTypeFunction %void - %float = OpTypeFloat 32 -%_ptr_Function_float = OpTypePointer Function %float - %8 = OpTypeFunction %float %_ptr_Function_float - %v4float = OpTypeVector %float 4 -%_ptr_Function_v4float = OpTypePointer Function %v4float - %v3float = OpTypeVector %float 3 - %18 = OpTypeFunction %v3float %_ptr_Function_v4float -%_ptr_Function_v3float = OpTypePointer Function %v3float - %23 = OpTypeFunction %v4float %_ptr_Function_v3float - %27 = OpTypeFunction %float %_ptr_Function_v3float - %31 = OpTypeFunction %float %_ptr_Function_float %_ptr_Function_float - %36 = OpTypeSampler -%_ptr_Function_36 = OpTypePointer Function %36 - %38 = OpTypeImage %float 2D 0 0 0 1 Unknown -%_ptr_Function_38 = OpTypePointer Function %38 - %40 = OpTypeFunction %float %_ptr_Function_36 %_ptr_Function_38 %_ptr_Function_v3float %_ptr_Function_float -%VertexOutput = OpTypeStruct %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v3float %v4float %float -%_ptr_Function_VertexOutput = OpTypePointer Function %VertexOutput - %Surface = OpTypeStruct %v3float %v3float %float %float %float %float - %50 = OpTypeFunction %Surface %_ptr_Function_VertexOutput - %54 = OpTypeFunction %v4float %_ptr_Function_VertexOutput - %v2float = OpTypeVector %float 2 -%_ptr_Function_v2float = OpTypePointer Function %v2float - %60 = OpTypeFunction %v4float %_ptr_Function_36 %_ptr_Function_38 %_ptr_Function_v2float %_ptr_Function_float %_ptr_Function_float -%SurfaceInput = OpTypeStruct %v4float %v2float %v2float -%_ptr_Function_SurfaceInput = OpTypePointer Function %SurfaceInput - %70 = OpTypeFunction %Surface %_ptr_Function_SurfaceInput %_ptr_Function_v2float - %float_0 = OpConstant %float 0 - %float_1 = OpConstant %float 1 - %float_2 = OpConstant %float 2 -%mat4v4float = OpTypeMatrix %v4float 4 - %Globals = OpTypeStruct %mat4v4float %v4float %v4float %v4float %v3float %v3float %v3float %v3float %v3float %v4float %v3float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float - %CB0 = OpTypeStruct %Globals -%_ptr_Uniform_CB0 = OpTypePointer Uniform %CB0 - %_ = OpVariable %_ptr_Uniform_CB0 Uniform - %int = OpTypeInt 32 1 - %int_0 = OpConstant %int 0 - %int_15 = OpConstant %int 15 -%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float - %int_14 = OpConstant %int 14 - %128 = OpConstantComposite %v3float %float_1 %float_1 %float_1 - %133 = OpTypeImage %float 3D 0 0 0 1 Unknown -%_ptr_UniformConstant_133 = OpTypePointer UniformConstant %133 -%LightMapTexture = OpVariable %_ptr_UniformConstant_133 UniformConstant -%_ptr_UniformConstant_36 = OpTypePointer UniformConstant %36 -%LightMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant - %140 = OpTypeSampledImage %133 - %int_11 = OpConstant %int 11 - %uint = OpTypeInt 32 0 - %float_9 = OpConstant %float 9 - %float_20 = OpConstant %float 20 - %float_0_5 = OpConstant %float 0.5 - %183 = OpTypeSampledImage %38 - %uint_0 = OpConstant %uint 0 - %uint_1 = OpConstant %uint 1 - %int_17 = OpConstant %int 17 - %uint_3 = OpConstant %uint 3 -%_ptr_Uniform_float = OpTypePointer Uniform %float - %float_0_25 = OpConstant %float 0.25 - %int_5 = OpConstant %int 5 -%float_0_00333333 = OpConstant %float 0.00333333 - %int_16 = OpConstant %int 16 -%_ptr_Function_Surface = OpTypePointer Function %Surface - %int_6 = OpConstant %int 6 - %int_7 = OpConstant %int 7 -%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float - %int_8 = OpConstant %int 8 -%ShadowMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant -%_ptr_UniformConstant_38 = OpTypePointer UniformConstant %38 -%ShadowMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant - %367 = OpTypeImage %float Cube 0 0 0 1 Unknown -%_ptr_UniformConstant_367 = OpTypePointer UniformConstant %367 -%EnvironmentMapTexture = OpVariable %_ptr_UniformConstant_367 UniformConstant -%EnvironmentMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant - %373 = OpTypeSampledImage %367 - %float_1_5 = OpConstant %float 1.5 - %int_10 = OpConstant %int 10 -%_ptr_Input_v4float = OpTypePointer Input %v4float -%IN_HPosition = OpVariable %_ptr_Input_v4float Input -%IN_Uv_EdgeDistance1 = OpVariable %_ptr_Input_v4float Input -%IN_UvStuds_EdgeDistance2 = OpVariable %_ptr_Input_v4float Input - %IN_Color = OpVariable %_ptr_Input_v4float Input -%IN_LightPosition_Fog = OpVariable %_ptr_Input_v4float Input -%IN_View_Depth = OpVariable %_ptr_Input_v4float Input -%IN_Normal_SpecPower = OpVariable %_ptr_Input_v4float Input -%_ptr_Input_v3float = OpTypePointer Input %v3float - %IN_Tangent = OpVariable %_ptr_Input_v3float Input -%IN_PosLightSpace_Reflectance = OpVariable %_ptr_Input_v4float Input -%_ptr_Input_float = OpTypePointer Input %float -%IN_studIndex = OpVariable %_ptr_Input_float Input -%_ptr_Output_v4float = OpTypePointer Output %v4float -%_entryPointOutput = OpVariable %_ptr_Output_v4float Output - %bool = OpTypeBool -%DiffuseMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant -%DiffuseMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant -%NormalMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant -%NormalMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant -%NormalDetailMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant -%NormalDetailMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant - %float_0_3 = OpConstant %float 0.3 -%StudsMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant -%StudsMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant -%SpecularMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant -%SpecularMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant - %float_0_75 = OpConstant %float 0.75 - %float_256 = OpConstant %float 256 - %689 = OpConstantComposite %v2float %float_2 %float_256 - %float_0_01 = OpConstant %float 0.01 - %692 = OpConstantComposite %v2float %float_0 %float_0_01 - %float_0_8 = OpConstant %float 0.8 - %float_120 = OpConstant %float 120 - %697 = OpConstantComposite %v2float %float_0_8 %float_120 - %Params = OpTypeStruct %v4float - %CB2 = OpTypeStruct %Params -%_ptr_Uniform_CB2 = OpTypePointer Uniform %CB2 - %false = OpConstantFalse %bool - %1509 = OpUndef %VertexOutput - %1510 = OpUndef %SurfaceInput - %1511 = OpUndef %v2float - %1512 = OpUndef %v4float - %1531 = OpUndef %Surface - %main = OpFunction %void None %3 - %5 = OpLabel - %501 = OpLoad %v4float %IN_HPosition - %1378 = OpCompositeInsert %VertexOutput %501 %1509 0 - %504 = OpLoad %v4float %IN_Uv_EdgeDistance1 - %1380 = OpCompositeInsert %VertexOutput %504 %1378 1 - %507 = OpLoad %v4float %IN_UvStuds_EdgeDistance2 - %1382 = OpCompositeInsert %VertexOutput %507 %1380 2 - %510 = OpLoad %v4float %IN_Color - %1384 = OpCompositeInsert %VertexOutput %510 %1382 3 - %513 = OpLoad %v4float %IN_LightPosition_Fog - %1386 = OpCompositeInsert %VertexOutput %513 %1384 4 - %516 = OpLoad %v4float %IN_View_Depth - %1388 = OpCompositeInsert %VertexOutput %516 %1386 5 - %519 = OpLoad %v4float %IN_Normal_SpecPower - %1390 = OpCompositeInsert %VertexOutput %519 %1388 6 - %523 = OpLoad %v3float %IN_Tangent - %1392 = OpCompositeInsert %VertexOutput %523 %1390 7 - %526 = OpLoad %v4float %IN_PosLightSpace_Reflectance - %1394 = OpCompositeInsert %VertexOutput %526 %1392 8 - %530 = OpLoad %float %IN_studIndex - %1396 = OpCompositeInsert %VertexOutput %530 %1394 9 - %1400 = OpCompositeInsert %SurfaceInput %510 %1510 0 - %954 = OpVectorShuffle %v2float %504 %504 0 1 - %1404 = OpCompositeInsert %SurfaceInput %954 %1400 1 - %958 = OpVectorShuffle %v2float %507 %507 0 1 - %1408 = OpCompositeInsert %SurfaceInput %958 %1404 2 - %1410 = OpCompositeExtract %float %1408 2 1 - %962 = OpExtInst %float %1 Fract %1410 - %965 = OpFAdd %float %962 %530 - %966 = OpFMul %float %965 %float_0_25 - %1414 = OpCompositeInsert %SurfaceInput %966 %1408 2 1 - %1416 = OpCompositeExtract %float %1396 5 3 - %970 = OpFMul %float %1416 %float_0_00333333 - %971 = OpFSub %float %float_1 %970 - %987 = OpExtInst %float %1 FClamp %971 %float_0 %float_1 - %976 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %int_16 %uint_1 - %977 = OpLoad %float %976 - %978 = OpFMul %float %1416 %977 - %979 = OpFSub %float %float_1 %978 - %990 = OpExtInst %float %1 FClamp %979 %float_0 %float_1 - %1024 = OpVectorTimesScalar %v2float %954 %float_1 - %1029 = OpLoad %36 %DiffuseMapSampler - %1030 = OpLoad %38 %DiffuseMapTexture - OpBranch %1119 - %1119 = OpLabel - OpLoopMerge %1120 %1121 None - OpBranch %1122 - %1122 = OpLabel - %1124 = OpFOrdEqual %bool %float_0 %float_0 - OpSelectionMerge %1125 None - OpBranchConditional %1124 %1126 %1127 - %1126 = OpLabel - %1130 = OpSampledImage %183 %1030 %1029 - %1132 = OpImageSampleImplicitLod %v4float %1130 %1024 - OpBranch %1120 - %1127 = OpLabel - %1134 = OpFSub %float %float_1 %float_0 - %1135 = OpFDiv %float %float_1 %1134 - %1138 = OpSampledImage %183 %1030 %1029 - %1140 = OpVectorTimesScalar %v2float %1024 %float_0_25 - %1141 = OpImageSampleImplicitLod %v4float %1138 %1140 - %1144 = OpSampledImage %183 %1030 %1029 - %1146 = OpImageSampleImplicitLod %v4float %1144 %1024 - %1149 = OpFMul %float %987 %1135 - %1152 = OpFMul %float %float_0 %1135 - %1153 = OpFSub %float %1149 %1152 - %1161 = OpExtInst %float %1 FClamp %1153 %float_0 %float_1 - %1155 = OpCompositeConstruct %v4float %1161 %1161 %1161 %1161 - %1156 = OpExtInst %v4float %1 FMix %1141 %1146 %1155 - OpBranch %1120 - %1125 = OpLabel - %1157 = OpUndef %v4float - OpBranch %1120 - %1121 = OpLabel - OpBranchConditional %false %1119 %1120 - %1120 = OpLabel - %1517 = OpPhi %v4float %1132 %1126 %1156 %1127 %1157 %1125 %1512 %1121 - %1035 = OpVectorTimesScalar %v4float %1517 %float_1 - %1036 = OpLoad %36 %NormalMapSampler - %1037 = OpLoad %38 %NormalMapTexture - OpBranch %1165 - %1165 = OpLabel - OpLoopMerge %1166 %1167 None - OpBranch %1168 - %1168 = OpLabel - OpSelectionMerge %1171 None - OpBranchConditional %1124 %1172 %1173 - %1172 = OpLabel - %1176 = OpSampledImage %183 %1037 %1036 - %1178 = OpImageSampleImplicitLod %v4float %1176 %1024 - OpBranch %1166 - %1173 = OpLabel - %1180 = OpFSub %float %float_1 %float_0 - %1181 = OpFDiv %float %float_1 %1180 - %1184 = OpSampledImage %183 %1037 %1036 - %1186 = OpVectorTimesScalar %v2float %1024 %float_0_25 - %1187 = OpImageSampleImplicitLod %v4float %1184 %1186 - %1190 = OpSampledImage %183 %1037 %1036 - %1192 = OpImageSampleImplicitLod %v4float %1190 %1024 - %1195 = OpFMul %float %990 %1181 - %1198 = OpFMul %float %float_0 %1181 - %1199 = OpFSub %float %1195 %1198 - %1206 = OpExtInst %float %1 FClamp %1199 %float_0 %float_1 - %1201 = OpCompositeConstruct %v4float %1206 %1206 %1206 %1206 - %1202 = OpExtInst %v4float %1 FMix %1187 %1192 %1201 - OpBranch %1166 - %1171 = OpLabel - %1203 = OpUndef %v4float - OpBranch %1166 - %1167 = OpLabel - OpBranchConditional %false %1165 %1166 - %1166 = OpLabel - %1523 = OpPhi %v4float %1178 %1172 %1202 %1173 %1203 %1171 %1512 %1167 - %1210 = OpVectorShuffle %v2float %1523 %1523 3 1 - %1211 = OpVectorTimesScalar %v2float %1210 %float_2 - %1212 = OpCompositeConstruct %v2float %float_1 %float_1 - %1213 = OpFSub %v2float %1211 %1212 - %1216 = OpFNegate %v2float %1213 - %1218 = OpDot %float %1216 %1213 - %1219 = OpFAdd %float %float_1 %1218 - %1220 = OpExtInst %float %1 FClamp %1219 %float_0 %float_1 - %1221 = OpExtInst %float %1 Sqrt %1220 - %1222 = OpCompositeExtract %float %1213 0 - %1223 = OpCompositeExtract %float %1213 1 - %1224 = OpCompositeConstruct %v3float %1222 %1223 %1221 - %1042 = OpLoad %38 %NormalDetailMapTexture - %1043 = OpLoad %36 %NormalDetailMapSampler - %1044 = OpSampledImage %183 %1042 %1043 - %1046 = OpVectorTimesScalar %v2float %1024 %float_0 - %1047 = OpImageSampleImplicitLod %v4float %1044 %1046 - %1228 = OpVectorShuffle %v2float %1047 %1047 3 1 - %1229 = OpVectorTimesScalar %v2float %1228 %float_2 - %1231 = OpFSub %v2float %1229 %1212 - %1234 = OpFNegate %v2float %1231 - %1236 = OpDot %float %1234 %1231 - %1237 = OpFAdd %float %float_1 %1236 - %1238 = OpExtInst %float %1 FClamp %1237 %float_0 %float_1 - %1239 = OpExtInst %float %1 Sqrt %1238 - %1240 = OpCompositeExtract %float %1231 0 - %1241 = OpCompositeExtract %float %1231 1 - %1242 = OpCompositeConstruct %v3float %1240 %1241 %1239 - %1050 = OpVectorShuffle %v2float %1242 %1242 0 1 - %1051 = OpVectorTimesScalar %v2float %1050 %float_0 - %1053 = OpVectorShuffle %v2float %1224 %1224 0 1 - %1054 = OpFAdd %v2float %1053 %1051 - %1056 = OpVectorShuffle %v3float %1224 %1054 3 4 2 - %1059 = OpVectorShuffle %v2float %1056 %1056 0 1 - %1060 = OpVectorTimesScalar %v2float %1059 %990 - %1062 = OpVectorShuffle %v3float %1056 %1060 3 4 2 - %1430 = OpCompositeExtract %float %1062 0 - %1065 = OpFMul %float %1430 %float_0_3 - %1066 = OpFAdd %float %float_1 %1065 - %1069 = OpVectorShuffle %v3float %510 %510 0 1 2 - %1071 = OpVectorShuffle %v3float %1035 %1035 0 1 2 - %1072 = OpFMul %v3float %1069 %1071 - %1074 = OpVectorTimesScalar %v3float %1072 %1066 - %1075 = OpLoad %38 %StudsMapTexture - %1076 = OpLoad %36 %StudsMapSampler - %1077 = OpSampledImage %183 %1075 %1076 - %1434 = OpCompositeExtract %v2float %1414 2 - %1080 = OpImageSampleImplicitLod %v4float %1077 %1434 - %1436 = OpCompositeExtract %float %1080 0 - %1083 = OpFMul %float %1436 %float_2 - %1085 = OpVectorTimesScalar %v3float %1074 %1083 - %1086 = OpLoad %36 %SpecularMapSampler - %1087 = OpLoad %38 %SpecularMapTexture - OpBranch %1246 - %1246 = OpLabel - OpLoopMerge %1247 %1248 None - OpBranch %1249 - %1249 = OpLabel - %1251 = OpFOrdEqual %bool %float_0_75 %float_0 - OpSelectionMerge %1252 None - OpBranchConditional %1251 %1253 %1254 - %1253 = OpLabel - %1257 = OpSampledImage %183 %1087 %1086 - %1259 = OpImageSampleImplicitLod %v4float %1257 %1024 - OpBranch %1247 - %1254 = OpLabel - %1261 = OpFSub %float %float_1 %float_0_75 - %1262 = OpFDiv %float %float_1 %1261 - %1265 = OpSampledImage %183 %1087 %1086 - %1267 = OpVectorTimesScalar %v2float %1024 %float_0_25 - %1268 = OpImageSampleImplicitLod %v4float %1265 %1267 - %1271 = OpSampledImage %183 %1087 %1086 - %1273 = OpImageSampleImplicitLod %v4float %1271 %1024 - %1276 = OpFMul %float %990 %1262 - %1279 = OpFMul %float %float_0_75 %1262 - %1280 = OpFSub %float %1276 %1279 - %1287 = OpExtInst %float %1 FClamp %1280 %float_0 %float_1 - %1282 = OpCompositeConstruct %v4float %1287 %1287 %1287 %1287 - %1283 = OpExtInst %v4float %1 FMix %1268 %1273 %1282 - OpBranch %1247 - %1252 = OpLabel - %1284 = OpUndef %v4float - OpBranch %1247 - %1248 = OpLabel - OpBranchConditional %false %1246 %1247 - %1247 = OpLabel - %1530 = OpPhi %v4float %1259 %1253 %1283 %1254 %1284 %1252 %1512 %1248 - %1091 = OpVectorShuffle %v2float %1530 %1530 0 1 - %1093 = OpFMul %v2float %1091 %689 - %1094 = OpFAdd %v2float %1093 %692 - %1097 = OpCompositeConstruct %v2float %990 %990 - %1098 = OpExtInst %v2float %1 FMix %697 %1094 %1097 - %1438 = OpCompositeInsert %Surface %1085 %1531 0 - %1440 = OpCompositeInsert %Surface %1062 %1438 1 - %1442 = OpCompositeExtract %float %1098 0 - %1444 = OpCompositeInsert %Surface %1442 %1440 2 - %1446 = OpCompositeExtract %float %1098 1 - %1448 = OpCompositeInsert %Surface %1446 %1444 3 - %1450 = OpCompositeExtract %float %1091 1 - %1112 = OpFMul %float %1450 %990 - %1113 = OpFMul %float %1112 %float_0 - %1452 = OpCompositeInsert %Surface %1113 %1448 4 - %1456 = OpCompositeExtract %float %1396 3 3 - %764 = OpCompositeExtract %float %1085 0 - %765 = OpCompositeExtract %float %1085 1 - %766 = OpCompositeExtract %float %1085 2 - %767 = OpCompositeConstruct %v4float %764 %765 %766 %1456 - %770 = OpVectorShuffle %v3float %519 %519 0 1 2 - %773 = OpExtInst %v3float %1 Cross %770 %523 - %1462 = OpCompositeExtract %float %1452 1 0 - %778 = OpVectorTimesScalar %v3float %523 %1462 - %1466 = OpCompositeExtract %float %1452 1 1 - %782 = OpVectorTimesScalar %v3float %773 %1466 - %783 = OpFAdd %v3float %778 %782 - %1468 = OpCompositeExtract %float %1452 1 2 - %789 = OpVectorTimesScalar %v3float %770 %1468 - %790 = OpFAdd %v3float %783 %789 - %791 = OpExtInst %v3float %1 Normalize %790 - %793 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 %int_7 - %794 = OpLoad %v3float %793 - %795 = OpFNegate %v3float %794 - %796 = OpDot %float %791 %795 - %1290 = OpExtInst %float %1 FClamp %796 %float_0 %float_1 - %799 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 %int_6 - %800 = OpLoad %v3float %799 - %801 = OpVectorTimesScalar %v3float %800 %1290 - %803 = OpFNegate %float %796 - %804 = OpExtInst %float %1 FMax %803 %float_0 - %805 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 %int_8 - %806 = OpLoad %v3float %805 - %807 = OpVectorTimesScalar %v3float %806 %804 - %808 = OpFAdd %v3float %801 %807 - %810 = OpExtInst %float %1 Step %float_0 %796 - %813 = OpFMul %float %810 %1442 - %820 = OpVectorShuffle %v3float %513 %513 0 1 2 - %1296 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %int_15 - %1297 = OpLoad %v4float %1296 - %1298 = OpVectorShuffle %v3float %1297 %1297 0 1 2 - %1300 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %int_14 - %1301 = OpLoad %v4float %1300 - %1302 = OpVectorShuffle %v3float %1301 %1301 0 1 2 - %1303 = OpFSub %v3float %820 %1302 - %1304 = OpExtInst %v3float %1 FAbs %1303 - %1305 = OpExtInst %v3float %1 Step %1298 %1304 - %1307 = OpDot %float %1305 %128 - %1328 = OpExtInst %float %1 FClamp %1307 %float_0 %float_1 - %1309 = OpLoad %133 %LightMapTexture - %1310 = OpLoad %36 %LightMapSampler - %1311 = OpSampledImage %140 %1309 %1310 - %1313 = OpVectorShuffle %v3float %820 %820 1 2 0 - %1317 = OpVectorTimesScalar %v3float %1313 %1328 - %1318 = OpFSub %v3float %1313 %1317 - %1319 = OpImageSampleImplicitLod %v4float %1311 %1318 - %1321 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %int_11 - %1322 = OpLoad %v4float %1321 - %1324 = OpCompositeConstruct %v4float %1328 %1328 %1328 %1328 - %1325 = OpExtInst %v4float %1 FMix %1319 %1322 %1324 - %822 = OpLoad %36 %ShadowMapSampler - %823 = OpLoad %38 %ShadowMapTexture - %826 = OpVectorShuffle %v3float %526 %526 0 1 2 - %1482 = OpCompositeExtract %float %1325 3 - %1337 = OpSampledImage %183 %823 %822 - %1339 = OpVectorShuffle %v2float %826 %826 0 1 - %1340 = OpImageSampleImplicitLod %v4float %1337 %1339 - %1341 = OpVectorShuffle %v2float %1340 %1340 0 1 - %1484 = OpCompositeExtract %float %826 2 - %1486 = OpCompositeExtract %float %1341 0 - %1363 = OpExtInst %float %1 Step %1486 %1484 - %1365 = OpFSub %float %1484 %float_0_5 - %1366 = OpExtInst %float %1 FAbs %1365 - %1367 = OpFMul %float %float_20 %1366 - %1368 = OpFSub %float %float_9 %1367 - %1369 = OpExtInst %float %1 FClamp %1368 %float_0 %float_1 - %1370 = OpFMul %float %1363 %1369 - %1488 = OpCompositeExtract %float %1341 1 - %1350 = OpFMul %float %1370 %1488 - %1351 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %int_17 %uint_3 - %1352 = OpLoad %float %1351 - %1353 = OpFMul %float %1350 %1352 - %1354 = OpFSub %float %float_1 %1353 - %1356 = OpFMul %float %1354 %1482 - %830 = OpLoad %367 %EnvironmentMapTexture - %831 = OpLoad %36 %EnvironmentMapSampler - %832 = OpSampledImage %373 %830 %831 - %835 = OpVectorShuffle %v3float %516 %516 0 1 2 - %836 = OpFNegate %v3float %835 - %838 = OpExtInst %v3float %1 Reflect %836 %791 - %839 = OpImageSampleImplicitLod %v4float %832 %838 - %840 = OpVectorShuffle %v3float %839 %839 0 1 2 - %842 = OpVectorShuffle %v3float %767 %767 0 1 2 - %845 = OpCompositeConstruct %v3float %1113 %1113 %1113 - %846 = OpExtInst %v3float %1 FMix %842 %840 %845 - %848 = OpVectorShuffle %v4float %767 %846 4 5 6 3 - %849 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 %int_5 - %850 = OpLoad %v3float %849 - %853 = OpVectorTimesScalar %v3float %808 %1356 - %854 = OpFAdd %v3float %850 %853 - %856 = OpVectorShuffle %v3float %1325 %1325 0 1 2 - %857 = OpFAdd %v3float %854 %856 - %859 = OpVectorShuffle %v3float %848 %848 0 1 2 - %860 = OpFMul %v3float %857 %859 - %865 = OpFMul %float %813 %1356 - %873 = OpExtInst %v3float %1 Normalize %835 - %874 = OpFAdd %v3float %795 %873 - %875 = OpExtInst %v3float %1 Normalize %874 - %876 = OpDot %float %791 %875 - %877 = OpExtInst %float %1 FClamp %876 %float_0 %float_1 - %879 = OpExtInst %float %1 Pow %877 %1446 - %880 = OpFMul %float %865 %879 - %881 = OpVectorTimesScalar %v3float %800 %880 - %884 = OpFAdd %v3float %860 %881 - %886 = OpVectorShuffle %v4float %1512 %884 4 5 6 3 - %1494 = OpCompositeExtract %float %848 3 - %1496 = OpCompositeInsert %v4float %1494 %886 3 - %896 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %int_17 %uint_0 - %897 = OpLoad %float %896 - %898 = OpFMul %float %978 %897 - %899 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %int_17 %uint_1 - %900 = OpLoad %float %899 - %901 = OpFAdd %float %898 %900 - %1373 = OpExtInst %float %1 FClamp %901 %float_0 %float_1 - %905 = OpVectorShuffle %v2float %504 %504 3 2 - %908 = OpVectorShuffle %v2float %507 %507 3 2 - %909 = OpExtInst %v2float %1 FMin %905 %908 - %1504 = OpCompositeExtract %float %909 0 - %1506 = OpCompositeExtract %float %909 1 - %914 = OpExtInst %float %1 FMin %1504 %1506 - %916 = OpFDiv %float %914 %978 - %919 = OpFSub %float %float_1_5 %916 - %920 = OpFMul %float %1373 %919 - %922 = OpFAdd %float %920 %916 - %1376 = OpExtInst %float %1 FClamp %922 %float_0 %float_1 - %925 = OpVectorShuffle %v3float %1496 %1496 0 1 2 - %926 = OpVectorTimesScalar %v3float %925 %1376 - %928 = OpVectorShuffle %v4float %1496 %926 4 5 6 3 - %1508 = OpCompositeExtract %float %1396 4 3 - %931 = OpExtInst %float %1 FClamp %1508 %float_0 %float_1 - %932 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 %int_10 - %933 = OpLoad %v3float %932 - %935 = OpVectorShuffle %v3float %928 %928 0 1 2 - %937 = OpCompositeConstruct %v3float %931 %931 %931 - %938 = OpExtInst %v3float %1 FMix %933 %935 %937 - %940 = OpVectorShuffle %v4float %928 %938 4 5 6 3 - OpStore %_entryPointOutput %940 - OpReturn - OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag b/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag new file mode 100644 index 00000000000..518dbd81e44 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag @@ -0,0 +1,74 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 35 +; Schema: 0 + OpCapability Shader + OpCapability InputAttachment + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %load_subpasses_IP1_ "load_subpasses(IP1;" + OpName %uInput "uInput" + OpName %FragColor "FragColor" + OpName %uSubpass0 "uSubpass0" + OpName %uSubpass1 "uSubpass1" + OpName %gl_FragCoord "gl_FragCoord" + OpDecorate %load_subpasses_IP1_ RelaxedPrecision + OpDecorate %uInput RelaxedPrecision + OpDecorate %14 RelaxedPrecision + OpDecorate %19 RelaxedPrecision + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %uSubpass0 RelaxedPrecision + OpDecorate %uSubpass0 DescriptorSet 0 + OpDecorate %uSubpass0 Binding 0 + OpDecorate %uSubpass0 InputAttachmentIndex 0 + OpDecorate %25 RelaxedPrecision + OpDecorate %26 RelaxedPrecision + OpDecorate %uSubpass1 RelaxedPrecision + OpDecorate %uSubpass1 DescriptorSet 0 + OpDecorate %uSubpass1 Binding 1 + OpDecorate %uSubpass1 InputAttachmentIndex 1 + OpDecorate %28 RelaxedPrecision + OpDecorate %29 RelaxedPrecision + OpDecorate %gl_FragCoord BuiltIn FragCoord + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %7 = OpTypeImage %float SubpassData 0 0 0 2 Unknown +%_ptr_UniformConstant_7 = OpTypePointer UniformConstant %7 + %v4float = OpTypeVector %float 4 + %10 = OpTypeFunction %v4float %_ptr_UniformConstant_7 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v2int = OpTypeVector %int 2 + %18 = OpConstantComposite %v2int %int_0 %int_0 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %uSubpass0 = OpVariable %_ptr_UniformConstant_7 UniformConstant + %uSubpass1 = OpVariable %_ptr_UniformConstant_7 UniformConstant +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %main = OpFunction %void None %3 + %5 = OpLabel + %25 = OpLoad %7 %uSubpass0 + %26 = OpImageRead %v4float %25 %18 + %28 = OpFunctionCall %v4float %load_subpasses_IP1_ %uSubpass1 + %29 = OpFAdd %v4float %26 %28 + ;%32 = OpLoad %v4float %gl_FragCoord + ;%33 = OpVectorShuffle %v4float %32 %32 0 1 0 1 + ;%34 = OpFAdd %v4float %29 %33 + OpStore %FragColor %29 + OpReturn + OpFunctionEnd +%load_subpasses_IP1_ = OpFunction %v4float None %10 + %uInput = OpFunctionParameter %_ptr_UniformConstant_7 + %13 = OpLabel + %14 = OpLoad %7 %uInput + %19 = OpImageRead %v4float %14 %18 + OpReturnValue %19 + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag b/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag new file mode 100644 index 00000000000..707fa550b93 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag @@ -0,0 +1,36 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 17 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %col "col" + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float +%float_0_150000006 = OpConstant %float 0.150000006 + %v3float = OpTypeVector %float 3 +%_ptr_Function_v3float = OpTypePointer Function %v3float + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %v2int = OpTypeVector %int 2 +%_ptr_Function_v2int = OpTypePointer Function %v2int +%_ptr_Function_int = OpTypePointer Function %int + %main = OpFunction %void None %3 + %5 = OpLabel + %col = OpVariable %_ptr_Function_v3float Function + %icol = OpVariable %_ptr_Function_v2int Function + %ptr_x = OpAccessChain %_ptr_Function_float %col %int_0 + %ptr_y = OpAccessChain %_ptr_Function_int %icol %int_1 + %16 = OpExtInst %float %1 Modf %float_0_150000006 %ptr_x + %17 = OpExtInst %float %1 Frexp %float_0_150000006 %ptr_y + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag new file mode 100644 index 00000000000..17aab1d8f77 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag @@ -0,0 +1,25 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 10 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragDepth + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main DepthReplacing + OpSource GLSL 450 + OpName %main "main" + OpName %gl_FragDepth "gl_FragDepth" + OpDecorate %gl_FragDepth BuiltIn FragDepth + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Output_float = OpTypePointer Output %float + %float_0_5 = OpConstant %float 0.5 +%gl_FragDepth = OpVariable %_ptr_Output_float Output %float_0_5 + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag b/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag new file mode 100644 index 00000000000..3696660d36d --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag @@ -0,0 +1,69 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 40 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vColor %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %vColor "vColor" + OpName %uninit_function_int "uninit_function_int" + OpName %FragColor "FragColor" + OpName %uninit_int "uninit_int" + OpName %uninit_vector "uninit_vector" + OpName %uninit_matrix "uninit_matrix" + OpName %Foo "Foo" + OpMemberName %Foo 0 "a" + OpName %uninit_foo "uninit_foo" + OpDecorate %vColor Location 0 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %vColor = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %float_10 = OpConstant %float 10 + %bool = OpTypeBool + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_10 = OpConstant %int 10 + %int_20 = OpConstant %int 20 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output +%_ptr_Private_int = OpTypePointer Private %int + %uninit_int = OpUndef %int + %v4int = OpTypeVector %int 4 +%_ptr_Private_v4int = OpTypePointer Private %v4int +%uninit_vector = OpUndef %v4int +%mat4v4float = OpTypeMatrix %v4float 4 +%_ptr_Private_mat4v4float = OpTypePointer Private %mat4v4float +%uninit_matrix = OpUndef %mat4v4float + %Foo = OpTypeStruct %int +%_ptr_Private_Foo = OpTypePointer Private %Foo + %uninit_foo = OpUndef %Foo + %main = OpFunction %void None %3 + %5 = OpLabel +%uninit_function_int = OpVariable %_ptr_Function_int Function + %13 = OpAccessChain %_ptr_Input_float %vColor %uint_0 + %14 = OpLoad %float %13 + %17 = OpFOrdGreaterThan %bool %14 %float_10 + OpSelectionMerge %19 None + OpBranchConditional %17 %18 %24 + %18 = OpLabel + OpBranch %19 + %24 = OpLabel + OpBranch %19 + %19 = OpLabel + %27 = OpPhi %int %int_10 %18 %int_20 %24 + %28 = OpLoad %v4float %vColor + OpStore %FragColor %28 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag new file mode 100644 index 00000000000..ebd8d6bab75 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag @@ -0,0 +1,89 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + OpReturn + OpFunctionEnd + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + OpBeginInvocationInterlockEXT + %43 = OpFunctionCall %void %callee2_ + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag new file mode 100644 index 00000000000..69b8f911204 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag @@ -0,0 +1,121 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %ssbo2 DescriptorSet 0 + OpDecorate %ssbo2 Binding 2 + + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint + %SSBO2 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %bool = OpTypeBool + %true = OpConstantTrue %bool +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %callee3_res = OpFunctionCall %void %callee3_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %calle3_block = OpLabel + %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %frag_coord_x = OpLoad %float %frag_coord_x_ptr + %frag_coord_int = OpConvertFToS %int %frag_coord_x + %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int + OpStore %ssbo_ptr %uint_4 + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + + OpSelectionMerge %merged_block None + OpBranchConditional %true %dummy_block %merged_block + %dummy_block = OpLabel + OpBeginInvocationInterlockEXT + OpEndInvocationInterlockEXT + OpBranch %merged_block + + %merged_block = OpLabel + OpReturn + + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag new file mode 100644 index 00000000000..7c0fe9a2b24 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag @@ -0,0 +1,102 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %call3res = OpFunctionCall %void %callee3_ + %call4res = OpFunctionCall %void %callee4_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %begin3 = OpLabel + OpBeginInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee4_ = OpFunction %void None %3 + %begin4 = OpLabel + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag b/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag new file mode 100644 index 00000000000..49ed96094a5 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag @@ -0,0 +1,32 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 17 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %b "b" + OpName %FragColor "FragColor" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Private_float = OpTypePointer Private %float + %float_10 = OpConstant %float 10 + %float_20 = OpConstant %float 20 + %b = OpVariable %_ptr_Private_float Private %float_10 +%_ptr_Output_float = OpTypePointer Output %float + %FragColor = OpVariable %_ptr_Output_float Output + %main = OpFunction %void None %3 + %5 = OpLabel + OpStore %b %float_20 + %15 = OpLoad %float %b + %16 = OpFAdd %float %15 %15 + OpStore %FragColor %16 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag b/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag new file mode 100644 index 00000000000..a5a16f2873b --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 37 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %ACOS_f1_ "mat3" + OpName %a "a" + OpName %ACOS_i1_ "gl_Foo" + OpName %a_0 "a" + OpName %FragColor "FragColor" + OpName %param "param" + OpName %param_0 "param" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float + %8 = OpTypeFunction %float %_ptr_Function_float + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %14 = OpTypeFunction %float %_ptr_Function_int + %float_1 = OpConstant %float 1 +%_ptr_Output_float = OpTypePointer Output %float + %FragColor = OpVariable %_ptr_Output_float Output + %float_2 = OpConstant %float 2 + %int_4 = OpConstant %int 4 + %main = OpFunction %void None %3 + %5 = OpLabel + %param = OpVariable %_ptr_Function_float Function + %param_0 = OpVariable %_ptr_Function_int Function + OpStore %param %float_2 + %32 = OpFunctionCall %float %ACOS_f1_ %param + OpStore %param_0 %int_4 + %35 = OpFunctionCall %float %ACOS_i1_ %param_0 + %36 = OpFAdd %float %32 %35 + OpStore %FragColor %36 + OpReturn + OpFunctionEnd + %ACOS_f1_ = OpFunction %float None %8 + %a = OpFunctionParameter %_ptr_Function_float + %11 = OpLabel + %18 = OpLoad %float %a + %20 = OpFAdd %float %18 %float_1 + OpReturnValue %20 + OpFunctionEnd + %ACOS_i1_ = OpFunction %float None %14 + %a_0 = OpFunctionParameter %_ptr_Function_int + %17 = OpLabel + %23 = OpLoad %int %a_0 + %24 = OpConvertSToF %float %23 + %25 = OpFAdd %float %24 %float_1 + OpReturnValue %25 + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag b/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag new file mode 100644 index 00000000000..07450ee80b6 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag @@ -0,0 +1,62 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %bool = OpTypeBool + %false = OpConstantFalse %bool + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %true = OpConstantTrue %bool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %s = OpTypeStruct %float + %arr = OpTypeArray %float %uint_2 +%_ptr_Function_s = OpTypePointer Function %s +%_ptr_Function_arr = OpTypePointer Function %arr + %FragColor = OpVariable %_ptr_Output_v4float Output + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %17 = OpConstantComposite %v4float %float_1 %float_1 %float_0 %float_1 + %18 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1 + %19 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %20 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %s0 = OpConstantComposite %s %float_0 + %s1 = OpConstantComposite %s %float_1 + %v4bool = OpTypeVector %bool 4 + %b4 = OpConstantComposite %v4bool %false %true %false %true + %arr1 = OpConstantComposite %arr %float_0 %float_1 + %arr2 = OpConstantComposite %arr %float_1 %float_0 + %main = OpFunction %void None %3 + %5 = OpLabel + %ss = OpVariable %_ptr_Function_s Function + %arrvar = OpVariable %_ptr_Function_arr Function + ; Not trivial + %21 = OpSelect %v4float %false %17 %18 + OpStore %FragColor %21 + ; Trivial + %22 = OpSelect %v4float %false %19 %20 + OpStore %FragColor %22 + ; Vector not trivial + %23 = OpSelect %v4float %b4 %17 %18 + OpStore %FragColor %23 + ; Vector trivial + %24 = OpSelect %v4float %b4 %19 %20 + OpStore %FragColor %24 + ; Struct selection + %sout = OpSelect %s %false %s0 %s1 + OpStore %ss %sout + ; Array selection + %arrout = OpSelect %arr %true %arr1 %arr2 + OpStore %arrvar %arrout + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag b/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag new file mode 100644 index 00000000000..5f0734062d6 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag @@ -0,0 +1,65 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 78 +; Schema: 0 + OpCapability Shader + OpCapability GroupNonUniform + OpCapability GroupNonUniformArithmetic + OpCapability GroupNonUniformClustered + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %index %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_KHR_shader_subgroup_arithmetic" + OpSourceExtension "GL_KHR_shader_subgroup_basic" + OpSourceExtension "GL_KHR_shader_subgroup_clustered" + OpName %main "main" + OpName %index "index" + OpName %FragColor "FragColor" + OpDecorate %index Flat + OpDecorate %index Location 0 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int + %index = OpVariable %_ptr_Input_int Input + %uint_3 = OpConstant %uint 3 + %uint_4 = OpConstant %uint 4 +%_ptr_Output_uint = OpTypePointer Output %uint + %FragColor = OpVariable %_ptr_Output_uint Output + %main = OpFunction %void None %3 + %5 = OpLabel + %i = OpLoad %int %index + %u = OpBitcast %uint %i + %res0 = OpGroupNonUniformSMin %uint %uint_3 Reduce %i + %res1 = OpGroupNonUniformSMax %uint %uint_3 Reduce %u + %res2 = OpGroupNonUniformUMin %uint %uint_3 Reduce %i + %res3 = OpGroupNonUniformUMax %uint %uint_3 Reduce %u + ;%res4 = OpGroupNonUniformSMax %uint %uint_3 InclusiveScan %i + ;%res5 = OpGroupNonUniformSMin %uint %uint_3 InclusiveScan %u + ;%res6 = OpGroupNonUniformUMax %uint %uint_3 ExclusiveScan %i + ;%res7 = OpGroupNonUniformUMin %uint %uint_3 ExclusiveScan %u + %res8 = OpGroupNonUniformSMin %uint %uint_3 ClusteredReduce %i %uint_4 + %res9 = OpGroupNonUniformSMax %uint %uint_3 ClusteredReduce %u %uint_4 + %res10 = OpGroupNonUniformUMin %uint %uint_3 ClusteredReduce %i %uint_4 + %res11 = OpGroupNonUniformUMax %uint %uint_3 ClusteredReduce %u %uint_4 + OpStore %FragColor %res0 + OpStore %FragColor %res1 + OpStore %FragColor %res2 + OpStore %FragColor %res3 + ;OpStore %FragColor %res4 + ;OpStore %FragColor %res5 + ;OpStore %FragColor %res6 + ;OpStore %FragColor %res7 + OpStore %FragColor %res8 + OpStore %FragColor %res9 + OpStore %FragColor %res10 + OpStore %FragColor %res11 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag similarity index 100% rename from shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag rename to shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag diff --git a/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag b/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag new file mode 100644 index 00000000000..702b826e5bb --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag @@ -0,0 +1,28 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 14 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %_GLF_color + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %_GLF_color "_GLF_color" + OpDecorate %_GLF_color Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %10 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %_GLF_color = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %13 = OpExtInst %v4float %1 Modf %10 %_GLF_color + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc new file mode 100644 index 00000000000..a3d489941c8 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc @@ -0,0 +1,85 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 44 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %C "C" + OpMemberName %C 0 "v" + OpName %c "c" + OpName %gl_InvocationID "gl_InvocationID" + OpName %P "P" + OpMemberName %P 0 "v" + OpName %p "p" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %C Block + OpDecorate %c Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpMemberDecorate %P 0 Patch + OpDecorate %P Block + OpDecorate %p Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %C = OpTypeStruct %v4float + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_C_uint_4 = OpTypeArray %C %uint_4 +%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4 + %zero_c = OpConstantNull %_arr_C_uint_4 + %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %P = OpTypeStruct %v4float +%_ptr_Output_P = OpTypePointer Output %P + %zero_p = OpConstantNull %P + %p = OpVariable %_ptr_Output_P Output %zero_p + %float_2 = OpConstant %float 2 + %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output + %float_3 = OpConstant %float 3 + %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %17 = OpLoad %int %gl_InvocationID + %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0 + OpStore %22 %20 + %28 = OpAccessChain %_ptr_Output_v4float %p %int_0 + OpStore %28 %27 + %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0 + OpStore %38 %37 + %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1 + OpStore %43 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc new file mode 100644 index 00000000000..a3d489941c8 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc @@ -0,0 +1,85 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 44 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %C "C" + OpMemberName %C 0 "v" + OpName %c "c" + OpName %gl_InvocationID "gl_InvocationID" + OpName %P "P" + OpMemberName %P 0 "v" + OpName %p "p" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %C Block + OpDecorate %c Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpMemberDecorate %P 0 Patch + OpDecorate %P Block + OpDecorate %p Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %C = OpTypeStruct %v4float + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_C_uint_4 = OpTypeArray %C %uint_4 +%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4 + %zero_c = OpConstantNull %_arr_C_uint_4 + %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %P = OpTypeStruct %v4float +%_ptr_Output_P = OpTypePointer Output %P + %zero_p = OpConstantNull %P + %p = OpVariable %_ptr_Output_P Output %zero_p + %float_2 = OpConstant %float 2 + %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output + %float_3 = OpConstant %float 3 + %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %17 = OpLoad %int %gl_InvocationID + %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0 + OpStore %22 %20 + %28 = OpAccessChain %_ptr_Output_v4float %p %int_0 + OpStore %28 %27 + %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0 + OpStore %38 %37 + %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1 + OpStore %43 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc new file mode 100644 index 00000000000..a3d489941c8 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc @@ -0,0 +1,85 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 44 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %C "C" + OpMemberName %C 0 "v" + OpName %c "c" + OpName %gl_InvocationID "gl_InvocationID" + OpName %P "P" + OpMemberName %P 0 "v" + OpName %p "p" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %C Block + OpDecorate %c Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpMemberDecorate %P 0 Patch + OpDecorate %P Block + OpDecorate %p Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %C = OpTypeStruct %v4float + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_C_uint_4 = OpTypeArray %C %uint_4 +%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4 + %zero_c = OpConstantNull %_arr_C_uint_4 + %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %P = OpTypeStruct %v4float +%_ptr_Output_P = OpTypePointer Output %P + %zero_p = OpConstantNull %P + %p = OpVariable %_ptr_Output_P Output %zero_p + %float_2 = OpConstant %float 2 + %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output + %float_3 = OpConstant %float 3 + %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %17 = OpLoad %int %gl_InvocationID + %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0 + OpStore %22 %20 + %28 = OpAccessChain %_ptr_Output_v4float %p %int_0 + OpStore %28 %27 + %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0 + OpStore %38 %37 + %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1 + OpStore %43 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc new file mode 100644 index 00000000000..a3d489941c8 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc @@ -0,0 +1,85 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 44 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %C "C" + OpMemberName %C 0 "v" + OpName %c "c" + OpName %gl_InvocationID "gl_InvocationID" + OpName %P "P" + OpMemberName %P 0 "v" + OpName %p "p" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %C Block + OpDecorate %c Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpMemberDecorate %P 0 Patch + OpDecorate %P Block + OpDecorate %p Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %C = OpTypeStruct %v4float + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_C_uint_4 = OpTypeArray %C %uint_4 +%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4 + %zero_c = OpConstantNull %_arr_C_uint_4 + %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %P = OpTypeStruct %v4float +%_ptr_Output_P = OpTypePointer Output %P + %zero_p = OpConstantNull %P + %p = OpVariable %_ptr_Output_P Output %zero_p + %float_2 = OpConstant %float 2 + %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output + %float_3 = OpConstant %float 3 + %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %17 = OpLoad %int %gl_InvocationID + %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0 + OpStore %22 %20 + %28 = OpAccessChain %_ptr_Output_v4float %p %int_0 + OpStore %28 %27 + %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0 + OpStore %38 %37 + %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1 + OpStore %43 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc new file mode 100644 index 00000000000..23424ff7ba3 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc @@ -0,0 +1,86 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 44 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %C "C" + OpMemberName %C 0 "v" + OpName %c "c" + OpName %gl_InvocationID "gl_InvocationID" + OpName %P "P" + OpMemberName %P 0 "v" + OpName %p "p" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %C Block + OpDecorate %c Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpMemberDecorate %P 0 Patch + OpDecorate %P Block + OpDecorate %p Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %C = OpTypeStruct %v4float + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_C_uint_4 = OpTypeArray %C %uint_4 +%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4 + %zero_c = OpConstantNull %_arr_C_uint_4 + %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %P = OpTypeStruct %v4float +%_ptr_Output_P = OpTypePointer Output %P + %zero_p = OpConstantNull %P + %p = OpVariable %_ptr_Output_P Output %zero_p + %float_2 = OpConstant %float 2 + %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %float_3 = OpConstant %float 3 + %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %17 = OpLoad %int %gl_InvocationID + %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0 + OpStore %22 %20 + %28 = OpAccessChain %_ptr_Output_v4float %p %int_0 + OpStore %28 %27 + %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0 + OpStore %38 %37 + %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1 + OpStore %43 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc new file mode 100644 index 00000000000..23424ff7ba3 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc @@ -0,0 +1,86 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 44 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %C "C" + OpMemberName %C 0 "v" + OpName %c "c" + OpName %gl_InvocationID "gl_InvocationID" + OpName %P "P" + OpMemberName %P 0 "v" + OpName %p "p" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %C Block + OpDecorate %c Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpMemberDecorate %P 0 Patch + OpDecorate %P Block + OpDecorate %p Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %C = OpTypeStruct %v4float + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_C_uint_4 = OpTypeArray %C %uint_4 +%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4 + %zero_c = OpConstantNull %_arr_C_uint_4 + %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %P = OpTypeStruct %v4float +%_ptr_Output_P = OpTypePointer Output %P + %zero_p = OpConstantNull %P + %p = OpVariable %_ptr_Output_P Output %zero_p + %float_2 = OpConstant %float 2 + %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %float_3 = OpConstant %float 3 + %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %17 = OpLoad %int %gl_InvocationID + %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0 + OpStore %22 %20 + %28 = OpAccessChain %_ptr_Output_v4float %p %int_0 + OpStore %28 %27 + %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0 + OpStore %38 %37 + %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1 + OpStore %43 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc new file mode 100644 index 00000000000..23424ff7ba3 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc @@ -0,0 +1,86 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 44 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %C "C" + OpMemberName %C 0 "v" + OpName %c "c" + OpName %gl_InvocationID "gl_InvocationID" + OpName %P "P" + OpMemberName %P 0 "v" + OpName %p "p" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %C Block + OpDecorate %c Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpMemberDecorate %P 0 Patch + OpDecorate %P Block + OpDecorate %p Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %C = OpTypeStruct %v4float + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_C_uint_4 = OpTypeArray %C %uint_4 +%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4 + %zero_c = OpConstantNull %_arr_C_uint_4 + %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %P = OpTypeStruct %v4float +%_ptr_Output_P = OpTypePointer Output %P + %zero_p = OpConstantNull %P + %p = OpVariable %_ptr_Output_P Output %zero_p + %float_2 = OpConstant %float 2 + %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %float_3 = OpConstant %float 3 + %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %17 = OpLoad %int %gl_InvocationID + %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0 + OpStore %22 %20 + %28 = OpAccessChain %_ptr_Output_v4float %p %int_0 + OpStore %28 %27 + %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0 + OpStore %38 %37 + %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1 + OpStore %43 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc new file mode 100644 index 00000000000..23424ff7ba3 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc @@ -0,0 +1,86 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 44 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %C "C" + OpMemberName %C 0 "v" + OpName %c "c" + OpName %gl_InvocationID "gl_InvocationID" + OpName %P "P" + OpMemberName %P 0 "v" + OpName %p "p" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %C Block + OpDecorate %c Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpMemberDecorate %P 0 Patch + OpDecorate %P Block + OpDecorate %p Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %C = OpTypeStruct %v4float + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_C_uint_4 = OpTypeArray %C %uint_4 +%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4 + %zero_c = OpConstantNull %_arr_C_uint_4 + %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %P = OpTypeStruct %v4float +%_ptr_Output_P = OpTypePointer Output %P + %zero_p = OpConstantNull %P + %p = OpVariable %_ptr_Output_P Output %zero_p + %float_2 = OpConstant %float 2 + %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %float_3 = OpConstant %float 3 + %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %17 = OpLoad %int %gl_InvocationID + %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0 + OpStore %22 %20 + %28 = OpAccessChain %_ptr_Output_v4float %p %int_0 + OpStore %28 %27 + %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0 + OpStore %38 %37 + %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1 + OpStore %43 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc new file mode 100644 index 00000000000..6b616b04163 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc @@ -0,0 +1,76 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpName %gl_InvocationID "gl_InvocationID" + OpName %foo_patch "foo_patch" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %foo Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %foo_patch Patch + OpDecorate %foo_patch Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %zero_foo = OpConstantNull %_arr_v4float_uint_4 +%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4 + %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %float_1 = OpConstant %float 1 + %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %zero_foo_patch = OpConstantNull %v4float + %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch + %float_2 = OpConstant %float 2 + %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %int_0 = OpConstant %int 0 + %float_3 = OpConstant %float 3 + %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpLoad %int %gl_InvocationID + %20 = OpAccessChain %_ptr_Output_v4float %foo %16 + OpStore %20 %18 + OpStore %foo_patch %23 + %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0 + OpStore %34 %33 + %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1 + OpStore %39 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc new file mode 100644 index 00000000000..6b616b04163 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc @@ -0,0 +1,76 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpName %gl_InvocationID "gl_InvocationID" + OpName %foo_patch "foo_patch" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %foo Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %foo_patch Patch + OpDecorate %foo_patch Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %zero_foo = OpConstantNull %_arr_v4float_uint_4 +%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4 + %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %float_1 = OpConstant %float 1 + %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %zero_foo_patch = OpConstantNull %v4float + %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch + %float_2 = OpConstant %float 2 + %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %int_0 = OpConstant %int 0 + %float_3 = OpConstant %float 3 + %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpLoad %int %gl_InvocationID + %20 = OpAccessChain %_ptr_Output_v4float %foo %16 + OpStore %20 %18 + OpStore %foo_patch %23 + %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0 + OpStore %34 %33 + %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1 + OpStore %39 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc new file mode 100644 index 00000000000..6b616b04163 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc @@ -0,0 +1,76 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpName %gl_InvocationID "gl_InvocationID" + OpName %foo_patch "foo_patch" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %foo Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %foo_patch Patch + OpDecorate %foo_patch Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %zero_foo = OpConstantNull %_arr_v4float_uint_4 +%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4 + %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %float_1 = OpConstant %float 1 + %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %zero_foo_patch = OpConstantNull %v4float + %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch + %float_2 = OpConstant %float 2 + %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %int_0 = OpConstant %int 0 + %float_3 = OpConstant %float 3 + %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpLoad %int %gl_InvocationID + %20 = OpAccessChain %_ptr_Output_v4float %foo %16 + OpStore %20 %18 + OpStore %foo_patch %23 + %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0 + OpStore %34 %33 + %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1 + OpStore %39 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc new file mode 100644 index 00000000000..6b616b04163 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc @@ -0,0 +1,76 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpName %gl_InvocationID "gl_InvocationID" + OpName %foo_patch "foo_patch" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %foo Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %foo_patch Patch + OpDecorate %foo_patch Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %zero_foo = OpConstantNull %_arr_v4float_uint_4 +%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4 + %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %float_1 = OpConstant %float 1 + %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %zero_foo_patch = OpConstantNull %v4float + %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch + %float_2 = OpConstant %float 2 + %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %int_0 = OpConstant %int 0 + %float_3 = OpConstant %float 3 + %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpLoad %int %gl_InvocationID + %20 = OpAccessChain %_ptr_Output_v4float %foo %16 + OpStore %20 %18 + OpStore %foo_patch %23 + %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0 + OpStore %34 %33 + %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1 + OpStore %39 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc new file mode 100644 index 00000000000..6b616b04163 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc @@ -0,0 +1,76 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpName %gl_InvocationID "gl_InvocationID" + OpName %foo_patch "foo_patch" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %foo Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %foo_patch Patch + OpDecorate %foo_patch Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %zero_foo = OpConstantNull %_arr_v4float_uint_4 +%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4 + %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %float_1 = OpConstant %float 1 + %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %zero_foo_patch = OpConstantNull %v4float + %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch + %float_2 = OpConstant %float 2 + %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %int_0 = OpConstant %int 0 + %float_3 = OpConstant %float 3 + %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpLoad %int %gl_InvocationID + %20 = OpAccessChain %_ptr_Output_v4float %foo %16 + OpStore %20 %18 + OpStore %foo_patch %23 + %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0 + OpStore %34 %33 + %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1 + OpStore %39 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc new file mode 100644 index 00000000000..6b616b04163 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc @@ -0,0 +1,76 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpName %gl_InvocationID "gl_InvocationID" + OpName %foo_patch "foo_patch" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %foo Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %foo_patch Patch + OpDecorate %foo_patch Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %zero_foo = OpConstantNull %_arr_v4float_uint_4 +%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4 + %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %float_1 = OpConstant %float 1 + %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %zero_foo_patch = OpConstantNull %v4float + %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch + %float_2 = OpConstant %float 2 + %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %int_0 = OpConstant %int 0 + %float_3 = OpConstant %float 3 + %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpLoad %int %gl_InvocationID + %20 = OpAccessChain %_ptr_Output_v4float %foo %16 + OpStore %20 %18 + OpStore %foo_patch %23 + %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0 + OpStore %34 %33 + %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1 + OpStore %39 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc new file mode 100644 index 00000000000..6b616b04163 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc @@ -0,0 +1,76 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpName %gl_InvocationID "gl_InvocationID" + OpName %foo_patch "foo_patch" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %foo Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %foo_patch Patch + OpDecorate %foo_patch Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %zero_foo = OpConstantNull %_arr_v4float_uint_4 +%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4 + %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %float_1 = OpConstant %float 1 + %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %zero_foo_patch = OpConstantNull %v4float + %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch + %float_2 = OpConstant %float 2 + %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %int_0 = OpConstant %int 0 + %float_3 = OpConstant %float 3 + %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpLoad %int %gl_InvocationID + %20 = OpAccessChain %_ptr_Output_v4float %foo %16 + OpStore %20 %18 + OpStore %foo_patch %23 + %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0 + OpStore %34 %33 + %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1 + OpStore %39 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc new file mode 100644 index 00000000000..6b616b04163 --- /dev/null +++ b/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc @@ -0,0 +1,76 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Tessellation + OpCapability TessellationPointSize + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpName %gl_InvocationID "gl_InvocationID" + OpName %foo_patch "foo_patch" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpDecorate %foo Location 0 + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %foo_patch Patch + OpDecorate %foo_patch Location 1 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %zero_foo = OpConstantNull %_arr_v4float_uint_4 +%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4 + %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %float_1 = OpConstant %float 1 + %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %zero_foo_patch = OpConstantNull %v4float + %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch + %float_2 = OpConstant %float 2 + %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out + %int_0 = OpConstant %int 0 + %float_3 = OpConstant %float 3 + %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 + %int_1 = OpConstant %int 1 + %float_4 = OpConstant %float 4 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpLoad %int %gl_InvocationID + %20 = OpAccessChain %_ptr_Output_v4float %foo %16 + OpStore %20 %18 + OpStore %foo_patch %23 + %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0 + OpStore %34 %33 + %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1 + OpStore %39 %float_4 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp b/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp new file mode 100644 index 00000000000..a37bdd91959 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 21 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBORow "SSBORow" + OpMemberName %SSBORow 0 "v" + OpMemberName %SSBORow 1 "row_major0" + OpName %_ "" + OpMemberDecorate %SSBORow 0 Offset 0 + OpMemberDecorate %SSBORow 1 RowMajor + OpMemberDecorate %SSBORow 1 Offset 16 + OpMemberDecorate %SSBORow 1 MatrixStride 16 + OpDecorate %SSBORow BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %SSBORow = OpTypeStruct %float %mat4v4float +%_ptr_Uniform_SSBORow = OpTypePointer Uniform %SSBORow + %_ = OpVariable %_ptr_Uniform_SSBORow Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %main = OpFunction %void None %3 + %5 = OpLabel + %row_ptr = OpAccessChain %_ptr_Uniform_v4float %_ %int_1 %int_1 + %vec = OpLoad %v4float %row_ptr + %float_val = OpCompositeExtract %float %vec 2 + + %20 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %20 %float_val + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp new file mode 100644 index 00000000000..4c222454447 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 23 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_scalar_block_layout" + OpName %main "main" + OpName %SSBOScalar "SSBOScalar" + OpMemberName %SSBOScalar 0 "a" + OpMemberName %SSBOScalar 1 "b" + OpMemberName %SSBOScalar 2 "c" + OpName %_ "" + OpMemberDecorate %SSBOScalar 0 Offset 0 + OpMemberDecorate %SSBOScalar 1 RowMajor + OpMemberDecorate %SSBOScalar 1 Offset 16 + OpMemberDecorate %SSBOScalar 1 MatrixStride 16 + OpMemberDecorate %SSBOScalar 2 RowMajor + OpMemberDecorate %SSBOScalar 2 Offset 64 + OpMemberDecorate %SSBOScalar 2 MatrixStride 16 + OpDecorate %SSBOScalar BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 +%mat3v3float = OpTypeMatrix %v3float 3 + %SSBOScalar = OpTypeStruct %v3float %mat3v3float %mat3v3float +%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar + %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 +%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %main = OpFunction %void None %3 + %5 = OpLabel + %b_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_1 + %c_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_2 + %b = OpLoad %mat3v3float %b_ptr + %c = OpLoad %mat3v3float %c_ptr + OpStore %b_ptr %c + %19 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 + %20 = OpLoad %v3float %19 + %21 = OpMatrixTimesVector %v3float %b %20 + %22 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 + OpStore %22 %21 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp new file mode 100644 index 00000000000..85a220f516c --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 22 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_scalar_block_layout" + OpName %main "main" + OpName %SSBOScalar "SSBOScalar" + OpMemberName %SSBOScalar 0 "a" + OpMemberName %SSBOScalar 1 "b" + OpMemberName %SSBOScalar 2 "c" + OpName %_ "" + OpMemberDecorate %SSBOScalar 0 Offset 0 + OpMemberDecorate %SSBOScalar 1 Offset 12 + OpMemberDecorate %SSBOScalar 2 Offset 24 + OpDecorate %SSBOScalar BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 + %SSBOScalar = OpTypeStruct %v3float %v3float %v3float +%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar + %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %int_2 = OpConstant %int 2 + %main = OpFunction %void None %3 + %5 = OpLabel + %15 = OpAccessChain %_ptr_Uniform_v3float %_ %int_1 + %16 = OpLoad %v3float %15 + %18 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + %19 = OpLoad %v3float %18 + OpStore %18 %16 + %20 = OpFMul %v3float %16 %19 + %21 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 + OpStore %21 %20 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp new file mode 100644 index 00000000000..bef3fcb766c --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp @@ -0,0 +1,61 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 29 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_scalar_block_layout" + OpName %main "main" + OpName %SSBOScalar "SSBOScalar" + OpMemberName %SSBOScalar 0 "a" + OpMemberName %SSBOScalar 1 "b" + OpMemberName %SSBOScalar 2 "c" + OpName %_ "" + OpDecorate %_arr_v2float_uint_16 ArrayStride 16 + OpDecorate %_arr_v2float_uint_16_0 ArrayStride 16 + OpDecorate %_arr_float_uint_16 ArrayStride 16 + OpMemberDecorate %SSBOScalar 0 Offset 0 + OpMemberDecorate %SSBOScalar 1 Offset 256 + OpMemberDecorate %SSBOScalar 2 Offset 512 + OpDecorate %SSBOScalar BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %uint = OpTypeInt 32 0 + %uint_16 = OpConstant %uint 16 +%_arr_v2float_uint_16 = OpTypeArray %v2float %uint_16 +%_arr_v2float_uint_16_0 = OpTypeArray %v2float %uint_16 +%_arr_float_uint_16 = OpTypeArray %float %uint_16 + %SSBOScalar = OpTypeStruct %_arr_v2float_uint_16 %_arr_v2float_uint_16_0 %_arr_float_uint_16 +%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar + %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float + %int_2 = OpConstant %int 2 + %float_10 = OpConstant %float 10.0 + %float_11 = OpConstant %float 11.0 + %float_const = OpConstantComposite %v2float %float_10 %float_11 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %main = OpFunction %void None %3 + %5 = OpLabel + %21 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 %int_10 + %22 = OpLoad %v2float %21 + %25 = OpAccessChain %_ptr_Uniform_float %_ %int_2 %int_10 + %26 = OpLoad %float %25 + OpStore %21 %float_const + %27 = OpVectorTimesScalar %v2float %22 %26 + %28 = OpAccessChain %_ptr_Uniform_v2float %_ %int_0 %int_10 + OpStore %28 %27 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp new file mode 100644 index 00000000000..8de22b82851 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp @@ -0,0 +1,54 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 29 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_scalar_block_layout" + OpName %main "main" + OpName %SSBOScalar "SSBOScalar" + OpMemberName %SSBOScalar 0 "a" + OpMemberName %SSBOScalar 1 "b" + OpMemberName %SSBOScalar 2 "c" + OpName %_ "" + OpMemberDecorate %SSBOScalar 0 Offset 0 + OpMemberDecorate %SSBOScalar 1 Offset 8 + OpMemberDecorate %SSBOScalar 2 Offset 20 + OpDecorate %SSBOScalar BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %SSBOScalar = OpTypeStruct %v2float %v3float %v3float +%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar + %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %int_2 = OpConstant %int 2 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float + %float_1 = OpConstant %float 1 + %27 = OpConstantComposite %v3float %float_1 %float_1 %float_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Uniform_v3float %_ %int_1 + %17 = OpLoad %v3float %16 + %18 = OpVectorShuffle %v2float %17 %17 0 1 + %20 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + %21 = OpLoad %v3float %20 + %22 = OpVectorShuffle %v2float %21 %21 1 2 + OpStore %16 %27 + %23 = OpFMul %v2float %18 %22 + %25 = OpAccessChain %_ptr_Uniform_v2float %_ %int_0 + OpStore %25 %23 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp new file mode 100644 index 00000000000..0b0ba53e8e1 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 23 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_scalar_block_layout" + OpName %main "main" + OpName %SSBOScalar "SSBOScalar" + OpMemberName %SSBOScalar 0 "a" + OpMemberName %SSBOScalar 1 "b" + OpMemberName %SSBOScalar 2 "c" + OpName %_ "" + OpMemberDecorate %SSBOScalar 0 Offset 0 + OpMemberDecorate %SSBOScalar 1 RowMajor + OpMemberDecorate %SSBOScalar 1 Offset 12 + OpMemberDecorate %SSBOScalar 1 MatrixStride 12 + OpMemberDecorate %SSBOScalar 2 RowMajor + OpMemberDecorate %SSBOScalar 2 Offset 48 + OpMemberDecorate %SSBOScalar 2 MatrixStride 12 + OpDecorate %SSBOScalar BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 +%mat3v3float = OpTypeMatrix %v3float 3 + %SSBOScalar = OpTypeStruct %v3float %mat3v3float %mat3v3float +%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar + %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 +%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %main = OpFunction %void None %3 + %5 = OpLabel + %b_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_1 + %c_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_2 + %b = OpLoad %mat3v3float %b_ptr + %c = OpLoad %mat3v3float %c_ptr + OpStore %b_ptr %c + %19 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 + %20 = OpLoad %v3float %19 + %21 = OpMatrixTimesVector %v3float %b %20 + %22 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 + OpStore %22 %21 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp b/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp new file mode 100644 index 00000000000..70b17527919 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 28 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_scalar_block_layout" + OpName %main "main" + OpName %SSBOScalar "SSBOScalar" + OpMemberName %SSBOScalar 0 "a" + OpMemberName %SSBOScalar 1 "b" + OpMemberName %SSBOScalar 2 "c" + OpMemberName %SSBOScalar 3 "d" + OpName %_ "" + OpMemberDecorate %SSBOScalar 0 Offset 0 + OpMemberDecorate %SSBOScalar 1 Offset 8 + OpMemberDecorate %SSBOScalar 2 Offset 20 + OpMemberDecorate %SSBOScalar 3 Offset 32 + OpDecorate %SSBOScalar BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %SSBOScalar = OpTypeStruct %v2float %v3float %v3float %v3float +%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar + %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %int_2 = OpConstant %int 2 + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %float_2 = OpConstant %float 2.0 +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float + %main = OpFunction %void None %3 + %5 = OpLabel + %v3_ptr = OpAccessChain %_ptr_Uniform_v3float %_ %int_1 + %v3 = OpLoad %v3float %v3_ptr + %v3_mod = OpCompositeInsert %v3float %float_2 %v3 2 + %v2 = OpVectorShuffle %v2float %v3 %v3 0 1 + %v1 = OpCompositeExtract %float %v3 2 + %v2_mul = OpVectorTimesScalar %v2float %v2 %v1 + %v2_ptr = OpAccessChain %_ptr_Uniform_v2float %_ %int_0 + OpStore %v2_ptr %v2_mul + OpStore %v3_ptr %v3_mod + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp b/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp new file mode 100644 index 00000000000..398c8d135c8 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 21 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBORow "SSBORow" + OpMemberName %SSBORow 0 "v" + OpMemberName %SSBORow 1 "row_major0" + OpName %_ "" + OpMemberDecorate %SSBORow 0 Offset 0 + OpMemberDecorate %SSBORow 1 RowMajor + OpMemberDecorate %SSBORow 1 Offset 16 + OpMemberDecorate %SSBORow 1 MatrixStride 16 + OpDecorate %SSBORow BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %SSBORow = OpTypeStruct %float %mat4v4float +%_ptr_Uniform_SSBORow = OpTypePointer Uniform %SSBORow + %_ = OpVariable %_ptr_Uniform_SSBORow Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %main = OpFunction %void None %3 + %5 = OpLabel + %row_ptr = OpAccessChain %_ptr_Uniform_v4float %_ %int_1 %int_1 + %float_ptr = OpAccessChain %_ptr_Uniform_float %row_ptr %uint_2 + + %19 = OpLoad %float %float_ptr + %20 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %20 %19 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag new file mode 100644 index 00000000000..85249d99810 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag @@ -0,0 +1,54 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 29 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %out_var_SV_Target + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 600 + OpName %type_Foo "type.Foo" + OpMemberName %type_Foo 0 "a" + OpMemberName %type_Foo 1 "b" + OpName %Foo "Foo" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %main "main" + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %Foo DescriptorSet 0 + OpDecorate %Foo Binding 0 + OpDecorate %_arr_v2float_uint_2 ArrayStride 16 + OpMemberDecorate %type_Foo 0 Offset 0 + OpMemberDecorate %type_Foo 1 Offset 24 + OpDecorate %type_Foo Block + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%_arr_v2float_uint_2 = OpTypeArray %v2float %uint_2 + %type_Foo = OpTypeStruct %_arr_v2float_uint_2 %float +%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo +%_ptr_Output_v2float = OpTypePointer Output %v2float + %void = OpTypeVoid + %16 = OpTypeFunction %void +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform +%out_var_SV_Target = OpVariable %_ptr_Output_v2float Output + %main = OpFunction %void None %16 + %19 = OpLabel + %20 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %int_0 + %21 = OpLoad %v2float %20 + %22 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %int_1 + %23 = OpLoad %v2float %22 + %24 = OpFAdd %v2float %21 %23 + %25 = OpAccessChain %_ptr_Uniform_float %Foo %int_1 + %26 = OpLoad %float %25 + %27 = OpCompositeConstruct %v2float %26 %26 + %28 = OpFAdd %v2float %24 %27 + OpStore %out_var_SV_Target %28 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag new file mode 100644 index 00000000000..7ed32bee417 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag @@ -0,0 +1,51 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 26 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %out_var_SV_Target + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 600 + OpName %type_Foo "type.Foo" + OpMemberName %type_Foo 0 "a" + OpMemberName %type_Foo 1 "b" + OpName %Foo "Foo" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %main "main" + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %Foo DescriptorSet 0 + OpDecorate %Foo Binding 0 + OpDecorate %_arr_v3float_uint_1 ArrayStride 16 + OpMemberDecorate %type_Foo 0 Offset 0 + OpMemberDecorate %type_Foo 1 Offset 12 + OpDecorate %type_Foo Block + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 +%_arr_v3float_uint_1 = OpTypeArray %v3float %uint_1 + %type_Foo = OpTypeStruct %_arr_v3float_uint_1 %float +%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo +%_ptr_Output_v3float = OpTypePointer Output %v3float + %void = OpTypeVoid + %16 = OpTypeFunction %void +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform +%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output + %main = OpFunction %void None %16 + %19 = OpLabel + %20 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %int_0 + %21 = OpLoad %v3float %20 + %22 = OpAccessChain %_ptr_Uniform_float %Foo %int_1 + %23 = OpLoad %float %22 + %24 = OpCompositeConstruct %v3float %23 %23 %23 + %25 = OpFAdd %v3float %21 %24 + OpStore %out_var_SV_Target %25 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag new file mode 100644 index 00000000000..406328b8d40 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag @@ -0,0 +1,54 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 29 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %out_var_SV_Target + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 600 + OpName %type_Foo "type.Foo" + OpMemberName %type_Foo 0 "a" + OpMemberName %type_Foo 1 "b" + OpName %Foo "Foo" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %main "main" + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %Foo DescriptorSet 0 + OpDecorate %Foo Binding 0 + OpDecorate %_arr_v3float_uint_2 ArrayStride 16 + OpMemberDecorate %type_Foo 0 Offset 0 + OpMemberDecorate %type_Foo 1 Offset 28 + OpDecorate %type_Foo Block + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 +%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2 + %type_Foo = OpTypeStruct %_arr_v3float_uint_2 %float +%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo +%_ptr_Output_v3float = OpTypePointer Output %v3float + %void = OpTypeVoid + %16 = OpTypeFunction %void +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform +%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output + %main = OpFunction %void None %16 + %19 = OpLabel + %20 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %int_0 + %21 = OpLoad %v3float %20 + %22 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %int_1 + %23 = OpLoad %v3float %22 + %24 = OpFAdd %v3float %21 %23 + %25 = OpAccessChain %_ptr_Uniform_float %Foo %int_1 + %26 = OpLoad %float %25 + %27 = OpCompositeConstruct %v3float %26 %26 %26 + %28 = OpFAdd %v3float %24 %27 + OpStore %out_var_SV_Target %28 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag new file mode 100644 index 00000000000..b9b4f5a0172 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 30 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %out_var_SV_Target + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 600 + OpName %type_Foo "type.Foo" + OpMemberName %type_Foo 0 "a" + OpMemberName %type_Foo 1 "b" + OpName %Foo "Foo" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %main "main" + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %Foo DescriptorSet 0 + OpDecorate %Foo Binding 0 + OpMemberDecorate %type_Foo 0 Offset 0 + OpMemberDecorate %type_Foo 0 MatrixStride 16 + OpMemberDecorate %type_Foo 0 ColMajor + OpMemberDecorate %type_Foo 1 Offset 24 + OpDecorate %type_Foo Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %int_1 = OpConstant %int 1 + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%mat2v2float = OpTypeMatrix %v2float 2 + %type_Foo = OpTypeStruct %mat2v2float %float +%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo +%_ptr_Output_v2float = OpTypePointer Output %v2float + %void = OpTypeVoid + %17 = OpTypeFunction %void +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform +%out_var_SV_Target = OpVariable %_ptr_Output_v2float Output + %main = OpFunction %void None %17 + %20 = OpLabel + %21 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_0 + %22 = OpLoad %v2float %21 + %23 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_1 + %24 = OpLoad %v2float %23 + %25 = OpFAdd %v2float %22 %24 + %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1 + %27 = OpLoad %float %26 + %28 = OpCompositeConstruct %v2float %27 %27 + %29 = OpFAdd %v2float %25 %28 + OpStore %out_var_SV_Target %29 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag new file mode 100644 index 00000000000..e1830e9cf91 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 30 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %out_var_SV_Target + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 600 + OpName %type_Foo "type.Foo" + OpMemberName %type_Foo 0 "a" + OpMemberName %type_Foo 1 "b" + OpName %Foo "Foo" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %main "main" + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %Foo DescriptorSet 0 + OpDecorate %Foo Binding 0 + OpMemberDecorate %type_Foo 0 Offset 0 + OpMemberDecorate %type_Foo 0 MatrixStride 16 + OpMemberDecorate %type_Foo 0 RowMajor + OpMemberDecorate %type_Foo 1 Offset 24 + OpDecorate %type_Foo Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %int_1 = OpConstant %int 1 + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%mat2v2float = OpTypeMatrix %v2float 2 + %type_Foo = OpTypeStruct %mat2v2float %float +%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo +%_ptr_Output_v2float = OpTypePointer Output %v2float + %void = OpTypeVoid + %17 = OpTypeFunction %void +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform +%out_var_SV_Target = OpVariable %_ptr_Output_v2float Output + %main = OpFunction %void None %17 + %20 = OpLabel + %21 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_0 + %22 = OpLoad %v2float %21 + %23 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_1 + %24 = OpLoad %v2float %23 + %25 = OpFAdd %v2float %22 %24 + %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1 + %27 = OpLoad %float %26 + %28 = OpCompositeConstruct %v2float %27 %27 + %29 = OpFAdd %v2float %25 %28 + OpStore %out_var_SV_Target %29 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag new file mode 100644 index 00000000000..647939f2050 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 30 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %out_var_SV_Target + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 600 + OpName %type_Foo "type.Foo" + OpMemberName %type_Foo 0 "a" + OpMemberName %type_Foo 1 "b" + OpName %Foo "Foo" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %main "main" + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %Foo DescriptorSet 0 + OpDecorate %Foo Binding 0 + OpMemberDecorate %type_Foo 0 Offset 0 + OpMemberDecorate %type_Foo 0 MatrixStride 16 + OpMemberDecorate %type_Foo 0 ColMajor + OpMemberDecorate %type_Foo 1 Offset 28 + OpDecorate %type_Foo Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %int_1 = OpConstant %int 1 + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 +%mat2v3float = OpTypeMatrix %v3float 2 + %type_Foo = OpTypeStruct %mat2v3float %float +%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo +%_ptr_Output_v3float = OpTypePointer Output %v3float + %void = OpTypeVoid + %17 = OpTypeFunction %void +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform +%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output + %main = OpFunction %void None %17 + %20 = OpLabel + %21 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_0 + %22 = OpLoad %v3float %21 + %23 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_1 + %24 = OpLoad %v3float %23 + %25 = OpFAdd %v3float %22 %24 + %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1 + %27 = OpLoad %float %26 + %28 = OpCompositeConstruct %v3float %27 %27 %27 + %29 = OpFAdd %v3float %25 %28 + OpStore %out_var_SV_Target %29 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag new file mode 100644 index 00000000000..733465a0fb0 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 30 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %out_var_SV_Target + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 600 + OpName %type_Foo "type.Foo" + OpMemberName %type_Foo 0 "a" + OpMemberName %type_Foo 1 "b" + OpName %Foo "Foo" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %main "main" + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %Foo DescriptorSet 0 + OpDecorate %Foo Binding 0 + OpMemberDecorate %type_Foo 0 Offset 0 + OpMemberDecorate %type_Foo 0 MatrixStride 16 + OpMemberDecorate %type_Foo 0 RowMajor + OpMemberDecorate %type_Foo 1 Offset 40 + OpDecorate %type_Foo Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %int_1 = OpConstant %int 1 + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 +%mat2v3float = OpTypeMatrix %v3float 2 + %type_Foo = OpTypeStruct %mat2v3float %float +%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo +%_ptr_Output_v3float = OpTypePointer Output %v3float + %void = OpTypeVoid + %17 = OpTypeFunction %void +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform +%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output + %main = OpFunction %void None %17 + %20 = OpLabel + %21 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_0 + %22 = OpLoad %v3float %21 + %23 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_1 + %24 = OpLoad %v3float %23 + %25 = OpFAdd %v3float %22 %24 + %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1 + %27 = OpLoad %float %26 + %28 = OpCompositeConstruct %v3float %27 %27 %27 + %29 = OpFAdd %v3float %25 %28 + OpStore %out_var_SV_Target %29 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag new file mode 100644 index 00000000000..c97fb81f6a3 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 30 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %out_var_SV_Target + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 600 + OpName %type_Foo "type.Foo" + OpMemberName %type_Foo 0 "a" + OpMemberName %type_Foo 1 "b" + OpName %Foo "Foo" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %main "main" + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %Foo DescriptorSet 0 + OpDecorate %Foo Binding 0 + OpMemberDecorate %type_Foo 0 Offset 0 + OpMemberDecorate %type_Foo 0 MatrixStride 16 + OpMemberDecorate %type_Foo 0 ColMajor + OpMemberDecorate %type_Foo 1 Offset 40 + OpDecorate %type_Foo Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %int_1 = OpConstant %int 1 + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%mat3v2float = OpTypeMatrix %v2float 3 + %type_Foo = OpTypeStruct %mat3v2float %float +%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo +%_ptr_Output_v2float = OpTypePointer Output %v2float + %void = OpTypeVoid + %17 = OpTypeFunction %void +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform +%out_var_SV_Target = OpVariable %_ptr_Output_v2float Output + %main = OpFunction %void None %17 + %20 = OpLabel + %21 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_0 + %22 = OpLoad %v2float %21 + %23 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_1 + %24 = OpLoad %v2float %23 + %25 = OpFAdd %v2float %22 %24 + %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1 + %27 = OpLoad %float %26 + %28 = OpCompositeConstruct %v2float %27 %27 + %29 = OpFAdd %v2float %25 %28 + OpStore %out_var_SV_Target %29 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag new file mode 100644 index 00000000000..b1cfa561e05 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 30 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %out_var_SV_Target + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 600 + OpName %type_Foo "type.Foo" + OpMemberName %type_Foo 0 "a" + OpMemberName %type_Foo 1 "b" + OpName %Foo "Foo" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %main "main" + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %Foo DescriptorSet 0 + OpDecorate %Foo Binding 0 + OpMemberDecorate %type_Foo 0 Offset 0 + OpMemberDecorate %type_Foo 0 MatrixStride 16 + OpMemberDecorate %type_Foo 0 RowMajor + OpMemberDecorate %type_Foo 1 Offset 28 + OpDecorate %type_Foo Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %int_1 = OpConstant %int 1 + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%mat3v2float = OpTypeMatrix %v2float 3 + %type_Foo = OpTypeStruct %mat3v2float %float +%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo +%_ptr_Output_v2float = OpTypePointer Output %v2float + %void = OpTypeVoid + %17 = OpTypeFunction %void +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform +%out_var_SV_Target = OpVariable %_ptr_Output_v2float Output + %main = OpFunction %void None %17 + %20 = OpLabel + %21 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_0 + %22 = OpLoad %v2float %21 + %23 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_1 + %24 = OpLoad %v2float %23 + %25 = OpFAdd %v2float %22 %24 + %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1 + %27 = OpLoad %float %26 + %28 = OpCompositeConstruct %v2float %27 %27 + %29 = OpFAdd %v2float %25 %28 + OpStore %out_var_SV_Target %29 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag new file mode 100644 index 00000000000..cef8308b2fb --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 30 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %out_var_SV_Target + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 600 + OpName %type_Foo "type.Foo" + OpMemberName %type_Foo 0 "a" + OpMemberName %type_Foo 1 "b" + OpName %Foo "Foo" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %main "main" + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %Foo DescriptorSet 0 + OpDecorate %Foo Binding 0 + OpMemberDecorate %type_Foo 0 Offset 0 + OpMemberDecorate %type_Foo 0 MatrixStride 16 + OpMemberDecorate %type_Foo 0 ColMajor + OpMemberDecorate %type_Foo 1 Offset 44 + OpDecorate %type_Foo Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %int_1 = OpConstant %int 1 + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 +%mat3v3float = OpTypeMatrix %v3float 3 + %type_Foo = OpTypeStruct %mat3v3float %float +%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo +%_ptr_Output_v3float = OpTypePointer Output %v3float + %void = OpTypeVoid + %17 = OpTypeFunction %void +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform +%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output + %main = OpFunction %void None %17 + %20 = OpLabel + %21 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_0 + %22 = OpLoad %v3float %21 + %23 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_1 + %24 = OpLoad %v3float %23 + %25 = OpFAdd %v3float %22 %24 + %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1 + %27 = OpLoad %float %26 + %28 = OpCompositeConstruct %v3float %27 %27 %27 + %29 = OpFAdd %v3float %25 %28 + OpStore %out_var_SV_Target %29 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag new file mode 100644 index 00000000000..35d7ebc3192 --- /dev/null +++ b/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 30 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %out_var_SV_Target + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 600 + OpName %type_Foo "type.Foo" + OpMemberName %type_Foo 0 "a" + OpMemberName %type_Foo 1 "b" + OpName %Foo "Foo" + OpName %out_var_SV_Target "out.var.SV_Target" + OpName %main "main" + OpDecorate %out_var_SV_Target Location 0 + OpDecorate %Foo DescriptorSet 0 + OpDecorate %Foo Binding 0 + OpMemberDecorate %type_Foo 0 Offset 0 + OpMemberDecorate %type_Foo 0 MatrixStride 16 + OpMemberDecorate %type_Foo 0 RowMajor + OpMemberDecorate %type_Foo 1 Offset 44 + OpDecorate %type_Foo Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %int_1 = OpConstant %int 1 + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 +%mat3v3float = OpTypeMatrix %v3float 3 + %type_Foo = OpTypeStruct %mat3v3float %float +%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo +%_ptr_Output_v3float = OpTypePointer Output %v3float + %void = OpTypeVoid + %17 = OpTypeFunction %void +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform +%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output + %main = OpFunction %void None %17 + %20 = OpLabel + %21 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_0 + %22 = OpLoad %v3float %21 + %23 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_1 + %24 = OpLoad %v3float %23 + %25 = OpFAdd %v3float %22 %24 + %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1 + %27 = OpLoad %float %26 + %28 = OpCompositeConstruct %v3float %27 %27 %27 + %29 = OpFAdd %v3float %25 %28 + OpStore %out_var_SV_Target %29 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag b/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag new file mode 100644 index 00000000000..eccff08b331 --- /dev/null +++ b/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag @@ -0,0 +1,93 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 65 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %vA %vB + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %vA "vA" + OpName %vB "vB" + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %vA RelaxedPrecision + OpDecorate %vA Flat + OpDecorate %vA Location 0 + OpDecorate %25 RelaxedPrecision + OpDecorate %30 RelaxedPrecision + OpDecorate %vB RelaxedPrecision + OpDecorate %vB Flat + OpDecorate %vB Location 1 + OpDecorate %38 RelaxedPrecision + OpDecorate %40 RelaxedPrecision + OpDecorate %49 RelaxedPrecision + OpDecorate %51 RelaxedPrecision + OpDecorate %53 RelaxedPrecision + OpDecorate %56 RelaxedPrecision + OpDecorate %64 RelaxedPrecision + OpDecorate %58 RelaxedPrecision + OpDecorate %57 RelaxedPrecision + OpDecorate %60 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %float_0 = OpConstant %float 0 + %11 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Input_int = OpTypePointer Input %int + %vA = OpVariable %_ptr_Input_int Input + %bool = OpTypeBool + %int_20 = OpConstant %int 20 + %int_50 = OpConstant %int 50 + %vB = OpVariable %_ptr_Input_int Input + %int_40 = OpConstant %int 40 + %int_60 = OpConstant %int 60 + %int_10 = OpConstant %int 10 + %float_1 = OpConstant %float 1 + %63 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %main = OpFunction %void None %3 + %5 = OpLabel + OpStore %FragColor %11 + OpBranch %17 + %17 = OpLabel + %60 = OpPhi %int %int_0 %5 %58 %20 + %57 = OpPhi %int %int_0 %5 %56 %20 + %25 = OpLoad %int %vA + %27 = OpSLessThan %bool %57 %25 + OpLoopMerge %19 %20 None + OpBranchConditional %27 %18 %19 + %18 = OpLabel + %30 = OpIAdd %int %25 %57 + %32 = OpIEqual %bool %30 %int_20 + OpSelectionMerge %34 None + OpBranchConditional %32 %33 %36 + %33 = OpLabel + OpBranch %34 + %36 = OpLabel + %38 = OpLoad %int %vB + %40 = OpIAdd %int %38 %57 + %42 = OpIEqual %bool %40 %int_40 + %64 = OpSelect %int %42 %int_60 %60 + OpBranch %34 + %34 = OpLabel + %58 = OpPhi %int %int_50 %33 %64 %36 + %49 = OpIAdd %int %58 %int_10 + %51 = OpLoad %v4float %FragColor + %53 = OpFAdd %v4float %51 %63 + OpStore %FragColor %53 + OpBranch %20 + %20 = OpLabel + %56 = OpIAdd %int %57 %49 + OpBranch %17 + %19 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc b/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc new file mode 100644 index 00000000000..5bbe951df47 --- /dev/null +++ b/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc @@ -0,0 +1,80 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 48 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %foo + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpName %gl_InvocationID "gl_InvocationID" + OpName %foo "foo" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %foo Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 + %uint_4 = OpConstant %uint 4 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %uint_3 = OpConstant %uint 3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 +%_arr__arr_float_uint_3_uint_4 = OpTypeArray %_arr_float_uint_3 %uint_4 +%_ptr_Output__arr__arr_float_uint_3_uint_4 = OpTypePointer Output %_arr__arr_float_uint_3_uint_4 +%foo_zero = OpConstantNull %_arr__arr_float_uint_3_uint_4 + %foo = OpVariable %_ptr_Output__arr__arr_float_uint_3_uint_4 Output %foo_zero +%_ptr_Output_float = OpTypePointer Output %float + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %float_2 = OpConstant %float 2 + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpLoad %int %gl_InvocationID + %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0 + OpStore %24 %22 + %30 = OpLoad %int %gl_InvocationID + %31 = OpLoad %int %gl_InvocationID + %32 = OpConvertSToF %float %31 + %34 = OpAccessChain %_ptr_Output_float %foo %30 %int_0 + OpStore %34 %32 + %35 = OpLoad %int %gl_InvocationID + %37 = OpLoad %int %gl_InvocationID + %38 = OpConvertSToF %float %37 + %39 = OpFAdd %float %38 %float_1 + %40 = OpAccessChain %_ptr_Output_float %foo %35 %int_1 + OpStore %40 %39 + %41 = OpLoad %int %gl_InvocationID + %43 = OpLoad %int %gl_InvocationID + %44 = OpConvertSToF %float %43 + %46 = OpFAdd %float %44 %float_2 + %47 = OpAccessChain %_ptr_Output_float %foo %41 %int_2 + OpStore %47 %46 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc b/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc new file mode 100644 index 00000000000..d4e14be4abe --- /dev/null +++ b/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc @@ -0,0 +1,70 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 35 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %verts + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpName %gl_InvocationID "gl_InvocationID" + OpName %Verts "Verts" + OpMemberName %Verts 0 "a" + OpMemberName %Verts 1 "b" + OpName %verts "verts" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %Verts Block + OpDecorate %verts Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 + %uint_4 = OpConstant %uint 4 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 +;%gl_out_zero = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output ;%gl_out_zero + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %v2float = OpTypeVector %float 2 + %Verts = OpTypeStruct %float %v2float +%_arr_Verts_uint_4 = OpTypeArray %Verts %uint_4 +%_ptr_Output__arr_Verts_uint_4 = OpTypePointer Output %_arr_Verts_uint_4 + %verts_zero = OpConstantNull %_arr_Verts_uint_4 + %verts = OpVariable %_ptr_Output__arr_Verts_uint_4 Output %verts_zero +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpLoad %int %gl_InvocationID + %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0 + OpStore %24 %22 + %30 = OpLoad %int %gl_InvocationID + %31 = OpLoad %int %gl_InvocationID + %32 = OpConvertSToF %float %31 + %34 = OpAccessChain %_ptr_Output_float %verts %30 %int_0 + OpStore %34 %32 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc b/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc new file mode 100644 index 00000000000..1219183ca7e --- /dev/null +++ b/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc @@ -0,0 +1,65 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 35 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %verts + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpName %gl_out "gl_out" + OpName %gl_InvocationID "gl_InvocationID" + OpName %Verts "Verts" + OpMemberName %Verts 0 "a" + OpMemberName %Verts 1 "b" + OpName %verts "verts" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpDecorate %gl_PerVertex Block + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %Verts Block + OpDecorate %verts Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%gl_PerVertex = OpTypeStruct %v4float %float + %uint_4 = OpConstant %uint 4 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 +%gl_out_zero = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %gl_out_zero + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %v2float = OpTypeVector %float 2 + %Verts = OpTypeStruct %float %v2float +%_arr_Verts_uint_4 = OpTypeArray %Verts %uint_4 +%_ptr_Output__arr_Verts_uint_4 = OpTypePointer Output %_arr_Verts_uint_4 + %verts_zero = OpConstantNull %_arr_Verts_uint_4 + %verts = OpVariable %_ptr_Output__arr_Verts_uint_4 Output %verts_zero +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpLoad %int %gl_InvocationID + %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0 + OpStore %24 %22 + %30 = OpLoad %int %gl_InvocationID + %31 = OpLoad %int %gl_InvocationID + %32 = OpConvertSToF %float %31 + %34 = OpAccessChain %_ptr_Output_float %verts %30 %int_0 + OpStore %34 %32 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc b/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc new file mode 100644 index 00000000000..03ac99befb6 --- /dev/null +++ b/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc @@ -0,0 +1,69 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 35 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %foo + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpName %gl_InvocationID "gl_InvocationID" + OpName %Foo "Foo" + OpMemberName %Foo 0 "a" + OpMemberName %Foo 1 "b" + OpMemberName %Foo 2 "c" + OpName %foo "foo" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %foo Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 + %uint_4 = OpConstant %uint 4 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %v2float = OpTypeVector %float 2 + %Foo = OpTypeStruct %float %v2float %v4float +%_arr_Foo_uint_4 = OpTypeArray %Foo %uint_4 +%_ptr_Output__arr_Foo_uint_4 = OpTypePointer Output %_arr_Foo_uint_4 + %foo_zero = OpConstantNull %_arr_Foo_uint_4 + %foo = OpVariable %_ptr_Output__arr_Foo_uint_4 Output %foo_zero +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpLoad %int %gl_InvocationID + %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0 + OpStore %24 %22 + %30 = OpLoad %int %gl_InvocationID + %31 = OpLoad %int %gl_InvocationID + %32 = OpConvertSToF %float %31 + %34 = OpAccessChain %_ptr_Output_float %foo %30 %int_0 + OpStore %34 %32 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc b/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc new file mode 100644 index 00000000000..7c0a638f985 --- /dev/null +++ b/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc @@ -0,0 +1,199 @@ +; SPIR-V +; Version: 1.0 +; Generator: Wine VKD3D Shader Compiler; 2 +; Bound: 126 +; Schema: 0 + OpCapability Tessellation + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %1 "main" %4 %30 %80 %101 %103 %108 %110 %115 %117 + OpExecutionMode %1 OutputVertices 3 + OpExecutionMode %1 Triangles + OpExecutionMode %1 SpacingEqual + OpExecutionMode %1 VertexOrderCw + OpName %1 "main" + OpName %11 "opc" + OpName %14 "cb1_struct" + OpName %16 "cb0_0" + OpName %22 "vicp" + OpName %23 "fork0" + OpName %26 "vForkInstanceId" + OpName %34 "r0" + OpName %32 "fork0_epilogue" + OpName %75 "fork1" + OpName %81 "fork1_epilogue" + OpName %101 "v0" + OpName %103 "v1" + OpName %108 "vicp0" + OpName %110 "vocp0" + OpName %115 "vicp1" + OpName %117 "vocp1" + OpDecorate %4 BuiltIn InvocationId + OpDecorate %13 ArrayStride 16 + OpDecorate %14 Block + OpMemberDecorate %14 0 Offset 0 + OpDecorate %16 DescriptorSet 0 + OpDecorate %16 Binding 0 + OpDecorate %30 BuiltIn TessLevelOuter + OpDecorate %30 Patch + OpDecorate %30 Patch + OpDecorate %30 Patch + OpDecorate %30 Patch + OpDecorate %80 BuiltIn TessLevelInner + OpDecorate %80 Patch + OpDecorate %80 Patch + OpDecorate %101 Location 0 + OpDecorate %103 Location 1 + OpDecorate %108 Location 2 + OpDecorate %110 Location 3 + OpDecorate %115 Location 4 + OpDecorate %117 Location 5 + %2 = OpTypeInt 32 1 + %3 = OpTypePointer Input %2 + %4 = OpVariable %3 Input + %5 = OpTypeFloat 32 + %6 = OpTypeVector %5 4 + %7 = OpTypeInt 32 0 + %8 = OpConstant %7 4 + %9 = OpTypeArray %6 %8 + %10 = OpTypePointer Private %9 + %11 = OpVariable %10 Private + %12 = OpConstant %7 1 + %13 = OpTypeArray %6 %12 + %14 = OpTypeStruct %13 + %15 = OpTypePointer Uniform %14 + %16 = OpVariable %15 Uniform + %17 = OpConstant %7 3 + %18 = OpTypeArray %6 %17 + %19 = OpConstant %7 2 + %20 = OpTypeArray %18 %19 + %21 = OpTypePointer Private %20 + %22 = OpVariable %21 Private + %24 = OpTypeVoid + %25 = OpTypeFunction %24 %7 + %28 = OpTypeArray %5 %8 + %29 = OpTypePointer Output %28 + %30 = OpVariable %29 Output + %31 = OpConstant %7 0 + %33 = OpTypePointer Function %6 + %36 = OpTypePointer Function %5 + %38 = OpTypePointer Uniform %6 + %40 = OpTypePointer Uniform %5 + %46 = OpTypePointer Private %6 + %48 = OpTypePointer Private %5 + %52 = OpVariable %46 Private + %55 = OpVariable %46 Private + %58 = OpVariable %46 Private + %60 = OpTypeFunction %24 %46 %46 %46 + %69 = OpTypePointer Output %5 + %76 = OpTypeFunction %24 + %78 = OpTypeArray %5 %19 + %79 = OpTypePointer Output %78 + %80 = OpVariable %79 Output + %89 = OpVariable %46 Private + %91 = OpTypeFunction %24 %46 + %98 = OpTypePointer Private %18 + %100 = OpTypePointer Input %18 + %101 = OpVariable %100 Input + %103 = OpVariable %100 Input + %105 = OpTypeVector %5 3 + %106 = OpTypeArray %105 %17 + %107 = OpTypePointer Input %106 + %108 = OpVariable %107 Input + %109 = OpTypePointer Output %106 + %110 = OpVariable %109 Output + %111 = OpTypePointer Output %105 + %112 = OpTypePointer Input %105 + %115 = OpVariable %100 Input + %116 = OpTypePointer Output %18 + %117 = OpVariable %116 Output + %118 = OpTypePointer Output %6 + %119 = OpTypePointer Input %6 + %23 = OpFunction %24 None %25 + %26 = OpFunctionParameter %7 + %27 = OpLabel + %34 = OpVariable %33 Function + %35 = OpBitcast %5 %26 + %37 = OpInBoundsAccessChain %36 %34 %31 + OpStore %37 %35 + %39 = OpAccessChain %38 %16 %31 %31 + %41 = OpInBoundsAccessChain %40 %39 %31 + %42 = OpLoad %5 %41 + %43 = OpInBoundsAccessChain %36 %34 %31 + %44 = OpLoad %5 %43 + %45 = OpBitcast %2 %44 + %47 = OpAccessChain %46 %11 %45 + %49 = OpInBoundsAccessChain %48 %47 %31 + OpStore %49 %42 + %50 = OpAccessChain %46 %11 %31 + %51 = OpLoad %6 %50 + OpStore %52 %51 + %53 = OpAccessChain %46 %11 %12 + %54 = OpLoad %6 %53 + OpStore %55 %54 + %56 = OpAccessChain %46 %11 %19 + %57 = OpLoad %6 %56 + OpStore %58 %57 + %59 = OpFunctionCall %24 %32 %52 %55 %58 + OpReturn + OpFunctionEnd + %32 = OpFunction %24 None %60 + %61 = OpFunctionParameter %46 + %62 = OpFunctionParameter %46 + %63 = OpFunctionParameter %46 + %64 = OpLabel + %65 = OpLoad %6 %61 + %66 = OpLoad %6 %62 + %67 = OpLoad %6 %63 + %68 = OpCompositeExtract %5 %65 0 + %70 = OpAccessChain %69 %30 %31 + OpStore %70 %68 + %71 = OpCompositeExtract %5 %66 0 + %72 = OpAccessChain %69 %30 %12 + OpStore %72 %71 + %73 = OpCompositeExtract %5 %67 0 + %74 = OpAccessChain %69 %30 %19 + OpStore %74 %73 + OpReturn + OpFunctionEnd + %75 = OpFunction %24 None %76 + %77 = OpLabel + %82 = OpAccessChain %38 %16 %31 %31 + %83 = OpInBoundsAccessChain %40 %82 %31 + %84 = OpLoad %5 %83 + %85 = OpAccessChain %46 %11 %17 + %86 = OpInBoundsAccessChain %48 %85 %31 + OpStore %86 %84 + %87 = OpAccessChain %46 %11 %17 + %88 = OpLoad %6 %87 + OpStore %89 %88 + %90 = OpFunctionCall %24 %81 %89 + OpReturn + OpFunctionEnd + %81 = OpFunction %24 None %91 + %92 = OpFunctionParameter %46 + %93 = OpLabel + %94 = OpLoad %6 %92 + %95 = OpCompositeExtract %5 %94 0 + %96 = OpAccessChain %69 %80 %31 + OpStore %96 %95 + OpReturn + OpFunctionEnd + %1 = OpFunction %24 None %76 + %97 = OpLabel + %99 = OpInBoundsAccessChain %98 %22 %31 + OpCopyMemory %99 %101 + %102 = OpInBoundsAccessChain %98 %22 %12 + OpCopyMemory %102 %103 + %104 = OpLoad %2 %4 + %113 = OpAccessChain %111 %110 %104 + %114 = OpAccessChain %112 %108 %104 + OpCopyMemory %113 %114 + %120 = OpAccessChain %118 %117 %104 + %121 = OpAccessChain %119 %115 %104 + OpCopyMemory %120 %121 + %122 = OpFunctionCall %24 %23 %31 + %123 = OpFunctionCall %24 %23 %12 + %124 = OpFunctionCall %24 %23 %19 + %125 = OpFunctionCall %24 %75 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc b/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc new file mode 100644 index 00000000000..346466e61f5 --- /dev/null +++ b/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc @@ -0,0 +1,82 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 43 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_TessLevelInner %gl_TessLevelOuter %gl_out %gl_InvocationID + OpExecutionMode %main OutputVertices 1 + OpExecutionMode %main Triangles + OpSource GLSL 450 + OpName %main "main" + OpName %gl_TessLevelInner "gl_TessLevelInner" + OpName %gl_TessLevelOuter "gl_TessLevelOuter" + OpName %inner "inner" + OpName %outer "outer" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpName %gl_InvocationID "gl_InvocationID" + OpDecorate %gl_TessLevelInner Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %gl_InvocationID BuiltIn InvocationId + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %14 = OpConstantComposite %_arr_float_uint_2 %float_1 %float_2 + %uint_4 = OpConstant %uint 4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 +%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %21 = OpConstantComposite %_arr_float_uint_4 %float_1 %float_2 %float_3 %float_4 +%_ptr_Function__arr_float_uint_2 = OpTypePointer Function %_arr_float_uint_2 +%_ptr_Function__arr_float_uint_4 = OpTypePointer Function %_arr_float_uint_4 + %v4float = OpTypeVector %float 4 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_1 = OpTypeArray %gl_PerVertex %uint_1 +%_ptr_Output__arr_gl_PerVertex_uint_1 = OpTypePointer Output %_arr_gl_PerVertex_uint_1 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_1 Output + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %40 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %main = OpFunction %void None %3 + %5 = OpLabel + %inner = OpVariable %_ptr_Function__arr_float_uint_2 Function + %outer = OpVariable %_ptr_Function__arr_float_uint_4 Function + OpStore %gl_TessLevelInner %14 + OpStore %gl_TessLevelOuter %21 + %24 = OpLoad %_arr_float_uint_2 %gl_TessLevelInner + OpStore %inner %24 + %27 = OpLoad %_arr_float_uint_4 %gl_TessLevelOuter + OpStore %outer %27 + %38 = OpLoad %int %gl_InvocationID + %42 = OpAccessChain %_ptr_Output_v4float %gl_out %38 %int_0 + OpStore %42 %40 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc b/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc new file mode 100644 index 00000000000..2a95da04b7b --- /dev/null +++ b/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc @@ -0,0 +1,63 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 33 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %v + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpName %gl_InvocationID "gl_InvocationID" + OpName %v "v" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %v Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 + %uint_4 = OpConstant %uint 4 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 + %v_zero = OpConstantNull %_arr_float_uint_4 + %v = OpVariable %_ptr_Output__arr_float_uint_4 Output %v_zero +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpLoad %int %gl_InvocationID + %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0 + OpStore %24 %22 + %28 = OpLoad %int %gl_InvocationID + %29 = OpLoad %int %gl_InvocationID + %30 = OpConvertSToF %float %29 + %32 = OpAccessChain %_ptr_Output_float %v %28 + OpStore %32 %30 + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc similarity index 100% rename from shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc rename to shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc diff --git a/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc similarity index 100% rename from shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc rename to shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc diff --git a/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc b/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc new file mode 100644 index 00000000000..53248f17e6a --- /dev/null +++ b/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc @@ -0,0 +1,88 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 47 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %gl_TessLevelInner %gl_TessLevelOuter + OpExecutionMode %main OutputVertices 4 + OpExecutionMode %main Quads + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpName %gl_InvocationID "gl_InvocationID" + OpName %gl_TessLevelInner "gl_TessLevelInner" + OpName %gl_TessLevelOuter "gl_TessLevelOuter" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %gl_TessLevelInner Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 + %uint_4 = OpConstant %uint 4 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %uint_2 = OpConstant %uint 2 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 + %inner_zero = OpConstantNull %_arr_float_uint_2 +%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output %inner_zero +%_ptr_Output_float = OpTypePointer Output %float + %int_1 = OpConstant %int 1 + %float_2 = OpConstant %float 2 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 + %outer_zero = OpConstantNull %_arr_float_uint_4 +%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output %outer_zero + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %int_2 = OpConstant %int 2 + %float_5 = OpConstant %float 5 + %int_3 = OpConstant %int 3 + %float_6 = OpConstant %float 6 + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpLoad %int %gl_InvocationID + %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0 + OpStore %24 %22 + %30 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0 + OpStore %30 %float_1 + %33 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_1 + OpStore %33 %float_2 + %38 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_0 + OpStore %38 %float_3 + %40 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1 + OpStore %40 %float_4 + %43 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_2 + OpStore %43 %float_5 + %46 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_3 + OpStore %46 %float_6 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc b/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc new file mode 100644 index 00000000000..6fbc33dc22f --- /dev/null +++ b/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc @@ -0,0 +1,88 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 47 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %gl_TessLevelInner %gl_TessLevelOuter + OpExecutionMode %main OutputVertices 4 + OpExecutionMode %main Triangles + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpName %gl_InvocationID "gl_InvocationID" + OpName %gl_TessLevelInner "gl_TessLevelInner" + OpName %gl_TessLevelOuter "gl_TessLevelOuter" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %gl_TessLevelInner Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 + %uint_4 = OpConstant %uint 4 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %uint_2 = OpConstant %uint 2 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 + %inner_zero = OpConstantNull %_arr_float_uint_2 +%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output %inner_zero +%_ptr_Output_float = OpTypePointer Output %float + %int_1 = OpConstant %int 1 + %float_2 = OpConstant %float 2 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 + %outer_zero = OpConstantNull %_arr_float_uint_4 +%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output %outer_zero + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %int_2 = OpConstant %int 2 + %float_5 = OpConstant %float 5 + %int_3 = OpConstant %int 3 + %float_6 = OpConstant %float 6 + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpLoad %int %gl_InvocationID + %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0 + OpStore %24 %22 + %30 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0 + OpStore %30 %float_1 + %33 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_1 + OpStore %33 %float_2 + %38 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_0 + OpStore %38 %float_3 + %40 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1 + OpStore %40 %float_4 + %43 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_2 + OpStore %43 %float_5 + %46 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_3 + OpStore %46 %float_6 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc b/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc new file mode 100644 index 00000000000..33b8883cc82 --- /dev/null +++ b/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc @@ -0,0 +1,109 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 64 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_TessLevelInner %gl_TessLevelOuter %gl_out %gl_InvocationID + OpExecutionMode %main OutputVertices 1 + OpExecutionMode %main Triangles + OpSource GLSL 450 + OpName %main "main" + OpName %load_tess_level_in_func_ "load_tess_level_in_func(" + OpName %store_tess_level_in_func_ "store_tess_level_in_func(" + OpName %gl_TessLevelInner "gl_TessLevelInner" + OpName %gl_TessLevelOuter "gl_TessLevelOuter" + OpName %v "v" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpName %gl_InvocationID "gl_InvocationID" + OpDecorate %gl_TessLevelInner Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %gl_InvocationID BuiltIn InvocationId + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %7 = OpTypeFunction %float + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Output_float = OpTypePointer Output %float + %uint_4 = OpConstant %uint 4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 +%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output + %int_1 = OpConstant %int 1 + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %int_2 = OpConstant %int 2 + %float_5 = OpConstant %float 5 + %int_3 = OpConstant %int 3 + %float_6 = OpConstant %float 6 +%_ptr_Function_float = OpTypePointer Function %float + %v4float = OpTypeVector %float 4 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_arr_gl_PerVertex_uint_1 = OpTypeArray %gl_PerVertex %uint_1 +%_ptr_Output__arr_gl_PerVertex_uint_1 = OpTypePointer Output %_arr_gl_PerVertex_uint_1 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_1 Output +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input +%_ptr_Output_v4float = OpTypePointer Output %v4float + %main = OpFunction %void None %3 + %5 = OpLabel + %v = OpVariable %_ptr_Function_float Function + %46 = OpFunctionCall %void %store_tess_level_in_func_ + %49 = OpFunctionCall %float %load_tess_level_in_func_ + OpStore %v %49 + %59 = OpLoad %int %gl_InvocationID + %60 = OpLoad %float %v + %61 = OpCompositeConstruct %v4float %60 %60 %60 %60 + %63 = OpAccessChain %_ptr_Output_v4float %gl_out %59 %int_0 + OpStore %63 %61 + OpReturn + OpFunctionEnd +%load_tess_level_in_func_ = OpFunction %float None %7 + %9 = OpLabel + %20 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0 + %21 = OpLoad %float %20 + %27 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1 + %28 = OpLoad %float %27 + %29 = OpFAdd %float %21 %28 + OpReturnValue %29 + OpFunctionEnd +%store_tess_level_in_func_ = OpFunction %void None %3 + %11 = OpLabel + %33 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0 + OpStore %33 %float_1 + %35 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_1 + OpStore %35 %float_2 + %37 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_0 + OpStore %37 %float_3 + %39 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1 + OpStore %39 %float_4 + %42 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_2 + OpStore %42 %float_5 + %45 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_3 + OpStore %45 %float_6 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese b/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese new file mode 100644 index 00000000000..5a7e730634c --- /dev/null +++ b/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese @@ -0,0 +1,58 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 35 +; Schema: 0 + OpCapability Tessellation + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationEvaluation %Domain "main" %gl_TessLevelOuter %gl_TessLevelInner %in_var_CUSTOM_VALUE %gl_TessCoord %out_var_CUSTOM_VALUE + OpExecutionMode %Domain Quads + OpSource HLSL 600 + OpName %in_var_CUSTOM_VALUE "in.var.CUSTOM_VALUE" + OpName %out_var_CUSTOM_VALUE "out.var.CUSTOM_VALUE" + OpName %Domain "Domain" + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorate %gl_TessLevelInner Patch + OpDecorate %gl_TessCoord BuiltIn TessCoord + OpDecorate %gl_TessCoord Patch + OpDecorate %in_var_CUSTOM_VALUE Location 0 + OpDecorate %out_var_CUSTOM_VALUE Location 0 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4 + %uint_2 = OpConstant %uint 2 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2 + %v4float = OpTypeVector %float 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 +%_ptr_Input__arr_v4float_uint_4 = OpTypePointer Input %_arr_v4float_uint_4 + %v3float = OpTypeVector %float 3 +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %22 = OpTypeFunction %void +%gl_TessLevelOuter = OpVariable %_ptr_Input__arr_float_uint_4 Input +%gl_TessLevelInner = OpVariable %_ptr_Input__arr_float_uint_2 Input +%in_var_CUSTOM_VALUE = OpVariable %_ptr_Input__arr_v4float_uint_4 Input +%gl_TessCoord = OpVariable %_ptr_Input_v3float Input +%out_var_CUSTOM_VALUE = OpVariable %_ptr_Output_v4float Output + %Domain = OpFunction %void None %22 + %23 = OpLabel + %24 = OpLoad %_arr_float_uint_4 %gl_TessLevelOuter + %25 = OpLoad %_arr_float_uint_2 %gl_TessLevelInner + %26 = OpCompositeExtract %float %24 0 + %27 = OpCompositeExtract %float %24 1 + %28 = OpCompositeExtract %float %24 2 + %29 = OpCompositeExtract %float %24 3 + %30 = OpCompositeExtract %float %25 0 + %31 = OpCompositeExtract %float %25 1 + %32 = OpFAdd %float %26 %30 + %33 = OpFAdd %float %27 %31 + %34 = OpCompositeConstruct %v4float %32 %33 %28 %29 + OpStore %out_var_CUSTOM_VALUE %34 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese b/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese new file mode 100644 index 00000000000..e13064f94f4 --- /dev/null +++ b/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese @@ -0,0 +1,35 @@ + OpCapability Tessellation + %94 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationEvaluation %main "main" %in0 %o0 + OpExecutionMode %main Quads + OpName %main "main" + OpName %in0 "in0" + OpName %o0 "o0" + OpDecorate %in0 Location 0 + OpDecorate %o0 Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 +%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1 +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Input__arr_v4float_uint_1 = OpTypePointer Input %_arr_v4float_uint_1 + %in0 = OpVariable %_ptr_Input__arr_v4float_uint_1 Input +%_ptr_Output_v4float = OpTypePointer Output %v4float + %o0 = OpVariable %_ptr_Output_float Output +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_float = OpTypePointer Input %float + %main = OpFunction %void None %3 + %4 = OpLabel + %ac = OpAccessChain %_ptr_Input_v4float %in0 %uint_0 + %bac = OpInBoundsAccessChain %_ptr_Input_float %ac %uint_2 + %loaded = OpLoad %float %bac + OpStore %o0 %loaded + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert b/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert new file mode 100644 index 00000000000..6ae3b67e59d --- /dev/null +++ b/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert @@ -0,0 +1,43 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 13 +; Schema: 0 +OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Vertex %main "main" %_ %foo %gl_Position +OpSource GLSL 450 +OpName %main "main" +OpName %Vert "Vert" +OpMemberName %Vert 0 "a" +OpMemberName %Vert 1 "b" +OpName %_ "" +OpName %Foo "Foo" +OpMemberName %Foo 0 "c" +OpMemberName %Foo 1 "d" +OpName %foo "foo" +OpDecorate %Vert Block +OpDecorate %_ Location 0 +OpDecorate %foo Location 2 +OpDecorate %gl_Position BuiltIn Position +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%Vert = OpTypeStruct %float %float +%vec4 = OpTypeVector %float 4 +%ptr_Output_vec4 = OpTypePointer Output %vec4 +%_ptr_Output_Vert = OpTypePointer Output %Vert +%zero_vert = OpConstantNull %Vert +%_ = OpVariable %_ptr_Output_Vert Output %zero_vert +%gl_Position = OpVariable %ptr_Output_vec4 Output +%Foo = OpTypeStruct %float %float +%_ptr_Output_Foo = OpTypePointer Output %Foo +%zero_foo = OpConstantNull %Foo +%blank = OpConstantNull %vec4 +%foo = OpVariable %_ptr_Output_Foo Output %zero_foo +%main = OpFunction %void None %3 +%5 = OpLabel +OpStore %gl_Position %blank +OpReturn +OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert b/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert new file mode 100644 index 00000000000..a00d4b71bba --- /dev/null +++ b/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert @@ -0,0 +1,39 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 20 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpName %_ "" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%gl_PerVertex = OpTypeStruct %v4float %float +%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex + %zero = OpConstantNull %gl_PerVertex + %_ = OpVariable %_ptr_Output_gl_PerVertex Output %zero + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %17 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + OpStore %19 %17 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert b/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert new file mode 100644 index 00000000000..d44e325b1cf --- /dev/null +++ b/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert @@ -0,0 +1,63 @@ + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %VSMain "main" %gl_VertexIndex %gl_Position + OpSource HLSL 600 + OpName %type_Float2Array "type.Float2Array" + OpMemberName %type_Float2Array 0 "arr" + OpName %Float2Array "Float2Array" + OpName %VSMain "VSMain" + OpName %param_var_i "param.var.i" + OpName %src_VSMain "src.VSMain" + OpName %i "i" + OpName %bb_entry "bb.entry" + OpDecorate %gl_VertexIndex BuiltIn VertexIndex + OpDecorate %gl_Position BuiltIn Position + OpDecorate %Float2Array DescriptorSet 0 + OpDecorate %Float2Array Binding 0 + OpDecorate %_arr_v2float_uint_3 ArrayStride 16 + OpMemberDecorate %type_Float2Array 0 Offset 0 + OpDecorate %type_Float2Array Block + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %uint = OpTypeInt 32 0 + %uint_3 = OpConstant %uint 3 + %v2float = OpTypeVector %float 2 +%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3 +%type_Float2Array = OpTypeStruct %_arr_v2float_uint_3 +%_ptr_Uniform_type_Float2Array = OpTypePointer Uniform %type_Float2Array +%_ptr_Input_uint = OpTypePointer Input %uint + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %20 = OpTypeFunction %void +%_ptr_Function_uint = OpTypePointer Function %uint + %27 = OpTypeFunction %v4float %_ptr_Function_uint +%_ptr_Uniform__arr_v2float_uint_3 = OpTypePointer Uniform %_arr_v2float_uint_3 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%Float2Array = OpVariable %_ptr_Uniform_type_Float2Array Uniform +%gl_VertexIndex = OpVariable %_ptr_Input_uint Input +%gl_Position = OpVariable %_ptr_Output_v4float Output + %VSMain = OpFunction %void None %20 + %21 = OpLabel +%param_var_i = OpVariable %_ptr_Function_uint Function + %24 = OpLoad %uint %gl_VertexIndex + OpStore %param_var_i %24 + %25 = OpFunctionCall %v4float %src_VSMain %param_var_i + OpStore %gl_Position %25 + OpReturn + OpFunctionEnd + %src_VSMain = OpFunction %v4float None %27 + %i = OpFunctionParameter %_ptr_Function_uint + %bb_entry = OpLabel + %30 = OpLoad %uint %i + %32 = OpAccessChain %_ptr_Uniform__arr_v2float_uint_3 %Float2Array %int_0 + %34 = OpAccessChain %_ptr_Uniform_v2float %32 %30 + %35 = OpLoad %v2float %34 + %36 = OpCompositeExtract %float %35 0 + %37 = OpCompositeExtract %float %35 1 + %38 = OpCompositeConstruct %v4float %36 %37 %float_0 %float_1 + OpReturnValue %38 + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert b/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert new file mode 100644 index 00000000000..992b1fc21be --- /dev/null +++ b/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert @@ -0,0 +1,157 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 121 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %1 "main" %2 %3 %4 %5 %gl_VertexIndex %gl_InstanceIndex + OpMemberDecorate %_struct_8 0 BuiltIn Position + OpMemberDecorate %_struct_8 1 BuiltIn PointSize + OpMemberDecorate %_struct_8 2 BuiltIn ClipDistance + OpMemberDecorate %_struct_8 3 BuiltIn CullDistance + OpDecorate %_struct_8 Block + OpDecorate %3 Location 0 + OpDecorate %4 Location 1 + OpDecorate %5 Location 1 + OpDecorate %gl_VertexIndex BuiltIn VertexIndex + OpDecorate %gl_InstanceIndex BuiltIn InstanceIndex + OpDecorate %9 ArrayStride 4 + OpDecorate %10 Offset 0 + %9 = OpDecorationGroup + %10 = OpDecorationGroup + OpDecorate %11 RelaxedPrecision + OpDecorate %12 RelaxedPrecision + OpDecorate %12 Flat + OpDecorate %12 Restrict + %13 = OpDecorationGroup + %11 = OpDecorationGroup + %12 = OpDecorationGroup + OpGroupMemberDecorate %10 %_struct_14 0 %_struct_15 0 + %void = OpTypeVoid + %bool = OpTypeBool + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %float = OpTypeFloat 32 + %v2int = OpTypeVector %int 2 + %v2uint = OpTypeVector %uint 2 + %v2float = OpTypeVector %float 2 + %v3int = OpTypeVector %int 3 + %v3uint = OpTypeVector %uint 3 + %v3float = OpTypeVector %float 3 + %v4int = OpTypeVector %int 4 + %v4uint = OpTypeVector %uint 4 + %v4float = OpTypeVector %float 4 + %v4bool = OpTypeVector %bool 4 + %31 = OpTypeFunction %v4float %v4float + %32 = OpTypeFunction %bool + %33 = OpTypeFunction %void +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Input_int = OpTypePointer Input %int +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Input_v2int = OpTypePointer Input %v2int +%_ptr_Input_v2uint = OpTypePointer Input %v2uint +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_v4int = OpTypePointer Input %v4int +%_ptr_Input_v4uint = OpTypePointer Input %v4uint +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Output_int = OpTypePointer Output %int +%_ptr_Output_uint = OpTypePointer Output %uint +%_ptr_Output_v2float = OpTypePointer Output %v2float +%_ptr_Output_v2int = OpTypePointer Output %v2int +%_ptr_Output_v2uint = OpTypePointer Output %v2uint +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_ptr_Output_v4int = OpTypePointer Output %v4int +%_ptr_Output_v4uint = OpTypePointer Output %v4uint +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Function_int = OpTypePointer Function %int +%_ptr_Function_v4float = OpTypePointer Function %v4float + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %float_0_5 = OpConstant %float 0.5 + %float_n1 = OpConstant %float -1 + %float_7 = OpConstant %float 7 + %float_8 = OpConstant %float 8 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %int_4 = OpConstant %int 4 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uint_32 = OpConstant %uint 32 + %uint_4 = OpConstant %uint 4 +%uint_2147483647 = OpConstant %uint 2147483647 + %74 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %75 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1 + %76 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32 +%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3 +%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 + %_struct_8 = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_ptr_Output__struct_8 = OpTypePointer Output %_struct_8 + %2 = OpVariable %_ptr_Output__struct_8 Output + %3 = OpVariable %_ptr_Input_v4float Input + %4 = OpVariable %_ptr_Output_v4float Output + %5 = OpVariable %_ptr_Input_v4float Input +%gl_VertexIndex = OpVariable %_ptr_Input_int Input +%gl_InstanceIndex = OpVariable %_ptr_Input_int Input +%_arr_float_uint_3 = OpTypeArray %float %uint_3 + %_struct_14 = OpTypeStruct %_arr_float_uint_3 + %_struct_15 = OpTypeStruct %_arr_float_uint_3 +%_ptr_Function__struct_14 = OpTypePointer Function %_struct_14 +%_ptr_Function__struct_15 = OpTypePointer Function %_struct_15 + %float_2 = OpConstant %float 2 + %float_n2 = OpConstant %float -2 + %93 = OpConstantComposite %_arr_float_uint_3 %float_1 %float_2 %float_1 + %94 = OpConstantComposite %_arr_float_uint_3 %float_n1 %float_n2 %float_n1 + %95 = OpConstantComposite %_struct_14 %93 + %96 = OpConstantComposite %_struct_15 %94 + %1 = OpFunction %void None %33 + %97 = OpLabel + %98 = OpLoad %v4float %3 + %99 = OpAccessChain %_ptr_Output_v4float %2 %int_0 + OpStore %99 %98 + %100 = OpLoad %v4float %5 + %101 = OpFunctionCall %v4float %102 %100 + OpStore %4 %101 + OpReturn + OpFunctionEnd + %103 = OpFunction %bool None %32 + %104 = OpLabel + %105 = OpLoad %int %gl_VertexIndex + %106 = OpIEqual %bool %105 %int_0 + OpReturnValue %106 + OpFunctionEnd + %102 = OpFunction %v4float None %31 + %107 = OpFunctionParameter %v4float + %108 = OpLabel + %109 = OpVariable %_ptr_Function_v4float Function + %110 = OpVariable %_ptr_Function__struct_14 Function + %111 = OpVariable %_ptr_Function__struct_15 Function + OpStore %109 %107 + OpStore %110 %95 + OpStore %111 %96 + %112 = OpAccessChain %_ptr_Function_float %110 %int_0 %int_2 + %113 = OpLoad %float %112 + %114 = OpAccessChain %_ptr_Function_float %111 %int_0 %int_2 + %115 = OpLoad %float %114 + %116 = OpFAdd %float %113 %115 + %117 = OpAccessChain %_ptr_Function_float %109 %int_1 + %118 = OpLoad %float %117 + %119 = OpFAdd %float %116 %118 + OpStore %117 %119 + %120 = OpLoad %v4float %109 + OpReturnValue %120 + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert b/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert new file mode 100644 index 00000000000..00ad1ee9cbc --- /dev/null +++ b/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert @@ -0,0 +1,66 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 23 +; Schema: 0 + OpCapability Shader + OpCapability MultiView + OpExtension "SPV_KHR_multiview" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ %gl_ViewIndex + OpEntryPoint Vertex %main2 "main2" %_ %gl_ViewIndex2 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_multiview" + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %_ "" + OpName %gl_ViewIndex "gl_ViewIndex" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %gl_ViewIndex BuiltIn ViewIndex + OpDecorate %gl_ViewIndex2 BuiltIn ViewIndex + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex + %_ = OpVariable %_ptr_Output_gl_PerVertex Output + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Input_int = OpTypePointer Input %int +%gl_ViewIndex = OpVariable %_ptr_Input_int Input +%gl_ViewIndex2 = OpVariable %_ptr_Input_int Input +%_ptr_Output_v4float = OpTypePointer Output %v4float + + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpLoad %int %gl_ViewIndex + %19 = OpConvertSToF %float %18 + %20 = OpCompositeConstruct %v4float %19 %19 %19 %19 + %22 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + OpStore %22 %20 + OpReturn + OpFunctionEnd + + %main2 = OpFunction %void None %3 + %100 = OpLabel + %101 = OpLoad %int %gl_ViewIndex2 + %102 = OpConvertSToF %float %101 + %103 = OpCompositeConstruct %v4float %102 %102 %102 %102 + %104 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + OpStore %104 %103 + + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert b/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert new file mode 100644 index 00000000000..22058d2c8ce --- /dev/null +++ b/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert @@ -0,0 +1,34 @@ +OpCapability Shader +OpCapability VariablePointers +OpCapability VariablePointersStorageBuffer +OpMemoryModel Logical GLSL450 + +OpEntryPoint Vertex %fn_vert "main" + +%F = OpTypeFloat 32 +%PF = OpTypePointer StorageBuffer %F +%PPF = OpTypePointer Private %PF +%PPPF = OpTypePointer Function %PPF + +%V = OpTypeVoid +%Fn0V = OpTypeFunction %V + +%FnArg = OpTypeFunction %V %PPPF + +%uPPF = OpUndef %PPF + +%fn_ptr = OpFunction %V None %FnArg + %arg = OpFunctionParameter %PPPF + %fn_ptr_bb0 = OpLabel + OpReturn +OpFunctionEnd + +%fn_vert = OpFunction %V None %Fn0V + %fn_vert_bb0 = OpLabel + %VPPPF = OpVariable %PPPF Function + OpStore %VPPPF %uPPF + %VV = OpFunctionCall %V %fn_ptr %VPPPF + OpReturn +OpFunctionEnd + + diff --git a/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp b/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp new file mode 100644 index 00000000000..081c39626d1 --- /dev/null +++ b/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp @@ -0,0 +1,18 @@ +#version 450 +layout(local_size_x = 8) in; + +shared float shared_group[8][8]; +shared float shared_group_alt[8][8]; + +void main() +{ + float blob[8]; + for (int i = 0; i < 8; i++) + blob[i] = float(i); + shared_group[gl_LocalInvocationIndex] = blob; + + barrier(); + + float copied_blob[8] = shared_group[gl_LocalInvocationIndex ^ 1u]; + shared_group_alt[gl_LocalInvocationIndex] = shared_group[gl_LocalInvocationIndex]; +} diff --git a/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp b/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp new file mode 100644 index 00000000000..c2965731e9a --- /dev/null +++ b/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp @@ -0,0 +1,27 @@ +#version 450 +layout(local_size_x = 3, local_size_y = 3, local_size_z = 2) in; + +layout(set = 0, binding = 0) uniform Foo +{ + int a; + int b; +}; + +layout(set = 0, binding = 1) uniform Bar +{ + int c; + int d; +}; + +layout(set = 1, binding = 2) buffer Baz +{ + int e; + int f; +} baz[3][3][2]; + +void main() +{ + uvec3 coords = gl_GlobalInvocationID; + baz[coords.x][coords.y][coords.z].e = a + c; + baz[coords.x][coords.y][coords.z].f = b * d; +} diff --git a/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp b/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp new file mode 100644 index 00000000000..8f1d97861c3 --- /dev/null +++ b/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp @@ -0,0 +1,18 @@ +#version 450 +#extension GL_EXT_buffer_reference : require + +layout(buffer_reference) buffer Ref +{ + vec4 v; +}; + +layout(push_constant) uniform Registers +{ + Ref foo; +}; + +void main() +{ + restrict Ref ref = foo; + ref.v = vec4(1.0); +} diff --git a/shaders-msl/comp/bitcast-16bit-1.invalid.comp b/shaders-msl-no-opt/comp/bitcast-16bit-1.invalid.comp similarity index 100% rename from shaders-msl/comp/bitcast-16bit-1.invalid.comp rename to shaders-msl-no-opt/comp/bitcast-16bit-1.invalid.comp diff --git a/shaders-msl/comp/bitcast-16bit-2.invalid.comp b/shaders-msl-no-opt/comp/bitcast-16bit-2.invalid.comp similarity index 100% rename from shaders-msl/comp/bitcast-16bit-2.invalid.comp rename to shaders-msl-no-opt/comp/bitcast-16bit-2.invalid.comp diff --git a/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp b/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp new file mode 100644 index 00000000000..56c11bbb75d --- /dev/null +++ b/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp @@ -0,0 +1,21 @@ +#version 460 + +#extension GL_EXT_buffer_reference: enable +#extension GL_EXT_buffer_reference_uvec2: enable + +struct S { + vec3 v; +}; + +layout(buffer_reference) buffer SSBO{ + S s[]; +}; + +layout(push_constant) uniform PC { + uvec2 ptr; +} pc; + +void main(){ + SSBO ssbo = SSBO(pc.ptr); + ssbo.s[0].v = vec3(1.0); +} diff --git a/shaders-msl-no-opt/comp/glsl.std450.comp b/shaders-msl-no-opt/comp/glsl.std450.comp new file mode 100644 index 00000000000..a17a82b82af --- /dev/null +++ b/shaders-msl-no-opt/comp/glsl.std450.comp @@ -0,0 +1,129 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float res; + int ires; + uint ures; + + vec4 f32; + ivec4 s32; + uvec4 u32; + + mat2 m2; + mat3 m3; + mat4 m4; +}; + +void main() +{ + float tmp; + vec2 v2; + vec3 v3; + vec4 v4; + int itmp; + + res = round(f32.x); + res = roundEven(f32.x); + res = trunc(f32.x); + res = abs(f32.x); + ires = abs(s32.x); + res = sign(f32.x); + ires = sign(s32.x); + res = floor(f32.x); + res = ceil(f32.x); + res = fract(f32.x); + res = radians(f32.x); + res = degrees(f32.x); + res = sin(f32.x); + res = cos(f32.x); + res = tan(f32.x); + res = asin(f32.x); + res = acos(f32.x); + res = atan(f32.x); + res = sinh(f32.x); + res = cosh(f32.x); + res = tanh(f32.x); + res = asinh(f32.x); + res = acosh(f32.x); + res = atanh(f32.x); + res = atan(f32.x, f32.y); + res = pow(f32.x, f32.y); + res = exp(f32.x); + res = log(f32.x); + res = exp2(f32.x); + res = log2(f32.x); + res = sqrt(f32.x); + res = inversesqrt(f32.x); + + res = length(f32.x); + res = distance(f32.x, f32.y); + res = normalize(f32.x); + res = faceforward(f32.x, f32.y, f32.z); + res = reflect(f32.x, f32.y); + res = refract(f32.x, f32.y, f32.z); + + res = length(f32.xy); + res = distance(f32.xy, f32.zw); + v2 = normalize(f32.xy); + v2 = faceforward(f32.xy, f32.yz, f32.zw); + v2 = reflect(f32.xy, f32.zw); + v2 = refract(f32.xy, f32.yz, f32.w); + + v3 = cross(f32.xyz, f32.yzw); + + res = determinant(m2); + res = determinant(m3); + res = determinant(m4); + m2 = inverse(m2); + m3 = inverse(m3); + m4 = inverse(m4); + + res = modf(f32.x, tmp); + // ModfStruct + + res = min(f32.x, f32.y); + ures = min(u32.x, u32.y); + ires = min(s32.x, s32.y); + res = max(f32.x, f32.y); + ures = max(u32.x, u32.y); + ires = max(s32.x, s32.y); + + res = clamp(f32.x, f32.y, f32.z); + ures = clamp(u32.x, u32.y, u32.z); + ires = clamp(s32.x, s32.y, s32.z); + + res = mix(f32.x, f32.y, f32.z); + res = step(f32.x, f32.y); + res = smoothstep(f32.x, f32.y, f32.z); + res = fma(f32.x, f32.y, f32.z); + + res = frexp(f32.x, itmp); + // FrexpStruct + res = ldexp(f32.x, itmp); + + ures = packSnorm4x8(f32); + ures = packUnorm4x8(f32); + ures = packSnorm2x16(f32.xy); + ures = packUnorm2x16(f32.xy); + ures = packHalf2x16(f32.xy); + // packDouble2x32 + + v2 = unpackSnorm2x16(u32.x); + v2 = unpackUnorm2x16(u32.x); + v2 = unpackHalf2x16(u32.x); + v4 = unpackSnorm4x8(u32.x); + v4 = unpackUnorm4x8(u32.x); + // unpackDouble2x32 + + s32 = findLSB(s32); + s32 = findLSB(u32); + s32 = findMSB(s32); + s32 = findMSB(u32); + + // interpolateAtSample + // interpolateAtOffset + + // NMin, NMax, NClamp +} diff --git a/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp b/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp new file mode 100644 index 00000000000..f7a8787d3d8 --- /dev/null +++ b/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp @@ -0,0 +1,62 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 31 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %Foo "Foo" + OpMemberName %Foo 0 "abs" + OpName %f "f" + OpName %Foo_0 "Foo" + OpMemberName %Foo_0 0 "abs" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "foo" + OpMemberName %SSBO 1 "foo2" + OpName %_ "" + OpName %linear "abs" + OpMemberDecorate %Foo_0 0 Offset 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %Foo = OpTypeStruct %float +%_ptr_Function_Foo = OpTypePointer Function %Foo + %Foo_0 = OpTypeStruct %float + %SSBO = OpTypeStruct %Foo_0 %Foo_0 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Function_int = OpTypePointer Function %int + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %main = OpFunction %void None %3 + %5 = OpLabel + %f = OpVariable %_ptr_Function_Foo Function + %linear = OpVariable %_ptr_Function_int Function + %17 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0 + %18 = OpLoad %Foo_0 %17 + %19 = OpCompositeExtract %float %18 0 + %21 = OpAccessChain %_ptr_Function_float %f %int_0 + OpStore %21 %19 + OpStore %linear %int_10 + %26 = OpLoad %Foo %f + %27 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_1 + %28 = OpCompositeExtract %float %26 0 + %30 = OpAccessChain %_ptr_Uniform_float %27 %int_0 + OpStore %30 %28 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/comp/implicit-integer-promotion.comp b/shaders-msl-no-opt/comp/implicit-integer-promotion.comp new file mode 100644 index 00000000000..a0ee95b3a1a --- /dev/null +++ b/shaders-msl-no-opt/comp/implicit-integer-promotion.comp @@ -0,0 +1,85 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(set = 0, binding = 0) buffer BUF0 +{ + f16vec2 f16s; + u16vec2 u16; + i16vec2 i16; + u16vec4 u16s; + i16vec4 i16s; + float16_t f16; +}; + +void test_i16() +{ + f16 += int16BitsToFloat16(i16.x + i16.y); + f16 += int16BitsToFloat16(i16.x - i16.y); + f16 += int16BitsToFloat16(i16.x * i16.y); + f16 += int16BitsToFloat16(i16.x / i16.y); + f16 += int16BitsToFloat16(i16.x % i16.y); + f16 += int16BitsToFloat16(i16.x << i16.y); + f16 += int16BitsToFloat16(i16.x >> i16.y); + f16 += int16BitsToFloat16(~i16.x); + f16 += int16BitsToFloat16(-i16.x); + f16 += int16BitsToFloat16(i16.x ^ i16.y); + f16 += int16BitsToFloat16(i16.x & i16.y); + f16 += int16BitsToFloat16(i16.x | i16.y); +} + +void test_u16() +{ + f16 += uint16BitsToFloat16(u16.x + u16.y); + f16 += uint16BitsToFloat16(u16.x - u16.y); + f16 += uint16BitsToFloat16(u16.x * u16.y); + f16 += uint16BitsToFloat16(u16.x / u16.y); + f16 += uint16BitsToFloat16(u16.x % u16.y); + f16 += uint16BitsToFloat16(u16.x << u16.y); + f16 += uint16BitsToFloat16(u16.x >> u16.y); + f16 += uint16BitsToFloat16(~u16.x); + f16 += uint16BitsToFloat16(-u16.x); + f16 += uint16BitsToFloat16(u16.x ^ u16.y); + f16 += uint16BitsToFloat16(u16.x & u16.y); + f16 += uint16BitsToFloat16(u16.x | u16.y); +} + +void test_u16s() +{ + f16s += uint16BitsToFloat16(u16s.xy + u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy - u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy * u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy / u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy % u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy << u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy >> u16s.zw); + f16s += uint16BitsToFloat16(~u16s.xy); + f16s += uint16BitsToFloat16(-u16s.xy); + f16s += uint16BitsToFloat16(u16s.xy ^ u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy & u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy | u16s.zw); +} + +void test_i16s() +{ + f16s += int16BitsToFloat16(i16s.xy + i16s.zw); + f16s += int16BitsToFloat16(i16s.xy - i16s.zw); + f16s += int16BitsToFloat16(i16s.xy * i16s.zw); + f16s += int16BitsToFloat16(i16s.xy / i16s.zw); + f16s += int16BitsToFloat16(i16s.xy % i16s.zw); + f16s += int16BitsToFloat16(i16s.xy << i16s.zw); + f16s += int16BitsToFloat16(i16s.xy >> i16s.zw); + f16s += int16BitsToFloat16(~i16s.xy); + f16s += int16BitsToFloat16(-i16s.xy); + f16s += int16BitsToFloat16(i16s.xy ^ i16s.zw); + f16s += int16BitsToFloat16(i16s.xy & i16s.zw); + f16s += int16BitsToFloat16(i16s.xy | i16s.zw); +} + +void main() +{ + test_u16(); + test_i16(); + test_u16s(); + test_i16s(); +} diff --git a/shaders-msl-no-opt/comp/int16min-literal.comp b/shaders-msl-no-opt/comp/int16min-literal.comp new file mode 100644 index 00000000000..c1b345266d8 --- /dev/null +++ b/shaders-msl-no-opt/comp/int16min-literal.comp @@ -0,0 +1,22 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 1) buffer SSBO +{ + float16_t a; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + float16_t b; +}; + +void main() +{ + int16_t v = float16BitsToInt16(b); + v ^= 0x8000s; + a = int16BitsToFloat16(v); +} diff --git a/shaders-msl/comp/int64.invalid.msl22.comp b/shaders-msl-no-opt/comp/int64.invalid.msl22.comp similarity index 100% rename from shaders-msl/comp/int64.invalid.msl22.comp rename to shaders-msl-no-opt/comp/int64.invalid.msl22.comp diff --git a/shaders-msl-no-opt/comp/int64min-literal.msl22.comp b/shaders-msl-no-opt/comp/int64min-literal.msl22.comp new file mode 100644 index 00000000000..79296054462 --- /dev/null +++ b/shaders-msl-no-opt/comp/int64min-literal.msl22.comp @@ -0,0 +1,21 @@ +#version 450 +#extension GL_ARB_gpu_shader_int64 : require + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 1) buffer SSBO +{ + float a; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + float b; +}; + +void main() +{ + int64_t v = int64_t(floatBitsToInt(b)); + v ^= 0x8000000000000000L; + a = intBitsToFloat(int(v)); +} diff --git a/shaders-msl-no-opt/comp/intmin-literal.comp b/shaders-msl-no-opt/comp/intmin-literal.comp new file mode 100644 index 00000000000..ee35cedabb9 --- /dev/null +++ b/shaders-msl-no-opt/comp/intmin-literal.comp @@ -0,0 +1,18 @@ +#version 450 + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 1) buffer SSBO +{ + float a; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + float b; +}; + +void main() +{ + a = intBitsToFloat(floatBitsToInt(b) ^ 0x80000000); +} diff --git a/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp b/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp new file mode 100644 index 00000000000..af1c47b32ce --- /dev/null +++ b/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp @@ -0,0 +1,13 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer SSBO +{ + float a[16]; + vec4 b[16]; +}; + +void main() +{ + b[gl_GlobalInvocationID.x] = vec4(a[gl_GlobalInvocationID.x]); +} diff --git a/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp similarity index 94% rename from shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp rename to shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp index a0898cfc549..47d88912f75 100644 --- a/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp +++ b/shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp @@ -43,7 +43,8 @@ struct Content S3 m3; float m4; - S4 m3s[8]; + // glslang seems to miscompile this atm into ArrayStride of 16 even in scalar layout. + //S4 m3s[8]; }; layout(binding = 2, scalar) restrict buffer SSBO2 diff --git a/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp new file mode 100644 index 00000000000..8a0be2269e5 --- /dev/null +++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp @@ -0,0 +1,25 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +// Reduced test for emulated functionality. + +void main() +{ + // basic + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); +} diff --git a/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp new file mode 100644 index 00000000000..f840d2aee89 --- /dev/null +++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp @@ -0,0 +1,145 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_clustered : require +#extension GL_KHR_shader_subgroup_quad : require +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +void main() +{ + // basic + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); + + // ballot + FragColor = float(gl_SubgroupEqMask); + FragColor = float(gl_SubgroupGeMask); + FragColor = float(gl_SubgroupGtMask); + FragColor = float(gl_SubgroupLeMask); + FragColor = float(gl_SubgroupLtMask); + vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); + vec3 first = subgroupBroadcastFirst(vec3(20.0)); + bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); + uvec4 ballot_value = subgroupBallot(true); + bool inverse_ballot_value = subgroupInverseBallot(ballot_value); + bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); + uint bit_count = subgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint lsb = subgroupBallotFindLSB(ballot_value); + uint msb = subgroupBallotFindMSB(ballot_value); + + // shuffle + uint shuffled = subgroupShuffle(10u, 8u); + bool shuffled_bool = subgroupShuffle(true, 9u); + uint shuffled_xor = subgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); + + // shuffle relative + uint shuffled_up = subgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = subgroupShuffleUp(true, 4u); + uint shuffled_down = subgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = subgroupShuffleDown(false, 4u); + + // vote + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + bool has_equal = subgroupAllEqual(0); + has_equal = subgroupAllEqual(true); + has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); + has_equal = subgroupAllEqual(bvec4(true, true, false, true)); + + // arithmetic + vec4 added = subgroupAdd(vec4(20.0)); + ivec4 iadded = subgroupAdd(ivec4(20)); + vec4 multiplied = subgroupMul(vec4(20.0)); + ivec4 imultiplied = subgroupMul(ivec4(20)); + vec4 lo = subgroupMin(vec4(20.0)); + vec4 hi = subgroupMax(vec4(20.0)); + ivec4 slo = subgroupMin(ivec4(20)); + ivec4 shi = subgroupMax(ivec4(20)); + uvec4 ulo = subgroupMin(uvec4(20)); + uvec4 uhi = subgroupMax(uvec4(20)); + uvec4 anded = subgroupAnd(ballot_value); + uvec4 ored = subgroupOr(ballot_value); + uvec4 xored = subgroupXor(ballot_value); + bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42))); + bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42))); + bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42))); + + added = subgroupInclusiveAdd(added); + iadded = subgroupInclusiveAdd(iadded); + multiplied = subgroupInclusiveMul(multiplied); + imultiplied = subgroupInclusiveMul(imultiplied); + //lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupInclusiveMax(hi); + //slo = subgroupInclusiveMin(slo); + //shi = subgroupInclusiveMax(shi); + //ulo = subgroupInclusiveMin(ulo); + //uhi = subgroupInclusiveMax(uhi); + //anded = subgroupInclusiveAnd(anded); + //ored = subgroupInclusiveOr(ored); + //xored = subgroupInclusiveXor(ored); + //added = subgroupExclusiveAdd(lo); + + added = subgroupExclusiveAdd(multiplied); + multiplied = subgroupExclusiveMul(multiplied); + iadded = subgroupExclusiveAdd(imultiplied); + imultiplied = subgroupExclusiveMul(imultiplied); + //lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupExclusiveMax(hi); + //ulo = subgroupExclusiveMin(ulo); + //uhi = subgroupExclusiveMax(uhi); + //slo = subgroupExclusiveMin(slo); + //shi = subgroupExclusiveMax(shi); + //anded = subgroupExclusiveAnd(anded); + //ored = subgroupExclusiveOr(ored); + //xored = subgroupExclusiveXor(ored); + + // clustered + added = subgroupClusteredAdd(added, 4u); + multiplied = subgroupClusteredMul(multiplied, 4u); + iadded = subgroupClusteredAdd(iadded, 4u); + imultiplied = subgroupClusteredMul(imultiplied, 4u); + lo = subgroupClusteredMin(lo, 4u); + hi = subgroupClusteredMax(hi, 4u); + ulo = subgroupClusteredMin(ulo, 4u); + uhi = subgroupClusteredMax(uhi, 4u); + slo = subgroupClusteredMin(slo, 4u); + shi = subgroupClusteredMax(shi, 4u); + anded = subgroupClusteredAnd(anded, 4u); + ored = subgroupClusteredOr(ored, 4u); + xored = subgroupClusteredXor(xored, 4u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u); + + // quad + vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); + bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); + vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); + bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); + vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); + bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); + vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); + bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); +} diff --git a/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp similarity index 86% rename from shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp rename to shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp index f8f5133f8de..28c5d6b34d3 100644 --- a/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp +++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp @@ -35,7 +35,9 @@ void main() FragColor = float(gl_SubgroupLeMask); FragColor = float(gl_SubgroupLtMask); vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); vec3 first = subgroupBroadcastFirst(vec3(20.0)); + bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); uvec4 ballot_value = subgroupBallot(true); bool inverse_ballot_value = subgroupInverseBallot(ballot_value); bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); @@ -47,17 +49,23 @@ void main() // shuffle uint shuffled = subgroupShuffle(10u, 8u); + bool shuffled_bool = subgroupShuffle(true, 9u); uint shuffled_xor = subgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); // shuffle relative uint shuffled_up = subgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = subgroupShuffleUp(true, 4u); uint shuffled_down = subgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = subgroupShuffleDown(false, 4u); // vote bool has_all = subgroupAll(true); bool has_any = subgroupAny(true); bool has_equal = subgroupAllEqual(0); has_equal = subgroupAllEqual(true); + has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); + has_equal = subgroupAllEqual(bvec4(true, true, false, true)); // arithmetic vec4 added = subgroupAdd(vec4(20.0)); @@ -120,7 +128,11 @@ void main() // quad vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); + bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); + bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); + bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); + bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); } diff --git a/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp similarity index 70% rename from shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp rename to shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp index 66eb4a20758..a78527f5428 100644 --- a/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp +++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp @@ -27,15 +27,23 @@ void main() // shuffle uint shuffled = subgroupShuffle(10u, 8u); + bool shuffled_bool = subgroupShuffle(true, 9u); uint shuffled_xor = subgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); // shuffle relative uint shuffled_up = subgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = subgroupShuffleUp(true, 4u); uint shuffled_down = subgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = subgroupShuffleDown(false, 4u); // quad vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); + bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); + bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); + bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); + bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); } diff --git a/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp new file mode 100644 index 00000000000..bc904a4f750 --- /dev/null +++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp @@ -0,0 +1,79 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +#extension GL_KHR_shader_subgroup_quad : require +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +// Reduced test for functionality exposed on iOS. + +void main() +{ + // basic + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); + + // ballot + FragColor = float(gl_SubgroupEqMask); + FragColor = float(gl_SubgroupGeMask); + FragColor = float(gl_SubgroupGtMask); + FragColor = float(gl_SubgroupLeMask); + FragColor = float(gl_SubgroupLtMask); + vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); + vec3 first = subgroupBroadcastFirst(vec3(20.0)); + bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); + uvec4 ballot_value = subgroupBallot(true); + bool inverse_ballot_value = subgroupInverseBallot(ballot_value); + bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); + uint bit_count = subgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint lsb = subgroupBallotFindLSB(ballot_value); + uint msb = subgroupBallotFindMSB(ballot_value); + + // shuffle + uint shuffled = subgroupShuffle(10u, 8u); + bool shuffled_bool = subgroupShuffle(true, 9u); + uint shuffled_xor = subgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); + + // shuffle relative + uint shuffled_up = subgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = subgroupShuffleUp(true, 4u); + uint shuffled_down = subgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = subgroupShuffleDown(false, 4u); + + // vote + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + bool has_equal = subgroupAllEqual(0); + has_equal = subgroupAllEqual(true); + has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); + has_equal = subgroupAllEqual(bvec4(true, true, false, true)); + + // quad + vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); + bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); + vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); + bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); + vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); + bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); + vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); + bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); +} diff --git a/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp new file mode 100644 index 00000000000..28c5d6b34d3 --- /dev/null +++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp @@ -0,0 +1,138 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_clustered : require +#extension GL_KHR_shader_subgroup_quad : require +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +void main() +{ + // basic + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); + + // ballot + FragColor = float(gl_SubgroupEqMask); + FragColor = float(gl_SubgroupGeMask); + FragColor = float(gl_SubgroupGtMask); + FragColor = float(gl_SubgroupLeMask); + FragColor = float(gl_SubgroupLtMask); + vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); + vec3 first = subgroupBroadcastFirst(vec3(20.0)); + bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); + uvec4 ballot_value = subgroupBallot(true); + bool inverse_ballot_value = subgroupInverseBallot(ballot_value); + bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); + uint bit_count = subgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint lsb = subgroupBallotFindLSB(ballot_value); + uint msb = subgroupBallotFindMSB(ballot_value); + + // shuffle + uint shuffled = subgroupShuffle(10u, 8u); + bool shuffled_bool = subgroupShuffle(true, 9u); + uint shuffled_xor = subgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); + + // shuffle relative + uint shuffled_up = subgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = subgroupShuffleUp(true, 4u); + uint shuffled_down = subgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = subgroupShuffleDown(false, 4u); + + // vote + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + bool has_equal = subgroupAllEqual(0); + has_equal = subgroupAllEqual(true); + has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); + has_equal = subgroupAllEqual(bvec4(true, true, false, true)); + + // arithmetic + vec4 added = subgroupAdd(vec4(20.0)); + ivec4 iadded = subgroupAdd(ivec4(20)); + vec4 multiplied = subgroupMul(vec4(20.0)); + ivec4 imultiplied = subgroupMul(ivec4(20)); + vec4 lo = subgroupMin(vec4(20.0)); + vec4 hi = subgroupMax(vec4(20.0)); + ivec4 slo = subgroupMin(ivec4(20)); + ivec4 shi = subgroupMax(ivec4(20)); + uvec4 ulo = subgroupMin(uvec4(20)); + uvec4 uhi = subgroupMax(uvec4(20)); + uvec4 anded = subgroupAnd(ballot_value); + uvec4 ored = subgroupOr(ballot_value); + uvec4 xored = subgroupXor(ballot_value); + + added = subgroupInclusiveAdd(added); + iadded = subgroupInclusiveAdd(iadded); + multiplied = subgroupInclusiveMul(multiplied); + imultiplied = subgroupInclusiveMul(imultiplied); + //lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupInclusiveMax(hi); + //slo = subgroupInclusiveMin(slo); + //shi = subgroupInclusiveMax(shi); + //ulo = subgroupInclusiveMin(ulo); + //uhi = subgroupInclusiveMax(uhi); + //anded = subgroupInclusiveAnd(anded); + //ored = subgroupInclusiveOr(ored); + //xored = subgroupInclusiveXor(ored); + //added = subgroupExclusiveAdd(lo); + + added = subgroupExclusiveAdd(multiplied); + multiplied = subgroupExclusiveMul(multiplied); + iadded = subgroupExclusiveAdd(imultiplied); + imultiplied = subgroupExclusiveMul(imultiplied); + //lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupExclusiveMax(hi); + //ulo = subgroupExclusiveMin(ulo); + //uhi = subgroupExclusiveMax(uhi); + //slo = subgroupExclusiveMin(slo); + //shi = subgroupExclusiveMax(shi); + //anded = subgroupExclusiveAnd(anded); + //ored = subgroupExclusiveOr(ored); + //xored = subgroupExclusiveXor(ored); + + // clustered + added = subgroupClusteredAdd(added, 4u); + multiplied = subgroupClusteredMul(multiplied, 4u); + iadded = subgroupClusteredAdd(iadded, 4u); + imultiplied = subgroupClusteredMul(imultiplied, 4u); + lo = subgroupClusteredMin(lo, 4u); + hi = subgroupClusteredMax(hi, 4u); + ulo = subgroupClusteredMin(ulo, 4u); + uhi = subgroupClusteredMax(uhi, 4u); + slo = subgroupClusteredMin(slo, 4u); + shi = subgroupClusteredMax(shi, 4u); + anded = subgroupClusteredAnd(anded, 4u); + ored = subgroupClusteredOr(ored, 4u); + xored = subgroupClusteredXor(xored, 4u); + + // quad + vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); + bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); + vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); + bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); + vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); + bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); + vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); + bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); +} diff --git a/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp b/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp new file mode 100644 index 00000000000..c3e0922a166 --- /dev/null +++ b/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp @@ -0,0 +1,14 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0) buffer A +{ + vec3 a; + vec3 b; +}; + +void main() +{ + bvec3 c = lessThan(b, vec3(1.0)); + a = mix(vec3(1, 0, 0), vec3(0, 0, 1), c); +} diff --git a/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp b/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp new file mode 100644 index 00000000000..5ffcc3f3a49 --- /dev/null +++ b/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0) buffer A +{ + mat3 a; + float b; +}; + +void main() +{ + // Scalar to Matrix + bool c = b < 1.0; + a = c ? mat3(vec3(1), vec3(1), vec3(1)) : mat3(vec3(0), vec3(0), vec3(0)); + a = c ? mat3(1) : mat3(0); +} diff --git a/shaders-msl-no-opt/components/fragment-input-component.frag b/shaders-msl-no-opt/components/fragment-input-component.frag new file mode 100644 index 00000000000..60d48bef7e6 --- /dev/null +++ b/shaders-msl-no-opt/components/fragment-input-component.frag @@ -0,0 +1,10 @@ +#version 450 + +layout(location = 0, component = 3) in float Foo1; +layout(location = 0, component = 0) in vec3 Foo3; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(Foo3, Foo1); +} diff --git a/shaders-msl-no-opt/components/fragment-output-component.frag b/shaders-msl-no-opt/components/fragment-output-component.frag new file mode 100644 index 00000000000..29a57dfa1f3 --- /dev/null +++ b/shaders-msl-no-opt/components/fragment-output-component.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(location = 0, component = 0) out float FragColor0; +layout(location = 0, component = 1) out vec2 FragColor1; +layout(location = 0, component = 3) out float FragColor3; + +void main() +{ + FragColor0 = 1.0; + FragColor1 = vec2(2.0, 3.0); + FragColor3 = 4.0; +} diff --git a/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag b/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag new file mode 100644 index 00000000000..ae9b7f75e76 --- /dev/null +++ b/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag @@ -0,0 +1,10 @@ +#version 450 + +layout(location = 0, component = 0) out float FragColor0; +layout(location = 0, component = 1) out vec2 FragColor1; + +void main() +{ + FragColor0 = 1.0; + FragColor1 = vec2(2.0, 3.0); +} diff --git a/shaders-msl-no-opt/components/vertex-input-component.vert b/shaders-msl-no-opt/components/vertex-input-component.vert new file mode 100644 index 00000000000..7ba31bf6552 --- /dev/null +++ b/shaders-msl-no-opt/components/vertex-input-component.vert @@ -0,0 +1,11 @@ +#version 450 + +layout(location = 0, component = 0) in vec3 Foo3; +layout(location = 0, component = 3) in float Foo1; +layout(location = 0) out vec3 Foo; + +void main() +{ + gl_Position = vec4(Foo3, Foo1); + Foo = Foo3 + Foo1; +} diff --git a/shaders-msl-no-opt/components/vertex-output-component.vert b/shaders-msl-no-opt/components/vertex-output-component.vert new file mode 100644 index 00000000000..5abd8dc6ff4 --- /dev/null +++ b/shaders-msl-no-opt/components/vertex-output-component.vert @@ -0,0 +1,12 @@ +#version 450 + +layout(location = 0) in vec4 vFoo; +layout(location = 0) out vec3 Foo3; +layout(location = 0, component = 3) out float Foo1; + +void main() +{ + gl_Position = vFoo; + Foo3 = vFoo.xyz; + Foo1 = vFoo.w; +} diff --git a/shaders-msl/frag/16bit-constants.frag b/shaders-msl-no-opt/frag/16bit-constants.invalid.frag similarity index 100% rename from shaders-msl/frag/16bit-constants.frag rename to shaders-msl-no-opt/frag/16bit-constants.invalid.frag diff --git a/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag b/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag new file mode 100644 index 00000000000..8cce059bab2 --- /dev/null +++ b/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag @@ -0,0 +1,8 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +void main() +{ + //demote; // FIXME: Not implemented for MSL + bool helper = helperInvocationEXT(); +} diff --git a/shaders-msl-no-opt/frag/depth-image-gather.asm.frag b/shaders-msl-no-opt/frag/depth-image-gather.asm.frag new file mode 100644 index 00000000000..430899c6f05 --- /dev/null +++ b/shaders-msl-no-opt/frag/depth-image-gather.asm.frag @@ -0,0 +1,72 @@ +; SPIR-V +; Version: 1.3 +; Generator: Google spiregg; 0 +; Bound: 36 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_GOOGLE_hlsl_functionality1" + OpExtension "SPV_GOOGLE_user_type" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %psMain "main" %gl_FragCoord %in_var_TEXCOORD0 %out_var_SV_Target0 + OpExecutionMode %psMain OriginUpperLeft + OpSource HLSL 500 + OpName %type_2d_image "type.2d.image" + OpName %g_depthTexture "g_depthTexture" + OpName %type_sampler "type.sampler" + OpName %g_sampler "g_sampler" + OpName %g_comp "g_comp" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %psMain "psMain" + OpName %type_sampled_image "type.sampled.image" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_Position" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %in_var_TEXCOORD0 Location 0 + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %g_depthTexture DescriptorSet 0 + OpDecorate %g_depthTexture Binding 0 + OpDecorate %g_sampler DescriptorSet 0 + OpDecorate %g_sampler Binding 0 + OpDecorate %g_comp DescriptorSet 0 + OpDecorate %g_comp Binding 1 + OpDecorateString %g_depthTexture UserTypeGOOGLE "texture2d" + %float = OpTypeFloat 32 + %float_0_5 = OpConstant %float 0.5 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v2int = OpTypeVector %int 2 + %16 = OpConstantComposite %v2int %int_0 %int_0 +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %25 = OpTypeFunction %void +%type_sampled_image = OpTypeSampledImage %type_2d_image +%g_depthTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant + %g_sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant + %g_comp = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output + %psMain = OpFunction %void None %25 + %26 = OpLabel + %27 = OpLoad %v2float %in_var_TEXCOORD0 + %28 = OpLoad %type_2d_image %g_depthTexture + %29 = OpLoad %type_sampler %g_comp + %30 = OpSampledImage %type_sampled_image %28 %29 + %31 = OpImageDrefGather %v4float %30 %27 %float_0_5 None + %32 = OpLoad %type_sampler %g_sampler + %33 = OpSampledImage %type_sampled_image %28 %32 + %34 = OpImageGather %v4float %33 %27 %int_0 ConstOffset %16 + %35 = OpFMul %v4float %31 %34 + OpStore %out_var_SV_Target0 %35 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag b/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag new file mode 100644 index 00000000000..b1b058d0146 --- /dev/null +++ b/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag @@ -0,0 +1,15 @@ +#version 450 + +layout(location = 0) in vec2 vUV; +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 0) uniform sampler2D uTexture1; +layout(set = 0, binding = 1) uniform sampler2D uTexture2; +layout(set = 2, binding = 0) uniform sampler2D uTextureDiscrete1; +layout(set = 2, binding = 1) uniform sampler2D uTextureDiscrete2; + +void main() +{ + FragColor = texture(uTexture2, vUV); + FragColor += texture(uTextureDiscrete2, vUV); +} diff --git a/shaders-msl/frag/fp16.desktop.invalid.frag b/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag similarity index 100% rename from shaders-msl/frag/fp16.desktop.invalid.frag rename to shaders-msl-no-opt/frag/fp16.desktop.invalid.frag diff --git a/shaders-msl-no-opt/frag/image-gather.frag b/shaders-msl-no-opt/frag/image-gather.frag new file mode 100644 index 00000000000..b492cfbe903 --- /dev/null +++ b/shaders-msl-no-opt/frag/image-gather.frag @@ -0,0 +1,14 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 0) uniform sampler2D uSamp; +layout(set = 0, binding = 1) uniform sampler2DShadow uSampShadow; +layout(location = 0) in vec3 vUV; + +void main() +{ + FragColor = textureGather(uSamp, vUV.xy, 0); + FragColor += textureGather(uSamp, vUV.xy, 1); + FragColor += textureGather(uSampShadow, vUV.xy, vUV.z); +} diff --git a/shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag b/shaders-msl-no-opt/frag/min-max-clamp.invalid.asm.frag similarity index 100% rename from shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag rename to shaders-msl-no-opt/frag/min-max-clamp.invalid.asm.frag diff --git a/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag b/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag new file mode 100644 index 00000000000..ad566615f56 --- /dev/null +++ b/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag @@ -0,0 +1,293 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 205 +; Schema: 0 + OpCapability Shader + OpCapability Float16 + OpExtension "SPV_AMD_gpu_shader_half_float" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %v1 %v2 %v3 %v4 %h1 %h2 %h3 %h4 + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_AMD_gpu_shader_half_float" + OpName %main "main" + OpName %res "res" + OpName %res2 "res2" + OpName %res3 "res3" + OpName %res4 "res4" + OpName %hres "hres" + OpName %hres2 "hres2" + OpName %hres3 "hres3" + OpName %hres4 "hres4" + OpName %v1 "v1" + OpName %v2 "v2" + OpName %v3 "v3" + OpName %v4 "v4" + OpName %h1 "h1" + OpName %h2 "h2" + OpName %h3 "h3" + OpName %h4 "h4" + OpDecorate %v1 Location 0 + OpDecorate %v2 Location 1 + OpDecorate %v3 Location 2 + OpDecorate %v4 Location 3 + OpDecorate %h1 Location 4 + OpDecorate %h2 Location 5 + OpDecorate %h3 Location 6 + OpDecorate %h4 Location 7 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 + %half = OpTypeFloat 16 + %v2half = OpTypeVector %half 2 + %v3half = OpTypeVector %half 3 + %v4half = OpTypeVector %half 4 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Function_v2float = OpTypePointer Function %v2float +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Function_v3float = OpTypePointer Function %v3float +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Function_half = OpTypePointer Function %half +%_ptr_Input_half = OpTypePointer Input %half +%_ptr_Function_v2half = OpTypePointer Function %v2half +%_ptr_Input_v2half = OpTypePointer Input %v2half +%_ptr_Function_v3half = OpTypePointer Function %v3half +%_ptr_Input_v3half = OpTypePointer Input %v3half +%_ptr_Function_v4half = OpTypePointer Function %v4half +%_ptr_Input_v4half = OpTypePointer Input %v4half + %v1 = OpVariable %_ptr_Input_float Input + %v2 = OpVariable %_ptr_Input_v2float Input + %v3 = OpVariable %_ptr_Input_v3float Input + %v4 = OpVariable %_ptr_Input_v4float Input + %h1 = OpVariable %_ptr_Input_half Input + %h2 = OpVariable %_ptr_Input_v2half Input + %h3 = OpVariable %_ptr_Input_v3half Input + %h4 = OpVariable %_ptr_Input_v4half Input + %main = OpFunction %void None %3 + %5 = OpLabel + %res = OpVariable %_ptr_Function_float Function + %46 = OpLoad %float %v1 + %47 = OpLoad %float %v1 + %48 = OpExtInst %float %1 FMin %46 %47 + OpStore %res %48 + %49 = OpLoad %float %v1 + %50 = OpLoad %float %v1 + %51 = OpExtInst %float %1 FMax %49 %50 + OpStore %res %51 + %52 = OpLoad %float %v1 + %53 = OpLoad %float %v1 + %54 = OpLoad %float %v1 + %55 = OpExtInst %float %1 FClamp %52 %53 %54 + OpStore %res %55 + %56 = OpLoad %float %v1 + %57 = OpLoad %float %v1 + %58 = OpExtInst %float %1 NMin %56 %57 + OpStore %res %58 + %59 = OpLoad %float %v1 + %60 = OpLoad %float %v1 + %61 = OpExtInst %float %1 NMax %59 %60 + OpStore %res %61 + %62 = OpLoad %float %v1 + %63 = OpLoad %float %v1 + %64 = OpLoad %float %v1 + %65 = OpExtInst %float %1 NClamp %62 %63 %64 + OpStore %res %65 + %res2 = OpVariable %_ptr_Function_v2float Function + %66 = OpLoad %v2float %v2 + %67 = OpLoad %v2float %v2 + %68 = OpExtInst %v2float %1 FMin %66 %67 + OpStore %res2 %68 + %69 = OpLoad %v2float %v2 + %70 = OpLoad %v2float %v2 + %71 = OpExtInst %v2float %1 FMax %69 %70 + OpStore %res2 %71 + %72 = OpLoad %v2float %v2 + %73 = OpLoad %v2float %v2 + %74 = OpLoad %v2float %v2 + %75 = OpExtInst %v2float %1 FClamp %72 %73 %74 + OpStore %res2 %75 + %76 = OpLoad %v2float %v2 + %77 = OpLoad %v2float %v2 + %78 = OpExtInst %v2float %1 NMin %76 %77 + OpStore %res2 %78 + %79 = OpLoad %v2float %v2 + %80 = OpLoad %v2float %v2 + %81 = OpExtInst %v2float %1 NMax %79 %80 + OpStore %res2 %81 + %82 = OpLoad %v2float %v2 + %83 = OpLoad %v2float %v2 + %84 = OpLoad %v2float %v2 + %85 = OpExtInst %v2float %1 NClamp %82 %83 %84 + OpStore %res2 %85 + %res3 = OpVariable %_ptr_Function_v3float Function + %86 = OpLoad %v3float %v3 + %87 = OpLoad %v3float %v3 + %88 = OpExtInst %v3float %1 FMin %86 %87 + OpStore %res3 %88 + %89 = OpLoad %v3float %v3 + %90 = OpLoad %v3float %v3 + %91 = OpExtInst %v3float %1 FMax %89 %90 + OpStore %res3 %91 + %92 = OpLoad %v3float %v3 + %93 = OpLoad %v3float %v3 + %94 = OpLoad %v3float %v3 + %95 = OpExtInst %v3float %1 FClamp %92 %93 %94 + OpStore %res3 %95 + %96 = OpLoad %v3float %v3 + %97 = OpLoad %v3float %v3 + %98 = OpExtInst %v3float %1 NMin %96 %97 + OpStore %res3 %98 + %99 = OpLoad %v3float %v3 + %100 = OpLoad %v3float %v3 + %101 = OpExtInst %v3float %1 NMax %99 %100 + OpStore %res3 %101 + %102 = OpLoad %v3float %v3 + %103 = OpLoad %v3float %v3 + %104 = OpLoad %v3float %v3 + %105 = OpExtInst %v3float %1 NClamp %102 %103 %104 + OpStore %res3 %105 + %res4 = OpVariable %_ptr_Function_v4float Function + %106 = OpLoad %v4float %v4 + %107 = OpLoad %v4float %v4 + %108 = OpExtInst %v4float %1 FMin %106 %107 + OpStore %res4 %108 + %109 = OpLoad %v4float %v4 + %110 = OpLoad %v4float %v4 + %111 = OpExtInst %v4float %1 FMax %109 %110 + OpStore %res4 %111 + %112 = OpLoad %v4float %v4 + %113 = OpLoad %v4float %v4 + %114 = OpLoad %v4float %v4 + %115 = OpExtInst %v4float %1 FClamp %112 %113 %114 + OpStore %res4 %115 + %116 = OpLoad %v4float %v4 + %117 = OpLoad %v4float %v4 + %118 = OpExtInst %v4float %1 NMin %116 %117 + OpStore %res4 %118 + %119 = OpLoad %v4float %v4 + %120 = OpLoad %v4float %v4 + %121 = OpExtInst %v4float %1 NMax %119 %120 + OpStore %res4 %121 + %122 = OpLoad %v4float %v4 + %123 = OpLoad %v4float %v4 + %124 = OpLoad %v4float %v4 + %125 = OpExtInst %v4float %1 NClamp %122 %123 %124 + OpStore %res4 %125 + %hres = OpVariable %_ptr_Function_half Function + %126 = OpLoad %half %h1 + %127 = OpLoad %half %h1 + %128 = OpExtInst %half %1 FMin %126 %127 + OpStore %hres %128 + %129 = OpLoad %half %h1 + %130 = OpLoad %half %h1 + %131 = OpExtInst %half %1 FMax %129 %130 + OpStore %hres %131 + %132 = OpLoad %half %h1 + %133 = OpLoad %half %h1 + %134 = OpLoad %half %h1 + %135 = OpExtInst %half %1 FClamp %132 %133 %134 + OpStore %hres %135 + %136 = OpLoad %half %h1 + %137 = OpLoad %half %h1 + %138 = OpExtInst %half %1 NMin %136 %137 + OpStore %hres %138 + %139 = OpLoad %half %h1 + %140 = OpLoad %half %h1 + %141 = OpExtInst %half %1 NMax %139 %140 + OpStore %hres %141 + %142 = OpLoad %half %h1 + %143 = OpLoad %half %h1 + %144 = OpLoad %half %h1 + %145 = OpExtInst %half %1 NClamp %142 %143 %144 + OpStore %hres %145 + %hres2 = OpVariable %_ptr_Function_v2half Function + %146 = OpLoad %v2half %h2 + %147 = OpLoad %v2half %h2 + %148 = OpExtInst %v2half %1 FMin %146 %147 + OpStore %hres2 %148 + %149 = OpLoad %v2half %h2 + %150 = OpLoad %v2half %h2 + %151 = OpExtInst %v2half %1 FMax %149 %150 + OpStore %hres2 %151 + %152 = OpLoad %v2half %h2 + %153 = OpLoad %v2half %h2 + %154 = OpLoad %v2half %h2 + %155 = OpExtInst %v2half %1 FClamp %152 %153 %154 + OpStore %hres2 %155 + %156 = OpLoad %v2half %h2 + %157 = OpLoad %v2half %h2 + %158 = OpExtInst %v2half %1 NMin %156 %157 + OpStore %hres2 %158 + %159 = OpLoad %v2half %h2 + %160 = OpLoad %v2half %h2 + %161 = OpExtInst %v2half %1 NMax %159 %160 + OpStore %hres2 %161 + %162 = OpLoad %v2half %h2 + %163 = OpLoad %v2half %h2 + %164 = OpLoad %v2half %h2 + %165 = OpExtInst %v2half %1 NClamp %162 %163 %164 + OpStore %hres2 %165 + %hres3 = OpVariable %_ptr_Function_v3half Function + %166 = OpLoad %v3half %h3 + %167 = OpLoad %v3half %h3 + %168 = OpExtInst %v3half %1 FMin %166 %167 + OpStore %hres3 %168 + %169 = OpLoad %v3half %h3 + %170 = OpLoad %v3half %h3 + %171 = OpExtInst %v3half %1 FMax %169 %170 + OpStore %hres3 %171 + %172 = OpLoad %v3half %h3 + %173 = OpLoad %v3half %h3 + %174 = OpLoad %v3half %h3 + %175 = OpExtInst %v3half %1 FClamp %172 %173 %174 + OpStore %hres3 %175 + %176 = OpLoad %v3half %h3 + %177 = OpLoad %v3half %h3 + %178 = OpExtInst %v3half %1 NMin %176 %177 + OpStore %hres3 %178 + %179 = OpLoad %v3half %h3 + %180 = OpLoad %v3half %h3 + %181 = OpExtInst %v3half %1 NMax %179 %180 + OpStore %hres3 %181 + %182 = OpLoad %v3half %h3 + %183 = OpLoad %v3half %h3 + %184 = OpLoad %v3half %h3 + %185 = OpExtInst %v3half %1 NClamp %182 %183 %184 + OpStore %hres3 %185 + %hres4 = OpVariable %_ptr_Function_v4half Function + %186 = OpLoad %v4half %h4 + %187 = OpLoad %v4half %h4 + %188 = OpExtInst %v4half %1 FMin %186 %187 + OpStore %hres4 %188 + %189 = OpLoad %v4half %h4 + %190 = OpLoad %v4half %h4 + %191 = OpExtInst %v4half %1 FMax %189 %190 + OpStore %hres4 %191 + %192 = OpLoad %v4half %h4 + %193 = OpLoad %v4half %h4 + %194 = OpLoad %v4half %h4 + %195 = OpExtInst %v4half %1 FClamp %192 %193 %194 + OpStore %hres4 %195 + %196 = OpLoad %v4half %h4 + %197 = OpLoad %v4half %h4 + %198 = OpExtInst %v4half %1 NMin %196 %197 + OpStore %hres4 %198 + %199 = OpLoad %v4half %h4 + %200 = OpLoad %v4half %h4 + %201 = OpExtInst %v4half %1 NMax %199 %200 + OpStore %hres4 %201 + %202 = OpLoad %v4half %h4 + %203 = OpLoad %v4half %h4 + %204 = OpLoad %v4half %h4 + %205 = OpExtInst %v4half %1 NClamp %202 %203 %204 + OpStore %hres4 %205 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag b/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag new file mode 100644 index 00000000000..4e0460afbbb --- /dev/null +++ b/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag @@ -0,0 +1,14 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vUV; +layout(location = 1) flat in int vIndex; + +layout(set = 0, binding = 0) uniform texture2D uTex[10]; +layout(set = 1, binding = 0) uniform sampler Immut; + +void main() +{ + FragColor = texture(nonuniformEXT(sampler2D(uTex[vIndex], Immut)), vUV); +} diff --git a/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag new file mode 100644 index 00000000000..59079fe58b4 --- /dev/null +++ b/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(set = 0, binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +}; + +layout(set = 0, binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +}; + +void callee2() +{ + values1[int(gl_FragCoord.x)] += 1; +} + +void callee() +{ + values0[int(gl_FragCoord.x)] += 1; + callee2(); +} + +void main() +{ + beginInvocationInterlockARB(); + callee(); + endInvocationInterlockARB(); +} diff --git a/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag b/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag new file mode 100644 index 00000000000..a2be5ef62f3 --- /dev/null +++ b/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag @@ -0,0 +1,12 @@ +#version 450 +layout(location = 0) centroid in vec4 a[2]; +layout(location = 2) centroid in vec4 b[2]; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor.x = interpolateAtOffset(a[0].x, vec2(0.5)); + FragColor.y = interpolateAtOffset(a[1].y, vec2(0.5)); + FragColor.z = interpolateAtOffset(b[0].z, vec2(0.5)); + FragColor.w = interpolateAtOffset(b[1].w, vec2(0.5)); +} diff --git a/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag b/shaders-msl-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag similarity index 100% rename from shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag rename to shaders-msl-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag diff --git a/shaders-msl/frag/shadow-compare-global-alias.invalid.frag b/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag similarity index 100% rename from shaders-msl/frag/shadow-compare-global-alias.invalid.frag rename to shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag diff --git a/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag similarity index 85% rename from shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag rename to shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag index 3a2cf0234cb..05aa5212026 100644 --- a/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag +++ b/shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag @@ -28,7 +28,9 @@ void main() FragColor = float(gl_SubgroupLeMask); FragColor = float(gl_SubgroupLtMask); vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); vec3 first = subgroupBroadcastFirst(vec3(20.0)); + bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); uvec4 ballot_value = subgroupBallot(true); bool inverse_ballot_value = subgroupInverseBallot(ballot_value); bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); @@ -40,17 +42,23 @@ void main() // shuffle uint shuffled = subgroupShuffle(10u, 8u); + bool shuffled_bool = subgroupShuffle(true, 9u); uint shuffled_xor = subgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); // shuffle relative uint shuffled_up = subgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = subgroupShuffleUp(true, 4u); uint shuffled_down = subgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = subgroupShuffleDown(false, 4u); // vote bool has_all = subgroupAll(true); bool has_any = subgroupAny(true); bool has_equal = subgroupAllEqual(0); has_equal = subgroupAllEqual(true); + has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); + has_equal = subgroupAllEqual(bvec4(true, true, false, true)); // arithmetic vec4 added = subgroupAdd(vec4(20.0)); @@ -113,7 +121,11 @@ void main() // quad vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); + bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); + bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); + bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); + bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); } diff --git a/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag b/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag new file mode 100644 index 00000000000..70822aee999 --- /dev/null +++ b/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag @@ -0,0 +1,9 @@ +#version 450 + +layout(binding = 4, input_attachment_index = 1) uniform subpassInput uInput; +layout(location = 1) out vec4 FragColor; + +void main() +{ + FragColor = subpassLoad(uInput); +} diff --git a/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag b/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag new file mode 100644 index 00000000000..70822aee999 --- /dev/null +++ b/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag @@ -0,0 +1,9 @@ +#version 450 + +layout(binding = 4, input_attachment_index = 1) uniform subpassInput uInput; +layout(location = 1) out vec4 FragColor; + +void main() +{ + FragColor = subpassLoad(uInput); +} diff --git a/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag b/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag new file mode 100644 index 00000000000..ef9ef77d56f --- /dev/null +++ b/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag @@ -0,0 +1,24 @@ +#version 450 + +layout(set = 0, input_attachment_index = 0, binding = 0) uniform subpassInput uSub; +layout(location = 0) out vec4 FragColor; + +vec4 samp3(subpassInput uS) +{ + return subpassLoad(uS); +} + +vec4 samp2(subpassInput uS) +{ + return subpassLoad(uS) + samp3(uS); +} + +vec4 samp() +{ + return subpassLoad(uSub) + samp3(uSub); +} + +void main() +{ + FragColor = samp() + samp2(uSub); +} diff --git a/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag b/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag new file mode 100644 index 00000000000..ef9ef77d56f --- /dev/null +++ b/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag @@ -0,0 +1,24 @@ +#version 450 + +layout(set = 0, input_attachment_index = 0, binding = 0) uniform subpassInput uSub; +layout(location = 0) out vec4 FragColor; + +vec4 samp3(subpassInput uS) +{ + return subpassLoad(uS); +} + +vec4 samp2(subpassInput uS) +{ + return subpassLoad(uS) + samp3(uS); +} + +vec4 samp() +{ + return subpassLoad(uSub) + samp3(uSub); +} + +void main() +{ + FragColor = samp() + samp2(uSub); +} diff --git a/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag new file mode 100644 index 00000000000..671a4d1b416 --- /dev/null +++ b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(set = 0, binding = 10, input_attachment_index = 1) uniform subpassInput uSub; +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 9) uniform texture2D uTex; +layout(set = 0, binding = 8) uniform sampler uSampler; + +void main() +{ + FragColor = subpassLoad(uSub) + texture(sampler2D(uTex, uSampler), vec2(0.5)); +} diff --git a/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag new file mode 100644 index 00000000000..671a4d1b416 --- /dev/null +++ b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(set = 0, binding = 10, input_attachment_index = 1) uniform subpassInput uSub; +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 9) uniform texture2D uTex; +layout(set = 0, binding = 8) uniform sampler uSampler; + +void main() +{ + FragColor = subpassLoad(uSub) + texture(sampler2D(uTex, uSampler), vec2(0.5)); +} diff --git a/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag new file mode 100644 index 00000000000..671a4d1b416 --- /dev/null +++ b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(set = 0, binding = 10, input_attachment_index = 1) uniform subpassInput uSub; +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 9) uniform texture2D uTex; +layout(set = 0, binding = 8) uniform sampler uSampler; + +void main() +{ + FragColor = subpassLoad(uSub) + texture(sampler2D(uTex, uSampler), vec2(0.5)); +} diff --git a/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag new file mode 100644 index 00000000000..671a4d1b416 --- /dev/null +++ b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(set = 0, binding = 10, input_attachment_index = 1) uniform subpassInput uSub; +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 9) uniform texture2D uTex; +layout(set = 0, binding = 8) uniform sampler uSampler; + +void main() +{ + FragColor = subpassLoad(uSub) + texture(sampler2D(uTex, uSampler), vec2(0.5)); +} diff --git a/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag b/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag new file mode 100644 index 00000000000..b4d9509ab49 --- /dev/null +++ b/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag @@ -0,0 +1,42 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 22 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %vUV + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %uSamp "uSamp" + OpName %vUV "vUV" + OpDecorate %FragColor Location 0 + OpDecorate %uSamp DescriptorSet 0 + OpDecorate %uSamp Binding 0 + OpDecorate %vUV Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %10 = OpTypeImage %float 2D 0 0 0 1 Unknown + %11 = OpTypeSampledImage %10 +%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11 + %uSamp = OpVariable %_ptr_UniformConstant_11 UniformConstant + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %vUV = OpVariable %_ptr_Input_v2float Input + %int = OpTypeInt 32 0 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %14 = OpLoad %11 %uSamp + %18 = OpLoad %v2float %vUV + %21 = OpImageGather %v4float %14 %18 %int_1 + OpStore %FragColor %21 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag b/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag new file mode 100644 index 00000000000..f3cf0e190d9 --- /dev/null +++ b/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag @@ -0,0 +1,18 @@ +#version 450 + +struct Foo +{ + vec4 v; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + Foo foo; +} ubos[2]; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = ubos[1].foo.v; +} diff --git a/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag b/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag new file mode 100644 index 00000000000..f3cf0e190d9 --- /dev/null +++ b/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag @@ -0,0 +1,18 @@ +#version 450 + +struct Foo +{ + vec4 v; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + Foo foo; +} ubos[2]; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = ubos[1].foo.v; +} diff --git a/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag b/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag new file mode 100644 index 00000000000..77760522f94 --- /dev/null +++ b/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag @@ -0,0 +1,16 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 0) uniform UBO +{ + layout(offset = 16) mat4 m; + layout(offset = 0) vec4 v; +}; + +layout(location = 0) in vec4 vColor; + +void main() +{ + FragColor = m * vColor + v; +} diff --git a/shaders-msl-no-opt/frag/variables.zero-initialize.frag b/shaders-msl-no-opt/frag/variables.zero-initialize.frag new file mode 100644 index 00000000000..41da8001f47 --- /dev/null +++ b/shaders-msl-no-opt/frag/variables.zero-initialize.frag @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) in vec4 vColor; +layout(location = 0) out vec4 FragColor; + +int uninit_int; +ivec4 uninit_vector; +mat4 uninit_matrix; + +struct Foo { int a; }; +Foo uninit_foo; + +void main() +{ + int uninit_function_int; + if (vColor.x > 10.0) + uninit_function_int = 10; + else + uninit_function_int = 20; + FragColor = vColor; +} diff --git a/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag b/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag new file mode 100644 index 00000000000..9a8d9d20b25 --- /dev/null +++ b/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag @@ -0,0 +1,11 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +layout(location = 0) out float FragColor; + +void main() +{ + FragColor = float(gl_HelperInvocation); + demote; + FragColor = float(gl_HelperInvocation); +} diff --git a/shaders-msl-no-opt/packing/array-of-vec3.comp b/shaders-msl-no-opt/packing/array-of-vec3.comp new file mode 100644 index 00000000000..61572122222 --- /dev/null +++ b/shaders-msl-no-opt/packing/array-of-vec3.comp @@ -0,0 +1,13 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0, scalar) buffer SSBO +{ + vec3 v[16]; +}; + +void main() +{ + v[1] = v[0]; +} diff --git a/shaders-msl-no-opt/packing/array-of-vec4.comp b/shaders-msl-no-opt/packing/array-of-vec4.comp new file mode 100644 index 00000000000..c5bf5e8a3fa --- /dev/null +++ b/shaders-msl-no-opt/packing/array-of-vec4.comp @@ -0,0 +1,13 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0, scalar) buffer SSBO +{ + vec4 v[16]; +}; + +void main() +{ + v[1] = v[0]; +} diff --git a/shaders-msl-no-opt/packing/isolated-scalar-access.comp b/shaders-msl-no-opt/packing/isolated-scalar-access.comp new file mode 100644 index 00000000000..32c92889934 --- /dev/null +++ b/shaders-msl-no-opt/packing/isolated-scalar-access.comp @@ -0,0 +1,25 @@ +#version 450 + +layout(set = 0, binding = 0) buffer SSBO +{ + vec4 v; + mat4 cm; + layout(row_major) mat4 rm; + + vec3 v3; + float f; +}; + +shared vec4 shared_vec4; +shared vec3 shared_vec3; + +void main() +{ + v.x = 10.0; + v3.y = 40.0; + cm[1][2] = 20.0; + rm[3][1] = 30.0; + + shared_vec4.z = 40.0; + shared_vec3.y = 1.0; +} diff --git a/shaders-msl-no-opt/packing/load-store-col-rows.comp b/shaders-msl-no-opt/packing/load-store-col-rows.comp new file mode 100644 index 00000000000..b3f2897034d --- /dev/null +++ b/shaders-msl-no-opt/packing/load-store-col-rows.comp @@ -0,0 +1,59 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +layout(binding = 0, std140) buffer SSBO1 +{ + mat2 a; + layout(row_major) mat2 a2; +}; + +layout(scalar, binding = 1) buffer SSBO2 +{ + mat2x3 b; + layout(row_major) mat3x2 b2; +}; + +void load_store_column() +{ + vec2 u = a[0]; + vec2 v = a[1]; + u += v; + a[0] = u; + a[1] = v; +} + +void load_store_row() +{ + vec2 u = a2[0]; + vec2 v = a2[1]; + u += v; + a2[0] = u; + a2[1] = v; +} + +void load_store_packed_column() +{ + vec3 u = b[0]; + vec3 v = b[1]; + u += v; + b[0] = u; + b[1] = v; +} + +void load_store_packed_row() +{ + vec2 u = b2[0]; + vec2 v = b2[1]; + u += v; + b2[0] = u; + b2[1] = v; +} + +void main() +{ + load_store_column(); + load_store_row(); + load_store_packed_column(); + load_store_packed_row(); +} diff --git a/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp b/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp new file mode 100644 index 00000000000..6a94c86ac0d --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp @@ -0,0 +1,86 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +#define T mat2 +#define PACKING scalar + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-2x2-std140.comp b/shaders-msl-no-opt/packing/matrix-2x2-std140.comp new file mode 100644 index 00000000000..3940e5c3b9d --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-2x2-std140.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat2 +#define PACKING std140 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-2x2-std430.comp b/shaders-msl-no-opt/packing/matrix-2x2-std430.comp new file mode 100644 index 00000000000..342c3989ca5 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-2x2-std430.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat2 +#define PACKING std430 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp b/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp new file mode 100644 index 00000000000..cf40f89a052 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp @@ -0,0 +1,86 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +#define T mat2x3 +#define PACKING scalar + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-2x3-std140.comp b/shaders-msl-no-opt/packing/matrix-2x3-std140.comp new file mode 100644 index 00000000000..6fbe149d1fe --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-2x3-std140.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat2x3 +#define PACKING std140 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-2x3-std430.comp b/shaders-msl-no-opt/packing/matrix-2x3-std430.comp new file mode 100644 index 00000000000..36a6bab1457 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-2x3-std430.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat2x3 +#define PACKING std430 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp b/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp new file mode 100644 index 00000000000..70fa4748d8e --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp @@ -0,0 +1,86 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +#define T mat2x4 +#define PACKING scalar + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-2x4-std140.comp b/shaders-msl-no-opt/packing/matrix-2x4-std140.comp new file mode 100644 index 00000000000..6c5d06fe514 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-2x4-std140.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat2x4 +#define PACKING std140 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-2x4-std430.comp b/shaders-msl-no-opt/packing/matrix-2x4-std430.comp new file mode 100644 index 00000000000..177b9669402 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-2x4-std430.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat2x4 +#define PACKING std430 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp b/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp new file mode 100644 index 00000000000..296efa673c4 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp @@ -0,0 +1,86 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +#define T mat3x2 +#define PACKING scalar + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-3x2-std140.comp b/shaders-msl-no-opt/packing/matrix-3x2-std140.comp new file mode 100644 index 00000000000..1334c4eae70 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-3x2-std140.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat3x2 +#define PACKING std140 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-3x2-std430.comp b/shaders-msl-no-opt/packing/matrix-3x2-std430.comp new file mode 100644 index 00000000000..fe82993ddba --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-3x2-std430.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat3x2 +#define PACKING std430 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp b/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp new file mode 100644 index 00000000000..0741384ea23 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp @@ -0,0 +1,86 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +#define T mat3 +#define PACKING scalar + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-3x3-std140.comp b/shaders-msl-no-opt/packing/matrix-3x3-std140.comp new file mode 100644 index 00000000000..0de5d599c12 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-3x3-std140.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat3 +#define PACKING std140 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-3x3-std430.comp b/shaders-msl-no-opt/packing/matrix-3x3-std430.comp new file mode 100644 index 00000000000..8e48109e935 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-3x3-std430.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat3 +#define PACKING std430 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp b/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp new file mode 100644 index 00000000000..23297d5c632 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp @@ -0,0 +1,86 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +#define T mat3x4 +#define PACKING scalar + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-3x4-std140.comp b/shaders-msl-no-opt/packing/matrix-3x4-std140.comp new file mode 100644 index 00000000000..11135eeccfc --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-3x4-std140.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat3x4 +#define PACKING std140 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-3x4-std430.comp b/shaders-msl-no-opt/packing/matrix-3x4-std430.comp new file mode 100644 index 00000000000..78c577f2ef2 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-3x4-std430.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat3x4 +#define PACKING std430 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp b/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp new file mode 100644 index 00000000000..412c208148d --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp @@ -0,0 +1,86 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +#define T mat4x2 +#define PACKING scalar + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-4x2-std140.comp b/shaders-msl-no-opt/packing/matrix-4x2-std140.comp new file mode 100644 index 00000000000..e130cb0a465 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-4x2-std140.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat4x2 +#define PACKING std140 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-4x2-std430.comp b/shaders-msl-no-opt/packing/matrix-4x2-std430.comp new file mode 100644 index 00000000000..76aa9ae4a64 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-4x2-std430.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat4x2 +#define PACKING std430 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp b/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp new file mode 100644 index 00000000000..8468b289cdb --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp @@ -0,0 +1,86 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +#define T mat4x3 +#define PACKING scalar + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-4x3-std140.comp b/shaders-msl-no-opt/packing/matrix-4x3-std140.comp new file mode 100644 index 00000000000..8223eae49f2 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-4x3-std140.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat4x3 +#define PACKING std140 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-4x3-std430.comp b/shaders-msl-no-opt/packing/matrix-4x3-std430.comp new file mode 100644 index 00000000000..aa4d685cf9b --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-4x3-std430.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat4x3 +#define PACKING std430 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp b/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp new file mode 100644 index 00000000000..6f14c07311e --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp @@ -0,0 +1,86 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +#define T mat4 +#define PACKING scalar + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-4x4-std140.comp b/shaders-msl-no-opt/packing/matrix-4x4-std140.comp new file mode 100644 index 00000000000..45193b3257f --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-4x4-std140.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat4 +#define PACKING std140 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-4x4-std430.comp b/shaders-msl-no-opt/packing/matrix-4x4-std430.comp new file mode 100644 index 00000000000..3a1eb9f020f --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-4x4-std430.comp @@ -0,0 +1,85 @@ +#version 450 +layout(local_size_x = 1) in; + +#define T mat4 +#define PACKING std430 + +layout(set = 0, binding = 0, PACKING) buffer SSBOCol +{ + layout(column_major) T col_major0; + layout(column_major) T col_major1; +}; + +layout(set = 0, binding = 1, PACKING) buffer SSBORow +{ + layout(row_major) T row_major0; + layout(row_major) T row_major1; +}; + +void load_store_to_variable_col_major() +{ + // Load to variable. + T loaded = col_major0; + + // Store from variable. + col_major1 = loaded; +} + +void load_store_to_variable_row_major() +{ + // Load to variable. + T loaded = row_major0; + + // Store to variable. + row_major0 = loaded; +} + +void copy_col_major_to_col_major() +{ + // Copy col -> col + col_major0 = col_major1; +} + +void copy_row_major_to_col_major() +{ + // Copy row -> col + col_major0 = row_major0; +} + +void copy_col_major_to_row_major() +{ + // Copy col -> row + row_major0 = col_major0; +} + +void copy_row_major_to_row_major() +{ + // Copy row -> row + row_major0 = row_major1; +} + +void copy_columns() +{ + // Copy columns/rows. + col_major0[1] = row_major0[1]; + row_major0[1] = col_major0[1]; +} + +void copy_elements() +{ + // Copy individual elements. + col_major0[0][1] = row_major0[0][1]; + row_major0[0][1] = col_major0[0][1]; +} + +void main() +{ + load_store_to_variable_col_major(); + load_store_to_variable_row_major(); + copy_col_major_to_col_major(); + copy_col_major_to_row_major(); + copy_row_major_to_col_major(); + copy_row_major_to_row_major(); + copy_columns(); + copy_elements(); +} diff --git a/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp b/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp new file mode 100644 index 00000000000..9b7b9fc37ef --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(row_major, set = 0, binding = 0) buffer SSBO +{ + mat3 m0; + mat3 m1; + vec3 v0; + vec3 v1; +}; + +void main() +{ + v0 = (m0 * m1) * v1; + v0 = m0 * (m1 * v1); +} diff --git a/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp new file mode 100644 index 00000000000..cd77d242a6d --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp @@ -0,0 +1,19 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +layout(scalar, set = 0, binding = 0) buffer SSBO +{ + mat3 m0; + mat3 m1; + vec3 v0; + vec3 v1; +}; + +void main() +{ + v0 = (m0 * m1) * v1; + v0 = m0 * (m1 * v1); + v0 = (v1 * m0) * m1; + v0 = v1 * (m0 * m1); +} diff --git a/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp new file mode 100644 index 00000000000..847d2e8f608 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp @@ -0,0 +1,18 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, set = 0, binding = 0) buffer SSBO +{ + mat2 m0; + mat2 m1; + vec2 v0; + vec2 v1; +}; + +void main() +{ + v0 = (m0 * m1) * v1; + v0 = m0 * (m1 * v1); + v0 = (v1 * m0) * m1; + v0 = v1 * (m0 * m1); +} diff --git a/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp new file mode 100644 index 00000000000..60a3da0aa35 --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp @@ -0,0 +1,19 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +layout(scalar, row_major, set = 0, binding = 0) buffer SSBO +{ + mat3 m0; + mat3 m1; + vec3 v0; + vec3 v1; +}; + +void main() +{ + v0 = (m0 * m1) * v1; + v0 = m0 * (m1 * v1); + v0 = (v1 * m0) * m1; + v0 = v1 * (m0 * m1); +} diff --git a/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp new file mode 100644 index 00000000000..5b71ae9bc7a --- /dev/null +++ b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp @@ -0,0 +1,18 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, row_major, set = 0, binding = 0) buffer SSBO +{ + mat2 m0; + mat2 m1; + vec2 v0; + vec2 v1; +}; + +void main() +{ + v0 = (m0 * m1) * v1; + v0 = m0 * (m1 * v1); + v0 = (v1 * m0) * m1; + v0 = v1 * (m0 * m1); +} diff --git a/shaders-msl-no-opt/packing/member-padding.comp b/shaders-msl-no-opt/packing/member-padding.comp new file mode 100644 index 00000000000..a413662f386 --- /dev/null +++ b/shaders-msl-no-opt/packing/member-padding.comp @@ -0,0 +1,14 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, set = 0, binding = 0) buffer SSBO +{ + layout(offset = 16) float a; + layout(offset = 40) float b; +}; + +void main() +{ + a = 10.0; + b = 20.0; +} diff --git a/shaders-msl-no-opt/packing/std140-array-of-vectors.comp b/shaders-msl-no-opt/packing/std140-array-of-vectors.comp new file mode 100644 index 00000000000..260a49810ab --- /dev/null +++ b/shaders-msl-no-opt/packing/std140-array-of-vectors.comp @@ -0,0 +1,47 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, set = 0, binding = 0) buffer SSBO +{ + float v1[4]; + vec2 v2[4]; + vec3 v3[4]; + vec4 v4[4]; + + float v1_array_of_array[4][4]; + vec2 v2_array_of_array[4][4]; + vec3 v3_array_of_array[4][4]; + vec4 v4_array_of_array[4][4]; + + float v_unsized[]; +}; + +void main() +{ + float loaded1 = v1[1]; + v1[2] = loaded1; + + vec2 loaded2 = v2[1]; + v2[2] = loaded2; + + vec3 loaded3 = v3[1]; + v3[2] = loaded3; + + vec4 loaded4 = v4[1]; + v4[2] = loaded4; + + loaded1 = v1_array_of_array[1][2]; + v1_array_of_array[2][3] = loaded1; + + loaded2 = v2_array_of_array[1][2]; + v2_array_of_array[2][3] = loaded2; + + loaded3 = v3_array_of_array[1][2]; + v3_array_of_array[2][3] = loaded3; + + loaded4 = v4_array_of_array[1][2]; + v4_array_of_array[2][3] = loaded4; + + loaded1 = v_unsized[1]; + v_unsized[2] = loaded1; +} diff --git a/shaders-msl-no-opt/packing/struct-alignment.comp b/shaders-msl-no-opt/packing/struct-alignment.comp new file mode 100644 index 00000000000..f9f58b7327f --- /dev/null +++ b/shaders-msl-no-opt/packing/struct-alignment.comp @@ -0,0 +1,22 @@ +#version 450 +layout(local_size_x = 1) in; + +struct Foo +{ + vec3 a; // <- This one should become packed_float3, and the MSL alignment of the struct is now 4. + float b; +}; + +layout(std140, set = 0, binding = 0) buffer SSBO +{ + vec2 a; + float b; + // <- We expect 4 bytes of padding here since MSL alignment of Foo must be lowered to 4. + Foo foo; +}; + +void main() +{ + a.x = 10.0; + b = 20.0; +} diff --git a/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp b/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp new file mode 100644 index 00000000000..08742d5bba5 --- /dev/null +++ b/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp @@ -0,0 +1,18 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +struct Foo +{ + vec3 a; +}; + +layout(scalar, set = 0, binding = 0) buffer SSBOScalar +{ + Foo v[]; +} buffer_scalar; + +void main() +{ + buffer_scalar.v[1].a.y = 1.0; +} diff --git a/shaders-msl-no-opt/packing/struct-packing-recursive.comp b/shaders-msl-no-opt/packing/struct-packing-recursive.comp new file mode 100644 index 00000000000..c3281b9ce01 --- /dev/null +++ b/shaders-msl-no-opt/packing/struct-packing-recursive.comp @@ -0,0 +1,29 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +struct Foo +{ + vec4 a; +}; + +struct Bar +{ + Foo a; +}; + +struct Baz +{ + Bar a; +}; + +layout(scalar, set = 0, binding = 0) buffer SSBOScalar +{ + float v; + Baz baz; +} buffer_scalar; + +void main() +{ + buffer_scalar.baz.a.a.a.a.x = 10.0; +} diff --git a/shaders-msl-no-opt/packing/struct-packing.comp b/shaders-msl-no-opt/packing/struct-packing.comp new file mode 100644 index 00000000000..69a80382e48 --- /dev/null +++ b/shaders-msl-no-opt/packing/struct-packing.comp @@ -0,0 +1,27 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 1) in; + +// Foo will be marked packed_float3 because offset of bar is just 12 bytes after foo. +struct Foo +{ + vec3 a; +}; + +// Bar will be marked as packed due to alignment of the struct itself cannot work without packed. +struct Bar +{ + vec3 a; +}; + +layout(scalar, set = 0, binding = 0) buffer SSBOScalar +{ + Foo foo; + Bar bar; +} buffer_scalar; + +void main() +{ + buffer_scalar.foo.a.x = 10.0; + buffer_scalar.bar.a.x = 20.0; +} diff --git a/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp b/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp new file mode 100644 index 00000000000..ef1ba65cf4f --- /dev/null +++ b/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp @@ -0,0 +1,45 @@ +#version 450 +layout(local_size_x = 1) in; + +struct A +{ + float v; +}; + +struct B +{ + vec2 v; +}; + +struct C +{ + vec3 v; +}; + +struct D +{ + vec4 v; +}; + +struct E +{ + vec4 a; + vec2 b; +}; + +layout(std140, set = 0, binding = 0) buffer SSBO +{ + A a[2][4]; + B b[2][4]; + C c[2][4]; + D d[2][4]; + mat2 e[2][4]; + E f[]; +}; + +void main() +{ + f[0].a = vec4(2.0); + mat2 tmp = e[0][1]; + e[1][2] = tmp; +} diff --git a/shaders-msl-no-opt/packing/struct-size-padding.comp b/shaders-msl-no-opt/packing/struct-size-padding.comp new file mode 100644 index 00000000000..ad65415bafe --- /dev/null +++ b/shaders-msl-no-opt/packing/struct-size-padding.comp @@ -0,0 +1,45 @@ +#version 450 +layout(local_size_x = 1) in; + +struct A +{ + float v; +}; + +struct B +{ + vec2 v; +}; + +struct C +{ + vec3 v; +}; + +struct D +{ + vec4 v; +}; + +struct E +{ + vec4 a; + vec2 b; +}; + +layout(std140, set = 0, binding = 0) buffer SSBO +{ + A a[4]; + B b[4]; + C c[4]; + D d[4]; + mat2 e[4]; + E f[]; +}; + +void main() +{ + f[0].a = vec4(2.0); + mat2 tmp = e[1]; + e[2] = tmp; +} diff --git a/shaders-msl-no-opt/tesc/copy-tess-level.tesc b/shaders-msl-no-opt/tesc/copy-tess-level.tesc new file mode 100644 index 00000000000..7510ff8f675 --- /dev/null +++ b/shaders-msl-no-opt/tesc/copy-tess-level.tesc @@ -0,0 +1,12 @@ +#version 450 +layout(vertices = 1) out; + +void main() +{ + gl_TessLevelInner = float[](1.0, 2.0); + gl_TessLevelOuter = float[](1.0, 2.0, 3.0, 4.0); + + float inner[2] = gl_TessLevelInner; + float outer[4] = gl_TessLevelOuter; + gl_out[gl_InvocationID].gl_Position = vec4(1.0); +} diff --git a/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc b/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc new file mode 100644 index 00000000000..16b60e495ad --- /dev/null +++ b/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc @@ -0,0 +1,22 @@ +#version 450 + +layout(vertices = 4) out; + +in gl_PerVertex +{ + float gl_ClipDistance[2]; + float gl_CullDistance[1]; +} gl_in[]; + +out gl_PerVertex +{ + float gl_ClipDistance[2]; + float gl_CullDistance[1]; +} gl_out[]; + +void main() +{ + gl_out[gl_InvocationID].gl_ClipDistance[0] = gl_in[gl_InvocationID].gl_ClipDistance[0]; + gl_out[gl_InvocationID].gl_ClipDistance[1] = gl_in[gl_InvocationID].gl_ClipDistance[1]; + gl_out[gl_InvocationID].gl_CullDistance[0] = gl_in[gl_InvocationID].gl_CullDistance[0]; +} diff --git a/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc b/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc new file mode 100644 index 00000000000..4ccfa28267a --- /dev/null +++ b/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc @@ -0,0 +1,24 @@ +#version 450 +layout(vertices = 1) out; + +float load_tess_level_in_func() +{ + return gl_TessLevelInner[0] + gl_TessLevelOuter[1]; +} + +void store_tess_level_in_func() +{ + gl_TessLevelInner[0] = 1.0; + gl_TessLevelInner[1] = 2.0; + gl_TessLevelOuter[0] = 3.0; + gl_TessLevelOuter[1] = 4.0; + gl_TessLevelOuter[2] = 5.0; + gl_TessLevelOuter[3] = 6.0; +} + +void main() +{ + store_tess_level_in_func(); + float v = load_tess_level_in_func(); + gl_out[gl_InvocationID].gl_Position = vec4(v); +} diff --git a/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese b/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese new file mode 100644 index 00000000000..1d8a50062e9 --- /dev/null +++ b/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese @@ -0,0 +1,10 @@ +#version 450 +layout(quads) in; + +layout(location = 0) patch in vec4 FragColor; +layout(location = 2) in vec4 FragColors[]; + +void main() +{ + gl_Position = vec4(1.0) + FragColor + FragColors[0] + FragColors[1] + gl_TessLevelInner[0] + gl_TessLevelOuter[gl_PrimitiveID & 1] + gl_in[0].gl_Position; +} diff --git a/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese b/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese new file mode 100644 index 00000000000..e86619e1a02 --- /dev/null +++ b/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese @@ -0,0 +1,19 @@ +#version 450 +layout(quads) in; + +in gl_PerVertex +{ + float gl_ClipDistance[2]; + float gl_CullDistance[3]; + vec4 gl_Position; +} gl_in[]; + +void main() +{ + gl_Position.x = gl_in[0].gl_ClipDistance[0]; + gl_Position.y = gl_in[1].gl_CullDistance[0]; + gl_Position.z = gl_in[0].gl_ClipDistance[1]; + gl_Position.w = gl_in[1].gl_CullDistance[1]; + gl_Position += gl_in[0].gl_Position; + gl_Position += gl_in[1].gl_Position; +} diff --git a/shaders-msl-no-opt/vert/cull-distance.for-tess.vert b/shaders-msl-no-opt/vert/cull-distance.for-tess.vert new file mode 100644 index 00000000000..8df181cdb30 --- /dev/null +++ b/shaders-msl-no-opt/vert/cull-distance.for-tess.vert @@ -0,0 +1,10 @@ +#version 450 + +out float gl_CullDistance[2]; + +void main() +{ + gl_CullDistance[0] = 1.0; + gl_CullDistance[1] = 3.0; + gl_Position = vec4(1.0); +} diff --git a/shaders-msl/vert/layer.msl11.invalid.vert b/shaders-msl-no-opt/vert/layer.msl11.invalid.vert similarity index 100% rename from shaders-msl/vert/layer.msl11.invalid.vert rename to shaders-msl-no-opt/vert/layer.msl11.invalid.vert diff --git a/shaders-msl-no-opt/vert/modf-storage-class.capture.vert b/shaders-msl-no-opt/vert/modf-storage-class.capture.vert new file mode 100644 index 00000000000..447c4975d93 --- /dev/null +++ b/shaders-msl-no-opt/vert/modf-storage-class.capture.vert @@ -0,0 +1,9 @@ +#version 450 + +layout(location = 0) out vec4 f; +layout(location = 0) in vec4 f2; + +void main() +{ + gl_Position = modf(f2, f); +} diff --git a/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert b/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert new file mode 100644 index 00000000000..2c142a78105 --- /dev/null +++ b/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert @@ -0,0 +1,26 @@ +#version 310 es + +layout(location = 0) in int Index1; +layout(location = 1) in int Index2; + +vec4 consume_constant_arrays2(const vec4 positions[4], const vec4 positions2[4]) +{ + return positions[Index1] + positions2[Index2]; +} + +vec4 consume_constant_arrays(const vec4 positions[4], const vec4 positions2[4]) +{ + return consume_constant_arrays2(positions, positions2); +} + +const vec4 LUT1[] = vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0)); + +void main() +{ + vec4 LUT2[4]; + LUT2[0] = vec4(10.0); + LUT2[1] = vec4(11.0); + LUT2[2] = vec4(12.0); + LUT2[3] = vec4(13.0); + gl_Position = consume_constant_arrays(LUT1, LUT2); +} diff --git a/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert b/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert new file mode 100644 index 00000000000..54c7afd07aa --- /dev/null +++ b/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert @@ -0,0 +1,8 @@ +#version 450 + +layout(location = 0) out vec4 Pos; + +void main() +{ + gl_Position = vec4(1.0); +} diff --git a/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert b/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert new file mode 100644 index 00000000000..4ec228df294 --- /dev/null +++ b/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert @@ -0,0 +1,7 @@ +#version 450 +#extension GL_KHR_shader_subgroup_ballot : require + +void main() +{ + gl_SubgroupEqMask; +} diff --git a/shaders-msl/vert/viewport-index.msl2.invalid.vert b/shaders-msl-no-opt/vert/viewport-index.msl2.invalid.vert similarity index 100% rename from shaders-msl/vert/viewport-index.msl2.invalid.vert rename to shaders-msl-no-opt/vert/viewport-index.msl2.invalid.vert diff --git a/shaders-msl/amd/shader_trinary_minmax.msl21.comp b/shaders-msl/amd/shader_trinary_minmax.msl21.comp new file mode 100644 index 00000000000..f836146a172 --- /dev/null +++ b/shaders-msl/amd/shader_trinary_minmax.msl21.comp @@ -0,0 +1,11 @@ +#version 450 +#extension GL_AMD_shader_trinary_minmax : require + +layout (local_size_x = 64) in; + +void main () +{ + int t11 = min3(0, 3, 2); + int t12 = max3(0, 3, 2); + int t13 = mid3(0, 3, 2); +} diff --git a/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/shaders-msl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000000..b7b4e0b2e1e --- /dev/null +++ b/shaders-msl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,101 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %inputs Restrict + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + OpDecorate %outputs Restrict + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %bool = OpTypeBool + %bvec4 = OpTypeVector %bool 4 + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + %uzero = OpConstant %uint 0 + %uone = OpConstant %uint 1 + %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone + %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + + %result_slt = OpSLessThan %bvec4 %input0 %input1 + %result_sle = OpSLessThanEqual %bvec4 %input0 %input1 + %result_ult = OpULessThan %bvec4 %input0 %input1 + %result_ule = OpULessThanEqual %bvec4 %input0 %input1 + %result_sgt = OpSGreaterThan %bvec4 %input0 %input1 + %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1 + %result_ugt = OpUGreaterThan %bvec4 %input0 %input1 + %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1 + + %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_slt + + %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sle + + %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ult + + %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ule + + %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sgt + + %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sge + + %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ugt + + %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_uge + + + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp b/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp new file mode 100644 index 00000000000..b01262f5bd4 --- /dev/null +++ b/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp @@ -0,0 +1,43 @@ +OpCapability Shader +OpExtension "SPV_KHR_storage_buffer_storage_class" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %main "main" %var_id +OpExecutionMode %main LocalSize 1 1 1 +OpDecorate %var_id BuiltIn GlobalInvocationId +OpDecorate %var_input Binding 0 +OpDecorate %var_input DescriptorSet 0 +OpDecorate %var_outdata Binding 1 +OpDecorate %var_outdata DescriptorSet 0 +OpMemberDecorate %type_container_struct 0 Offset 0 +OpMemberDecorate %type_container_struct 1 Offset 4 +OpMemberDecorate %type_container_struct 2 Offset 8 +OpMemberDecorate %type_container_struct 3 Offset 12 +OpDecorate %type_container_struct Block +%bool = OpTypeBool +%void = OpTypeVoid +%voidf = OpTypeFunction %void +%u32 = OpTypeInt 32 0 +%i32 = OpTypeInt 32 1 +%f32 = OpTypeFloat 32 +%uvec3 = OpTypeVector %u32 3 +%fvec3 = OpTypeVector %f32 3 +%uvec3ptr = OpTypePointer Input %uvec3 +%i32ptr = OpTypePointer Uniform %i32 +%f32ptr = OpTypePointer Uniform %f32 +%i32arr = OpTypeRuntimeArray %i32 +%f32arr = OpTypeRuntimeArray %f32 +%type_empty_struct = OpTypeStruct +%type_container_struct = OpTypeStruct %i32 %type_empty_struct %type_empty_struct %i32 +%type_container_struct_ubo_ptr = OpTypePointer Uniform %type_container_struct +%type_container_struct_ssbo_ptr = OpTypePointer StorageBuffer %type_container_struct +%var_id = OpVariable %uvec3ptr Input +%var_input = OpVariable %type_container_struct_ssbo_ptr StorageBuffer +%var_outdata = OpVariable %type_container_struct_ssbo_ptr StorageBuffer + +%main = OpFunction %void None %voidf +%label = OpLabel +%input_copy = OpCopyObject %type_container_struct_ssbo_ptr %var_input +%result = OpLoad %type_container_struct %input_copy +OpStore %var_outdata %result +OpReturn +OpFunctionEnd diff --git a/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp b/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp new file mode 100644 index 00000000000..63df59ac328 --- /dev/null +++ b/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp @@ -0,0 +1,43 @@ +OpCapability Shader +OpExtension "SPV_KHR_storage_buffer_storage_class" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %main "main" %var_id +OpExecutionMode %main LocalSize 1 1 1 +OpDecorate %var_id BuiltIn GlobalInvocationId +OpDecorate %var_input Binding 0 +OpDecorate %var_input DescriptorSet 0 +OpDecorate %var_outdata Binding 1 +OpDecorate %var_outdata DescriptorSet 0 +OpMemberDecorate %type_container_struct 0 Offset 0 +OpMemberDecorate %type_container_struct 1 Offset 16 +OpMemberDecorate %type_container_struct 2 Offset 32 +OpMemberDecorate %type_container_struct 3 Offset 48 +OpDecorate %type_container_struct Block +%bool = OpTypeBool +%void = OpTypeVoid +%voidf = OpTypeFunction %void +%u32 = OpTypeInt 32 0 +%i32 = OpTypeInt 32 1 +%f32 = OpTypeFloat 32 +%uvec3 = OpTypeVector %u32 3 +%fvec3 = OpTypeVector %f32 3 +%uvec3ptr = OpTypePointer Input %uvec3 +%i32ptr = OpTypePointer Uniform %i32 +%f32ptr = OpTypePointer Uniform %f32 +%i32arr = OpTypeRuntimeArray %i32 +%f32arr = OpTypeRuntimeArray %f32 +%type_empty_struct = OpTypeStruct +%type_container_struct = OpTypeStruct %i32 %type_empty_struct %type_empty_struct %i32 +%type_container_struct_ubo_ptr = OpTypePointer Uniform %type_container_struct +%type_container_struct_ssbo_ptr = OpTypePointer StorageBuffer %type_container_struct +%var_id = OpVariable %uvec3ptr Input +%var_input = OpVariable %type_container_struct_ubo_ptr Uniform +%var_outdata = OpVariable %type_container_struct_ssbo_ptr StorageBuffer + +%main = OpFunction %void None %voidf +%label = OpLabel +%input_copy = OpCopyObject %type_container_struct_ubo_ptr %var_input +%result = OpLoad %type_container_struct %input_copy +OpStore %var_outdata %result +OpReturn +OpFunctionEnd diff --git a/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp b/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp similarity index 100% rename from shaders-msl/asm/comp/image-load-store-short-vector.asm.comp rename to shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp diff --git a/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp b/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp new file mode 100644 index 00000000000..65a7eedd90b --- /dev/null +++ b/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp @@ -0,0 +1,107 @@ +OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %main "main" %id +OpExecutionMode %main LocalSize 1 1 1 +OpName %main "main" +OpName %id "gl_GlobalInvocationID" +OpDecorate %id BuiltIn GlobalInvocationId +OpDecorate %sc_0 SpecId 0 +OpDecorate %sc_1 SpecId 1 +OpDecorate %sc_2 SpecId 2 +OpDecorate %i32arr ArrayStride 4 +OpDecorate %buf BufferBlock +OpDecorate %indata DescriptorSet 0 +OpDecorate %indata Binding 0 +OpDecorate %outdata DescriptorSet 0 +OpDecorate %outdata Binding 1 +OpDecorate %f32arr ArrayStride 4 +OpMemberDecorate %buf 0 Offset 0 +%bool = OpTypeBool +%void = OpTypeVoid +%voidf = OpTypeFunction %void +%u32 = OpTypeInt 32 0 +%i32 = OpTypeInt 32 1 +%f32 = OpTypeFloat 32 +%uvec3 = OpTypeVector %u32 3 +%fvec3 = OpTypeVector %f32 3 +%uvec3ptr = OpTypePointer Input %uvec3 +%i32ptr = OpTypePointer Uniform %i32 +%f32ptr = OpTypePointer Uniform %f32 +%i32arr = OpTypeRuntimeArray %i32 +%f32arr = OpTypeRuntimeArray %f32 +%ivec3 = OpTypeVector %i32 3 +%zero = OpConstant %i32 0 +%one = OpConstant %i32 1 +%two = OpConstant %i32 2 +%three = OpConstant %i32 3 +%iarr3 = OpTypeArray %i32 %three +%imat3 = OpTypeArray %iarr3 %three +%struct = OpTypeStruct %imat3 +%buf = OpTypeStruct %i32arr +%bufptr = OpTypePointer Uniform %buf +%indata = OpVariable %bufptr Uniform +%outdata = OpVariable %bufptr Uniform +%id = OpVariable %uvec3ptr Input +%ivec3_0 = OpConstantComposite %ivec3 %zero %zero %zero +%vec3_undef = OpUndef %ivec3 +%iarr3_0 = OpConstantComposite %iarr3 %zero %zero %zero +%imat3_0 = OpConstantComposite %imat3 %iarr3_0 %iarr3_0 %iarr3_0 +%struct_0 = OpConstantComposite %struct %imat3_0 +%sc_0 = OpSpecConstant %i32 0 +%sc_1 = OpSpecConstant %i32 0 +%sc_2 = OpSpecConstant %i32 0 +%iarr3_a = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_0 0 +%iarr3_b = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_a 1 +%iarr3_c = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_b 2 +%iarr3_d = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_0 0 +%iarr3_e = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_d 1 +%iarr3_f = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_e 2 +%iarr3_g = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_0 0 +%iarr3_h = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_g 1 +%iarr3_i = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_h 2 +%imat3_a = OpSpecConstantOp %imat3 CompositeInsert %iarr3_c %imat3_0 0 +%imat3_b = OpSpecConstantOp %imat3 CompositeInsert %iarr3_f %imat3_a 1 +%imat3_c = OpSpecConstantOp %imat3 CompositeInsert %iarr3_i %imat3_b 2 +%struct_a = OpSpecConstantOp %struct CompositeInsert %imat3_c %struct_0 0 +%struct_b = OpSpecConstantOp %struct CompositeInsert %sc_2 %struct_a 0 1 2 +%comp_0_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 0 +%comp_1_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 0 +%comp_0_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 1 +%comp_2_2 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 2 +%comp_2_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 0 +%comp_1_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 1 +%cmpres_0 = OpSpecConstantOp %bool IEqual %comp_0_0 %comp_1_0 +%cmpres_1 = OpSpecConstantOp %bool IEqual %comp_0_1 %comp_2_2 +%cmpres_2 = OpSpecConstantOp %bool IEqual %comp_2_0 %comp_1_1 +%mustbe_0 = OpSpecConstantOp %i32 Select %cmpres_0 %one %zero +%mustbe_1 = OpSpecConstantOp %i32 Select %cmpres_1 %one %zero +%mustbe_2 = OpSpecConstantOp %i32 Select %cmpres_2 %two %one +%sc_vec3_0 = OpSpecConstantOp %ivec3 CompositeInsert %sc_0 %ivec3_0 0 +%sc_vec3_1 = OpSpecConstantOp %ivec3 CompositeInsert %sc_1 %ivec3_0 1 +%sc_vec3_2 = OpSpecConstantOp %ivec3 CompositeInsert %sc_2 %ivec3_0 2 +%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2 +%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0 +%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5 +%sc_vec3_01 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4 +%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2 +%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0 +%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1 +%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2 +%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1 +%sc_factor = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2 +%main = OpFunction %void None %voidf +%label = OpLabel +%subf_a = OpISub %i32 %one %mustbe_0 +%subf_b = OpIMul %i32 %subf_a %mustbe_1 +%subf_c = OpISub %i32 %mustbe_2 %one +%factor = OpIMul %i32 %subf_b %subf_c +%sc_final = OpIMul %i32 %factor %sc_factor +%idval = OpLoad %uvec3 %id +%x = OpCompositeExtract %u32 %idval 0 +%inloc = OpAccessChain %i32ptr %indata %zero %x +%inval = OpLoad %i32 %inloc +%final = OpIAdd %i32 %inval %sc_final +%outloc = OpAccessChain %i32ptr %outdata %zero %x + OpStore %outloc %final + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/comp/uint_smulextended.asm.comp b/shaders-msl/asm/comp/uint_smulextended.asm.comp new file mode 100644 index 00000000000..32d483636a2 --- /dev/null +++ b/shaders-msl/asm/comp/uint_smulextended.asm.comp @@ -0,0 +1,61 @@ + OpCapability Shader + + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationId + OpExecutionMode %main LocalSize 1 1 1 + + OpDecorate %gl_GlobalInvocationId BuiltIn GlobalInvocationId + OpDecorate %ra_uint ArrayStride 4 + OpDecorate %struct_uint4 BufferBlock + OpMemberDecorate %struct_uint4 0 Offset 0 + OpDecorate %input0 DescriptorSet 0 + OpDecorate %input0 Binding 0 + OpDecorate %input1 DescriptorSet 0 + OpDecorate %input1 Binding 1 + OpDecorate %output0 DescriptorSet 0 + OpDecorate %output0 Binding 2 + OpDecorate %output1 DescriptorSet 0 + OpDecorate %output1 Binding 3 + + %uint = OpTypeInt 32 0 + %ptr_uint = OpTypePointer Uniform %uint + %ptr_input_uint = OpTypePointer Input %uint + %uint3 = OpTypeVector %uint 3 + %ptr_input_uint3 = OpTypePointer Input %uint3 + %void = OpTypeVoid + %voidFn = OpTypeFunction %void + + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %ra_uint = OpTypeRuntimeArray %uint + %uint4 = OpTypeVector %uint 4 + %struct_uint4 = OpTypeStruct %ra_uint + %ptr_struct_uint4 = OpTypePointer Uniform %struct_uint4 + %resulttype = OpTypeStruct %uint %uint +%gl_GlobalInvocationId = OpVariable %ptr_input_uint3 Input + %input0 = OpVariable %ptr_struct_uint4 Uniform + %input1 = OpVariable %ptr_struct_uint4 Uniform + + %output0 = OpVariable %ptr_struct_uint4 Uniform + %output1 = OpVariable %ptr_struct_uint4 Uniform + + %main = OpFunction %void None %voidFn + %mainStart = OpLabel + %index_ptr = OpAccessChain %ptr_input_uint %gl_GlobalInvocationId %uint_0 + %index = OpLoad %uint %index_ptr + %in_ptr0 = OpAccessChain %ptr_uint %input0 %uint_0 %index + %invalue0 = OpLoad %uint %in_ptr0 + %in_ptr1 = OpAccessChain %ptr_uint %input1 %uint_0 %index + %invalue1 = OpLoad %uint %in_ptr1 + + %outvalue = OpSMulExtended %resulttype %invalue0 %invalue1 + %outvalue0 = OpCompositeExtract %uint %outvalue 0 + %out_ptr0 = OpAccessChain %ptr_uint %output0 %uint_0 %index + OpStore %out_ptr0 %outvalue0 + %outvalue1 = OpCompositeExtract %uint %outvalue 1 + %out_ptr1 = OpAccessChain %ptr_uint %output1 %uint_0 %index + OpStore %out_ptr1 %outvalue1 + + + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/comp/undefined-constant-composite.asm.comp b/shaders-msl/asm/comp/undefined-constant-composite.asm.comp new file mode 100644 index 00000000000..9de0501fe21 --- /dev/null +++ b/shaders-msl/asm/comp/undefined-constant-composite.asm.comp @@ -0,0 +1,102 @@ +; +; The shader below is based on the following GLSL shader: +; +; #version 450 +; +; struct Pair { +; int first; +; int second; +; }; +; +; const Pair constant_pair = { 100, 200 }; +; +; layout(set=0, binding=0, std430) buffer InputBlock { +; int array[10]; +; } inputValues; +; +; layout(set=0, binding=1, std430) buffer OutputBlock { +; int array[10]; +; } outputValues; +; +; int add_second (int value, Pair pair) { +; return value + pair.second; +; } +; +; void main() { +; uint idx = gl_GlobalInvocationID.x; +; outputValues.array[idx] = add_second(inputValues.array[idx], constant_pair); +; } +; +; However, the first element of constant_pair has been modified to be undefined. +; + OpCapability Shader + %std450 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %_arr_int_uint_10 ArrayStride 4 + OpMemberDecorate %OutputBlock 0 Offset 0 + OpDecorate %OutputBlock BufferBlock + OpDecorate %outputValues DescriptorSet 0 + OpDecorate %outputValues Binding 1 + OpMemberDecorate %InputBlock 0 Offset 0 + OpDecorate %InputBlock BufferBlock + OpDecorate %inputValues DescriptorSet 0 + OpDecorate %inputValues Binding 0 + %void = OpTypeVoid + %void_func = OpTypeFunction %void + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_200 = OpConstant %int 200 + %uint_0 = OpConstant %uint 0 + %uint_10 = OpConstant %uint 10 + %_ptr_Function_int = OpTypePointer Function %int + %Pair = OpTypeStruct %int %int + %_ptr_Function_Pair = OpTypePointer Function %Pair + %add_second_func_type = OpTypeFunction %int %_ptr_Function_int %_ptr_Function_Pair + %_ptr_Function_uint = OpTypePointer Function %uint + %_ptr_Input_v3uint = OpTypePointer Input %v3uint + %_ptr_Input_uint = OpTypePointer Input %uint + %_arr_int_uint_10 = OpTypeArray %int %uint_10 + %OutputBlock = OpTypeStruct %_arr_int_uint_10 +%_ptr_Uniform_OutputBlock = OpTypePointer Uniform %OutputBlock + %outputValues = OpVariable %_ptr_Uniform_OutputBlock Uniform + %InputBlock = OpTypeStruct %_arr_int_uint_10 + %_ptr_Uniform_InputBlock = OpTypePointer Uniform %InputBlock + %inputValues = OpVariable %_ptr_Uniform_InputBlock Uniform + ; Replaced %int_100 with an undefined int. + %undef_int = OpUndef %int + ; Composed a constant Pair with the undefined int in the first member. + %const_Pair = OpConstantComposite %Pair %undef_int %int_200 + %_ptr_Uniform_int = OpTypePointer Uniform %int + %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %main = OpFunction %void None %void_func + %main_label = OpLabel + %param_1 = OpVariable %_ptr_Function_int Function + %param_2 = OpVariable %_ptr_Function_Pair Function + %gidx_ptr = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %gidx = OpLoad %uint %gidx_ptr + %input_value_ptr = OpAccessChain %_ptr_Uniform_int %inputValues %int_0 %gidx + %input_value = OpLoad %int %input_value_ptr + OpStore %param_1 %input_value + OpStore %param_2 %const_Pair + %retval = OpFunctionCall %int %add_second %param_1 %param_2 + %output_value_ptr = OpAccessChain %_ptr_Uniform_int %outputValues %int_0 %gidx + OpStore %output_value_ptr %retval + OpReturn + OpFunctionEnd + %add_second = OpFunction %int None %add_second_func_type + %value_ptr = OpFunctionParameter %_ptr_Function_int + %pair = OpFunctionParameter %_ptr_Function_Pair + %add_second_label = OpLabel + %value = OpLoad %int %value_ptr + ; Access the second struct member, which is defined. + %pair_second_ptr = OpAccessChain %_ptr_Function_int %pair %int_1 + %pair_second = OpLoad %int %pair_second_ptr + %add_result = OpIAdd %int %value %pair_second + OpReturnValue %add_result + OpFunctionEnd diff --git a/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp b/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp new file mode 100644 index 00000000000..d89a402bf5f --- /dev/null +++ b/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp @@ -0,0 +1,122 @@ +; +; The shader below is based on the following GLSL shader: +; +; #version 450 +; +; struct Pair { +; int first; +; int second; +; }; +; +; const Pair constant_pair = { 100, 200 }; +; +; layout (constant_id=0) const int constantFirst = 0; +; +; Pair spec_constant_pair = { constantFirst, 200 }; +; +; layout(set=0, binding=0, std430) buffer InputBlock { +; int array[10]; +; } inputValues; +; +; layout(set=0, binding=1, std430) buffer OutputBlock { +; int array[10]; +; } outputValues; +; +; int add_first_and_second (int value, Pair p1, Pair p2) { +; return value + p1.first + p2.second; +; } +; +; void main() { +; uint idx = gl_GlobalInvocationID.x; +; outputValues.array[idx] = add_first_and_second(inputValues.array[idx], spec_constant_pair, constant_pair); +; } +; +; However, both the constant_pair and the spec_constant_pair have one of their members replaced by undefined values. +; + OpCapability Shader + %std450 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %_arr_int_uint_10 ArrayStride 4 + OpMemberDecorate %OutputBlock 0 Offset 0 + OpDecorate %OutputBlock BufferBlock + OpDecorate %outputValues DescriptorSet 0 + OpDecorate %outputValues Binding 1 + OpMemberDecorate %InputBlock 0 Offset 0 + OpDecorate %InputBlock BufferBlock + OpDecorate %inputValues DescriptorSet 0 + OpDecorate %inputValues Binding 0 + OpDecorate %spec_constant SpecId 0 + %void = OpTypeVoid + %void_func = OpTypeFunction %void + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_200 = OpConstant %int 200 + %uint_0 = OpConstant %uint 0 + %uint_10 = OpConstant %uint 10 + %_ptr_Function_int = OpTypePointer Function %int + %Pair = OpTypeStruct %int %int + %_ptr_Function_Pair = OpTypePointer Function %Pair +%add_pair_members_func_type = OpTypeFunction %int %_ptr_Function_int %_ptr_Function_Pair %_ptr_Function_Pair + %_ptr_Function_uint = OpTypePointer Function %uint + %_ptr_Input_v3uint = OpTypePointer Input %v3uint + %_ptr_Input_uint = OpTypePointer Input %uint + %_arr_int_uint_10 = OpTypeArray %int %uint_10 + %OutputBlock = OpTypeStruct %_arr_int_uint_10 + %_ptr_Uniform_OutputBlock = OpTypePointer Uniform %OutputBlock + %outputValues = OpVariable %_ptr_Uniform_OutputBlock Uniform + %InputBlock = OpTypeStruct %_arr_int_uint_10 + %_ptr_Uniform_InputBlock = OpTypePointer Uniform %InputBlock + %inputValues = OpVariable %_ptr_Uniform_InputBlock Uniform + ; Replaced %int_100 with an undefined int. + %undef_int = OpUndef %int + ; Composed a spec constant Pair with an undefined int in the second member. + %spec_constant = OpSpecConstant %int 0 + %spec_const_Pair = OpSpecConstantComposite %Pair %spec_constant %undef_int + ; Composed a constant Pair with the undefined int in the first member. + %const_Pair = OpConstantComposite %Pair %undef_int %int_200 + %_ptr_Uniform_int = OpTypePointer Uniform %int + %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %main = OpFunction %void None %void_func + %main_label = OpLabel + %param_1 = OpVariable %_ptr_Function_int Function + %param_2 = OpVariable %_ptr_Function_Pair Function + %param_3 = OpVariable %_ptr_Function_Pair Function + %gidx_ptr = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %gidx = OpLoad %uint %gidx_ptr + %input_value_ptr = OpAccessChain %_ptr_Uniform_int %inputValues %int_0 %gidx + %input_value = OpLoad %int %input_value_ptr + OpStore %param_1 %input_value + OpStore %param_2 %spec_const_Pair + OpStore %param_3 %const_Pair + ; Pass the input value as the first argument. + ; Pass the specialization constant Pair as the second argument. + ; Pass the constant Pair as the third argument. + %retval = OpFunctionCall %int %add_pair_members %param_1 %param_2 %param_3 + %output_value_ptr = OpAccessChain %_ptr_Uniform_int %outputValues %int_0 %gidx + OpStore %output_value_ptr %retval + OpReturn + OpFunctionEnd + %add_pair_members = OpFunction %int None %add_pair_members_func_type + %value_ptr = OpFunctionParameter %_ptr_Function_int + %pair_1 = OpFunctionParameter %_ptr_Function_Pair + %pair_2 = OpFunctionParameter %_ptr_Function_Pair + %add_pair_members_label = OpLabel + %value = OpLoad %int %value_ptr + ; Access the first struct member from the first pair. + ; Access the second struct member from the second pair. + ; Both should be defined according to the function call above. + %pair_1_first_ptr = OpAccessChain %_ptr_Function_int %pair_1 %int_0 + %pair_2_second_ptr = OpAccessChain %_ptr_Function_int %pair_2 %int_1 + %pair_1_first = OpLoad %int %pair_1_first_ptr + %pair_2_second = OpLoad %int %pair_2_second_ptr + %partial_result = OpIAdd %int %value %pair_1_first + %final_result = OpIAdd %int %partial_result %pair_2_second + OpReturnValue %final_result + OpFunctionEnd + diff --git a/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag b/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag new file mode 100644 index 00000000000..0be26d1c055 --- /dev/null +++ b/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag @@ -0,0 +1,170 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 132 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "main" %2 %3 %4 + OpExecutionMode %1 OriginUpperLeft + OpDecorate %3 Location 0 + OpDecorate %2 Location 1 + OpDecorate %4 BuiltIn FragCoord + OpDecorate %5 ArrayStride 4 + OpDecorate %6 ArrayStride 16 + OpMemberDecorate %7 0 Offset 0 + OpDecorate %7 BufferBlock + OpDecorate %8 DescriptorSet 0 + OpDecorate %8 Binding 0 + OpDecorate %9 DescriptorSet 0 + OpDecorate %9 Binding 1 + OpDecorate %10 DescriptorSet 0 + OpDecorate %10 Binding 2 + %11 = OpTypeVoid + %12 = OpTypeBool + %13 = OpTypeInt 32 1 + %14 = OpTypeInt 32 0 + %16 = OpTypeFloat 32 + %17 = OpTypeVector %13 2 + %18 = OpTypeVector %14 2 + %19 = OpTypeVector %16 2 + %20 = OpTypeVector %13 3 + %21 = OpTypeVector %14 3 + %22 = OpTypeVector %16 3 + %23 = OpTypeVector %13 4 + %24 = OpTypeVector %14 4 + %25 = OpTypeVector %16 4 + %26 = OpTypeVector %12 4 + %27 = OpTypeFunction %25 %25 + %28 = OpTypeFunction %12 + %29 = OpTypeFunction %11 + %30 = OpTypePointer Input %16 + %31 = OpTypePointer Input %13 + %32 = OpTypePointer Input %14 + %33 = OpTypePointer Input %19 + %34 = OpTypePointer Input %17 + %35 = OpTypePointer Input %18 + %38 = OpTypePointer Input %22 + %40 = OpTypePointer Input %25 + %41 = OpTypePointer Input %23 + %42 = OpTypePointer Input %24 + %43 = OpTypePointer Output %16 + %44 = OpTypePointer Output %13 + %45 = OpTypePointer Output %14 + %46 = OpTypePointer Output %19 + %47 = OpTypePointer Output %17 + %48 = OpTypePointer Output %18 + %49 = OpTypePointer Output %25 + %50 = OpTypePointer Output %23 + %51 = OpTypePointer Output %24 + %52 = OpTypePointer Function %16 + %53 = OpTypePointer Function %13 + %54 = OpTypePointer Function %25 + %55 = OpConstant %16 1 + %56 = OpConstant %16 0 + %57 = OpConstant %16 0.5 + %58 = OpConstant %16 -1 + %59 = OpConstant %16 7 + %60 = OpConstant %16 8 + %61 = OpConstant %13 0 + %62 = OpConstant %13 1 + %63 = OpConstant %13 2 + %64 = OpConstant %13 3 + %65 = OpConstant %13 4 + %66 = OpConstant %14 0 + %67 = OpConstant %14 1 + %68 = OpConstant %14 2 + %69 = OpConstant %14 3 + %70 = OpConstant %14 32 + %71 = OpConstant %14 4 + %72 = OpConstant %14 2147483647 + %73 = OpConstantComposite %25 %55 %55 %55 %55 + %74 = OpConstantComposite %25 %55 %56 %56 %55 + %75 = OpConstantComposite %25 %57 %57 %57 %57 + %76 = OpTypeArray %16 %67 + %77 = OpTypeArray %16 %68 + %78 = OpTypeArray %25 %69 + %79 = OpTypeArray %16 %71 + %80 = OpTypeArray %25 %70 + %81 = OpTypePointer Input %78 + %82 = OpTypePointer Input %80 + %83 = OpTypePointer Output %77 + %84 = OpTypePointer Output %78 + %85 = OpTypePointer Output %79 + %4 = OpVariable %40 Input + %3 = OpVariable %49 Output + %2 = OpVariable %40 Input + %86 = OpConstant %14 64 + %87 = OpConstant %13 64 + %88 = OpConstant %13 8 + %89 = OpConstantComposite %19 %60 %60 + %5 = OpTypeArray %16 %86 + %6 = OpTypeArray %25 %86 + %90 = OpTypePointer Uniform %16 + %91 = OpTypePointer Uniform %25 + %7 = OpTypeStruct %6 + %92 = OpTypePointer Uniform %7 + %10 = OpVariable %92 Uniform + %93 = OpTypeImage %16 2D 1 0 0 1 Rgba32f + %94 = OpTypePointer UniformConstant %93 + %8 = OpVariable %94 UniformConstant + %95 = OpTypeSampler + %96 = OpTypePointer UniformConstant %95 + %9 = OpVariable %96 UniformConstant + %97 = OpTypeSampledImage %93 + %98 = OpTypeFunction %11 %13 + %1 = OpFunction %11 None %29 + %99 = OpLabel + %100 = OpLoad %25 %2 + %101 = OpFunctionCall %25 %102 %100 + OpStore %3 %101 + OpReturn + OpFunctionEnd + %103 = OpFunction %12 None %28 + %104 = OpLabel + %105 = OpAccessChain %30 %4 %61 + %106 = OpAccessChain %30 %4 %62 + %107 = OpLoad %16 %105 + %108 = OpLoad %16 %106 + %109 = OpFOrdEqual %12 %107 %57 + %110 = OpFOrdEqual %12 %108 %57 + %111 = OpLogicalAnd %12 %109 %110 + OpReturnValue %111 + OpFunctionEnd + %112 = OpFunction %11 None %98 + %113 = OpFunctionParameter %13 + %114 = OpLabel + %115 = OpSRem %13 %113 %88 + %116 = OpSDiv %13 %113 %88 + %117 = OpCompositeConstruct %17 %115 %116 + %118 = OpConvertSToF %19 %117 + %119 = OpFDiv %19 %118 %89 + %120 = OpLoad %93 %8 + %121 = OpImageFetch %25 %120 %117 + %36 = OpAccessChain %91 %10 %61 %113 + OpStore %36 %121 + OpReturn + OpFunctionEnd + %102 = OpFunction %25 None %27 + %122 = OpFunctionParameter %25 + %123 = OpLabel + %124 = OpVariable %53 Function + OpStore %124 %61 + OpBranch %125 + %125 = OpLabel + %15 = OpLoad %13 %124 + %126 = OpSLessThan %12 %15 %87 + OpLoopMerge %127 %128 None + OpBranchConditional %126 %129 %127 + %129 = OpLabel + %130 = OpLoad %13 %124 + %131 = OpFunctionCall %11 %112 %130 + OpBranch %128 + %128 = OpLabel + %37 = OpLoad %13 %124 + %39 = OpIAdd %13 %37 %62 + OpStore %124 %39 + OpBranch %125 + %127 = OpLabel + OpReturnValue %122 + OpFunctionEnd diff --git a/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag b/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag new file mode 100644 index 00000000000..97e88b55a0a --- /dev/null +++ b/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag @@ -0,0 +1,173 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 134 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %1 "main" %2 %3 %4 + OpExecutionMode %1 OriginUpperLeft + OpDecorate %3 Location 0 + OpDecorate %2 Location 1 + OpDecorate %4 BuiltIn FragCoord + OpDecorate %5 ArrayStride 4 + OpDecorate %6 ArrayStride 16 + OpMemberDecorate %7 0 Offset 0 + OpDecorate %7 BufferBlock + OpDecorate %8 DescriptorSet 0 + OpDecorate %8 Binding 0 + OpDecorate %9 DescriptorSet 0 + OpDecorate %9 Binding 1 + OpDecorate %10 DescriptorSet 0 + OpDecorate %10 Binding 2 + %11 = OpTypeVoid + %12 = OpTypeBool + %13 = OpTypeInt 32 1 + %14 = OpTypeInt 32 0 + %16 = OpTypeFloat 32 + %17 = OpTypeVector %13 2 + %18 = OpTypeVector %14 2 + %19 = OpTypeVector %16 2 + %20 = OpTypeVector %13 3 + %21 = OpTypeVector %14 3 + %22 = OpTypeVector %16 3 + %23 = OpTypeVector %13 4 + %24 = OpTypeVector %14 4 + %25 = OpTypeVector %16 4 + %26 = OpTypeVector %12 4 + %27 = OpTypeFunction %25 %25 + %28 = OpTypeFunction %12 + %29 = OpTypeFunction %11 + %30 = OpTypePointer Input %16 + %31 = OpTypePointer Input %13 + %32 = OpTypePointer Input %14 + %33 = OpTypePointer Input %19 + %34 = OpTypePointer Input %17 + %35 = OpTypePointer Input %18 + %38 = OpTypePointer Input %22 + %40 = OpTypePointer Input %25 + %41 = OpTypePointer Input %23 + %42 = OpTypePointer Input %24 + %43 = OpTypePointer Output %16 + %44 = OpTypePointer Output %13 + %45 = OpTypePointer Output %14 + %46 = OpTypePointer Output %19 + %47 = OpTypePointer Output %17 + %48 = OpTypePointer Output %18 + %49 = OpTypePointer Output %25 + %50 = OpTypePointer Output %23 + %51 = OpTypePointer Output %24 + %52 = OpTypePointer Function %16 + %53 = OpTypePointer Function %13 + %54 = OpTypePointer Function %25 + %55 = OpConstant %16 1 + %56 = OpConstant %16 0 + %57 = OpConstant %16 0.5 + %58 = OpConstant %16 -1 + %59 = OpConstant %16 7 + %60 = OpConstant %16 8 + %61 = OpConstant %13 0 + %62 = OpConstant %13 1 + %63 = OpConstant %13 2 + %64 = OpConstant %13 3 + %65 = OpConstant %13 4 + %66 = OpConstant %14 0 + %67 = OpConstant %14 1 + %68 = OpConstant %14 2 + %69 = OpConstant %14 3 + %70 = OpConstant %14 32 + %71 = OpConstant %14 4 + %72 = OpConstant %14 2147483647 + %73 = OpConstantComposite %25 %55 %55 %55 %55 + %74 = OpConstantComposite %25 %55 %56 %56 %55 + %75 = OpConstantComposite %25 %57 %57 %57 %57 + %76 = OpTypeArray %16 %67 + %77 = OpTypeArray %16 %68 + %78 = OpTypeArray %25 %69 + %79 = OpTypeArray %16 %71 + %80 = OpTypeArray %25 %70 + %81 = OpTypePointer Input %78 + %82 = OpTypePointer Input %80 + %83 = OpTypePointer Output %77 + %84 = OpTypePointer Output %78 + %85 = OpTypePointer Output %79 + %4 = OpVariable %40 Input + %3 = OpVariable %49 Output + %2 = OpVariable %40 Input + %86 = OpConstant %14 64 + %87 = OpConstant %13 64 + %88 = OpConstant %13 8 + %89 = OpConstantComposite %19 %60 %60 + %5 = OpTypeArray %16 %86 + %6 = OpTypeArray %25 %86 + %90 = OpTypePointer Uniform %16 + %91 = OpTypePointer Uniform %25 + %7 = OpTypeStruct %6 + %92 = OpTypePointer Uniform %7 + %10 = OpVariable %92 Uniform + %93 = OpTypeImage %16 2D 1 0 0 1 Rgba32f + %94 = OpTypePointer UniformConstant %93 + %8 = OpVariable %94 UniformConstant + %95 = OpTypeSampler + %96 = OpTypePointer UniformConstant %95 + %9 = OpVariable %96 UniformConstant + %97 = OpTypeSampledImage %93 + %98 = OpTypeFunction %11 %13 + %1 = OpFunction %11 None %29 + %99 = OpLabel + %100 = OpLoad %25 %2 + %101 = OpFunctionCall %25 %102 %100 + OpStore %3 %101 + OpReturn + OpFunctionEnd + %103 = OpFunction %12 None %28 + %104 = OpLabel + %105 = OpAccessChain %30 %4 %61 + %106 = OpAccessChain %30 %4 %62 + %107 = OpLoad %16 %105 + %108 = OpLoad %16 %106 + %109 = OpFOrdEqual %12 %107 %57 + %110 = OpFOrdEqual %12 %108 %57 + %111 = OpLogicalAnd %12 %109 %110 + OpReturnValue %111 + OpFunctionEnd + %112 = OpFunction %11 None %98 + %113 = OpFunctionParameter %13 + %114 = OpLabel + %115 = OpSRem %13 %113 %88 + %116 = OpSDiv %13 %113 %88 + %117 = OpCompositeConstruct %17 %115 %116 + %118 = OpConvertSToF %19 %117 + %119 = OpFDiv %19 %118 %89 + %120 = OpLoad %93 %8 + %121 = OpLoad %95 %9 + %122 = OpSampledImage %97 %120 %121 + %123 = OpImageSampleExplicitLod %25 %122 %119 Lod %56 + %36 = OpAccessChain %91 %10 %61 %113 + OpStore %36 %123 + OpReturn + OpFunctionEnd + %102 = OpFunction %25 None %27 + %124 = OpFunctionParameter %25 + %125 = OpLabel + %126 = OpVariable %53 Function + OpStore %126 %61 + OpBranch %127 + %127 = OpLabel + %15 = OpLoad %13 %126 + %128 = OpSLessThan %12 %15 %87 + OpLoopMerge %129 %130 None + OpBranchConditional %128 %131 %129 + %131 = OpLabel + %132 = OpLoad %13 %126 + %133 = OpFunctionCall %11 %112 %132 + OpBranch %130 + %130 = OpLabel + %37 = OpLoad %13 %126 + %39 = OpIAdd %13 %37 %62 + OpStore %126 %39 + OpBranch %127 + %129 = OpLabel + OpReturnValue %124 + OpFunctionEnd + diff --git a/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag b/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag new file mode 100644 index 00000000000..02d018267a3 --- /dev/null +++ b/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag @@ -0,0 +1,83 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 37 +; Schema: 0 + OpCapability Shader + OpCapability StencilExportEXT + OpExtension "SPV_EXT_shader_stencil_export" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %o0 %o1 %o2 %o3 %o4 %o5 %o6 %o7 %oDepth %oStencil + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main DepthReplacing + OpSource GLSL 450 + OpSourceExtension "GL_ARB_shader_stencil_export" + OpName %main "main" + OpName %o0 "o0" + OpName %o1 "o1" + OpName %o2 "o2" + OpName %o3 "o3" + OpName %o4 "o4" + OpName %o5 "o5" + OpName %o6 "o6" + OpName %o7 "o7" + OpName %oDepth "oDepth" + OpName %oStencil "oStencil" + OpDecorate %o0 Location 0 + OpDecorate %o1 Location 1 + OpDecorate %o2 Location 2 + OpDecorate %o3 Location 3 + OpDecorate %o4 Location 4 + OpDecorate %o5 Location 5 + OpDecorate %o6 Location 6 + OpDecorate %o7 Location 7 + OpDecorate %oDepth BuiltIn FragDepth + OpDecorate %oStencil BuiltIn FragStencilRefEXT + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %o0 = OpVariable %_ptr_Output_v4float Output + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %12 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1 + %o1 = OpVariable %_ptr_Output_v4float Output + %14 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1 + %o2 = OpVariable %_ptr_Output_v4float Output + %16 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1 + %o3 = OpVariable %_ptr_Output_v4float Output + %18 = OpConstantComposite %v4float %float_0 %float_0 %float_1 %float_1 + %o4 = OpVariable %_ptr_Output_v4float Output + %float_0_5 = OpConstant %float 0.5 + %21 = OpConstantComposite %v4float %float_1 %float_0 %float_1 %float_0_5 + %o5 = OpVariable %_ptr_Output_v4float Output + %float_0_25 = OpConstant %float 0.25 + %24 = OpConstantComposite %v4float %float_0_25 %float_0_25 %float_0_25 %float_0_25 + %o6 = OpVariable %_ptr_Output_v4float Output + %float_0_75 = OpConstant %float 0.75 + %27 = OpConstantComposite %v4float %float_0_75 %float_0_75 %float_0_75 %float_0_75 + %o7 = OpVariable %_ptr_Output_v4float Output + %29 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_float = OpTypePointer Output %float + %oDepth = OpVariable %_ptr_Output_float Output +%float_0_899999976 = OpConstant %float 0.899999976 + %int = OpTypeInt 32 1 +%_ptr_Output_int = OpTypePointer Output %int + %oStencil = OpVariable %_ptr_Output_int Output + %int_127 = OpConstant %int 127 + %main = OpFunction %void None %3 + %5 = OpLabel + OpStore %o0 %12 + OpStore %o1 %14 + OpStore %o2 %16 + OpStore %o3 %18 + OpStore %o4 %21 + OpStore %o5 %24 + OpStore %o6 %27 + OpStore %o7 %29 + OpStore %oDepth %float_0_899999976 + OpStore %oStencil %int_127 + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag b/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag new file mode 100644 index 00000000000..9f03d77e3de --- /dev/null +++ b/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag @@ -0,0 +1,425 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 325 +; Schema: 0 + OpCapability Shader + OpCapability SampleRateShading + OpCapability InterpolationFunction + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %baz %a %s %foo %sid %bar %b %c + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %func_ "func(" + OpName %FragColor "FragColor" + OpName %baz "baz" + OpName %a "a" + OpName %_ "" + OpMemberName %_ 0 "x" + OpMemberName %_ 1 "y" + OpMemberName %_ 2 "z" + OpMemberName %_ 3 "u" + OpMemberName %_ 4 "v" + OpMemberName %_ 5 "w" + OpName %s "s" + OpName %foo "foo" + OpName %sid "sid" + OpName %bar "bar" + OpName %b "b" + OpName %c "c" + OpDecorate %FragColor Location 0 + OpDecorate %baz Sample + OpDecorate %baz Location 2 + OpDecorate %a Location 4 + OpDecorate %s Location 10 + OpDecorate %foo NoPerspective + OpDecorate %foo Location 0 + OpDecorate %sid Flat + OpDecorate %sid Location 3 + OpDecorate %bar Centroid + OpDecorate %bar Location 1 + OpDecorate %b Centroid + OpDecorate %b Location 6 + OpDecorate %c Sample + OpDecorate %c Location 8 + OpMemberDecorate %_ 1 Centroid + OpMemberDecorate %_ 1 NoPerspective + OpMemberDecorate %_ 2 Sample + OpMemberDecorate %_ 3 Centroid + OpMemberDecorate %_ 4 Sample + OpMemberDecorate %_ 4 NoPerspective + %void = OpTypeVoid + %15 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %baz = OpVariable %_ptr_Input_v2float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Output_float = OpTypePointer Output %float + %uint_1 = OpConstant %uint 1 + %int = OpTypeInt 32 1 + %int_3 = OpConstant %int 3 +%float_n0_100000001 = OpConstant %float -0.100000001 +%float_0_100000001 = OpConstant %float 0.100000001 + %30 = OpConstantComposite %v2float %float_n0_100000001 %float_0_100000001 + %uint_2 = OpConstant %uint 2 +%_arr_v2float_uint_2 = OpTypeArray %v2float %uint_2 +%_ptr_Input__arr_v2float_uint_2 = OpTypePointer Input %_arr_v2float_uint_2 + %a = OpVariable %_ptr_Input__arr_v2float_uint_2 Input + %int_1 = OpConstant %int 1 + %int_0 = OpConstant %int 0 + %int_2 = OpConstant %int 2 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_ptr_Input__arr_v4float_uint_2 = OpTypePointer Input %_arr_v4float_uint_2 + %uint_3 = OpConstant %uint 3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 + %_ = OpTypeStruct %v4float %v4float %v4float %_arr_v4float_uint_2 %_arr_v2float_uint_2 %_arr_float_uint_3 +%_ptr_Input__ = OpTypePointer Input %_ + %s = OpVariable %_ptr_Input__ Input +%_ptr_Input_v4float = OpTypePointer Input %v4float + %foo = OpVariable %_ptr_Input_v4float Input +%_ptr_Input_int = OpTypePointer Input %int + %sid = OpVariable %_ptr_Input_int Input + %44 = OpConstantComposite %v2float %float_0_100000001 %float_0_100000001 + %v3float = OpTypeVector %float 3 +%_ptr_Input_v3float = OpTypePointer Input %v3float + %bar = OpVariable %_ptr_Input_v3float Input + %47 = OpConstantComposite %v2float %float_n0_100000001 %float_n0_100000001 + %b = OpVariable %_ptr_Input__arr_v2float_uint_2 Input + %c = OpVariable %_ptr_Input__arr_v2float_uint_2 Input + %int_4 = OpConstant %int 4 + %int_5 = OpConstant %int 5 + %main = OpFunction %void None %15 + %50 = OpLabel + %51 = OpLoad %v4float %foo + OpStore %FragColor %51 + %52 = OpExtInst %v4float %1 InterpolateAtCentroid %foo + %53 = OpLoad %v4float %FragColor + %54 = OpFAdd %v4float %53 %52 + OpStore %FragColor %54 + %55 = OpLoad %int %sid + %56 = OpExtInst %v4float %1 InterpolateAtSample %foo %55 + %57 = OpLoad %v4float %FragColor + %58 = OpFAdd %v4float %57 %56 + OpStore %FragColor %58 + %59 = OpExtInst %v4float %1 InterpolateAtOffset %foo %44 + %60 = OpLoad %v4float %FragColor + %61 = OpFAdd %v4float %60 %59 + OpStore %FragColor %61 + %62 = OpLoad %v3float %bar + %63 = OpLoad %v4float %FragColor + %64 = OpVectorShuffle %v3float %63 %63 0 1 2 + %65 = OpFAdd %v3float %64 %62 + %66 = OpLoad %v4float %FragColor + %67 = OpVectorShuffle %v4float %66 %65 4 5 6 3 + OpStore %FragColor %67 + %68 = OpExtInst %v3float %1 InterpolateAtCentroid %bar + %69 = OpLoad %v4float %FragColor + %70 = OpVectorShuffle %v3float %69 %69 0 1 2 + %71 = OpFAdd %v3float %70 %68 + %72 = OpLoad %v4float %FragColor + %73 = OpVectorShuffle %v4float %72 %71 4 5 6 3 + OpStore %FragColor %73 + %74 = OpLoad %int %sid + %75 = OpExtInst %v3float %1 InterpolateAtSample %bar %74 + %76 = OpLoad %v4float %FragColor + %77 = OpVectorShuffle %v3float %76 %76 0 1 2 + %78 = OpFAdd %v3float %77 %75 + %79 = OpLoad %v4float %FragColor + %80 = OpVectorShuffle %v4float %79 %78 4 5 6 3 + OpStore %FragColor %80 + %81 = OpExtInst %v3float %1 InterpolateAtOffset %bar %47 + %82 = OpLoad %v4float %FragColor + %83 = OpVectorShuffle %v3float %82 %82 0 1 2 + %84 = OpFAdd %v3float %83 %81 + %85 = OpLoad %v4float %FragColor + %86 = OpVectorShuffle %v4float %85 %84 4 5 6 3 + OpStore %FragColor %86 + %87 = OpAccessChain %_ptr_Input_v2float %b %int_0 + %88 = OpLoad %v2float %87 + %89 = OpLoad %v4float %FragColor + %90 = OpVectorShuffle %v2float %89 %89 0 1 + %91 = OpFAdd %v2float %90 %88 + %92 = OpLoad %v4float %FragColor + %93 = OpVectorShuffle %v4float %92 %91 4 5 2 3 + OpStore %FragColor %93 + %94 = OpAccessChain %_ptr_Input_v2float %b %int_1 + %95 = OpExtInst %v2float %1 InterpolateAtCentroid %94 + %96 = OpLoad %v4float %FragColor + %97 = OpVectorShuffle %v2float %96 %96 0 1 + %98 = OpFAdd %v2float %97 %95 + %99 = OpLoad %v4float %FragColor + %100 = OpVectorShuffle %v4float %99 %98 4 5 2 3 + OpStore %FragColor %100 + %101 = OpAccessChain %_ptr_Input_v2float %b %int_0 + %102 = OpExtInst %v2float %1 InterpolateAtSample %101 %int_2 + %103 = OpLoad %v4float %FragColor + %104 = OpVectorShuffle %v2float %103 %103 0 1 + %105 = OpFAdd %v2float %104 %102 + %106 = OpLoad %v4float %FragColor + %107 = OpVectorShuffle %v4float %106 %105 4 5 2 3 + OpStore %FragColor %107 + %108 = OpAccessChain %_ptr_Input_v2float %b %int_1 + %109 = OpExtInst %v2float %1 InterpolateAtOffset %108 %30 + %110 = OpLoad %v4float %FragColor + %111 = OpVectorShuffle %v2float %110 %110 0 1 + %112 = OpFAdd %v2float %111 %109 + %113 = OpLoad %v4float %FragColor + %114 = OpVectorShuffle %v4float %113 %112 4 5 2 3 + OpStore %FragColor %114 + %115 = OpAccessChain %_ptr_Input_v2float %c %int_0 + %116 = OpLoad %v2float %115 + %117 = OpLoad %v4float %FragColor + %118 = OpVectorShuffle %v2float %117 %117 0 1 + %119 = OpFAdd %v2float %118 %116 + %120 = OpLoad %v4float %FragColor + %121 = OpVectorShuffle %v4float %120 %119 4 5 2 3 + OpStore %FragColor %121 + %122 = OpAccessChain %_ptr_Input_v2float %c %int_1 + %123 = OpExtInst %v2float %1 InterpolateAtCentroid %122 + %124 = OpVectorShuffle %v2float %123 %123 0 1 + %125 = OpLoad %v4float %FragColor + %126 = OpVectorShuffle %v2float %125 %125 0 1 + %127 = OpFAdd %v2float %126 %124 + %128 = OpLoad %v4float %FragColor + %129 = OpVectorShuffle %v4float %128 %127 4 5 2 3 + OpStore %FragColor %129 + %130 = OpAccessChain %_ptr_Input_v2float %c %int_0 + %131 = OpExtInst %v2float %1 InterpolateAtSample %130 %int_2 + %132 = OpVectorShuffle %v2float %131 %131 1 0 + %133 = OpLoad %v4float %FragColor + %134 = OpVectorShuffle %v2float %133 %133 0 1 + %135 = OpFAdd %v2float %134 %132 + %136 = OpLoad %v4float %FragColor + %137 = OpVectorShuffle %v4float %136 %135 4 5 2 3 + OpStore %FragColor %137 + %138 = OpAccessChain %_ptr_Input_v2float %c %int_1 + %139 = OpExtInst %v2float %1 InterpolateAtOffset %138 %30 + %140 = OpVectorShuffle %v2float %139 %139 0 0 + %141 = OpLoad %v4float %FragColor + %142 = OpVectorShuffle %v2float %141 %141 0 1 + %143 = OpFAdd %v2float %142 %140 + %144 = OpLoad %v4float %FragColor + %145 = OpVectorShuffle %v4float %144 %143 4 5 2 3 + OpStore %FragColor %145 + %146 = OpAccessChain %_ptr_Input_v4float %s %int_0 + %147 = OpLoad %v4float %146 + %148 = OpLoad %v4float %FragColor + %149 = OpFAdd %v4float %148 %147 + OpStore %FragColor %149 + %150 = OpAccessChain %_ptr_Input_v4float %s %int_0 + %151 = OpExtInst %v4float %1 InterpolateAtCentroid %150 + %152 = OpLoad %v4float %FragColor + %153 = OpFAdd %v4float %152 %151 + OpStore %FragColor %153 + %154 = OpAccessChain %_ptr_Input_v4float %s %int_0 + %155 = OpLoad %int %sid + %156 = OpExtInst %v4float %1 InterpolateAtSample %154 %155 + %157 = OpLoad %v4float %FragColor + %158 = OpFAdd %v4float %157 %156 + OpStore %FragColor %158 + %159 = OpAccessChain %_ptr_Input_v4float %s %int_0 + %160 = OpExtInst %v4float %1 InterpolateAtOffset %159 %44 + %161 = OpLoad %v4float %FragColor + %162 = OpFAdd %v4float %161 %160 + OpStore %FragColor %162 + %163 = OpAccessChain %_ptr_Input_v4float %s %int_1 + %164 = OpLoad %v4float %163 + %165 = OpLoad %v4float %FragColor + %166 = OpFAdd %v4float %165 %164 + OpStore %FragColor %166 + %167 = OpAccessChain %_ptr_Input_v4float %s %int_1 + %168 = OpExtInst %v4float %1 InterpolateAtCentroid %167 + %169 = OpLoad %v4float %FragColor + %170 = OpFAdd %v4float %169 %168 + OpStore %FragColor %170 + %171 = OpAccessChain %_ptr_Input_v4float %s %int_1 + %172 = OpLoad %int %sid + %173 = OpExtInst %v4float %1 InterpolateAtSample %171 %172 + %174 = OpLoad %v4float %FragColor + %175 = OpFAdd %v4float %174 %173 + OpStore %FragColor %175 + %176 = OpAccessChain %_ptr_Input_v4float %s %int_1 + %177 = OpExtInst %v4float %1 InterpolateAtOffset %176 %47 + %178 = OpLoad %v4float %FragColor + %179 = OpFAdd %v4float %178 %177 + OpStore %FragColor %179 + %180 = OpAccessChain %_ptr_Input_v2float %s %int_4 %int_0 + %181 = OpLoad %v2float %180 + %182 = OpLoad %v4float %FragColor + %183 = OpVectorShuffle %v2float %182 %182 0 1 + %184 = OpFAdd %v2float %183 %181 + %185 = OpLoad %v4float %FragColor + %186 = OpVectorShuffle %v4float %185 %184 4 5 2 3 + OpStore %FragColor %186 + %187 = OpAccessChain %_ptr_Input_v2float %s %int_4 %int_1 + %188 = OpExtInst %v2float %1 InterpolateAtCentroid %187 + %189 = OpLoad %v4float %FragColor + %190 = OpVectorShuffle %v2float %189 %189 0 1 + %191 = OpFAdd %v2float %190 %188 + %192 = OpLoad %v4float %FragColor + %193 = OpVectorShuffle %v4float %192 %191 4 5 2 3 + OpStore %FragColor %193 + %194 = OpAccessChain %_ptr_Input_v2float %s %int_4 %int_0 + %195 = OpExtInst %v2float %1 InterpolateAtSample %194 %int_2 + %196 = OpLoad %v4float %FragColor + %197 = OpVectorShuffle %v2float %196 %196 0 1 + %198 = OpFAdd %v2float %197 %195 + %199 = OpLoad %v4float %FragColor + %200 = OpVectorShuffle %v4float %199 %198 4 5 2 3 + OpStore %FragColor %200 + %201 = OpAccessChain %_ptr_Input_v2float %s %int_4 %int_1 + %202 = OpExtInst %v2float %1 InterpolateAtOffset %201 %30 + %203 = OpLoad %v4float %FragColor + %204 = OpVectorShuffle %v2float %203 %203 0 1 + %205 = OpFAdd %v2float %204 %202 + %206 = OpLoad %v4float %FragColor + %207 = OpVectorShuffle %v4float %206 %205 4 5 2 3 + OpStore %FragColor %207 + %208 = OpAccessChain %_ptr_Input_float %s %int_5 %int_0 + %209 = OpLoad %float %208 + %210 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + %211 = OpLoad %float %210 + %212 = OpFAdd %float %211 %209 + %213 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + OpStore %213 %212 + %214 = OpAccessChain %_ptr_Input_float %s %int_5 %int_1 + %215 = OpExtInst %float %1 InterpolateAtCentroid %214 + %216 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + %217 = OpLoad %float %216 + %218 = OpFAdd %float %217 %215 + %219 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + OpStore %219 %218 + %220 = OpAccessChain %_ptr_Input_float %s %int_5 %int_0 + %221 = OpExtInst %float %1 InterpolateAtSample %220 %int_2 + %222 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + %223 = OpLoad %float %222 + %224 = OpFAdd %float %223 %221 + %225 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + OpStore %225 %224 + %226 = OpAccessChain %_ptr_Input_float %s %int_5 %int_1 + %227 = OpExtInst %float %1 InterpolateAtOffset %226 %30 + %228 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + %229 = OpLoad %float %228 + %230 = OpFAdd %float %229 %227 + %231 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + OpStore %231 %230 + %232 = OpFunctionCall %void %func_ + OpReturn + OpFunctionEnd + %func_ = OpFunction %void None %15 + %233 = OpLabel + %234 = OpLoad %v2float %baz + %235 = OpLoad %v4float %FragColor + %236 = OpVectorShuffle %v2float %235 %235 0 1 + %237 = OpFAdd %v2float %236 %234 + %238 = OpLoad %v4float %FragColor + %239 = OpVectorShuffle %v4float %238 %237 4 5 2 3 + OpStore %FragColor %239 + %240 = OpAccessChain %_ptr_Input_float %baz %uint_0 + %241 = OpExtInst %float %1 InterpolateAtCentroid %240 + %242 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + %243 = OpLoad %float %242 + %244 = OpFAdd %float %243 %241 + %245 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + OpStore %245 %244 + %246 = OpAccessChain %_ptr_Input_float %baz %uint_1 + %247 = OpExtInst %float %1 InterpolateAtSample %246 %int_3 + %248 = OpAccessChain %_ptr_Output_float %FragColor %uint_1 + %249 = OpLoad %float %248 + %250 = OpFAdd %float %249 %247 + %251 = OpAccessChain %_ptr_Output_float %FragColor %uint_1 + OpStore %251 %250 + %252 = OpAccessChain %_ptr_Input_float %baz %uint_1 + %253 = OpExtInst %float %1 InterpolateAtOffset %252 %30 + %254 = OpAccessChain %_ptr_Output_float %FragColor %uint_2 + %255 = OpLoad %float %254 + %256 = OpFAdd %float %255 %253 + %257 = OpAccessChain %_ptr_Output_float %FragColor %uint_2 + OpStore %257 %256 + %258 = OpAccessChain %_ptr_Input_v2float %a %int_1 + %259 = OpExtInst %v2float %1 InterpolateAtCentroid %258 + %260 = OpLoad %v4float %FragColor + %261 = OpVectorShuffle %v2float %260 %260 0 1 + %262 = OpFAdd %v2float %261 %259 + %263 = OpLoad %v4float %FragColor + %264 = OpVectorShuffle %v4float %263 %262 4 5 2 3 + OpStore %FragColor %264 + %265 = OpAccessChain %_ptr_Input_v2float %a %int_0 + %266 = OpExtInst %v2float %1 InterpolateAtSample %265 %int_2 + %267 = OpLoad %v4float %FragColor + %268 = OpVectorShuffle %v2float %267 %267 0 1 + %269 = OpFAdd %v2float %268 %266 + %270 = OpLoad %v4float %FragColor + %271 = OpVectorShuffle %v4float %270 %269 4 5 2 3 + OpStore %FragColor %271 + %272 = OpAccessChain %_ptr_Input_v2float %a %int_1 + %273 = OpExtInst %v2float %1 InterpolateAtOffset %272 %30 + %274 = OpLoad %v4float %FragColor + %275 = OpVectorShuffle %v2float %274 %274 0 1 + %276 = OpFAdd %v2float %275 %273 + %277 = OpLoad %v4float %FragColor + %278 = OpVectorShuffle %v4float %277 %276 4 5 2 3 + OpStore %FragColor %278 + %279 = OpAccessChain %_ptr_Input_v4float %s %int_2 + %280 = OpLoad %v4float %279 + %281 = OpLoad %v4float %FragColor + %282 = OpFAdd %v4float %281 %280 + OpStore %FragColor %282 + %283 = OpAccessChain %_ptr_Input_v4float %s %int_2 + %284 = OpExtInst %v4float %1 InterpolateAtCentroid %283 + %285 = OpVectorShuffle %v2float %284 %284 1 1 + %286 = OpLoad %v4float %FragColor + %287 = OpVectorShuffle %v2float %286 %286 0 1 + %288 = OpFAdd %v2float %287 %285 + %289 = OpLoad %v4float %FragColor + %290 = OpVectorShuffle %v4float %289 %288 4 5 2 3 + OpStore %FragColor %290 + %291 = OpAccessChain %_ptr_Input_v4float %s %int_2 + %292 = OpExtInst %v4float %1 InterpolateAtSample %291 %int_3 + %293 = OpVectorShuffle %v2float %292 %292 0 1 + %294 = OpLoad %v4float %FragColor + %295 = OpVectorShuffle %v2float %294 %294 1 2 + %296 = OpFAdd %v2float %295 %293 + %297 = OpLoad %v4float %FragColor + %298 = OpVectorShuffle %v4float %297 %296 0 4 5 3 + OpStore %FragColor %298 + %299 = OpAccessChain %_ptr_Input_v4float %s %int_2 + %300 = OpExtInst %v4float %1 InterpolateAtOffset %299 %30 + %301 = OpVectorShuffle %v2float %300 %300 3 0 + %302 = OpLoad %v4float %FragColor + %303 = OpVectorShuffle %v2float %302 %302 2 3 + %304 = OpFAdd %v2float %303 %301 + %305 = OpLoad %v4float %FragColor + %306 = OpVectorShuffle %v4float %305 %304 0 1 4 5 + OpStore %FragColor %306 + %308 = OpAccessChain %_ptr_Input_v4float %s %int_3 %int_0 + %309 = OpLoad %v4float %308 + %310 = OpLoad %v4float %FragColor + %311 = OpFAdd %v4float %310 %309 + OpStore %FragColor %311 + %312 = OpAccessChain %_ptr_Input__arr_v4float_uint_2 %s %int_3 + %313 = OpAccessChain %_ptr_Input_v4float %312 %int_1 + %314 = OpExtInst %v4float %1 InterpolateAtCentroid %313 + %315 = OpLoad %v4float %FragColor + %316 = OpFAdd %v4float %315 %314 + OpStore %FragColor %316 + %317 = OpAccessChain %_ptr_Input_v4float %s %int_3 %int_0 + %318 = OpExtInst %v4float %1 InterpolateAtSample %317 %int_2 + %319 = OpLoad %v4float %FragColor + %320 = OpFAdd %v4float %319 %318 + OpStore %FragColor %320 + %321 = OpAccessChain %_ptr_Input_v4float %s %int_3 %int_1 + %322 = OpExtInst %v4float %1 InterpolateAtOffset %321 %30 + %323 = OpLoad %v4float %FragColor + %324 = OpFAdd %v4float %323 %322 + OpStore %FragColor %324 + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/frag/switch-different-sizes.asm.frag b/shaders-msl/asm/frag/switch-different-sizes.asm.frag new file mode 100644 index 00000000000..ee6daa3d2b0 --- /dev/null +++ b/shaders-msl/asm/frag/switch-different-sizes.asm.frag @@ -0,0 +1,106 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 10 +; Bound: 42 +; Schema: 0 + OpCapability Shader + OpCapability Int8 + OpCapability Int16 + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 330 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %sw0 "sw0" + OpName %result "result" + OpName %sw1 "sw1" + OpName %sw2 "sw2" + OpName %sw3 "sw3" + OpDecorate %sw1 RelaxedPrecision + OpDecorate %21 RelaxedPrecision + OpDecorate %sw2 RelaxedPrecision + OpDecorate %29 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %lowp_int = OpTypeInt 8 1 + %highp_int = OpTypeInt 16 1 + %_ptr_Function_int = OpTypePointer Function %int + %_ptr_Function_lowp_int = OpTypePointer Function %lowp_int + %_ptr_Function_highp_int = OpTypePointer Function %highp_int + %int_42 = OpConstant %int 42 + %int_0 = OpConstant %int 0 + %int_420 = OpConstant %int 420 + %int_10 = OpConstant %int 10 + %int_512 = OpConstant %int 512 + %lowp_int_10 = OpConstant %lowp_int 10 + %highp_int_10 = OpConstant %highp_int 10 + %main = OpFunction %void None %3 + %5 = OpLabel + %sw0 = OpVariable %_ptr_Function_int Function + %result = OpVariable %_ptr_Function_int Function + %sw1 = OpVariable %_ptr_Function_lowp_int Function + %sw2 = OpVariable %_ptr_Function_highp_int Function + %sw3 = OpVariable %_ptr_Function_highp_int Function + OpStore %sw0 %int_42 + OpStore %result %int_0 + %12 = OpLoad %int %sw0 + OpSelectionMerge %16 None + OpSwitch %12 %16 -42 %13 420 %14 -1234 %15 + %13 = OpLabel + OpStore %result %int_42 + OpBranch %14 + %14 = OpLabel + OpStore %result %int_420 + OpBranch %15 + %15 = OpLabel + OpStore %result %int_420 + OpBranch %16 + %16 = OpLabel + OpStore %sw1 %lowp_int_10 + %21 = OpLoad %lowp_int %sw1 + OpSelectionMerge %25 None + OpSwitch %21 %25 -42 %22 42 %23 -123 %24 + %22 = OpLabel + OpStore %result %int_42 + OpBranch %23 + %23 = OpLabel + OpStore %result %int_420 + OpBranch %24 + %24 = OpLabel + OpStore %result %int_512 + OpBranch %25 + %25 = OpLabel + OpStore %sw2 %highp_int_10 + %29 = OpLoad %highp_int %sw2 + OpSelectionMerge %33 None + OpSwitch %29 %33 -42 %30 42 %31 -1234 %32 + %30 = OpLabel + OpStore %result %int_42 + OpBranch %31 + %31 = OpLabel + OpStore %result %int_420 + OpBranch %32 + %32 = OpLabel + OpStore %result %int_512 + OpBranch %33 + %33 = OpLabel + OpStore %sw3 %highp_int_10 + %36 = OpLoad %highp_int %sw3 + OpSelectionMerge %40 None + OpSwitch %36 %40 -42 %37 42 %38 -1234 %39 + %37 = OpLabel + OpStore %result %int_42 + OpBranch %38 + %38 = OpLabel + OpStore %result %int_420 + OpBranch %39 + %39 = OpLabel + OpStore %result %int_512 + OpBranch %40 + %40 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag b/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag new file mode 100644 index 00000000000..62f2dc68073 --- /dev/null +++ b/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 21 +; Schema: 0 + OpCapability Shader + OpCapability Int64 + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 330 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %sw "sw" + OpName %result "result" + %void = OpTypeVoid + %6 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %long = OpTypeInt 64 1 +%_ptr_Function_int = OpTypePointer Function %int +%_ptr_Function_long = OpTypePointer Function %long + %int_42 = OpConstant %int 42 + %int_0 = OpConstant %int 0 + %int_420 = OpConstant %int 420 + %long_42 = OpConstant %long 42 + %main = OpFunction %void None %6 + %15 = OpLabel + %sw = OpVariable %_ptr_Function_long Function + %result = OpVariable %_ptr_Function_int Function + OpStore %sw %long_42 + OpStore %result %int_0 + %16 = OpLoad %long %sw + OpSelectionMerge %17 None + OpSwitch %16 %17 -42 %18 420 %19 -34359738368 %20 + %18 = OpLabel + OpStore %result %int_42 + OpBranch %19 + %19 = OpLabel + OpStore %result %int_420 + OpBranch %20 + %20 = OpLabel + OpStore %result %int_420 + OpBranch %17 + %17 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag b/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag new file mode 100644 index 00000000000..cea32b420a6 --- /dev/null +++ b/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 21 +; Schema: 0 + OpCapability Shader + OpCapability Int64 + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 330 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %sw "sw" + OpName %result "result" + %void = OpTypeVoid + %6 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %long = OpTypeInt 64 0 +%_ptr_Function_int = OpTypePointer Function %int +%_ptr_Function_long = OpTypePointer Function %long + %int_42 = OpConstant %int 42 + %int_0 = OpConstant %int 0 + %int_420 = OpConstant %int 420 + %long_42 = OpConstant %long 42 + %main = OpFunction %void None %6 + %15 = OpLabel + %sw = OpVariable %_ptr_Function_long Function + %result = OpVariable %_ptr_Function_int Function + OpStore %sw %long_42 + OpStore %result %int_0 + %16 = OpLoad %long %sw + OpSelectionMerge %17 None + OpSwitch %16 %17 42 %18 420 %19 343597383680 %20 + %18 = OpLabel + OpStore %result %int_42 + OpBranch %19 + %19 = OpLabel + OpStore %result %int_420 + OpBranch %20 + %20 = OpLabel + OpStore %result %int_420 + OpBranch %17 + %17 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag b/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag index ae7a972d7b2..e7e6f37ea27 100644 --- a/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag +++ b/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag @@ -5,6 +5,7 @@ ; Schema: 0 OpCapability Shader OpCapability StorageInputOutput16 + OpCapability Float16 OpExtension "SPV_KHR_16bit_storage" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 diff --git a/shaders-msl/asm/frag/unord-relational-op.asm.frag b/shaders-msl/asm/frag/unord-relational-op.asm.frag index 3e4cd6c2c29..824c0512911 100644 --- a/shaders-msl/asm/frag/unord-relational-op.asm.frag +++ b/shaders-msl/asm/frag/unord-relational-op.asm.frag @@ -114,6 +114,8 @@ OpStore %t1 %b %15 = OpFUnordEqual %bool %a %b OpStore %c1 %15 + %ordered = OpFOrdNotEqual %bool %a %b + OpStore %c1 %ordered %17 = OpFUnordNotEqual %bool %a %b OpStore %c2 %17 %19 = OpFUnordLessThan %bool %a %b diff --git a/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag new file mode 100644 index 00000000000..824c0512911 --- /dev/null +++ b/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag @@ -0,0 +1,207 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 122 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %c %d %e %f %g %h %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 460 + OpName %main "main" + OpName %t0 "t0" + OpName %a "a" + OpName %t1 "t1" + OpName %b "b" + OpName %c1 "c1" + OpName %c2 "c2" + OpName %c3 "c3" + OpName %c4 "c4" + OpName %c5 "c5" + OpName %c6 "c6" + OpName %c7 "c7" + OpName %c "c" + OpName %d "d" + OpName %c8 "c8" + OpName %c9 "c9" + OpName %c10 "c10" + OpName %c11 "c11" + OpName %c12 "c12" + OpName %c13 "c13" + OpName %e "e" + OpName %f "f" + OpName %c14 "c14" + OpName %c15 "c15" + OpName %c16 "c16" + OpName %c17 "c17" + OpName %c18 "c18" + OpName %c19 "c19" + OpName %g "g" + OpName %h "h" + OpName %c20 "c20" + OpName %c21 "c21" + OpName %c22 "c22" + OpName %c23 "c23" + OpName %c24 "c24" + OpName %FragColor "FragColor" + OpDecorate %a SpecId 1 + OpDecorate %b SpecId 2 + OpDecorate %c Location 2 + OpDecorate %d Location 3 + OpDecorate %e Location 4 + OpDecorate %f Location 5 + OpDecorate %g Location 6 + OpDecorate %h Location 7 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float + %a = OpSpecConstant %float 1 + %b = OpSpecConstant %float 2 + %bool = OpTypeBool +%_ptr_Function_bool = OpTypePointer Function %bool + %v2bool = OpTypeVector %bool 2 +%_ptr_Function_v2bool = OpTypePointer Function %v2bool + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %c = OpVariable %_ptr_Input_v2float Input + %d = OpVariable %_ptr_Input_v2float Input + %v3bool = OpTypeVector %bool 3 +%_ptr_Function_v3bool = OpTypePointer Function %v3bool + %v3float = OpTypeVector %float 3 +%_ptr_Input_v3float = OpTypePointer Input %v3float + %e = OpVariable %_ptr_Input_v3float Input + %f = OpVariable %_ptr_Input_v3float Input + %v4bool = OpTypeVector %bool 4 +%_ptr_Function_v4bool = OpTypePointer Function %v4bool + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %g = OpVariable %_ptr_Input_v4float Input + %h = OpVariable %_ptr_Input_v4float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %t0 = OpVariable %_ptr_Function_float Function + %t1 = OpVariable %_ptr_Function_float Function + %c1 = OpVariable %_ptr_Function_bool Function + %c2 = OpVariable %_ptr_Function_bool Function + %c3 = OpVariable %_ptr_Function_bool Function + %c4 = OpVariable %_ptr_Function_bool Function + %c5 = OpVariable %_ptr_Function_bool Function + %c6 = OpVariable %_ptr_Function_bool Function + %c7 = OpVariable %_ptr_Function_v2bool Function + %c8 = OpVariable %_ptr_Function_v2bool Function + %c9 = OpVariable %_ptr_Function_v2bool Function + %c10 = OpVariable %_ptr_Function_v2bool Function + %c11 = OpVariable %_ptr_Function_v2bool Function + %c12 = OpVariable %_ptr_Function_v2bool Function + %c13 = OpVariable %_ptr_Function_v3bool Function + %c14 = OpVariable %_ptr_Function_v3bool Function + %c15 = OpVariable %_ptr_Function_v3bool Function + %c16 = OpVariable %_ptr_Function_v3bool Function + %c17 = OpVariable %_ptr_Function_v3bool Function + %c18 = OpVariable %_ptr_Function_v3bool Function + %c19 = OpVariable %_ptr_Function_v4bool Function + %c20 = OpVariable %_ptr_Function_v4bool Function + %c21 = OpVariable %_ptr_Function_v4bool Function + %c22 = OpVariable %_ptr_Function_v4bool Function + %c23 = OpVariable %_ptr_Function_v4bool Function + %c24 = OpVariable %_ptr_Function_v4bool Function + OpStore %t0 %a + OpStore %t1 %b + %15 = OpFUnordEqual %bool %a %b + OpStore %c1 %15 + %ordered = OpFOrdNotEqual %bool %a %b + OpStore %c1 %ordered + %17 = OpFUnordNotEqual %bool %a %b + OpStore %c2 %17 + %19 = OpFUnordLessThan %bool %a %b + OpStore %c3 %19 + %21 = OpFUnordGreaterThan %bool %a %b + OpStore %c4 %21 + %23 = OpFUnordLessThanEqual %bool %a %b + OpStore %c5 %23 + %25 = OpFUnordGreaterThanEqual %bool %a %b + OpStore %c6 %25 + %32 = OpLoad %v2float %c + %34 = OpLoad %v2float %d + %35 = OpFUnordEqual %v2bool %32 %34 + OpStore %c7 %35 + %37 = OpLoad %v2float %c + %38 = OpLoad %v2float %d + %39 = OpFUnordNotEqual %v2bool %37 %38 + OpStore %c8 %39 + %41 = OpLoad %v2float %c + %42 = OpLoad %v2float %d + %43 = OpFUnordLessThan %v2bool %41 %42 + OpStore %c9 %43 + %45 = OpLoad %v2float %c + %46 = OpLoad %v2float %d + %47 = OpFUnordGreaterThan %v2bool %45 %46 + OpStore %c10 %47 + %49 = OpLoad %v2float %c + %50 = OpLoad %v2float %d + %51 = OpFUnordLessThanEqual %v2bool %49 %50 + OpStore %c11 %51 + %53 = OpLoad %v2float %c + %54 = OpLoad %v2float %d + %55 = OpFUnordGreaterThanEqual %v2bool %53 %54 + OpStore %c12 %55 + %62 = OpLoad %v3float %e + %64 = OpLoad %v3float %f + %65 = OpFUnordEqual %v3bool %62 %64 + OpStore %c13 %65 + %67 = OpLoad %v3float %e + %68 = OpLoad %v3float %f + %69 = OpFUnordNotEqual %v3bool %67 %68 + OpStore %c14 %69 + %71 = OpLoad %v3float %e + %72 = OpLoad %v3float %f + %73 = OpFUnordLessThan %v3bool %71 %72 + OpStore %c15 %73 + %75 = OpLoad %v3float %e + %76 = OpLoad %v3float %f + %77 = OpFUnordGreaterThan %v3bool %75 %76 + OpStore %c16 %77 + %79 = OpLoad %v3float %e + %80 = OpLoad %v3float %f + %81 = OpFUnordLessThanEqual %v3bool %79 %80 + OpStore %c17 %81 + %83 = OpLoad %v3float %e + %84 = OpLoad %v3float %f + %85 = OpFUnordGreaterThanEqual %v3bool %83 %84 + OpStore %c18 %85 + %92 = OpLoad %v4float %g + %94 = OpLoad %v4float %h + %95 = OpFUnordEqual %v4bool %92 %94 + OpStore %c19 %95 + %97 = OpLoad %v4float %g + %98 = OpLoad %v4float %h + %99 = OpFUnordNotEqual %v4bool %97 %98 + OpStore %c20 %99 + %101 = OpLoad %v4float %g + %102 = OpLoad %v4float %h + %103 = OpFUnordLessThan %v4bool %101 %102 + OpStore %c21 %103 + %105 = OpLoad %v4float %g + %106 = OpLoad %v4float %h + %107 = OpFUnordGreaterThan %v4bool %105 %106 + OpStore %c22 %107 + %109 = OpLoad %v4float %g + %110 = OpLoad %v4float %h + %111 = OpFUnordLessThanEqual %v4bool %109 %110 + OpStore %c23 %111 + %113 = OpLoad %v4float %g + %114 = OpLoad %v4float %h + %115 = OpFUnordGreaterThanEqual %v4bool %113 %114 + OpStore %c24 %115 + %118 = OpLoad %float %t0 + %119 = OpLoad %float %t1 + %120 = OpFAdd %float %118 %119 + %121 = OpCompositeConstruct %v4float %120 %120 %120 %120 + OpStore %FragColor %121 + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc b/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc new file mode 100644 index 00000000000..b21a2d3dd56 --- /dev/null +++ b/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc @@ -0,0 +1,102 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 46 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_TessLevelInner %gl_TessLevelOuter + OpExecutionMode %main OutputVertices 1 + OpExecutionMode %main Triangles + OpSource ESSL 310 + OpSourceExtension "GL_EXT_shader_io_blocks" + OpSourceExtension "GL_EXT_tessellation_shader" + OpName %main "main" + OpName %gl_TessLevelInner "gl_TessLevelInner" + OpName %TessLevels "TessLevels" + OpMemberName %TessLevels 0 "inner0" + OpMemberName %TessLevels 1 "inner1" + OpMemberName %TessLevels 2 "outer0" + OpMemberName %TessLevels 3 "outer1" + OpMemberName %TessLevels 4 "outer2" + OpMemberName %TessLevels 5 "outer3" + OpName %sb_levels "sb_levels" + OpName %gl_TessLevelOuter "gl_TessLevelOuter" + OpDecorate %gl_TessLevelInner Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpMemberDecorate %TessLevels 0 Restrict + OpMemberDecorate %TessLevels 0 NonWritable + OpMemberDecorate %TessLevels 0 Offset 0 + OpMemberDecorate %TessLevels 1 Restrict + OpMemberDecorate %TessLevels 1 NonWritable + OpMemberDecorate %TessLevels 1 Offset 4 + OpMemberDecorate %TessLevels 2 Restrict + OpMemberDecorate %TessLevels 2 NonWritable + OpMemberDecorate %TessLevels 2 Offset 8 + OpMemberDecorate %TessLevels 3 Restrict + OpMemberDecorate %TessLevels 3 NonWritable + OpMemberDecorate %TessLevels 3 Offset 12 + OpMemberDecorate %TessLevels 4 Restrict + OpMemberDecorate %TessLevels 4 NonWritable + OpMemberDecorate %TessLevels 4 Offset 16 + OpMemberDecorate %TessLevels 5 Restrict + OpMemberDecorate %TessLevels 5 NonWritable + OpMemberDecorate %TessLevels 5 Offset 20 + OpDecorate %TessLevels Block + OpDecorate %sb_levels DescriptorSet 0 + OpDecorate %sb_levels Binding 0 + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %TessLevels = OpTypeStruct %float %float %float %float %float %float +%_ptr_StorageBuffer_TessLevels = OpTypePointer StorageBuffer %TessLevels + %sb_levels = OpVariable %_ptr_StorageBuffer_TessLevels StorageBuffer +%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float +%_ptr_Output_float = OpTypePointer Output %float + %int_1 = OpConstant %int 1 + %uint_4 = OpConstant %uint 4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 +%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %int_4 = OpConstant %int 4 + %int_5 = OpConstant %int 5 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_0 + %19 = OpLoad %float %18 + %21 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0 + OpStore %21 %19 + %23 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_1 + %24 = OpLoad %float %23 + %25 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_1 + OpStore %25 %24 + %31 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_2 + %32 = OpLoad %float %31 + %33 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_0 + OpStore %33 %32 + %35 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_3 + %36 = OpLoad %float %35 + %37 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1 + OpStore %37 %36 + %39 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_4 + %40 = OpLoad %float %39 + %41 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_2 + OpStore %41 %40 + %43 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_5 + %44 = OpLoad %float %43 + %45 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_3 + OpStore %45 %44 + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert b/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert new file mode 100644 index 00000000000..59ec3f91984 --- /dev/null +++ b/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert @@ -0,0 +1,91 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 56 +; Schema: 0 + OpCapability Shader + OpCapability ClipDistance + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %pos_1 %_entryPointOutput_pos %_entryPointOutput_clip + OpSource HLSL 500 + OpName %main "main" + OpName %VSOut "VSOut" + OpMemberName %VSOut 0 "pos" + OpMemberName %VSOut 1 "clip" + OpName %_main_vf4_ "@main(vf4;" + OpName %pos "pos" + OpName %vout "vout" + OpName %pos_0 "pos" + OpName %pos_1 "pos" + OpName %flattenTemp "flattenTemp" + OpName %param "param" + OpName %_entryPointOutput_pos "@entryPointOutput.pos" + OpName %_entryPointOutput_clip "@entryPointOutput.clip" + OpDecorate %pos_1 Location 0 + OpDecorate %_entryPointOutput_pos BuiltIn Position + OpDecorate %_entryPointOutput_clip BuiltIn ClipDistance + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %v2float = OpTypeVector %float 2 + %VSOut = OpTypeStruct %v4float %v2float + %11 = OpTypeFunction %VSOut %_ptr_Function_v4float +%_ptr_Function_VSOut = OpTypePointer Function %VSOut + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 +%_ptr_Function_v2float = OpTypePointer Function %v2float +%_ptr_Input_v4float = OpTypePointer Input %v4float + %pos_1 = OpVariable %_ptr_Input_v4float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_pos = OpVariable %_ptr_Output_v4float Output + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%_entryPointOutput_clip = OpVariable %_ptr_Output__arr_float_uint_2 Output + %uint_0 = OpConstant %uint 0 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Output_float = OpTypePointer Output %float + %uint_1 = OpConstant %uint 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %pos_0 = OpVariable %_ptr_Function_v4float Function +%flattenTemp = OpVariable %_ptr_Function_VSOut Function + %param = OpVariable %_ptr_Function_v4float Function + %32 = OpLoad %v4float %pos_1 + OpStore %pos_0 %32 + %35 = OpLoad %v4float %pos_0 + OpStore %param %35 + %36 = OpFunctionCall %VSOut %_main_vf4_ %param + OpStore %flattenTemp %36 + %39 = OpAccessChain %_ptr_Function_v4float %flattenTemp %int_0 + %40 = OpLoad %v4float %39 + OpStore %_entryPointOutput_pos %40 + %48 = OpAccessChain %_ptr_Function_float %flattenTemp %int_1 %uint_0 + %49 = OpLoad %float %48 + %51 = OpAccessChain %_ptr_Output_float %_entryPointOutput_clip %int_0 + OpStore %51 %49 + %53 = OpAccessChain %_ptr_Function_float %flattenTemp %int_1 %uint_1 + %54 = OpLoad %float %53 + %55 = OpAccessChain %_ptr_Output_float %_entryPointOutput_clip %int_1 + OpStore %55 %54 + OpReturn + OpFunctionEnd + %_main_vf4_ = OpFunction %VSOut None %11 + %pos = OpFunctionParameter %_ptr_Function_v4float + %14 = OpLabel + %vout = OpVariable %_ptr_Function_VSOut Function + %19 = OpLoad %v4float %pos + %20 = OpAccessChain %_ptr_Function_v4float %vout %int_0 + OpStore %20 %19 + %22 = OpLoad %v4float %pos + %23 = OpVectorShuffle %v2float %22 %22 0 1 + %25 = OpAccessChain %_ptr_Function_v2float %vout %int_1 + OpStore %25 %23 + %26 = OpLoad %VSOut %vout + OpReturnValue %26 + OpFunctionEnd diff --git a/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert b/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert new file mode 100644 index 00000000000..59ec3f91984 --- /dev/null +++ b/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert @@ -0,0 +1,91 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 56 +; Schema: 0 + OpCapability Shader + OpCapability ClipDistance + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %pos_1 %_entryPointOutput_pos %_entryPointOutput_clip + OpSource HLSL 500 + OpName %main "main" + OpName %VSOut "VSOut" + OpMemberName %VSOut 0 "pos" + OpMemberName %VSOut 1 "clip" + OpName %_main_vf4_ "@main(vf4;" + OpName %pos "pos" + OpName %vout "vout" + OpName %pos_0 "pos" + OpName %pos_1 "pos" + OpName %flattenTemp "flattenTemp" + OpName %param "param" + OpName %_entryPointOutput_pos "@entryPointOutput.pos" + OpName %_entryPointOutput_clip "@entryPointOutput.clip" + OpDecorate %pos_1 Location 0 + OpDecorate %_entryPointOutput_pos BuiltIn Position + OpDecorate %_entryPointOutput_clip BuiltIn ClipDistance + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %v2float = OpTypeVector %float 2 + %VSOut = OpTypeStruct %v4float %v2float + %11 = OpTypeFunction %VSOut %_ptr_Function_v4float +%_ptr_Function_VSOut = OpTypePointer Function %VSOut + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 +%_ptr_Function_v2float = OpTypePointer Function %v2float +%_ptr_Input_v4float = OpTypePointer Input %v4float + %pos_1 = OpVariable %_ptr_Input_v4float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_pos = OpVariable %_ptr_Output_v4float Output + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%_entryPointOutput_clip = OpVariable %_ptr_Output__arr_float_uint_2 Output + %uint_0 = OpConstant %uint 0 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Output_float = OpTypePointer Output %float + %uint_1 = OpConstant %uint 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %pos_0 = OpVariable %_ptr_Function_v4float Function +%flattenTemp = OpVariable %_ptr_Function_VSOut Function + %param = OpVariable %_ptr_Function_v4float Function + %32 = OpLoad %v4float %pos_1 + OpStore %pos_0 %32 + %35 = OpLoad %v4float %pos_0 + OpStore %param %35 + %36 = OpFunctionCall %VSOut %_main_vf4_ %param + OpStore %flattenTemp %36 + %39 = OpAccessChain %_ptr_Function_v4float %flattenTemp %int_0 + %40 = OpLoad %v4float %39 + OpStore %_entryPointOutput_pos %40 + %48 = OpAccessChain %_ptr_Function_float %flattenTemp %int_1 %uint_0 + %49 = OpLoad %float %48 + %51 = OpAccessChain %_ptr_Output_float %_entryPointOutput_clip %int_0 + OpStore %51 %49 + %53 = OpAccessChain %_ptr_Function_float %flattenTemp %int_1 %uint_1 + %54 = OpLoad %float %53 + %55 = OpAccessChain %_ptr_Output_float %_entryPointOutput_clip %int_1 + OpStore %55 %54 + OpReturn + OpFunctionEnd + %_main_vf4_ = OpFunction %VSOut None %11 + %pos = OpFunctionParameter %_ptr_Function_v4float + %14 = OpLabel + %vout = OpVariable %_ptr_Function_VSOut Function + %19 = OpLoad %v4float %pos + %20 = OpAccessChain %_ptr_Function_v4float %vout %int_0 + OpStore %20 %19 + %22 = OpLoad %v4float %pos + %23 = OpVectorShuffle %v2float %22 %22 0 1 + %25 = OpAccessChain %_ptr_Function_v2float %vout %int_1 + OpStore %25 %23 + %26 = OpLoad %VSOut %vout + OpReturnValue %26 + OpFunctionEnd diff --git a/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert b/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert new file mode 100644 index 00000000000..429d3e4127c --- /dev/null +++ b/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert @@ -0,0 +1,111 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 62 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ %gl_VertexIndex %a_position + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %_ "" + OpName %Struct "Struct" + OpMemberName %Struct 0 "flags" + OpName %defaultUniformsVS "defaultUniformsVS" + OpMemberName %defaultUniformsVS 0 "flags" + OpMemberName %defaultUniformsVS 1 "uquad" + OpMemberName %defaultUniformsVS 2 "umatrix" + OpName %__0 "" + OpName %gl_VertexIndex "gl_VertexIndex" + OpName %a_position "a_position" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %_arr_uint_uint_1 ArrayStride 16 + OpMemberDecorate %Struct 0 Offset 0 + OpDecorate %_arr_v2float_uint_4 ArrayStride 16 + OpMemberDecorate %defaultUniformsVS 0 Offset 0 + OpMemberDecorate %defaultUniformsVS 1 Offset 16 + OpMemberDecorate %defaultUniformsVS 2 ColMajor + OpMemberDecorate %defaultUniformsVS 2 Offset 80 + OpMemberDecorate %defaultUniformsVS 2 MatrixStride 16 + OpDecorate %defaultUniformsVS Block + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + OpDecorate %gl_VertexIndex BuiltIn VertexIndex + OpDecorate %a_position Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex + %_ = OpVariable %_ptr_Output_gl_PerVertex Output + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_arr_uint_uint_1 = OpTypeArray %uint %uint_1 + %Struct = OpTypeStruct %_arr_uint_uint_1 + %v2float = OpTypeVector %float 2 + %uint_4 = OpConstant %uint 4 +%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4 +%mat4v4float = OpTypeMatrix %v4float 4 +%defaultUniformsVS = OpTypeStruct %Struct %_arr_v2float_uint_4 %mat4v4float +%_ptr_Uniform_defaultUniformsVS = OpTypePointer Uniform %defaultUniformsVS + %__0 = OpVariable %_ptr_Uniform_defaultUniformsVS Uniform + %int_2 = OpConstant %int 2 +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float + %int_1 = OpConstant %int 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_VertexIndex = OpVariable %_ptr_Input_int Input +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%_ptr_Input_v4float = OpTypePointer Input %v4float + %a_position = OpVariable %_ptr_Input_v4float Input + %uint_2 = OpConstant %uint 2 +%_ptr_Input_float = OpTypePointer Input %float + %uint_3 = OpConstant %uint 3 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bool = OpTypeBool + %uint_0 = OpConstant %uint 0 + %float_0 = OpConstant %float 0 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %27 = OpAccessChain %_ptr_Uniform_mat4v4float %__0 %int_2 + %28 = OpLoad %mat4v4float %27 + %32 = OpLoad %int %gl_VertexIndex + %34 = OpAccessChain %_ptr_Uniform_v2float %__0 %int_1 %32 + %35 = OpLoad %v2float %34 + %40 = OpAccessChain %_ptr_Input_float %a_position %uint_2 + %41 = OpLoad %float %40 + %43 = OpAccessChain %_ptr_Input_float %a_position %uint_3 + %44 = OpLoad %float %43 + %45 = OpCompositeExtract %float %35 0 + %46 = OpCompositeExtract %float %35 1 + %47 = OpCompositeConstruct %v4float %45 %46 %41 %44 + %48 = OpMatrixTimesVector %v4float %28 %47 + %50 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + OpStore %50 %48 + %52 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %int_0 %int_0 + %53 = OpLoad %uint %52 + %56 = OpINotEqual %bool %53 %uint_0 + OpSelectionMerge %58 None + OpBranchConditional %56 %57 %58 + %57 = OpLabel + %61 = OpAccessChain %_ptr_Output_float %_ %int_0 %uint_2 + OpStore %61 %float_0 + OpBranch %58 + %58 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert b/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert new file mode 100644 index 00000000000..8448265c1dd --- /dev/null +++ b/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert @@ -0,0 +1,113 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 64 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ %gl_VertexIndex %a_position + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %_ "" + OpName %Struct "Struct" + OpMemberName %Struct 0 "flags" + OpName %defaultUniformsVS "defaultUniformsVS" + OpMemberName %defaultUniformsVS 0 "flags" + OpMemberName %defaultUniformsVS 1 "uquad" + OpMemberName %defaultUniformsVS 2 "umatrix" + OpName %__0 "" + OpName %gl_VertexIndex "gl_VertexIndex" + OpName %a_position "a_position" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %_arr_v2uint_uint_1 ArrayStride 16 + OpMemberDecorate %Struct 0 Offset 0 + OpDecorate %_arr_v2float_uint_4 ArrayStride 16 + OpMemberDecorate %defaultUniformsVS 0 Offset 0 + OpMemberDecorate %defaultUniformsVS 1 Offset 16 + OpMemberDecorate %defaultUniformsVS 2 ColMajor + OpMemberDecorate %defaultUniformsVS 2 Offset 80 + OpMemberDecorate %defaultUniformsVS 2 MatrixStride 16 + OpDecorate %defaultUniformsVS Block + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + OpDecorate %gl_VertexIndex BuiltIn VertexIndex + OpDecorate %a_position Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex + %_ = OpVariable %_ptr_Output_gl_PerVertex Output + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v2uint = OpTypeVector %uint 2 +%_arr_v2uint_uint_1 = OpTypeArray %v2uint %uint_1 + %Struct = OpTypeStruct %_arr_v2uint_uint_1 + %v2float = OpTypeVector %float 2 + %uint_4 = OpConstant %uint 4 +%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4 +%mat4v4float = OpTypeMatrix %v4float 4 +%defaultUniformsVS = OpTypeStruct %Struct %_arr_v2float_uint_4 %mat4v4float +%_ptr_Uniform_defaultUniformsVS = OpTypePointer Uniform %defaultUniformsVS + %__0 = OpVariable %_ptr_Uniform_defaultUniformsVS Uniform + %int_2 = OpConstant %int 2 +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float + %int_1 = OpConstant %int 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_VertexIndex = OpVariable %_ptr_Input_int Input +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%_ptr_Input_v4float = OpTypePointer Input %v4float + %a_position = OpVariable %_ptr_Input_v4float Input + %uint_2 = OpConstant %uint 2 +%_ptr_Input_float = OpTypePointer Input %float + %uint_3 = OpConstant %uint 3 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %bool = OpTypeBool + %v2bool = OpTypeVector %bool 2 + %uint_0 = OpConstant %uint 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %float_0 = OpConstant %float 0 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %28 = OpAccessChain %_ptr_Uniform_mat4v4float %__0 %int_2 + %29 = OpLoad %mat4v4float %28 + %33 = OpLoad %int %gl_VertexIndex + %35 = OpAccessChain %_ptr_Uniform_v2float %__0 %int_1 %33 + %36 = OpLoad %v2float %35 + %41 = OpAccessChain %_ptr_Input_float %a_position %uint_2 + %42 = OpLoad %float %41 + %44 = OpAccessChain %_ptr_Input_float %a_position %uint_3 + %45 = OpLoad %float %44 + %46 = OpCompositeExtract %float %36 0 + %47 = OpCompositeExtract %float %36 1 + %48 = OpCompositeConstruct %v4float %46 %47 %42 %45 + %49 = OpMatrixTimesVector %v4float %29 %48 + %51 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + OpStore %51 %49 + %56 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %int_0 %int_0 %uint_0 + %57 = OpLoad %uint %56 + %58 = OpINotEqual %bool %57 %uint_0 + OpSelectionMerge %60 None + OpBranchConditional %58 %59 %60 + %59 = OpLabel + %63 = OpAccessChain %_ptr_Output_float %_ %int_0 %uint_2 + OpStore %63 %float_0 + OpBranch %60 + %60 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert b/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert index b566a3d1a0f..64f6c92ce95 100644 --- a/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert +++ b/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert @@ -49,8 +49,10 @@ %28 = OpConstant %17 2 %33 = OpConstant %12 20 %34 = OpConstant %12 30 + %int_3 = OpConstant %12 -3 + %bar = OpSpecConstantOp %12 SRem %13 %int_3 %35 = OpTypeVector %12 4 - %36 = OpSpecConstantComposite %35 %33 %34 %15 %15 + %36 = OpSpecConstantComposite %35 %33 %34 %15 %bar %40 = OpTypeVector %12 2 %41 = OpSpecConstantOp %40 VectorShuffle %36 %36 1 0 %foo = OpSpecConstantOp %12 CompositeExtract %36 1 @@ -63,6 +65,7 @@ %53 = OpConstant %12 0 %55 = OpTypePointer Output %7 %57 = OpSpecConstant %6 3.14159 + %baz = OpSpecConstantOp %6 QuantizeToF16 %57 %4 = OpFunction %2 None %3 %5 = OpLabel %9 = OpVariable %8 Function diff --git a/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp b/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp new file mode 100644 index 00000000000..72ca8899ad1 --- /dev/null +++ b/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp @@ -0,0 +1,10 @@ +#version 450 + +layout(set = 0, binding = 1, r32f) writeonly uniform image2D uImage; +layout(set = 0, binding = 2, r32f) readonly uniform image2D uImageRead; + +void main() +{ + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); + imageStore(uImage, coord, imageLoad(uImageRead, coord)); +} diff --git a/shaders-msl/comp/basic.dispatchbase.comp b/shaders-msl/comp/basic.dispatchbase.comp new file mode 100644 index 00000000000..2c873468cc7 --- /dev/null +++ b/shaders-msl/comp/basic.dispatchbase.comp @@ -0,0 +1,29 @@ +#version 310 es +layout(local_size_x_id = 10) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +layout(std430, binding = 2) buffer SSBO3 +{ + uint counter; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + uint workgroup = gl_WorkGroupID.x; + vec4 idata = in_data[ident]; + if (dot(idata, vec4(1.0, 5.0, 6.0, 2.0)) > 8.2) + { + out_data[atomicAdd(counter, 1u)] = idata; + } +} + diff --git a/shaders-msl/comp/basic.dispatchbase.msl11.comp b/shaders-msl/comp/basic.dispatchbase.msl11.comp new file mode 100644 index 00000000000..91453332aa4 --- /dev/null +++ b/shaders-msl/comp/basic.dispatchbase.msl11.comp @@ -0,0 +1,29 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +layout(std430, binding = 2) buffer SSBO3 +{ + uint counter; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + uint workgroup = gl_WorkGroupID.x; + vec4 idata = in_data[ident]; + if (dot(idata, vec4(1.0, 5.0, 6.0, 2.0)) > 8.2) + { + out_data[atomicAdd(counter, 1u)] = idata; + } +} + diff --git a/shaders-msl/comp/basic.inline-block.msl2.comp b/shaders-msl/comp/basic.inline-block.msl2.comp new file mode 100644 index 00000000000..8e1144a98e8 --- /dev/null +++ b/shaders-msl/comp/basic.inline-block.msl2.comp @@ -0,0 +1,37 @@ +#version 450 +#extension GL_EXT_scalar_block_layout : require +layout(local_size_x = 3, local_size_y = 3, local_size_z = 2) in; + +struct X +{ + int x; + int y; + float z; +}; + +layout(set = 0, binding = 0, scalar) uniform Foo +{ + int a; + int b; + mat4 c; + X x[2]; +}; + +layout(set = 0, binding = 1) uniform Bar +{ + int d; + int e; +}; + +layout(set = 1, binding = 2) buffer Baz +{ + int f; + int g; +} baz[3]; + +void main() +{ + uvec3 coords = gl_GlobalInvocationID; + baz[coords.x].f = a + d; + baz[coords.x].g = b * e; +} diff --git a/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp new file mode 100644 index 00000000000..9212e04c1ad --- /dev/null +++ b/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp @@ -0,0 +1,22 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require + +layout(buffer_reference, buffer_reference_align = 4) buffer SSBO +{ + vec3 a1; // Will be 12-byte packed + float a2; +}; + +layout(push_constant) uniform UBO +{ + uvec2 b; +}; + +void main() +{ + SSBO(b).a1 = vec3(1.0, 2.0, 3.0); // uvec2 -> buff ref and assign to packed + uvec2 v2 = uvec2(SSBO(b + 32)); // uvec2 -> buff ref -> uvec2 + vec3 v3 = SSBO(v2).a1; // uvec2 -> buff ref and assign from packed + SSBO(v2).a1 = v3 + 1.0; // uvec2 -> buff ref and assign to packed +} diff --git a/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp b/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp new file mode 100644 index 00000000000..2bb19eedad2 --- /dev/null +++ b/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp @@ -0,0 +1,64 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer t21; +layout(buffer_reference, buffer_reference_align = 16, std140) buffer t21 +{ + int m0[2]; + int m1; + layout(row_major) t21 m2[2]; + layout(row_major) t21 m3; + layout(row_major) mat2 m4; +}; + +layout(set = 0, binding = 1, std140) uniform t24 +{ + int m0[2]; + int m1; + layout(row_major) t21 m2[2]; + layout(row_major) t21 m3; + layout(row_major) mat2 m4; +} u24; + +layout(push_constant, std430) uniform t35 +{ + int m0[32]; +} u35; + +layout(set = 0, binding = 0, r32ui) uniform writeonly uimage2D v295; + +void main() +{ + int v8 = 0; + v8 |= (u24.m0[0] - 0); + v8 |= (u24.m0[u35.m0[1]] - 1); + v8 |= (u24.m1 - 2); + v8 |= int(u24.m4[0].x - 3.0); + v8 |= int(u24.m4[0].y - 5.0); + v8 |= int(u24.m4[1].x - 4.0); + v8 |= int(u24.m4[1].y - 6.0); + v8 |= (u24.m2[0].m0[0] - 3); + v8 |= (u24.m2[0].m0[u35.m0[1]] - 4); + v8 |= (u24.m2[0].m1 - 5); + v8 |= int(u24.m2[0].m4[0].x - 6.0); + v8 |= int(u24.m2[0].m4[0].y - 8.0); + v8 |= int(u24.m2[0].m4[1].x - 7.0); + v8 |= int(u24.m2[0].m4[1].y - 9.0); + v8 |= (u24.m2[u35.m0[1]].m0[0] - 6); + v8 |= (u24.m2[u35.m0[1]].m0[u35.m0[1]] - 7); + v8 |= (u24.m2[u35.m0[1]].m1 - 8); + v8 |= int(u24.m2[u35.m0[1]].m4[0].x - 9.0); + v8 |= int(u24.m2[u35.m0[1]].m4[0].y - 11.0); + v8 |= int(u24.m2[u35.m0[1]].m4[1].x - 10.0); + v8 |= int(u24.m2[u35.m0[1]].m4[1].y - 12.0); + v8 |= (u24.m3.m0[0] - 9); + v8 |= (u24.m3.m0[u35.m0[1]] - 10); + v8 |= (u24.m3.m1 - 11); + v8 |= int(u24.m3.m4[0].x - 12.0); + v8 |= int(u24.m3.m4[0].y - 14.0); + v8 |= int(u24.m3.m4[1].x - 13.0); + v8 |= int(u24.m3.m4[1].y - 15.0); + uvec4 v284 = mix(uvec4(1u, 0u, 0u, 1u), uvec4(0u), bvec4(v8 != 0)); + imageStore(v295, ivec2(gl_GlobalInvocationID.xy), v284); +} diff --git a/shaders-msl/comp/buffer_device_address.msl2.comp b/shaders-msl/comp/buffer_device_address.msl2.comp new file mode 100644 index 00000000000..14ac1ef9dc0 --- /dev/null +++ b/shaders-msl/comp/buffer_device_address.msl2.comp @@ -0,0 +1,86 @@ +/* Copyright (c) 2021, Arm Limited and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#version 450 + +// Allows buffer_reference. +#extension GL_EXT_buffer_reference : require + +layout(local_size_x = 8, local_size_y = 8) in; + +// If we mark a buffer as buffer_reference, this is treated as a pointer type. +// A variable with the type Position is a 64-bit pointer to the data within. +// We can freely cast between pointer types if we wish, but that is not necessary in this sample. +// buffer_reference_align is used to let the underlying implementation know which alignment to expect. +// The pointer can have scalar alignment, which is something the compiler cannot know unless you tell it. +// It is best to use vector alignment when you can for optimal performance, but scalar alignment is sometimes useful. +// With SSBOs, the API has a minimum offset alignment which guarantees a minimum level of alignment from API side. + +// It is possible to forward reference a pointer, so you can contain a pointer to yourself inside a struct. +// Useful if you need something like a linked list on the GPU. +// Here it's not particularly useful, but something to know about. +layout(buffer_reference) buffer Position; + +layout(std430, buffer_reference, buffer_reference_align = 8) writeonly buffer Position +{ + vec2 positions[]; +}; + +layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer PositionReferences +{ + // This buffer contains an array of pointers to other buffers. + Position buffers[]; +}; + +// In push constant we place a pointer to VBO pointers, spicy! +// This way we don't need any descriptor sets, but there's nothing wrong with combining use of descriptor sets and buffer device addresses. +// It is mostly done for convenience here. +layout(push_constant) uniform Registers +{ + PositionReferences references; + // A buffer reference is 64-bit, so offset of fract_time is 8 bytes. + float fract_time; +} registers; + +void main() +{ + // Every slice is a 8x8 grid of vertices which we update here in compute. + uvec2 local_offset = gl_GlobalInvocationID.xy; + uint local_index = local_offset.y * gl_WorkGroupSize.x * gl_NumWorkGroups.x + local_offset.x; + uint slice = gl_WorkGroupID.z; + + restrict Position positions = registers.references.buffers[slice]; + + // This is a trivial wave-like function. Arbitrary for demonstration purposes. + const float TWO_PI = 3.1415628 * 2.0; + float offset = TWO_PI * fract(registers.fract_time + float(slice) * 0.1); + + // Simple grid. + vec2 pos = vec2(local_offset); + + // Wobble, wobble. + pos.x += 0.2 * sin(2.2 * pos.x + offset); + pos.y += 0.2 * sin(2.25 * pos.y + 2.0 * offset); + pos.x += 0.2 * cos(1.8 * pos.y + 3.0 * offset); + pos.y += 0.2 * cos(2.85 * pos.x + 4.0 * offset); + pos.x += 0.5 * sin(offset); + pos.y += 0.5 * sin(offset + 0.3); + + // Center the mesh in [-0.5, 0.5] range. + // Here we write to a raw pointer. + // Be aware, there is no robustness support for buffer_device_address since we don't have a complete descriptor! + positions.positions[local_index] = pos / (vec2(gl_WorkGroupSize.xy) * vec2(gl_NumWorkGroups.xy) - 1.0) - 0.5; +} diff --git a/shaders-msl/comp/complex-composite-constant-array.comp b/shaders-msl/comp/complex-composite-constant-array.comp new file mode 100644 index 00000000000..96a3f8951d0 --- /dev/null +++ b/shaders-msl/comp/complex-composite-constant-array.comp @@ -0,0 +1,19 @@ +#version 450 + +layout(std430, set = 0, binding = 0) buffer SSBO +{ + mat4 a; + uint index; +}; + +const mat4 as[] = mat4[](mat4(1.0), mat4(2.0)); + +void write_global() +{ + a = as[index]; +} + +void main() +{ + write_global(); +} diff --git a/shaders-msl/comp/composite-array-initialization.force-native-array.comp b/shaders-msl/comp/composite-array-initialization.force-native-array.comp new file mode 100644 index 00000000000..1ecf4bcd406 --- /dev/null +++ b/shaders-msl/comp/composite-array-initialization.force-native-array.comp @@ -0,0 +1,28 @@ +#version 450 +layout(local_size_x = 2) in; + +struct Data +{ + float a; + float b; +}; + +layout(std430, binding = 0) buffer SSBO +{ + Data outdata[]; +}; + +layout(constant_id = 0) const float X = 4.0; + +Data data[2] = Data[](Data(1.0, 2.0), Data(3.0, 4.0)); +Data data2[2] = Data[](Data(X, 2.0), Data(3.0, 5.0)); + +Data combine(Data a, Data b) +{ + return Data(a.a + b.a, a.b + b.b); +} + +void main() +{ + outdata[gl_WorkGroupID.x] = combine(data[gl_LocalInvocationID.x], data2[gl_LocalInvocationID.x]); +} diff --git a/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp b/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp new file mode 100644 index 00000000000..edf87195b86 --- /dev/null +++ b/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp @@ -0,0 +1,21 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0, std430) buffer BUF +{ + int a; + float b; + float c; +} o; + +void main() +{ + const float a[2][2][2] = float[][][](float[][](float[](1.0, 2.0), float[](3.0, 4.0)), float[][](float[](1.0, 2.0), float[](3.0, 4.0))); + float b[2][2][2] = a; + float c[2][2][2] = b; + o.a = int(c[1][1][1]); + + float d[2][2][2] = float[][][](float[][](float[](o.b, o.c), float[](o.b, o.b)), float[][](float[](o.c, o.c), float[](o.c, o.b))); + float e[2][2][2] = d; + o.b = e[1][0][1]; +} diff --git a/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp b/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp new file mode 100644 index 00000000000..862cd212978 --- /dev/null +++ b/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0, r32ui) uniform uimage2D uImage; +layout(set = 0, binding = 1) uniform sampler2D uTexture; + +layout(set = 0, binding = 2) buffer SSBO +{ + vec4 outdata; +}; + +void main() +{ + uint ret = imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), 10u); + outdata = textureLod(uTexture, vec2(gl_GlobalInvocationID.xy), 0.0) + float(ret); +} diff --git a/shaders-msl/comp/image-atomic-automatic-bindings.comp b/shaders-msl/comp/image-atomic-automatic-bindings.comp new file mode 100644 index 00000000000..862cd212978 --- /dev/null +++ b/shaders-msl/comp/image-atomic-automatic-bindings.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0, r32ui) uniform uimage2D uImage; +layout(set = 0, binding = 1) uniform sampler2D uTexture; + +layout(set = 0, binding = 2) buffer SSBO +{ + vec4 outdata; +}; + +void main() +{ + uint ret = imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), 10u); + outdata = textureLod(uTexture, vec2(gl_GlobalInvocationID.xy), 0.0) + float(ret); +} diff --git a/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/shaders-msl/comp/mat3-row-maj-read-write-const.comp new file mode 100644 index 00000000000..068ad79721c --- /dev/null +++ b/shaders-msl/comp/mat3-row-maj-read-write-const.comp @@ -0,0 +1,17 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 1, std430) buffer model_t +{ + layout(row_major) mediump mat3 mtx_rm; +} model; + +void main() +{ + mat3 mtx_cm = model.mtx_rm; + mat3 mtx1 = mtx_cm * mat3(vec3(4.0, -3.0, 1.0), vec3(-7.0, 7.0, -7.0), vec3(-5.0, 6.0, -8.0)); + if (mtx1[0][0] != 0.0) + { + model.mtx_rm = mat3(vec3(-5.0, -3.0, -5.0), vec3(-2.0, 2.0, -5.0), vec3(6.0, 3.0, -8.0)); + } +} diff --git a/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp b/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp new file mode 100644 index 00000000000..eea6a3df46b --- /dev/null +++ b/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp @@ -0,0 +1,114 @@ +#version 450 +layout(local_size_x = 64) in; + +layout(set = 0, binding = 0) buffer SSBO_A +{ + float data[]; +} ssbo_a; + +layout(set = 0, binding = 0) buffer SSBO_B +{ + uvec2 data[]; +} ssbo_b; + +layout(set = 0, binding = 0) readonly buffer SSBO_BRO +{ + uvec2 data[]; +} ssbo_b_readonly; + +layout(set = 0, binding = 1) uniform UBO_C +{ + float data[1024]; +} ubo_c; + +layout(set = 0, binding = 1) uniform UBO_D +{ + uvec2 data[1024]; +} ubo_d; + +layout(set = 0, binding = 2) buffer SSBO_As +{ + float data[]; +} ssbo_as[4]; + +layout(set = 0, binding = 2) buffer SSBO_Bs +{ + uvec2 data[1024]; +} ssbo_bs[4]; + +layout(set = 0, binding = 2) readonly buffer SSBO_BsRO +{ + uvec2 data[1024]; +} ssbo_bs_readonly[4]; + +layout(set = 0, binding = 3) uniform UBO_Cs +{ + float data[1024]; +} ubo_cs[4]; + +layout(set = 0, binding = 3) uniform UBO_Ds +{ + uvec2 data[1024]; +} ubo_ds[4]; + +layout(set = 2, binding = 0) buffer SSBO_E +{ + float data[]; +} ssbo_e; + +layout(set = 2, binding = 0) buffer SSBO_F +{ + uvec2 data[]; +} ssbo_f; + +layout(set = 2, binding = 1) uniform UBO_G +{ + float data[1024]; +} ubo_g; + +layout(set = 2, binding = 1) uniform UBO_H +{ + uvec2 data[1024]; +} ubo_h; + +layout(set = 2, binding = 0) readonly buffer SSBO_I +{ + uvec2 data[]; +} ssbo_i; + +layout(push_constant) uniform Registers +{ + float reg; +}; + +void func0() +{ + ssbo_a.data[gl_GlobalInvocationID.x] = ubo_c.data[gl_WorkGroupID.x] + reg; + ssbo_b.data[gl_GlobalInvocationID.x] = + ubo_d.data[gl_WorkGroupID.y] + ssbo_b_readonly.data[gl_GlobalInvocationID.x]; +} + +void func1() +{ + ssbo_as[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x] = ubo_cs[gl_WorkGroupID.x].data[0]; +} + +void func2() +{ + ssbo_bs[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x] = + ubo_ds[gl_WorkGroupID.x].data[0] + ssbo_bs_readonly[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x]; +} + +void func3() +{ + ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x]; + ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y] + ssbo_i.data[gl_GlobalInvocationID.x]; +} + +void main() +{ + func0(); + func1(); + func2(); + func3(); +} diff --git a/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp b/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp new file mode 100644 index 00000000000..eea6a3df46b --- /dev/null +++ b/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp @@ -0,0 +1,114 @@ +#version 450 +layout(local_size_x = 64) in; + +layout(set = 0, binding = 0) buffer SSBO_A +{ + float data[]; +} ssbo_a; + +layout(set = 0, binding = 0) buffer SSBO_B +{ + uvec2 data[]; +} ssbo_b; + +layout(set = 0, binding = 0) readonly buffer SSBO_BRO +{ + uvec2 data[]; +} ssbo_b_readonly; + +layout(set = 0, binding = 1) uniform UBO_C +{ + float data[1024]; +} ubo_c; + +layout(set = 0, binding = 1) uniform UBO_D +{ + uvec2 data[1024]; +} ubo_d; + +layout(set = 0, binding = 2) buffer SSBO_As +{ + float data[]; +} ssbo_as[4]; + +layout(set = 0, binding = 2) buffer SSBO_Bs +{ + uvec2 data[1024]; +} ssbo_bs[4]; + +layout(set = 0, binding = 2) readonly buffer SSBO_BsRO +{ + uvec2 data[1024]; +} ssbo_bs_readonly[4]; + +layout(set = 0, binding = 3) uniform UBO_Cs +{ + float data[1024]; +} ubo_cs[4]; + +layout(set = 0, binding = 3) uniform UBO_Ds +{ + uvec2 data[1024]; +} ubo_ds[4]; + +layout(set = 2, binding = 0) buffer SSBO_E +{ + float data[]; +} ssbo_e; + +layout(set = 2, binding = 0) buffer SSBO_F +{ + uvec2 data[]; +} ssbo_f; + +layout(set = 2, binding = 1) uniform UBO_G +{ + float data[1024]; +} ubo_g; + +layout(set = 2, binding = 1) uniform UBO_H +{ + uvec2 data[1024]; +} ubo_h; + +layout(set = 2, binding = 0) readonly buffer SSBO_I +{ + uvec2 data[]; +} ssbo_i; + +layout(push_constant) uniform Registers +{ + float reg; +}; + +void func0() +{ + ssbo_a.data[gl_GlobalInvocationID.x] = ubo_c.data[gl_WorkGroupID.x] + reg; + ssbo_b.data[gl_GlobalInvocationID.x] = + ubo_d.data[gl_WorkGroupID.y] + ssbo_b_readonly.data[gl_GlobalInvocationID.x]; +} + +void func1() +{ + ssbo_as[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x] = ubo_cs[gl_WorkGroupID.x].data[0]; +} + +void func2() +{ + ssbo_bs[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x] = + ubo_ds[gl_WorkGroupID.x].data[0] + ssbo_bs_readonly[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x]; +} + +void func3() +{ + ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x]; + ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y] + ssbo_i.data[gl_GlobalInvocationID.x]; +} + +void main() +{ + func0(); + func1(); + func2(); + func3(); +} diff --git a/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp b/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp new file mode 100644 index 00000000000..fba72ad0d2d --- /dev/null +++ b/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp @@ -0,0 +1,58 @@ +#version 460 +#extension GL_EXT_ray_query : require +#extension GL_EXT_ray_tracing : require +#extension GL_EXT_ray_flags_primitive_culling : require +layout(primitive_culling); + +layout(set = 0, binding = 0) uniform accelerationStructureEXT AS0; +layout(set = 0, binding = 1) uniform accelerationStructureEXT AS1; + +layout(set = 0, binding = 2) uniform Params +{ + uint ray_flags; + uint cull_mask; + vec3 origin; + float tmin; + vec3 dir; + float tmax; + float thit; +}; + +rayQueryEXT q2[2]; + +void main() +{ + rayQueryEXT q; + bool res; + uint type; + float fval; + vec3 fvals; + int ival; + mat4x3 matrices; + + rayQueryInitializeEXT(q, AS0, ray_flags, cull_mask, origin, tmin, dir, tmax); + rayQueryInitializeEXT(q2[1], AS1, ray_flags, cull_mask, origin, tmin, dir, tmax); + + res = rayQueryProceedEXT(q); + rayQueryTerminateEXT(q2[0]); + rayQueryGenerateIntersectionEXT(q, thit); + rayQueryConfirmIntersectionEXT(q2[1]); + fval = rayQueryGetRayTMinEXT(q); + fvals = rayQueryGetWorldRayDirectionEXT(q); + fvals = rayQueryGetWorldRayOriginEXT(q); + type = rayQueryGetIntersectionTypeEXT(q2[1], true); + type = rayQueryGetIntersectionTypeEXT(q2[0], false); + res = rayQueryGetIntersectionCandidateAABBOpaqueEXT(q2[1]); + fval = rayQueryGetIntersectionTEXT(q2[1], true); + fval = rayQueryGetIntersectionTEXT(q2[1], false); + ival = rayQueryGetIntersectionInstanceCustomIndexEXT(q, true); + ival = rayQueryGetIntersectionInstanceIdEXT(q2[0], false); + ival = rayQueryGetIntersectionGeometryIndexEXT(q2[1], false); + ival = rayQueryGetIntersectionPrimitiveIndexEXT(q, true); + fvals.xy = rayQueryGetIntersectionBarycentricsEXT(q2[0], false); + res = rayQueryGetIntersectionFrontFaceEXT(q, true); + fvals = rayQueryGetIntersectionObjectRayDirectionEXT(q, false); + fvals = rayQueryGetIntersectionObjectRayOriginEXT(q2[0], true); + matrices = rayQueryGetIntersectionObjectToWorldEXT(q, false); + matrices = rayQueryGetIntersectionWorldToObjectEXT(q2[1], true); +} diff --git a/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/shaders-msl/comp/scalar-std450-distance-length-normalize.comp index 37414737fd2..63546322981 100644 --- a/shaders-msl/comp/scalar-std450-distance-length-normalize.comp +++ b/shaders-msl/comp/scalar-std450-distance-length-normalize.comp @@ -8,6 +8,7 @@ layout(std430, set = 0, binding = 0) buffer SSBO float c; float d; float e; + float f; }; void main() @@ -15,4 +16,5 @@ void main() c = distance(a, b); d = length(a); e = normalize(a); + f = distance(a-1, b-2); } diff --git a/shaders-msl/comp/shared-matrix-array-of-array.comp b/shaders-msl/comp/shared-matrix-array-of-array.comp new file mode 100644 index 00000000000..3bbd4c0f0c3 --- /dev/null +++ b/shaders-msl/comp/shared-matrix-array-of-array.comp @@ -0,0 +1,65 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer block { highp uint passed; }; +struct S1 { + mediump mat4x3 a[2]; + lowp float b; + lowp vec2 c[3]; +}; +struct S2 { + highp ivec4 a; + bool b[3][1][3]; +}; + +bool compare_float (highp float a, highp float b) { return abs(a - b) < 0.05; } +bool compare_vec2 (highp vec2 a, highp vec2 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); } +bool compare_vec3 (highp vec3 a, highp vec3 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); } +bool compare_mat4x3 (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); } +bool compare_ivec4 (highp ivec4 a, highp ivec4 b) { return a == b; } +bool compare_bool (bool a, bool b) { return a == b; } + +shared S1 s1; +shared S2 s2; + +void main (void) { + s1.a[0] = mat4x3(0.0, 2.0, -8.0, 6.0, 7.0, 5.0, -6.0, 1.0, 9.0, -4.0, -3.0, 4.0); + s1.a[1] = mat4x3(4.0, 9.0, -9.0, -8.0, -9.0, 8.0, 0.0, 4.0, -4.0, 7.0, 2.0, -1.0); + s1.b = 7.0; + s1.c[0] = vec2(-5.0, -4.0); + s1.c[1] = vec2(3.0, -5.0); + s1.c[2] = vec2(-3.0, -1.0); + s2.a = ivec4(1, 0, -3, 1); + s2.b[0][0][0] = true; + s2.b[0][0][1] = false; + s2.b[0][0][2] = false; + s2.b[1][0][0] = true; + s2.b[1][0][1] = false; + s2.b[1][0][2] = true; + s2.b[2][0][0] = false; + s2.b[2][0][1] = true; + s2.b[2][0][2] = true; + + barrier(); + memoryBarrier(); + bool allOk = true; + allOk = allOk && compare_mat4x3(mat4x3(0.0, 2.0, -8.0, 6.0, 7.0, 5.0, -6.0, 1.0, 9.0, -4.0, -3.0, 4.0), s1.a[0]); + allOk = allOk && compare_mat4x3(mat4x3(4.0, 9.0, -9.0, -8.0, -9.0, 8.0, 0.0, 4.0, -4.0, 7.0, 2.0, -1.0), s1.a[1]); + allOk = allOk && compare_float(7.0, s1.b); + allOk = allOk && compare_vec2(vec2(-5.0, -4.0), s1.c[0]); + allOk = allOk && compare_vec2(vec2(3.0, -5.0), s1.c[1]); + allOk = allOk && compare_vec2(vec2(-3.0, -1.0), s1.c[2]); + allOk = allOk && compare_ivec4(ivec4(1, 0, -3, 1), s2.a); + allOk = allOk && compare_bool(true, s2.b[0][0][0]); + allOk = allOk && compare_bool(false, s2.b[0][0][1]); + allOk = allOk && compare_bool(false, s2.b[0][0][2]); + allOk = allOk && compare_bool(true, s2.b[1][0][0]); + allOk = allOk && compare_bool(false, s2.b[1][0][1]); + allOk = allOk && compare_bool(true, s2.b[1][0][2]); + allOk = allOk && compare_bool(false, s2.b[2][0][0]); + allOk = allOk && compare_bool(true, s2.b[2][0][1]); + allOk = allOk && compare_bool(true, s2.b[2][0][2]); + if (allOk) + passed++; + +} diff --git a/shaders-msl/comp/shared-matrix-cast.comp b/shaders-msl/comp/shared-matrix-cast.comp new file mode 100644 index 00000000000..7e46fed7ae2 --- /dev/null +++ b/shaders-msl/comp/shared-matrix-cast.comp @@ -0,0 +1,33 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer block { highp uint passed; }; +struct S1 { + mediump vec4 a; + highp mat3x2 b; + bvec4 c; +}; + +bool compare_float (highp float a, highp float b) { return abs(a - b) < 0.05; } +bool compare_vec2 (highp vec2 a, highp vec2 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); } +bool compare_vec4 (highp vec4 a, highp vec4 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); } +bool compare_mat3x2 (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); } +bool compare_bvec4 (bvec4 a, bvec4 b) { return a == b; } + +shared S1 s1; + +void main (void) { + s1.a = vec4(1.0, -5.0, -9.0, -5.0); + s1.b = mat3x2(1.0, -7.0, 1.0, 2.0, 8.0, 7.0); + s1.c = bvec4(false, true, false, false); + + barrier(); + memoryBarrier(); + bool allOk = true; + allOk = allOk && compare_vec4(vec4(1.0, -5.0, -9.0, -5.0), s1.a); + allOk = allOk && compare_mat3x2(mat3x2(1.0, -7.0, 1.0, 2.0, 8.0, 7.0), s1.b); + allOk = allOk && compare_bvec4(bvec4(false, true, false, false), s1.c); + if (allOk) + passed++; + +} diff --git a/shaders-msl/comp/shared-matrix-nested-struct-array.comp b/shaders-msl/comp/shared-matrix-nested-struct-array.comp new file mode 100644 index 00000000000..59ab24d8480 --- /dev/null +++ b/shaders-msl/comp/shared-matrix-nested-struct-array.comp @@ -0,0 +1,87 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer block { highp uint passed; }; +struct sA +{ + mediump mat2x3 mA; +}; +struct sB +{ + mediump mat2 mA; + mediump mat3x2 mB; + highp uvec3 mC; +}; +struct sC +{ + sA mA; + sB mB; +}; +struct sD +{ + sC mA; +}; +struct sE +{ + lowp mat3x2 mA; + lowp mat4x3 mB; +}; +struct sF +{ + sE mA; +}; +struct sG +{ + sF mA; +}; +struct sH +{ + bvec3 mA[2]; +}; +struct S1 { + sD a; + sG b; + sH c[2]; +}; + +bool compare_float (highp float a, highp float b) { return abs(a - b) < 0.05; } +bool compare_vec2 (highp vec2 a, highp vec2 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); } +bool compare_vec3 (highp vec3 a, highp vec3 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); } +bool compare_mat2 (highp mat2 a, highp mat2 b) { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); } +bool compare_mat2x3 (highp mat2x3 a, highp mat2x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1]); } +bool compare_mat3x2 (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); } +bool compare_mat4x3 (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); } +bool compare_uvec3 (highp uvec3 a, highp uvec3 b) { return a == b; } +bool compare_bvec3 (bvec3 a, bvec3 b) { return a == b; } + +shared S1 s1; + +void main (void) { + s1.a.mA.mA.mA = mat2x3(6.0, 8.0, 8.0, 0.0, -4.0, -5.0); + s1.a.mA.mB.mA = mat2(9.0, -4.0, -6.0, -1.0); + s1.a.mA.mB.mB = mat3x2(-1.0, -2.0, 1.0, 6.0, 5.0, 7.0); + s1.a.mA.mB.mC = uvec3(3u, 1u, 5u); + s1.b.mA.mA.mA = mat3x2(8.0, 3.0, 0.0, 2.0, 1.0, 8.0); + s1.b.mA.mA.mB = mat4x3(0.0, 9.0, -1.0, -1.0, -7.0, 7.0, -4.0, -3.0, 1.0, -4.0, -9.0, 1.0); + s1.c[0].mA[0] = bvec3(true, false, false); + s1.c[0].mA[1] = bvec3(true, false, false); + s1.c[1].mA[0] = bvec3(false, false, false); + s1.c[1].mA[1] = bvec3(false, false, false); + + barrier(); + memoryBarrier(); + bool allOk = true; + allOk = allOk && compare_mat2x3(mat2x3(6.0, 8.0, 8.0, 0.0, -4.0, -5.0), s1.a.mA.mA.mA); + allOk = allOk && compare_mat2(mat2(9.0, -4.0, -6.0, -1.0), s1.a.mA.mB.mA); + allOk = allOk && compare_mat3x2(mat3x2(-1.0, -2.0, 1.0, 6.0, 5.0, 7.0), s1.a.mA.mB.mB); + allOk = allOk && compare_uvec3(uvec3(3u, 1u, 5u), s1.a.mA.mB.mC); + allOk = allOk && compare_mat3x2(mat3x2(8.0, 3.0, 0.0, 2.0, 1.0, 8.0), s1.b.mA.mA.mA); + allOk = allOk && compare_mat4x3(mat4x3(0.0, 9.0, -1.0, -1.0, -7.0, 7.0, -4.0, -3.0, 1.0, -4.0, -9.0, 1.0), s1.b.mA.mA.mB); + allOk = allOk && compare_bvec3(bvec3(true, false, false), s1.c[0].mA[0]); + allOk = allOk && compare_bvec3(bvec3(true, false, false), s1.c[0].mA[1]); + allOk = allOk && compare_bvec3(bvec3(false, false, false), s1.c[1].mA[0]); + allOk = allOk && compare_bvec3(bvec3(false, false, false), s1.c[1].mA[1]); + if (allOk) + passed++; + +} diff --git a/shaders-msl/comp/shared-matrix-nested-struct.comp b/shaders-msl/comp/shared-matrix-nested-struct.comp new file mode 100644 index 00000000000..c481f54a860 --- /dev/null +++ b/shaders-msl/comp/shared-matrix-nested-struct.comp @@ -0,0 +1,141 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer block { highp uint passed; }; +struct sA +{ + highp mat4 mA; + bvec3 mB; + bvec4 mC; +}; +struct sB +{ + bvec2 mA; +}; +struct sC +{ + highp float mA; + mediump uvec4 mB; + mediump float mC; +}; +struct sD +{ + sA mA; + sB mB; + sC mC; +}; +struct sE +{ + sD mA; +}; +struct sF +{ + lowp uvec3 mA; + bool mB; +}; +struct sG +{ + sF mA; + highp mat3x2 mB; +}; +struct sH +{ + sG mA; + mediump vec2 mB; +}; +struct sI +{ + mediump mat2 mA; + bvec3 mB; + bvec4 mC; +}; +struct sJ +{ + sI mA; + bvec3 mB; +}; +struct sK +{ + bvec2 mA; + sJ mB; + mediump ivec2 mC; +}; +struct S1 { + lowp uint a; + mediump vec4 b; +}; +struct S2 { + sE a; + highp ivec3 b; + sH c; + sK d; +}; + +bool compare_float (highp float a, highp float b) { return abs(a - b) < 0.05; } +bool compare_vec2 (highp vec2 a, highp vec2 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); } +bool compare_vec4 (highp vec4 a, highp vec4 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); } +bool compare_mat2 (highp mat2 a, highp mat2 b) { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); } +bool compare_mat3x2 (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); } +bool compare_mat4 (highp mat4 a, highp mat4 b) { return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2])&&compare_vec4(a[3], b[3]); } +bool compare_ivec2 (highp ivec2 a, highp ivec2 b) { return a == b; } +bool compare_ivec3 (highp ivec3 a, highp ivec3 b) { return a == b; } +bool compare_uint (highp uint a, highp uint b) { return a == b; } +bool compare_uvec3 (highp uvec3 a, highp uvec3 b) { return a == b; } +bool compare_uvec4 (highp uvec4 a, highp uvec4 b) { return a == b; } +bool compare_bool (bool a, bool b) { return a == b; } +bool compare_bvec2 (bvec2 a, bvec2 b) { return a == b; } +bool compare_bvec3 (bvec3 a, bvec3 b) { return a == b; } +bool compare_bvec4 (bvec4 a, bvec4 b) { return a == b; } + +shared S1 s1; +shared S2 s2; + +void main (void) { + s1.a = 0u; + s1.b = vec4(8.0, 8.0, 0.0, -4.0); + s2.a.mA.mA.mA = mat4(-5.0, 9.0, -4.0, -6.0, -1.0, -1.0, -2.0, 1.0, 6.0, 5.0, 7.0, -2.0, -4.0, -9.0, 8.0, 3.0); + s2.a.mA.mA.mB = bvec3(true, false, false); + s2.a.mA.mA.mC = bvec4(true, true, true, false); + s2.a.mA.mB.mA = bvec2(true, true); + s2.a.mA.mC.mA = 7.0; + s2.a.mA.mC.mB = uvec4(8u, 6u, 2u, 0u); + s2.a.mA.mC.mC = -9.0; + s2.b = ivec3(1, -4, 0); + s2.c.mA.mA.mA = uvec3(4u, 9u, 1u); + s2.c.mA.mA.mB = false; + s2.c.mA.mB = mat3x2(3.0, -5.0, -1.0, -5.0, -1.0, -9.0); + s2.c.mB = vec2(-6.0, -9.0); + s2.d.mA = bvec2(true, false); + s2.d.mB.mA.mA = mat2(-2.0, 3.0, 7.0, 2.0); + s2.d.mB.mA.mB = bvec3(false, false, false); + s2.d.mB.mA.mC = bvec4(false, false, false, true); + s2.d.mB.mB = bvec3(true, false, false); + s2.d.mC = ivec2(-9, 0); + + barrier(); + memoryBarrier(); + bool allOk = true; + allOk = allOk && compare_uint(0u, s1.a); + allOk = allOk && compare_vec4(vec4(8.0, 8.0, 0.0, -4.0), s1.b); + allOk = allOk && compare_mat4(mat4(-5.0, 9.0, -4.0, -6.0, -1.0, -1.0, -2.0, 1.0, 6.0, 5.0, 7.0, -2.0, -4.0, -9.0, 8.0, 3.0), s2.a.mA.mA.mA); + allOk = allOk && compare_bvec3(bvec3(true, false, false), s2.a.mA.mA.mB); + allOk = allOk && compare_bvec4(bvec4(true, true, true, false), s2.a.mA.mA.mC); + allOk = allOk && compare_bvec2(bvec2(true, true), s2.a.mA.mB.mA); + allOk = allOk && compare_float(7.0, s2.a.mA.mC.mA); + allOk = allOk && compare_uvec4(uvec4(8u, 6u, 2u, 0u), s2.a.mA.mC.mB); + allOk = allOk && compare_float(-9.0, s2.a.mA.mC.mC); + allOk = allOk && compare_ivec3(ivec3(1, -4, 0), s2.b); + allOk = allOk && compare_uvec3(uvec3(4u, 9u, 1u), s2.c.mA.mA.mA); + allOk = allOk && compare_bool(false, s2.c.mA.mA.mB); + allOk = allOk && compare_mat3x2(mat3x2(3.0, -5.0, -1.0, -5.0, -1.0, -9.0), s2.c.mA.mB); + allOk = allOk && compare_vec2(vec2(-6.0, -9.0), s2.c.mB); + allOk = allOk && compare_bvec2(bvec2(true, false), s2.d.mA); + allOk = allOk && compare_mat2(mat2(-2.0, 3.0, 7.0, 2.0), s2.d.mB.mA.mA); + allOk = allOk && compare_bvec3(bvec3(false, false, false), s2.d.mB.mA.mB); + allOk = allOk && compare_bvec4(bvec4(false, false, false, true), s2.d.mB.mA.mC); + allOk = allOk && compare_bvec3(bvec3(true, false, false), s2.d.mB.mB); + allOk = allOk && compare_ivec2(ivec2(-9, 0), s2.d.mC); + if (allOk) + passed++; + +} diff --git a/shaders-msl/comp/shared-struct-bool-cast.comp b/shaders-msl/comp/shared-struct-bool-cast.comp new file mode 100644 index 00000000000..d6479b3e446 --- /dev/null +++ b/shaders-msl/comp/shared-struct-bool-cast.comp @@ -0,0 +1,35 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer block { highp uint passed; }; +struct S1 { + mediump ivec3 a; + highp uvec2 b; + bvec4 c; + mediump uint d; +}; + +bool compare_ivec3 (highp ivec3 a, highp ivec3 b) { return a == b; } +bool compare_uint (highp uint a, highp uint b) { return a == b; } +bool compare_uvec2 (highp uvec2 a, highp uvec2 b) { return a == b; } +bool compare_bvec4 (bvec4 a, bvec4 b) { return a == b; } + +shared S1 s1; + +void main (void) { + s1.a = ivec3(6, 8, 8); + s1.b = uvec2(4u, 4u); + s1.c = bvec4(false, false, false, true); + s1.d = 6u; + + barrier(); + memoryBarrier(); + bool allOk = true; + allOk = allOk && compare_ivec3(ivec3(6, 8, 8), s1.a); + allOk = allOk && compare_uvec2(uvec2(4u, 4u), s1.b); + allOk = allOk && compare_bvec4(bvec4(false, false, false, true), s1.c); + allOk = allOk && compare_uint(6u, s1.d); + if (allOk) + passed++; + +} diff --git a/shaders-msl/comp/threadgroup-boolean-workaround.comp b/shaders-msl/comp/threadgroup-boolean-workaround.comp new file mode 100644 index 00000000000..8dce77a252c --- /dev/null +++ b/shaders-msl/comp/threadgroup-boolean-workaround.comp @@ -0,0 +1,21 @@ +#version 450 +layout(local_size_x = 4) in; + +shared bvec4 foo[4]; + +layout(binding = 0) buffer SSBO +{ + vec4 values[]; +}; + +void in_function() +{ + foo[gl_LocalInvocationIndex] = notEqual(values[gl_GlobalInvocationID.x], vec4(10.0)); + barrier(); + values[gl_GlobalInvocationID.x] = mix(vec4(40.0), vec4(30.0), foo[gl_LocalInvocationIndex ^ 3]); +} + +void main() +{ + in_function(); +} diff --git a/shaders-msl/comp/type_casting_i64.msl22.comp b/shaders-msl/comp/type_casting_i64.msl22.comp new file mode 100644 index 00000000000..45e682e586a --- /dev/null +++ b/shaders-msl/comp/type_casting_i64.msl22.comp @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_gpu_shader_int64 : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(constant_id = 0) const int base_val = 0; +layout(constant_id = 1) const int64_t shift_val = 0; +const int offset = base_val >> shift_val; + +layout(set = 0, binding = 0, std430) buffer src_buff_t +{ + int m0[]; +} src_buff; + +layout(set = 0, binding = 1, std430) buffer dst_buff_t +{ + int m0[]; +} dst_buff; + +void main() +{ + dst_buff.m0[gl_GlobalInvocationID.x] = src_buff.m0[gl_GlobalInvocationID.x] + offset; +} + diff --git a/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc b/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc new file mode 100644 index 00000000000..a258afb367b --- /dev/null +++ b/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc @@ -0,0 +1,32 @@ +#version 450 +layout(vertices = 1) out; + +in gl_PerVertex +{ + vec4 gl_Position; +} gl_in[gl_MaxPatchVertices]; + +out gl_PerVertex +{ + vec4 gl_Position; +} gl_out[1]; + +layout(location = 0) patch out vec3 vFoo; + +void set_position() +{ + gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position; +} + +void main() +{ + gl_TessLevelInner[0] = 8.9; + gl_TessLevelInner[1] = 6.9; + gl_TessLevelOuter[0] = 8.9; + gl_TessLevelOuter[1] = 6.9; + gl_TessLevelOuter[2] = 3.9; + gl_TessLevelOuter[3] = 4.9; + vFoo = vec3(1.0); + + set_position(); +} diff --git a/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc b/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc new file mode 100644 index 00000000000..78d0d00cb68 --- /dev/null +++ b/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc @@ -0,0 +1,22 @@ +#version 450 + +struct Boo +{ + vec3 a; + uvec3 b; +}; + +layout(vertices = 4) out; +layout(location = 0) out Boo vVertex[]; +layout(location = 0) in Boo vInput[]; + +void main() +{ + vVertex[gl_InvocationID] = vInput[gl_InvocationID]; + gl_TessLevelOuter[0] = 1.0; + gl_TessLevelOuter[1] = 2.0; + gl_TessLevelOuter[2] = 3.0; + gl_TessLevelOuter[3] = 4.0; + gl_TessLevelInner[0] = 1.0; + gl_TessLevelInner[1] = 2.0; +} diff --git a/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert b/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert new file mode 100644 index 00000000000..9c0f1d5f369 --- /dev/null +++ b/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert @@ -0,0 +1,10 @@ +#version 450 + +void main() +{ + gl_Position = vec4(10.0); + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 4.0; + //gl_CullDistance[0] = 4.0; + //gl_CullDistance[1] = 9.0; +} diff --git a/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert b/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert new file mode 100644 index 00000000000..fadd1e73bfd --- /dev/null +++ b/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert @@ -0,0 +1,11 @@ +#version 460 + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + gl_Position = vec4(gl_BaseVertex, gl_BaseInstance, 0, 1); +} diff --git a/shaders-msl/frag/array-component-io.frag b/shaders-msl/frag/array-component-io.frag new file mode 100644 index 00000000000..8d88249e78f --- /dev/null +++ b/shaders-msl/frag/array-component-io.frag @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 1, component = 0) out float A[2]; +layout(location = 1, component = 2) out vec2 B[2]; +layout(location = 0, component = 1) out float C[3]; +layout(location = 0, component = 3) out float D; + +layout(location = 1, component = 0) flat in float InA[2]; +layout(location = 1, component = 2) flat in vec2 InB[2]; +layout(location = 0, component = 1) flat in float InC[3]; +layout(location = 3, component = 1) sample in float InD; +layout(location = 4, component = 2) noperspective in float InE; +layout(location = 5, component = 3) centroid in float InF; + +void main() +{ + A = InA; + B = InB; + C = InC; + D = InD + InE + InF; +} diff --git a/shaders-msl/frag/array-of-array-lut.frag b/shaders-msl/frag/array-of-array-lut.frag new file mode 100644 index 00000000000..c401a3fe372 --- /dev/null +++ b/shaders-msl/frag/array-of-array-lut.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(location = 0) out float vOutput; +layout(location = 0) flat in int vIndex1; +layout(location = 1) flat in int vIndex2; + +const float FOO[2][3] = float[][](float[](1.0, 2.0, 3.0), float[](4.0, 5.0, 6.0)); + +void main() +{ + vOutput = FOO[vIndex1][vIndex2]; +} diff --git a/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag b/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag new file mode 100644 index 00000000000..ec25ceb1c8f --- /dev/null +++ b/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag @@ -0,0 +1,34 @@ +#version 450 + +layout(set = 0, binding = 0) uniform sampler2D uSampler[4]; +layout(set = 0, binding = 1) uniform sampler2D uSamp; + +layout(set = 0, binding = 2) uniform UBO +{ + uint index; +} uUBO; + +layout(set = 0, binding = 3) uniform UBO2 +{ + uint index2; +}; + +layout(location = 0) in vec2 vUV; + +layout(location = 0) out vec4 FragColor; + +vec4 sample_in_func() +{ + return texture(uSampler[uUBO.index], vUV); +} + +vec4 sample_single_in_func(sampler2D s) +{ + return texture(s, vUV); +} + +void main() +{ + FragColor = sample_in_func(); + FragColor += sample_single_in_func(uSampler[index2]); +} diff --git a/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag b/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag new file mode 100644 index 00000000000..ec25ceb1c8f --- /dev/null +++ b/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag @@ -0,0 +1,34 @@ +#version 450 + +layout(set = 0, binding = 0) uniform sampler2D uSampler[4]; +layout(set = 0, binding = 1) uniform sampler2D uSamp; + +layout(set = 0, binding = 2) uniform UBO +{ + uint index; +} uUBO; + +layout(set = 0, binding = 3) uniform UBO2 +{ + uint index2; +}; + +layout(location = 0) in vec2 vUV; + +layout(location = 0) out vec4 FragColor; + +vec4 sample_in_func() +{ + return texture(uSampler[uUBO.index], vUV); +} + +vec4 sample_single_in_func(sampler2D s) +{ + return texture(s, vUV); +} + +void main() +{ + FragColor = sample_in_func(); + FragColor += sample_single_in_func(uSampler[index2]); +} diff --git a/shaders-msl/frag/basic.force-sample.frag b/shaders-msl/frag/basic.force-sample.frag new file mode 100644 index 00000000000..dd9a8f85074 --- /dev/null +++ b/shaders-msl/frag/basic.force-sample.frag @@ -0,0 +1,13 @@ +#version 310 es +precision mediump float; + +layout(location = 0) in vec4 vColor; +layout(location = 1) in vec2 vTex; +layout(binding = 0) uniform sampler2D uTex; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vColor * texture(uTex, vTex); +} + diff --git a/shaders-msl/frag/bitcasting.1d-as-2d.frag b/shaders-msl/frag/bitcasting.1d-as-2d.frag new file mode 100644 index 00000000000..adaa749f754 --- /dev/null +++ b/shaders-msl/frag/bitcasting.1d-as-2d.frag @@ -0,0 +1,23 @@ +#version 450 + +layout(binding = 0) uniform sampler1D TextureBase; +layout(binding = 1) uniform sampler1D TextureDetail; + +layout(location = 0) in vec4 VertGeom; + +layout(location = 0) out vec4 FragColor0; +layout(location = 1) out vec4 FragColor1; + +void main() +{ + vec4 texSample0 = texture(TextureBase, VertGeom.x); + vec4 texSample1 = textureOffset(TextureDetail, VertGeom.x, 3); + + ivec4 iResult0 = floatBitsToInt(texSample0); + ivec4 iResult1 = floatBitsToInt(texSample1); + FragColor0 = (intBitsToFloat(iResult0) * intBitsToFloat(iResult1)); + + uvec4 uResult0 = floatBitsToUint(texSample0); + uvec4 uResult1 = floatBitsToUint(texSample1); + FragColor1 = (uintBitsToFloat(uResult0) * uintBitsToFloat(uResult1)); +} diff --git a/shaders-msl/frag/clip-distance-varying.frag b/shaders-msl/frag/clip-distance-varying.frag new file mode 100644 index 00000000000..df49bd515c6 --- /dev/null +++ b/shaders-msl/frag/clip-distance-varying.frag @@ -0,0 +1,10 @@ +#version 450 + +in float gl_ClipDistance[2]; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(1.0 - gl_ClipDistance[0] - gl_ClipDistance[1]); +} diff --git a/shaders-msl/frag/cull-distance-varying.frag b/shaders-msl/frag/cull-distance-varying.frag new file mode 100644 index 00000000000..8bade07e1dd --- /dev/null +++ b/shaders-msl/frag/cull-distance-varying.frag @@ -0,0 +1,10 @@ +#version 450 + +in float gl_CullDistance[2]; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(1.0 - gl_CullDistance[0] - gl_CullDistance[1]); +} diff --git a/shaders-msl/frag/depth-out-early-frag-tests.frag b/shaders-msl/frag/depth-out-early-frag-tests.frag new file mode 100644 index 00000000000..4208d79a061 --- /dev/null +++ b/shaders-msl/frag/depth-out-early-frag-tests.frag @@ -0,0 +1,11 @@ +#version 430 +layout(depth_less) out float gl_FragDepth; +layout(early_fragment_tests) in; + +layout(location = 0) out vec4 color_out; + +void main() +{ + color_out = vec4(1.0, 0.0, 0.0, 1.0); + gl_FragDepth = 0.699999988079071044921875; +} diff --git a/shaders-msl/frag/depth-out-no-early-frag-tests.frag b/shaders-msl/frag/depth-out-no-early-frag-tests.frag new file mode 100644 index 00000000000..84502079985 --- /dev/null +++ b/shaders-msl/frag/depth-out-no-early-frag-tests.frag @@ -0,0 +1,10 @@ +#version 430 +layout(depth_less) out float gl_FragDepth; + +layout(location = 0) out vec4 color_out; + +void main() +{ + color_out = vec4(1.0, 0.0, 0.0, 1.0); + gl_FragDepth = 0.699999988079071044921875; +} diff --git a/shaders-msl/frag/disable-frag-output.frag-output.frag b/shaders-msl/frag/disable-frag-output.frag-output.frag new file mode 100644 index 00000000000..7e149b86404 --- /dev/null +++ b/shaders-msl/frag/disable-frag-output.frag-output.frag @@ -0,0 +1,25 @@ +#version 450 +#extension GL_ARB_shader_stencil_export : require + +layout(location = 0) out vec4 buf0; +layout(location = 1) out vec4 buf1; +layout(location = 2) out vec4 buf2; +layout(location = 3) out vec4 buf3; +layout(location = 4) out vec4 buf4; +layout(location = 5) out vec4 buf5; +layout(location = 6) out vec4 buf6; +layout(location = 7) out vec4 buf7; + +void main() { + buf0 = vec4(0, 0, 0, 1); + buf1 = vec4(1, 0, 0, 1); + buf2 = vec4(0, 1, 0, 1); + buf3 = vec4(0, 0, 1, 1); + buf4 = vec4(1, 0, 1, 0.5); + buf5 = vec4(0.25, 0.25, 0.25, 0.25); + buf6 = vec4(0.75, 0.75, 0.75, 0.75); + buf7 = vec4(1, 1, 1, 1); + gl_FragDepth = 0.9; + gl_FragStencilRefARB = 127; +} + diff --git a/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag b/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag new file mode 100644 index 00000000000..9799a1392f8 --- /dev/null +++ b/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag @@ -0,0 +1,33 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : enable + +layout(set=0, binding=0, std430) buffer foo_t +{ + float x; + uint y; +} foo; + +layout(r32ui, set=0, binding=1) uniform uimage2D bar; + +layout(location=0) out vec4 fragColor; + +vec4 frag_body() { + foo.x = 1.0f; + atomicExchange(foo.y, 0); + if (int(gl_FragCoord.x) == 3) + demote; + imageStore(bar, ivec2(gl_FragCoord.xy), uvec4(1)); + atomicAdd(foo.y, 42); + imageAtomicOr(bar, ivec2(gl_FragCoord.xy), 0x3e); + atomicAnd(foo.y, 0xffff); + atomicXor(foo.y, 0xffffff00); + atomicMin(foo.y, 1); + imageAtomicMax(bar, ivec2(gl_FragCoord.xy), 100); + imageAtomicCompSwap(bar, ivec2(gl_FragCoord.xy), 100, 42); + return vec4(1.0f, float(helperInvocationEXT()), 0.0f, 1.0f); +} + +void main() { + fragColor = frag_body(); +} + diff --git a/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag b/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag new file mode 100644 index 00000000000..a7f37a5a700 --- /dev/null +++ b/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag @@ -0,0 +1,17 @@ +#version 450 + +layout(binding=0, set=0, std430) buffer foo +{ + int x; +}; + +layout(location=0) out vec4 fragColor; + +void main(void) +{ + if (gl_FragCoord.y == 7) + discard; + for (x = 0; x < gl_FragCoord.x; ++x) + ; + fragColor = vec4(x, 0, 0, 1); +} diff --git a/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag b/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag new file mode 100644 index 00000000000..8b40e60c19d --- /dev/null +++ b/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag @@ -0,0 +1,32 @@ +#version 450 + +layout(set=0, binding=0, std430) buffer foo_t +{ + float x; + uint y; +} foo; + +layout(r32ui, set=0, binding=1) uniform uimage2D bar; + +layout(location=0) out vec4 fragColor; + +vec4 frag_body() { + foo.x = 1.0f; + atomicExchange(foo.y, 0); + if (int(gl_FragCoord.x) == 3) + discard; + imageStore(bar, ivec2(gl_FragCoord.xy), uvec4(1)); + atomicAdd(foo.y, 42); + imageAtomicOr(bar, ivec2(gl_FragCoord.xy), 0x3e); + atomicAnd(foo.y, 0xffff); + atomicXor(foo.y, 0xffffff00); + atomicMin(foo.y, 1); + imageAtomicMax(bar, ivec2(gl_FragCoord.xy), 100); + imageAtomicCompSwap(bar, ivec2(gl_FragCoord.xy), 100, 42); + return vec4(1.0f, 0.0f, 0.0f, 1.0f); +} + +void main() { + fragColor = frag_body(); +} + diff --git a/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag b/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag new file mode 100644 index 00000000000..28d2b4ae8df --- /dev/null +++ b/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag @@ -0,0 +1,26 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vUV; +layout(set = 0, binding = 0) uniform sampler2D uSamplers[10000]; +layout(set = 2, binding = 0) uniform sampler2D uSampler; + +layout(set = 1, binding = 0) uniform UBO +{ + vec4 v; +} vs[10000]; + +vec4 samp_array() +{ + return texture(uSamplers[9999], vUV) + vs[5000].v; +} + +vec4 samp_single() +{ + return texture(uSampler, vUV); +} + +void main() +{ + FragColor = samp_array() + samp_single(); +} diff --git a/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag b/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag new file mode 100644 index 00000000000..a7529b18838 --- /dev/null +++ b/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag @@ -0,0 +1,19 @@ +#version 450 + +struct Foo +{ + float a; + float b; +}; + +layout(location = 1) in Foo foos[4]; +layout(location = 10) in Foo bars[4]; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor.x = foos[0].a; + FragColor.y = foos[1].b; + FragColor.z = foos[2].a; + FragColor.w = bars[3].b.x; +} \ No newline at end of file diff --git a/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag b/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag new file mode 100644 index 00000000000..b3d44c94364 --- /dev/null +++ b/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag @@ -0,0 +1,15 @@ +#version 450 + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInputMS uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform subpassInputMS uSubpass1; +layout(location = 0) out vec4 FragColor; + +vec4 load_subpasses(mediump subpassInputMS uInput) +{ + return subpassLoad(uInput, gl_SampleID); +} + +void main() +{ + FragColor = subpassLoad(uSubpass0, 1) + subpassLoad(uSubpass1, 2) + load_subpasses(uSubpass0); +} diff --git a/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag b/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag new file mode 100644 index 00000000000..b3d44c94364 --- /dev/null +++ b/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag @@ -0,0 +1,15 @@ +#version 450 + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInputMS uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform subpassInputMS uSubpass1; +layout(location = 0) out vec4 FragColor; + +vec4 load_subpasses(mediump subpassInputMS uInput) +{ + return subpassLoad(uInput, gl_SampleID); +} + +void main() +{ + FragColor = subpassLoad(uSubpass0, 1) + subpassLoad(uSubpass1, 2) + load_subpasses(uSubpass0); +} diff --git a/shaders-msl/frag/input-attachment.arrayed-subpass.frag b/shaders-msl/frag/input-attachment.arrayed-subpass.frag new file mode 100644 index 00000000000..877d0525a48 --- /dev/null +++ b/shaders-msl/frag/input-attachment.arrayed-subpass.frag @@ -0,0 +1,16 @@ +#version 310 es +precision mediump float; + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1; +layout(location = 0) out vec4 FragColor; + +vec4 load_subpasses(mediump subpassInput uInput) +{ + return subpassLoad(uInput); +} + +void main() +{ + FragColor = subpassLoad(uSubpass0) + load_subpasses(uSubpass1); +} diff --git a/shaders-msl/frag/input-attachment.multiview.frag b/shaders-msl/frag/input-attachment.multiview.frag new file mode 100644 index 00000000000..877d0525a48 --- /dev/null +++ b/shaders-msl/frag/input-attachment.multiview.frag @@ -0,0 +1,16 @@ +#version 310 es +precision mediump float; + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1; +layout(location = 0) out vec4 FragColor; + +vec4 load_subpasses(mediump subpassInput uInput) +{ + return subpassLoad(uInput); +} + +void main() +{ + FragColor = subpassLoad(uSubpass0) + load_subpasses(uSubpass1); +} diff --git a/shaders-msl/frag/modf-access-tracking-function.frag b/shaders-msl/frag/modf-access-tracking-function.frag new file mode 100644 index 00000000000..c1f1a1266f1 --- /dev/null +++ b/shaders-msl/frag/modf-access-tracking-function.frag @@ -0,0 +1,15 @@ +#version 450 + +layout(location = 0) in vec4 v; +layout(location = 0) out vec4 vo0; +layout(location = 1) out vec4 vo1; + +vec4 modf_inner() +{ + return modf(v, vo1); +} + +void main() +{ + vo0 = modf_inner(); +} diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag new file mode 100644 index 00000000000..ceac8cc50e4 --- /dev/null +++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -0,0 +1,36 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2, rgba8) uniform readonly image2D img3; +layout(binding = 3) coherent buffer Buffer +{ + int foo; + uint bar; +}; +layout(binding = 4) buffer Buffer2 +{ + uint quux; +}; + +layout(binding = 5, rgba8) uniform writeonly image2D img4; +layout(binding = 6) buffer Buffer3 +{ + int baz; +}; + +void main() +{ + // Deliberately outside the critical section to test usage tracking. + baz = 0; + imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0)); + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0))); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, quux); + endInvocationInterlockARB(); +} diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag new file mode 100644 index 00000000000..ceac8cc50e4 --- /dev/null +++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -0,0 +1,36 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2, rgba8) uniform readonly image2D img3; +layout(binding = 3) coherent buffer Buffer +{ + int foo; + uint bar; +}; +layout(binding = 4) buffer Buffer2 +{ + uint quux; +}; + +layout(binding = 5, rgba8) uniform writeonly image2D img4; +layout(binding = 6) buffer Buffer3 +{ + int baz; +}; + +void main() +{ + // Deliberately outside the critical section to test usage tracking. + baz = 0; + imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0)); + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0))); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, quux); + endInvocationInterlockARB(); +} diff --git a/shaders-msl/frag/post-depth-coverage.ios.msl2.frag b/shaders-msl/frag/post-depth-coverage.ios.msl2.frag new file mode 100644 index 00000000000..4f134b4f3bb --- /dev/null +++ b/shaders-msl/frag/post-depth-coverage.ios.msl2.frag @@ -0,0 +1,11 @@ +#version 450 +#extension GL_ARB_post_depth_coverage : require + +layout(post_depth_coverage) in; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(gl_SampleMaskIn[0]); +} diff --git a/shaders-msl/frag/post-depth-coverage.msl23.frag b/shaders-msl/frag/post-depth-coverage.msl23.frag new file mode 100644 index 00000000000..4f134b4f3bb --- /dev/null +++ b/shaders-msl/frag/post-depth-coverage.msl23.frag @@ -0,0 +1,11 @@ +#version 450 +#extension GL_ARB_post_depth_coverage : require + +layout(post_depth_coverage) in; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(gl_SampleMaskIn[0]); +} diff --git a/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag b/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag new file mode 100644 index 00000000000..d2d94fcbd95 --- /dev/null +++ b/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag @@ -0,0 +1,31 @@ +#version 460 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_EXT_ray_query : enable + +layout(location = 0) in vec4 inPos; +layout(location = 0) out vec4 outColor; + +layout(binding = 0) uniform accelerationStructureEXT topLevelAS; + +uint doRay(vec3 rayOrigin, vec3 rayDirection, float rayDistance) { + rayQueryEXT rayQuery; + rayQueryInitializeEXT(rayQuery, topLevelAS, gl_RayFlagsTerminateOnFirstHitEXT, 0xFF, + rayOrigin, 0.001, rayDirection, rayDistance); + + while(rayQueryProceedEXT(rayQuery)) + ; + + return rayQueryGetIntersectionTypeEXT(rayQuery, true); +} + +void main() { + vec3 rayOrigin = vec3(inPos.xy*4.0-vec2(2.0),1.0); + vec3 rayDirection = vec3(0,0,-1); + float rayDistance = 2.0; + + if(doRay(rayOrigin,rayDirection,rayDistance) == gl_RayQueryCommittedIntersectionNoneEXT) + discard; + + outColor = inPos; +} diff --git a/shaders-msl/frag/read-cull-clip-distance-in-function.frag b/shaders-msl/frag/read-cull-clip-distance-in-function.frag new file mode 100644 index 00000000000..0b82dc2df92 --- /dev/null +++ b/shaders-msl/frag/read-cull-clip-distance-in-function.frag @@ -0,0 +1,20 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +in float gl_CullDistance[2]; +in float gl_ClipDistance[2]; + +vec4 read_in_func() +{ + return vec4( + gl_CullDistance[0], + gl_CullDistance[1], + gl_ClipDistance[0], + gl_ClipDistance[1]); +} + +void main() +{ + FragColor = read_in_func(); +} diff --git a/shaders-msl/frag/return-value-after-discard-terminator.frag b/shaders-msl/frag/return-value-after-discard-terminator.frag new file mode 100644 index 00000000000..2ab410cb1b1 --- /dev/null +++ b/shaders-msl/frag/return-value-after-discard-terminator.frag @@ -0,0 +1,17 @@ +#version 450 + +layout(set = 0, binding = 0, std430) buffer buff_t +{ + int m0[1024]; +} buff; + +layout(location = 0) out vec4 frag_clr; + +void main() +{ + ivec2 frag_coord = ivec2(ivec4(gl_FragCoord).xy); + int buff_idx = (frag_coord.y * 32) + frag_coord.x; + frag_clr = vec4(0.0, 0.0, 1.0, 1.0); + buff.m0[buff_idx] = 1; + discard; +} diff --git a/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag b/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag new file mode 100644 index 00000000000..9a855ac7a80 --- /dev/null +++ b/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag @@ -0,0 +1,29 @@ +#version 450 + +layout(set = 0, binding = 0) uniform texture2D uTexture; +layout(set = 0, binding = 1) uniform sampler uSampler; +layout(set = 0, binding = 2) uniform samplerShadow uSamplerShadow; + +layout(location = 0) out float FragColor; +layout(location = 0) in vec3 vUV; + +float sample_normal2(texture2D tex) +{ + return texture(sampler2D(tex, uSampler), vUV.xy).x; +} + +float sample_normal(texture2D tex) +{ + return sample_normal2(tex); +} + +float sample_comp(texture2D tex) +{ + return texture(sampler2DShadow(tex, uSamplerShadow), vUV); +} + +void main() +{ + FragColor = sample_normal(uTexture); + FragColor += sample_comp(uTexture); +} diff --git a/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag b/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag new file mode 100644 index 00000000000..b78ee61e81f --- /dev/null +++ b/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag @@ -0,0 +1,10 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(1.0); + gl_SampleMask[0] = gl_SampleMaskIn[0]; +} + diff --git a/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag b/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag new file mode 100644 index 00000000000..b78ee61e81f --- /dev/null +++ b/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag @@ -0,0 +1,10 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(1.0); + gl_SampleMask[0] = gl_SampleMaskIn[0]; +} + diff --git a/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag b/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag new file mode 100644 index 00000000000..c3eaf5e19fb --- /dev/null +++ b/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag @@ -0,0 +1,8 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(1.0); +} diff --git a/shaders-msl/frag/sample-mask.fixed-sample-mask.frag b/shaders-msl/frag/sample-mask.fixed-sample-mask.frag new file mode 100644 index 00000000000..33ff0b2e695 --- /dev/null +++ b/shaders-msl/frag/sample-mask.fixed-sample-mask.frag @@ -0,0 +1,10 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(1.0); + gl_SampleMask[0] = 0; +} + diff --git a/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag b/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag new file mode 100644 index 00000000000..202dba0bdfa --- /dev/null +++ b/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag @@ -0,0 +1,10 @@ +#version 450 + +layout(set = 0, binding = 0) uniform sampler2DArray tex; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = texture(tex, vec3(gl_FragCoord.xy, float(gl_SampleID))); +} diff --git a/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag b/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag new file mode 100644 index 00000000000..b131fb032c5 --- /dev/null +++ b/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag @@ -0,0 +1,11 @@ +#version 450 + +layout(set = 0, binding = 0) uniform sampler2DArray tex; + +layout(location = 0) sample in float foo; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = texture(tex, vec3(gl_FragCoord.xy, foo)); +} diff --git a/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag b/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag new file mode 100644 index 00000000000..c8c3be96df6 --- /dev/null +++ b/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag @@ -0,0 +1,10 @@ +#version 450 + +layout(set = 0, binding = 0) uniform sampler2D tex; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = texture(tex, gl_FragCoord.xy - gl_SamplePosition); +} diff --git a/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag b/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag new file mode 100644 index 00000000000..a6b47e4bbed --- /dev/null +++ b/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag @@ -0,0 +1,10 @@ +#version 450 + +layout(set = 0, binding = 0) uniform sampler2D tex; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = texture(tex, gl_FragCoord.xy); +} diff --git a/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag b/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag new file mode 100644 index 00000000000..f4526f39d06 --- /dev/null +++ b/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(location = 0) flat in float vTex; +layout(binding = 0) uniform sampler1D uSampler; + +void main() +{ + FragColor += texture(uSampler, vTex, 2.0) + + textureLod(uSampler, vTex, 3.0) + + textureGrad(uSampler, vTex, 5.0, 8.0); +} diff --git a/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag b/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag new file mode 100644 index 00000000000..158c7600311 --- /dev/null +++ b/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag @@ -0,0 +1,11 @@ +#version 450 + +layout(binding = 0) uniform texture1DArray uTex; +layout(binding = 1) uniform samplerShadow uShadow; +layout(location = 0) in vec3 vUV; +layout(location = 0) out float FragColor; + +void main() +{ + FragColor = texture(sampler1DArrayShadow(uTex, uShadow), vUV, 1.0); +} diff --git a/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag b/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag new file mode 100644 index 00000000000..5c1c8937fb2 --- /dev/null +++ b/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag @@ -0,0 +1,11 @@ +#version 450 + +layout(binding = 0) uniform texture2DArray uTex; +layout(binding = 1) uniform samplerShadow uShadow; +layout(location = 0) in vec4 vUV; +layout(location = 0) out float FragColor; + +void main() +{ + FragColor = textureGrad(sampler2DArrayShadow(uTex, uShadow), vUV, vec2(0.0), vec2(0.0)) + textureGrad(sampler2DArrayShadow(uTex, uShadow), vUV, vec2(1.0), vec2(1.0)); +} diff --git a/shaders-msl/frag/subgroup-globals-extract.msl22.frag b/shaders-msl/frag/subgroup-globals-extract.msl22.frag new file mode 100644 index 00000000000..f763163dc2e --- /dev/null +++ b/shaders-msl/frag/subgroup-globals-extract.msl22.frag @@ -0,0 +1,30 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require + +layout(location = 0) out uvec2 FragColor; + +uint sub1() { + return subgroupBallotFindLSB(uvec4(1,2,3,4)); +} + +uint sub2() { + return subgroupBallotFindMSB(uvec4(1,2,3,4)); +} + +uint sub3() { + return subgroupBallotBitCount(uvec4(1,2,3,4)); +} + +uint sub4() { + return subgroupBallotInclusiveBitCount(uvec4(1,2,3,4)); +} + +uint sub5() { + return subgroupBallotExclusiveBitCount(uvec4(1,2,3,4)); +} + +void main() +{ + FragColor.x = sub1() + sub2() + sub3() + sub4() + sub5(); +} diff --git a/shaders-msl/frag/switch-unreachable-break.frag b/shaders-msl/frag/switch-unreachable-break.frag new file mode 100644 index 00000000000..4be90f86b5f --- /dev/null +++ b/shaders-msl/frag/switch-unreachable-break.frag @@ -0,0 +1,30 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vInput; + +layout(set = 0, binding = 0) uniform UBO +{ + int cond; + int cond2; +}; + +void main() +{ + FragColor = vec4(10.0); + switch (cond) + { + case 1: + if (cond2 < 50) + break; + else + discard; + + break; + + default: + FragColor = vec4(20.0); + break; + } +} + diff --git a/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag b/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag new file mode 100644 index 00000000000..8e15e36e354 --- /dev/null +++ b/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag @@ -0,0 +1,10 @@ +#version 450 +layout(location = 0) out vec4 FragColor; +layout(binding = 0) uniform sampler2D uTexture; +layout(binding = 1) uniform sampler1D uTexture2; + +void main() +{ + FragColor = texelFetchOffset(uTexture, ivec2(gl_FragCoord.xy), 0, ivec2(1, 1)); + FragColor += texelFetchOffset(uTexture2, int(gl_FragCoord.x), 0, int(-1)); +} diff --git a/shaders-msl/frag/texture-cube-array.frag b/shaders-msl/frag/texture-cube-array.frag new file mode 100644 index 00000000000..91a55f933bc --- /dev/null +++ b/shaders-msl/frag/texture-cube-array.frag @@ -0,0 +1,16 @@ +#version 450 + +layout(set = 0, binding = 0) uniform samplerCube cubeSampler; +layout(set = 0, binding = 1) uniform samplerCubeArray cubeArraySampler; +layout(set = 0, binding = 2) uniform sampler2DArray texArraySampler; + +layout(location = 0) in vec4 vUV; +layout(location = 0) out vec4 FragColor; + +void main() +{ + vec4 a = texture(cubeSampler, vUV.xyz); + vec4 b = texture(cubeArraySampler, vUV); + vec4 c = texture(texArraySampler, vUV.xyz); + FragColor = a + b + c; +} diff --git a/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag b/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag new file mode 100644 index 00000000000..91a55f933bc --- /dev/null +++ b/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag @@ -0,0 +1,16 @@ +#version 450 + +layout(set = 0, binding = 0) uniform samplerCube cubeSampler; +layout(set = 0, binding = 1) uniform samplerCubeArray cubeArraySampler; +layout(set = 0, binding = 2) uniform sampler2DArray texArraySampler; + +layout(location = 0) in vec4 vUV; +layout(location = 0) out vec4 FragColor; + +void main() +{ + vec4 a = texture(cubeSampler, vUV.xyz); + vec4 b = texture(cubeArraySampler, vUV); + vec4 c = texture(texArraySampler, vUV.xyz); + FragColor = a + b + c; +} diff --git a/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag b/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag new file mode 100644 index 00000000000..900c5b006c9 --- /dev/null +++ b/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag @@ -0,0 +1,17 @@ +#version 450 + +#extension GL_AMD_gpu_shader_int16 : require + +layout(location = 0) flat in int16_t a; +layout(location = 1) flat in ivec2 b; +layout(location = 2) flat in uint16_t c[2]; +layout(location = 4) flat in uvec4 e[2]; +layout(location = 6) in vec2 d; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(float(int(a)), float(b.x), vec2(uint(c[1]), float(e[0].w)) + d); +} + diff --git a/shaders-msl/intel/shader-integer-functions2.asm.comp b/shaders-msl/intel/shader-integer-functions2.asm.comp new file mode 100644 index 00000000000..9189794ef78 --- /dev/null +++ b/shaders-msl/intel/shader-integer-functions2.asm.comp @@ -0,0 +1,137 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 97 +; Schema: 0 + OpCapability Shader + OpCapability IntegerFunctions2INTEL + OpExtension "SPV_INTEL_shader_integer_functions2" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %main "main" + OpName %foo "foo" + OpMemberName %foo 0 "a" + OpMemberName %foo 1 "b" + OpMemberName %foo 2 "c" + OpMemberName %foo 3 "d" + OpName %_ "" + OpMemberDecorate %foo 0 Offset 0 + OpMemberDecorate %foo 1 Offset 4 + OpMemberDecorate %foo 2 Offset 8 + OpMemberDecorate %foo 3 Offset 12 + OpDecorate %foo Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %6 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %foo = OpTypeStruct %uint %uint %int %int +%_ptr_StorageBuffer_foo = OpTypePointer StorageBuffer %foo + %_ = OpVariable %_ptr_StorageBuffer_foo StorageBuffer + %int_0 = OpConstant %int 0 +%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 +%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int + %int_3 = OpConstant %int 3 + %main = OpFunction %void None %6 + %15 = OpLabel + %16 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + %17 = OpLoad %uint %16 + %18 = OpUCountLeadingZerosINTEL %uint %17 + %19 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + OpStore %19 %18 + %20 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + %21 = OpLoad %uint %20 + %22 = OpUCountTrailingZerosINTEL %uint %21 + %23 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + OpStore %23 %22 + %24 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2 + %25 = OpLoad %int %24 + %26 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3 + %27 = OpLoad %int %26 + %28 = OpAbsISubINTEL %uint %25 %27 + %29 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + OpStore %29 %28 + %30 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + %31 = OpLoad %uint %30 + %32 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1 + %33 = OpLoad %uint %32 + %34 = OpAbsUSubINTEL %uint %31 %33 + %35 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + OpStore %35 %34 + %37 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2 + %38 = OpLoad %int %37 + %39 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3 + %40 = OpLoad %int %39 + %41 = OpIAddSatINTEL %int %38 %40 + %42 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2 + OpStore %42 %41 + %43 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + %44 = OpLoad %uint %43 + %45 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1 + %46 = OpLoad %uint %45 + %47 = OpUAddSatINTEL %uint %44 %46 + %48 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + OpStore %48 %47 + %49 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2 + %50 = OpLoad %int %49 + %51 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3 + %52 = OpLoad %int %51 + %53 = OpIAverageINTEL %int %50 %52 + %54 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2 + OpStore %54 %53 + %55 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + %56 = OpLoad %uint %55 + %57 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1 + %58 = OpLoad %uint %57 + %59 = OpUAverageINTEL %uint %56 %58 + %60 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + OpStore %60 %59 + %61 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2 + %62 = OpLoad %int %61 + %63 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3 + %64 = OpLoad %int %63 + %65 = OpIAverageRoundedINTEL %int %62 %64 + %66 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2 + OpStore %66 %65 + %67 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + %68 = OpLoad %uint %67 + %69 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1 + %70 = OpLoad %uint %69 + %71 = OpUAverageRoundedINTEL %uint %68 %70 + %72 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + OpStore %72 %71 + %73 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2 + %74 = OpLoad %int %73 + %75 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3 + %76 = OpLoad %int %75 + %77 = OpISubSatINTEL %int %74 %76 + %78 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2 + OpStore %78 %77 + %79 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + %80 = OpLoad %uint %79 + %81 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1 + %82 = OpLoad %uint %81 + %83 = OpUSubSatINTEL %uint %80 %82 + %84 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + OpStore %84 %83 + %85 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2 + %86 = OpLoad %int %85 + %87 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3 + %88 = OpLoad %int %87 + %89 = OpIMul32x16INTEL %int %86 %88 + %90 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2 + OpStore %90 %89 + %91 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + %92 = OpLoad %uint %91 + %93 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1 + %94 = OpLoad %uint %93 + %95 = OpUMul32x16INTEL %uint %92 %94 + %96 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0 + OpStore %96 %95 + OpReturn + OpFunctionEnd diff --git a/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc new file mode 100644 index 00000000000..e69a7a1697d --- /dev/null +++ b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc @@ -0,0 +1,17 @@ +#version 450 + +layout(vertices = 4) out; +layout(location = 0) out vec4 Foo[][2]; +layout(location = 0) in vec4 iFoo[][2]; +layout(location = 2) patch out vec4 pFoo[2]; +layout(location = 2) in vec4 ipFoo[]; + +void main() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + Foo[gl_InvocationID] = iFoo[gl_InvocationID]; + if (gl_InvocationID == 0) + { + pFoo = vec4[](ipFoo[0], ipFoo[1]); + } +} diff --git a/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc new file mode 100644 index 00000000000..e69a7a1697d --- /dev/null +++ b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc @@ -0,0 +1,17 @@ +#version 450 + +layout(vertices = 4) out; +layout(location = 0) out vec4 Foo[][2]; +layout(location = 0) in vec4 iFoo[][2]; +layout(location = 2) patch out vec4 pFoo[2]; +layout(location = 2) in vec4 ipFoo[]; + +void main() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + Foo[gl_InvocationID] = iFoo[gl_InvocationID]; + if (gl_InvocationID == 0) + { + pFoo = vec4[](ipFoo[0], ipFoo[1]); + } +} diff --git a/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc new file mode 100644 index 00000000000..e69a7a1697d --- /dev/null +++ b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc @@ -0,0 +1,17 @@ +#version 450 + +layout(vertices = 4) out; +layout(location = 0) out vec4 Foo[][2]; +layout(location = 0) in vec4 iFoo[][2]; +layout(location = 2) patch out vec4 pFoo[2]; +layout(location = 2) in vec4 ipFoo[]; + +void main() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + Foo[gl_InvocationID] = iFoo[gl_InvocationID]; + if (gl_InvocationID == 0) + { + pFoo = vec4[](ipFoo[0], ipFoo[1]); + } +} diff --git a/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc new file mode 100644 index 00000000000..e69a7a1697d --- /dev/null +++ b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc @@ -0,0 +1,17 @@ +#version 450 + +layout(vertices = 4) out; +layout(location = 0) out vec4 Foo[][2]; +layout(location = 0) in vec4 iFoo[][2]; +layout(location = 2) patch out vec4 pFoo[2]; +layout(location = 2) in vec4 ipFoo[]; + +void main() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + Foo[gl_InvocationID] = iFoo[gl_InvocationID]; + if (gl_InvocationID == 0) + { + pFoo = vec4[](ipFoo[0], ipFoo[1]); + } +} diff --git a/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert b/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert new file mode 100644 index 00000000000..2d8fdf19a22 --- /dev/null +++ b/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert @@ -0,0 +1,16 @@ +#version 450 + +layout(location = 0) out V +{ + vec4 a; + vec4 b; + vec4 c; + vec4 d; +}; + +void main() +{ + gl_Position = vec4(1.0); + a = vec4(2.0); + b = vec4(3.0); +} diff --git a/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc new file mode 100644 index 00000000000..955f2c41872 --- /dev/null +++ b/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc @@ -0,0 +1,28 @@ +#version 450 + +layout(vertices = 4) out; +patch out P +{ + layout(location = 0) float a; + layout(location = 2) float b; +}; + +out C +{ + layout(location = 1) float a; + layout(location = 3) float b; +} c[]; + +void write_in_function() +{ + a = 1.0; + b = 2.0; + c[gl_InvocationID].a = 3.0; + c[gl_InvocationID].b = 4.0; + gl_out[gl_InvocationID].gl_Position = vec4(1.0); +} + +void main() +{ + write_in_function(); +} diff --git a/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc new file mode 100644 index 00000000000..955f2c41872 --- /dev/null +++ b/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc @@ -0,0 +1,28 @@ +#version 450 + +layout(vertices = 4) out; +patch out P +{ + layout(location = 0) float a; + layout(location = 2) float b; +}; + +out C +{ + layout(location = 1) float a; + layout(location = 3) float b; +} c[]; + +void write_in_function() +{ + a = 1.0; + b = 2.0; + c[gl_InvocationID].a = 3.0; + c[gl_InvocationID].b = 4.0; + gl_out[gl_InvocationID].gl_Position = vec4(1.0); +} + +void main() +{ + write_in_function(); +} diff --git a/shaders-msl/masking/write-outputs-block.mask-location-0.vert b/shaders-msl/masking/write-outputs-block.mask-location-0.vert new file mode 100644 index 00000000000..2d8fdf19a22 --- /dev/null +++ b/shaders-msl/masking/write-outputs-block.mask-location-0.vert @@ -0,0 +1,16 @@ +#version 450 + +layout(location = 0) out V +{ + vec4 a; + vec4 b; + vec4 c; + vec4 d; +}; + +void main() +{ + gl_Position = vec4(1.0); + a = vec4(2.0); + b = vec4(3.0); +} diff --git a/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert b/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert new file mode 100644 index 00000000000..2d8fdf19a22 --- /dev/null +++ b/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert @@ -0,0 +1,16 @@ +#version 450 + +layout(location = 0) out V +{ + vec4 a; + vec4 b; + vec4 c; + vec4 d; +}; + +void main() +{ + gl_Position = vec4(1.0); + a = vec4(2.0); + b = vec4(3.0); +} diff --git a/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc new file mode 100644 index 00000000000..955f2c41872 --- /dev/null +++ b/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc @@ -0,0 +1,28 @@ +#version 450 + +layout(vertices = 4) out; +patch out P +{ + layout(location = 0) float a; + layout(location = 2) float b; +}; + +out C +{ + layout(location = 1) float a; + layout(location = 3) float b; +} c[]; + +void write_in_function() +{ + a = 1.0; + b = 2.0; + c[gl_InvocationID].a = 3.0; + c[gl_InvocationID].b = 4.0; + gl_out[gl_InvocationID].gl_Position = vec4(1.0); +} + +void main() +{ + write_in_function(); +} diff --git a/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc new file mode 100644 index 00000000000..955f2c41872 --- /dev/null +++ b/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc @@ -0,0 +1,28 @@ +#version 450 + +layout(vertices = 4) out; +patch out P +{ + layout(location = 0) float a; + layout(location = 2) float b; +}; + +out C +{ + layout(location = 1) float a; + layout(location = 3) float b; +} c[]; + +void write_in_function() +{ + a = 1.0; + b = 2.0; + c[gl_InvocationID].a = 3.0; + c[gl_InvocationID].b = 4.0; + gl_out[gl_InvocationID].gl_Position = vec4(1.0); +} + +void main() +{ + write_in_function(); +} diff --git a/shaders-msl/masking/write-outputs-block.mask-location-1.vert b/shaders-msl/masking/write-outputs-block.mask-location-1.vert new file mode 100644 index 00000000000..2d8fdf19a22 --- /dev/null +++ b/shaders-msl/masking/write-outputs-block.mask-location-1.vert @@ -0,0 +1,16 @@ +#version 450 + +layout(location = 0) out V +{ + vec4 a; + vec4 b; + vec4 c; + vec4 d; +}; + +void main() +{ + gl_Position = vec4(1.0); + a = vec4(2.0); + b = vec4(3.0); +} diff --git a/shaders-msl/masking/write-outputs.mask-clip-distance.vert b/shaders-msl/masking/write-outputs.mask-clip-distance.vert new file mode 100644 index 00000000000..35b462fdf8b --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-clip-distance.vert @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) out vec4 v0; +layout(location = 1) out vec4 v1; + +out float gl_ClipDistance[2]; + +void write_in_func() +{ + v0 = vec4(1.0); + v1 = vec4(2.0); + gl_Position = vec4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert b/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert new file mode 100644 index 00000000000..35b462fdf8b --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) out vec4 v0; +layout(location = 1) out vec4 v1; + +out float gl_ClipDistance[2]; + +void write_in_func() +{ + v0 = vec4(1.0); + v1 = vec4(2.0); + gl_Position = vec4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc b/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc new file mode 100644 index 00000000000..c291fef03a0 --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc @@ -0,0 +1,26 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) out vec4 v0[]; +layout(location = 1) patch out vec4 v1; + +void write_in_func() +{ + v0[gl_InvocationID] = vec4(1.0); + v0[gl_InvocationID][0] = 2.0; + if (gl_InvocationID == 0) + { + v1 = vec4(2.0); + v1[3] = 4.0; + } + gl_out[gl_InvocationID].gl_Position = vec4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; + gl_out[gl_InvocationID].gl_Position[2] = 5.0; + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc new file mode 100644 index 00000000000..9f3ca9fcffe --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc @@ -0,0 +1,29 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) out vec4 v0[]; +layout(location = 1) patch out vec4 v1[2]; +layout(location = 3) patch out vec4 v3; + +void write_in_func() +{ + v0[gl_InvocationID] = vec4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = vec4(2.0); + v1[0].x = 3.0; + v1[1] = vec4(2.0); + v1[1].x = 5.0; + } + v3 = vec4(5.0); + gl_out[gl_InvocationID].gl_Position = vec4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-location-0.tesc b/shaders-msl/masking/write-outputs.mask-location-0.tesc new file mode 100644 index 00000000000..9f3ca9fcffe --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-location-0.tesc @@ -0,0 +1,29 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) out vec4 v0[]; +layout(location = 1) patch out vec4 v1[2]; +layout(location = 3) patch out vec4 v3; + +void write_in_func() +{ + v0[gl_InvocationID] = vec4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = vec4(2.0); + v1[0].x = 3.0; + v1[1] = vec4(2.0); + v1[1].x = 5.0; + } + v3 = vec4(5.0); + gl_out[gl_InvocationID].gl_Position = vec4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-location-0.vert b/shaders-msl/masking/write-outputs.mask-location-0.vert new file mode 100644 index 00000000000..35b462fdf8b --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-location-0.vert @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) out vec4 v0; +layout(location = 1) out vec4 v1; + +out float gl_ClipDistance[2]; + +void write_in_func() +{ + v0 = vec4(1.0); + v1 = vec4(2.0); + gl_Position = vec4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert b/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert new file mode 100644 index 00000000000..35b462fdf8b --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) out vec4 v0; +layout(location = 1) out vec4 v1; + +out float gl_ClipDistance[2]; + +void write_in_func() +{ + v0 = vec4(1.0); + v1 = vec4(2.0); + gl_Position = vec4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc b/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc new file mode 100644 index 00000000000..c291fef03a0 --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc @@ -0,0 +1,26 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) out vec4 v0[]; +layout(location = 1) patch out vec4 v1; + +void write_in_func() +{ + v0[gl_InvocationID] = vec4(1.0); + v0[gl_InvocationID][0] = 2.0; + if (gl_InvocationID == 0) + { + v1 = vec4(2.0); + v1[3] = 4.0; + } + gl_out[gl_InvocationID].gl_Position = vec4(3.0); + gl_out[gl_InvocationID].gl_PointSize = 4.0; + gl_out[gl_InvocationID].gl_Position[2] = 5.0; + gl_out[gl_InvocationID].gl_PointSize = 4.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc new file mode 100644 index 00000000000..9f3ca9fcffe --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc @@ -0,0 +1,29 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) out vec4 v0[]; +layout(location = 1) patch out vec4 v1[2]; +layout(location = 3) patch out vec4 v3; + +void write_in_func() +{ + v0[gl_InvocationID] = vec4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = vec4(2.0); + v1[0].x = 3.0; + v1[1] = vec4(2.0); + v1[1].x = 5.0; + } + v3 = vec4(5.0); + gl_out[gl_InvocationID].gl_Position = vec4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-location-1.tesc b/shaders-msl/masking/write-outputs.mask-location-1.tesc new file mode 100644 index 00000000000..9f3ca9fcffe --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-location-1.tesc @@ -0,0 +1,29 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) out vec4 v0[]; +layout(location = 1) patch out vec4 v1[2]; +layout(location = 3) patch out vec4 v3; + +void write_in_func() +{ + v0[gl_InvocationID] = vec4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = vec4(2.0); + v1[0].x = 3.0; + v1[1] = vec4(2.0); + v1[1].x = 5.0; + } + v3 = vec4(5.0); + gl_out[gl_InvocationID].gl_Position = vec4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-location-1.vert b/shaders-msl/masking/write-outputs.mask-location-1.vert new file mode 100644 index 00000000000..35b462fdf8b --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-location-1.vert @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) out vec4 v0; +layout(location = 1) out vec4 v1; + +out float gl_ClipDistance[2]; + +void write_in_func() +{ + v0 = vec4(1.0); + v1 = vec4(2.0); + gl_Position = vec4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert b/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert new file mode 100644 index 00000000000..35b462fdf8b --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) out vec4 v0; +layout(location = 1) out vec4 v1; + +out float gl_ClipDistance[2]; + +void write_in_func() +{ + v0 = vec4(1.0); + v1 = vec4(2.0); + gl_Position = vec4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc new file mode 100644 index 00000000000..9f3ca9fcffe --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc @@ -0,0 +1,29 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) out vec4 v0[]; +layout(location = 1) patch out vec4 v1[2]; +layout(location = 3) patch out vec4 v3; + +void write_in_func() +{ + v0[gl_InvocationID] = vec4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = vec4(2.0); + v1[0].x = 3.0; + v1[1] = vec4(2.0); + v1[1].x = 5.0; + } + v3 = vec4(5.0); + gl_out[gl_InvocationID].gl_Position = vec4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-point-size.tesc b/shaders-msl/masking/write-outputs.mask-point-size.tesc new file mode 100644 index 00000000000..9f3ca9fcffe --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-point-size.tesc @@ -0,0 +1,29 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) out vec4 v0[]; +layout(location = 1) patch out vec4 v1[2]; +layout(location = 3) patch out vec4 v3; + +void write_in_func() +{ + v0[gl_InvocationID] = vec4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = vec4(2.0); + v1[0].x = 3.0; + v1[1] = vec4(2.0); + v1[1].x = 5.0; + } + v3 = vec4(5.0); + gl_out[gl_InvocationID].gl_Position = vec4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-point-size.vert b/shaders-msl/masking/write-outputs.mask-point-size.vert new file mode 100644 index 00000000000..35b462fdf8b --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-point-size.vert @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) out vec4 v0; +layout(location = 1) out vec4 v1; + +out float gl_ClipDistance[2]; + +void write_in_func() +{ + v0 = vec4(1.0); + v1 = vec4(2.0); + gl_Position = vec4(3.0); + gl_PointSize = 4.0; + gl_ClipDistance[0] = 1.0; + gl_ClipDistance[1] = 0.5; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc new file mode 100644 index 00000000000..9f3ca9fcffe --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc @@ -0,0 +1,29 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) out vec4 v0[]; +layout(location = 1) patch out vec4 v1[2]; +layout(location = 3) patch out vec4 v3; + +void write_in_func() +{ + v0[gl_InvocationID] = vec4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = vec4(2.0); + v1[0].x = 3.0; + v1[1] = vec4(2.0); + v1[1].x = 5.0; + } + v3 = vec4(5.0); + gl_out[gl_InvocationID].gl_Position = vec4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/masking/write-outputs.mask-position.tesc b/shaders-msl/masking/write-outputs.mask-position.tesc new file mode 100644 index 00000000000..9f3ca9fcffe --- /dev/null +++ b/shaders-msl/masking/write-outputs.mask-position.tesc @@ -0,0 +1,29 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) out vec4 v0[]; +layout(location = 1) patch out vec4 v1[2]; +layout(location = 3) patch out vec4 v3; + +void write_in_func() +{ + v0[gl_InvocationID] = vec4(1.0); + v0[gl_InvocationID].z = 3.0; + if (gl_InvocationID == 0) + { + v1[0] = vec4(2.0); + v1[0].x = 3.0; + v1[1] = vec4(2.0); + v1[1].x = 5.0; + } + v3 = vec4(5.0); + gl_out[gl_InvocationID].gl_Position = vec4(10.0); + gl_out[gl_InvocationID].gl_Position.z = 20.0; + gl_out[gl_InvocationID].gl_PointSize = 40.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc b/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc new file mode 100644 index 00000000000..0fc300d6886 --- /dev/null +++ b/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc @@ -0,0 +1,64 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(vertices = 5) out; + +layout(location = 0) patch out highp vec2 in_te_positionScale; +layout(location = 1) patch out highp vec2 in_te_positionOffset; + +struct S +{ + highp int x; + highp vec4 y; + highp float z[2]; +}; +layout(location = 2) patch out TheBlock +{ + highp float blockFa[3]; + S blockSa[2]; + highp float blockF; +} tcBlock[2]; + +layout(location = 0) in highp float in_tc_attr[]; + +void main (void) +{ + { + highp float v = 1.3; + + // Assign values to output tcBlock + for (int i0 = 0; i0 < 2; ++i0) + { + for (int i1 = 0; i1 < 3; ++i1) + { + tcBlock[i0].blockFa[i1] = v; + v += 0.4; + } + for (int i1 = 0; i1 < 2; ++i1) + { + tcBlock[i0].blockSa[i1].x = int(v); + v += 0.4; + tcBlock[i0].blockSa[i1].y = vec4(v, v+0.8, v+1.6, v+2.4); + v += 0.4; + for (int i2 = 0; i2 < 2; ++i2) + { + tcBlock[i0].blockSa[i1].z[i2] = v; + v += 0.4; + } + } + tcBlock[i0].blockF = v; + v += 0.4; + } + } + + gl_TessLevelInner[0] = in_tc_attr[0]; + gl_TessLevelInner[1] = in_tc_attr[1]; + + gl_TessLevelOuter[0] = in_tc_attr[2]; + gl_TessLevelOuter[1] = in_tc_attr[3]; + gl_TessLevelOuter[2] = in_tc_attr[4]; + gl_TessLevelOuter[3] = in_tc_attr[5]; + + in_te_positionScale = vec2(in_tc_attr[6], in_tc_attr[7]); + in_te_positionOffset = vec2(in_tc_attr[8], in_tc_attr[9]); +} diff --git a/shaders-msl/tesc/basic.multi-patch.tesc b/shaders-msl/tesc/basic.multi-patch.tesc new file mode 100644 index 00000000000..0a41f98c830 --- /dev/null +++ b/shaders-msl/tesc/basic.multi-patch.tesc @@ -0,0 +1,17 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(location = 0) patch out vec3 vFoo; + +layout(vertices = 1) out; + +void main() +{ + gl_TessLevelInner[0] = 8.9; + gl_TessLevelInner[1] = 6.9; + gl_TessLevelOuter[0] = 8.9; + gl_TessLevelOuter[1] = 6.9; + gl_TessLevelOuter[2] = 3.9; + gl_TessLevelOuter[3] = 4.9; + vFoo = vec3(1.0); +} diff --git a/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc b/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc new file mode 100644 index 00000000000..aec8db7fac8 --- /dev/null +++ b/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc @@ -0,0 +1,70 @@ +#version 450 +layout(vertices = 4) out; + +struct Meep +{ + float a; + float b; +}; + +layout(location = 0) out float a[][2]; +layout(location = 2) out float b[]; +layout(location = 3) out mat2 m[]; +layout(location = 5) out Meep meep[]; +layout(location = 7) out Meep meeps[][2]; + +layout(location = 11) out Block +{ + float a[2]; + float b; + mat2 m; + Meep meep; + Meep meeps[2]; +} B[]; + +layout(location = 0) in float in_a[][2]; +layout(location = 2) in float in_b[]; +layout(location = 3) in mat2 in_m[]; +layout(location = 5) in Meep in_meep[]; +layout(location = 7) in Meep in_meeps[][2]; + +layout(location = 11) in Block +{ + float a[2]; + float b; + mat2 m; + Meep meep; + Meep meeps[2]; +} in_B[]; + +void write_in_func() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + + a[gl_InvocationID][0] = in_a[gl_InvocationID][0]; + a[gl_InvocationID][1] = in_a[gl_InvocationID][1]; + b[gl_InvocationID] = in_b[gl_InvocationID]; + m[gl_InvocationID] = in_m[gl_InvocationID]; + meep[gl_InvocationID].a = in_meep[gl_InvocationID].a; + meep[gl_InvocationID].b = in_meep[gl_InvocationID].b; + meeps[gl_InvocationID][0].a = in_meeps[gl_InvocationID][0].a; + meeps[gl_InvocationID][0].b = in_meeps[gl_InvocationID][0].b; + meeps[gl_InvocationID][1].a = in_meeps[gl_InvocationID][1].a; + meeps[gl_InvocationID][1].b = in_meeps[gl_InvocationID][1].b; + + B[gl_InvocationID].a[0] = in_B[gl_InvocationID].a[0]; + B[gl_InvocationID].a[1] = in_B[gl_InvocationID].a[1]; + B[gl_InvocationID].b = in_B[gl_InvocationID].b; + B[gl_InvocationID].m = in_B[gl_InvocationID].m; + B[gl_InvocationID].meep.a = in_B[gl_InvocationID].meep.a; + B[gl_InvocationID].meep.b = in_B[gl_InvocationID].meep.b; + B[gl_InvocationID].meeps[0].a = in_B[gl_InvocationID].meeps[0].a; + B[gl_InvocationID].meeps[0].b = in_B[gl_InvocationID].meeps[0].b; + B[gl_InvocationID].meeps[1].a = in_B[gl_InvocationID].meeps[1].a; + B[gl_InvocationID].meeps[1].b = in_B[gl_InvocationID].meeps[1].b; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/tesc/complex-control-point-inout-types.tesc b/shaders-msl/tesc/complex-control-point-inout-types.tesc new file mode 100644 index 00000000000..b7bb21d6b35 --- /dev/null +++ b/shaders-msl/tesc/complex-control-point-inout-types.tesc @@ -0,0 +1,68 @@ +#version 450 +layout(vertices = 4) out; + +struct Meep +{ + float a; + float b; +}; + +layout(location = 0) out float a[][2]; +layout(location = 2) out float b[]; +layout(location = 3) out mat2 m[]; +layout(location = 5) out Meep meep[]; +layout(location = 7) out Meep meeps[][2]; + +layout(location = 11) out Block +{ + float a[2]; + float b; + mat2 m; + Meep meep; + Meep meeps[2]; +} B[]; + +layout(location = 0) in float in_a[][2]; +layout(location = 2) in float in_b[]; +layout(location = 3) in mat2 in_m[]; +layout(location = 5) in Meep in_meep[]; + +layout(location = 11) in Block +{ + float a[2]; + float b; + mat2 m; + // Non-multi-patch path cannot support structs inside structs. +} in_B[]; + +void write_in_func() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + + a[gl_InvocationID][0] = in_a[gl_InvocationID][0]; + a[gl_InvocationID][1] = in_a[gl_InvocationID][1]; + b[gl_InvocationID] = in_b[gl_InvocationID]; + m[gl_InvocationID] = in_m[gl_InvocationID]; + meep[gl_InvocationID].a = in_meep[gl_InvocationID].a; + meep[gl_InvocationID].b = in_meep[gl_InvocationID].b; + meeps[gl_InvocationID][0].a = 1.0; + meeps[gl_InvocationID][0].b = 2.0; + meeps[gl_InvocationID][1].a = 3.0; + meeps[gl_InvocationID][1].b = 4.0; + + B[gl_InvocationID].a[0] = in_B[gl_InvocationID].a[0]; + B[gl_InvocationID].a[1] = in_B[gl_InvocationID].a[1]; + B[gl_InvocationID].b = in_B[gl_InvocationID].b; + B[gl_InvocationID].m = in_B[gl_InvocationID].m; + B[gl_InvocationID].meep.a = 10.0; + B[gl_InvocationID].meep.b = 20.0; + B[gl_InvocationID].meeps[0].a = 5.0; + B[gl_InvocationID].meeps[0].b = 6.0; + B[gl_InvocationID].meeps[1].a = 7.0; + B[gl_InvocationID].meeps[1].b = 8.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/tesc/complex-patch-out-types.tesc b/shaders-msl/tesc/complex-patch-out-types.tesc new file mode 100644 index 00000000000..fd56ae46bd0 --- /dev/null +++ b/shaders-msl/tesc/complex-patch-out-types.tesc @@ -0,0 +1,55 @@ +#version 450 +layout(vertices = 4) out; + +struct Meep +{ + float a; + float b; +}; + +layout(location = 0) patch out float a[2]; +layout(location = 2) patch out float b; +layout(location = 3) patch out mat2 m; +layout(location = 5) patch out Meep meep; +layout(location = 7) patch out Meep meeps[2]; + +layout(location = 11) patch out Block +{ + float a[2]; + float b; + mat2 m; + Meep meep; + Meep meeps[2]; +} B; + +void write_in_func() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + + a[0] = 1.0; + a[1] = 2.0; + b = 3.0; + m = mat2(2.0); + meep.a = 4.0; + meep.b = 5.0; + meeps[0].a = 6.0; + meeps[0].b = 7.0; + meeps[1].a = 8.0; + meeps[1].b = 9.0; + + B.a[0] = 1.0; + B.a[1] = 2.0; + B.b = 3.0; + B.m = mat2(4.0); + B.meep.a = 4.0; + B.meep.b = 5.0; + B.meeps[0].a = 6.0; + B.meeps[0].b = 7.0; + B.meeps[1].a = 8.0; + B.meeps[1].b = 9.0; +} + +void main() +{ + write_in_func(); +} diff --git a/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc b/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc new file mode 100644 index 00000000000..36b16681e28 --- /dev/null +++ b/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc @@ -0,0 +1,12 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) in mat4 vInputs[gl_MaxPatchVertices]; +layout(location = 0) out mat4 vOutputs[4]; + +void main() +{ + mat4 tmp[gl_MaxPatchVertices] = vInputs; + vOutputs[gl_InvocationID] = tmp[gl_InvocationID]; +} diff --git a/shaders-msl/tesc/load-control-point-array-of-matrix.tesc b/shaders-msl/tesc/load-control-point-array-of-matrix.tesc new file mode 100644 index 00000000000..36b16681e28 --- /dev/null +++ b/shaders-msl/tesc/load-control-point-array-of-matrix.tesc @@ -0,0 +1,12 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) in mat4 vInputs[gl_MaxPatchVertices]; +layout(location = 0) out mat4 vOutputs[4]; + +void main() +{ + mat4 tmp[gl_MaxPatchVertices] = vInputs; + vOutputs[gl_InvocationID] = tmp[gl_InvocationID]; +} diff --git a/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc b/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc new file mode 100644 index 00000000000..4b4d5bfcd13 --- /dev/null +++ b/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc @@ -0,0 +1,21 @@ +#version 450 + +layout(vertices = 4) out; + +struct VertexData +{ + mat4 a; + vec4 b[2]; + vec4 c; +}; + +layout(location = 0) in VertexData vInputs[gl_MaxPatchVertices]; +layout(location = 0) out vec4 vOutputs[4]; + +void main() +{ + VertexData tmp[gl_MaxPatchVertices] = vInputs; + VertexData tmp_single = vInputs[gl_InvocationID ^ 1]; + + vOutputs[gl_InvocationID] = tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1] + tmp[gl_InvocationID].c + tmp_single.c; +} diff --git a/shaders-msl/tesc/load-control-point-array-of-struct.tesc b/shaders-msl/tesc/load-control-point-array-of-struct.tesc new file mode 100644 index 00000000000..4b4d5bfcd13 --- /dev/null +++ b/shaders-msl/tesc/load-control-point-array-of-struct.tesc @@ -0,0 +1,21 @@ +#version 450 + +layout(vertices = 4) out; + +struct VertexData +{ + mat4 a; + vec4 b[2]; + vec4 c; +}; + +layout(location = 0) in VertexData vInputs[gl_MaxPatchVertices]; +layout(location = 0) out vec4 vOutputs[4]; + +void main() +{ + VertexData tmp[gl_MaxPatchVertices] = vInputs; + VertexData tmp_single = vInputs[gl_InvocationID ^ 1]; + + vOutputs[gl_InvocationID] = tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1] + tmp[gl_InvocationID].c + tmp_single.c; +} diff --git a/shaders-msl/tesc/load-control-point-array.multi-patch.tesc b/shaders-msl/tesc/load-control-point-array.multi-patch.tesc new file mode 100644 index 00000000000..1a5924b895f --- /dev/null +++ b/shaders-msl/tesc/load-control-point-array.multi-patch.tesc @@ -0,0 +1,12 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) in vec4 vInputs[gl_MaxPatchVertices]; +layout(location = 0) out vec4 vOutputs[4]; + +void main() +{ + vec4 tmp[gl_MaxPatchVertices] = vInputs; + vOutputs[gl_InvocationID] = tmp[gl_InvocationID]; +} diff --git a/shaders-msl/tesc/load-control-point-array.tesc b/shaders-msl/tesc/load-control-point-array.tesc new file mode 100644 index 00000000000..1a5924b895f --- /dev/null +++ b/shaders-msl/tesc/load-control-point-array.tesc @@ -0,0 +1,12 @@ +#version 450 + +layout(vertices = 4) out; + +layout(location = 0) in vec4 vInputs[gl_MaxPatchVertices]; +layout(location = 0) out vec4 vOutputs[4]; + +void main() +{ + vec4 tmp[gl_MaxPatchVertices] = vInputs; + vOutputs[gl_InvocationID] = tmp[gl_InvocationID]; +} diff --git a/shaders-msl/tesc/matrix-output.multi-patch.tesc b/shaders-msl/tesc/matrix-output.multi-patch.tesc new file mode 100644 index 00000000000..0d23861980c --- /dev/null +++ b/shaders-msl/tesc/matrix-output.multi-patch.tesc @@ -0,0 +1,28 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(vertices = 3) out; + +layout(location = 0) in highp float in_tc_attr[]; +layout(location = 0) out highp float in_te_attr[]; + +layout(location = 1) out mediump mat4x3 in_te_data0[]; +layout(location = 5) out mediump mat4x3 in_te_data1[]; + +void main (void) +{ + mat4x3 d = mat4x3(gl_InvocationID); + in_te_data0[gl_InvocationID] = d; + barrier(); + in_te_data1[gl_InvocationID] = d + in_te_data0[(gl_InvocationID + 1) % 3]; + + in_te_attr[gl_InvocationID] = in_tc_attr[gl_InvocationID]; + + gl_TessLevelInner[0] = 1.0; + gl_TessLevelInner[1] = 1.0; + + gl_TessLevelOuter[0] = 1.0; + gl_TessLevelOuter[1] = 1.0; + gl_TessLevelOuter[2] = 1.0; + gl_TessLevelOuter[3] = 1.0; +} diff --git a/shaders-msl/tesc/reload-tess-level.multi-patch.tesc b/shaders-msl/tesc/reload-tess-level.multi-patch.tesc new file mode 100644 index 00000000000..c3f0195cc76 --- /dev/null +++ b/shaders-msl/tesc/reload-tess-level.multi-patch.tesc @@ -0,0 +1,17 @@ +#version 450 +layout(vertices = 4) out; + +void main() +{ + if (gl_InvocationID == 0) + { + gl_TessLevelOuter[0] = 2.0; + gl_TessLevelOuter[1] = 3.0; + gl_TessLevelOuter[2] = 4.0; + gl_TessLevelOuter[3] = 5.0; + gl_TessLevelInner[0] = mix(gl_TessLevelOuter[0], gl_TessLevelOuter[3], 0.5); + gl_TessLevelInner[1] = mix(gl_TessLevelOuter[2], gl_TessLevelOuter[1], 0.5); + } + + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; +} diff --git a/shaders-msl/tesc/reload-tess-level.tesc b/shaders-msl/tesc/reload-tess-level.tesc new file mode 100644 index 00000000000..c3f0195cc76 --- /dev/null +++ b/shaders-msl/tesc/reload-tess-level.tesc @@ -0,0 +1,17 @@ +#version 450 +layout(vertices = 4) out; + +void main() +{ + if (gl_InvocationID == 0) + { + gl_TessLevelOuter[0] = 2.0; + gl_TessLevelOuter[1] = 3.0; + gl_TessLevelOuter[2] = 4.0; + gl_TessLevelOuter[3] = 5.0; + gl_TessLevelInner[0] = mix(gl_TessLevelOuter[0], gl_TessLevelOuter[3], 0.5); + gl_TessLevelInner[1] = mix(gl_TessLevelOuter[2], gl_TessLevelOuter[1], 0.5); + } + + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; +} diff --git a/shaders-msl/tesc/struct-output.multi-patch.tesc b/shaders-msl/tesc/struct-output.multi-patch.tesc new file mode 100644 index 00000000000..a1511a475a3 --- /dev/null +++ b/shaders-msl/tesc/struct-output.multi-patch.tesc @@ -0,0 +1,36 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(vertices = 3) out; + +layout(location = 0) in highp float in_tc_attr[]; +layout(location = 0) out highp float in_te_attr[]; + +struct te_data +{ + mediump float a; + mediump float b; + mediump uint c; +}; + +layout(location = 1) out te_data in_te_data0[]; +layout(location = 4) out te_data in_te_data1[]; + +void main (void) +{ + te_data d = te_data(float(gl_InvocationID), float(gl_InvocationID + 1), uint(gl_InvocationID)); + in_te_data0[gl_InvocationID] = d; + barrier(); + te_data e = in_te_data0[(gl_InvocationID + 1) % 3]; + in_te_data1[gl_InvocationID] = te_data(d.a + e.a, d.b + e.b, d.c + e.c); + + in_te_attr[gl_InvocationID] = in_tc_attr[gl_InvocationID]; + + gl_TessLevelInner[0] = 1.0; + gl_TessLevelInner[1] = 1.0; + + gl_TessLevelOuter[0] = 1.0; + gl_TessLevelOuter[1] = 1.0; + gl_TessLevelOuter[2] = 1.0; + gl_TessLevelOuter[3] = 1.0; +} diff --git a/shaders-msl/tesc/water_tess.multi-patch.tesc b/shaders-msl/tesc/water_tess.multi-patch.tesc new file mode 100644 index 00000000000..3ecdc3d1a96 --- /dev/null +++ b/shaders-msl/tesc/water_tess.multi-patch.tesc @@ -0,0 +1,115 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(vertices = 1) out; +layout(location = 0) in vec2 vPatchPosBase[]; + +layout(std140) uniform UBO +{ + vec4 uScale; + highp vec3 uCamPos; + vec2 uPatchSize; + vec2 uMaxTessLevel; + float uDistanceMod; + vec4 uFrustum[6]; +}; + +layout(location = 1) patch out vec2 vOutPatchPosBase; +layout(location = 2) patch out vec4 vPatchLods; + +float lod_factor(vec2 pos_) +{ + vec2 pos = pos_ * uScale.xy; + vec3 dist_to_cam = uCamPos - vec3(pos.x, 0.0, pos.y); + float level = log2((length(dist_to_cam) + 0.0001) * uDistanceMod); + return clamp(level, 0.0, uMaxTessLevel.x); +} + +float tess_level(float lod) +{ + return uMaxTessLevel.y * exp2(-lod); +} + +vec4 tess_level(vec4 lod) +{ + return uMaxTessLevel.y * exp2(-lod); +} + +// Guard band for vertex displacement. +#define GUARD_BAND 10.0 +bool frustum_cull(vec2 p0) +{ + vec2 min_xz = (p0 - GUARD_BAND) * uScale.xy; + vec2 max_xz = (p0 + uPatchSize + GUARD_BAND) * uScale.xy; + + vec3 bb_min = vec3(min_xz.x, -GUARD_BAND, min_xz.y); + vec3 bb_max = vec3(max_xz.x, +GUARD_BAND, max_xz.y); + vec3 center = 0.5 * (bb_min + bb_max); + float radius = 0.5 * length(bb_max - bb_min); + + vec3 f0 = vec3( + dot(uFrustum[0], vec4(center, 1.0)), + dot(uFrustum[1], vec4(center, 1.0)), + dot(uFrustum[2], vec4(center, 1.0))); + + vec3 f1 = vec3( + dot(uFrustum[3], vec4(center, 1.0)), + dot(uFrustum[4], vec4(center, 1.0)), + dot(uFrustum[5], vec4(center, 1.0))); + + return !(any(lessThanEqual(f0, vec3(-radius))) || any(lessThanEqual(f1, vec3(-radius)))); +} + +void compute_tess_levels(vec2 p0) +{ + vOutPatchPosBase = p0; + + float l00 = lod_factor(p0 + vec2(-0.5, -0.5) * uPatchSize); + float l10 = lod_factor(p0 + vec2(+0.5, -0.5) * uPatchSize); + float l20 = lod_factor(p0 + vec2(+1.5, -0.5) * uPatchSize); + float l01 = lod_factor(p0 + vec2(-0.5, +0.5) * uPatchSize); + float l11 = lod_factor(p0 + vec2(+0.5, +0.5) * uPatchSize); + float l21 = lod_factor(p0 + vec2(+1.5, +0.5) * uPatchSize); + float l02 = lod_factor(p0 + vec2(-0.5, +1.5) * uPatchSize); + float l12 = lod_factor(p0 + vec2(+0.5, +1.5) * uPatchSize); + float l22 = lod_factor(p0 + vec2(+1.5, +1.5) * uPatchSize); + + vec4 lods = vec4( + dot(vec4(l01, l11, l02, l12), vec4(0.25)), + dot(vec4(l00, l10, l01, l11), vec4(0.25)), + dot(vec4(l10, l20, l11, l21), vec4(0.25)), + dot(vec4(l11, l21, l12, l22), vec4(0.25))); + + vPatchLods = lods; + + vec4 outer_lods = min(lods.xyzw, lods.yzwx); + vec4 levels = tess_level(outer_lods); + gl_TessLevelOuter[0] = levels.x; + gl_TessLevelOuter[1] = levels.y; + gl_TessLevelOuter[2] = levels.z; + gl_TessLevelOuter[3] = levels.w; + + float min_lod = min(min(lods.x, lods.y), min(lods.z, lods.w)); + float inner = tess_level(min(min_lod, l11)); + gl_TessLevelInner[0] = inner; + gl_TessLevelInner[1] = inner; +} + +void main() +{ + vec2 p0 = vPatchPosBase[0]; + if (!frustum_cull(p0)) + { + gl_TessLevelOuter[0] = -1.0; + gl_TessLevelOuter[1] = -1.0; + gl_TessLevelOuter[2] = -1.0; + gl_TessLevelOuter[3] = -1.0; + gl_TessLevelInner[0] = -1.0; + gl_TessLevelInner[1] = -1.0; + } + else + { + compute_tess_levels(p0); + } +} + diff --git a/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese b/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese new file mode 100644 index 00000000000..a32c13096a5 --- /dev/null +++ b/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese @@ -0,0 +1,19 @@ +#version 450 +#extension GL_EXT_tessellation_shader : require + +layout(triangles) in; +layout(location = 0) in struct { + float dummy; + vec4 variableInStruct; +} testStructArray[][3]; +layout(location = 0) out float outResult; +void main(void) +{ + gl_Position = vec4(gl_TessCoord.xy * 2.0 - 1.0, 0.0, 1.0); + float result; + result = float(abs(testStructArray[0][2].variableInStruct.x - -4.0) < 0.001) * + float(abs(testStructArray[0][2].variableInStruct.y - -9.0) < 0.001) * + float(abs(testStructArray[0][2].variableInStruct.z - 3.0) < 0.001) * + float(abs(testStructArray[0][2].variableInStruct.w - 7.0) < 0.001); + outResult = result; +} diff --git a/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese b/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese new file mode 100644 index 00000000000..629415a8736 --- /dev/null +++ b/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese @@ -0,0 +1,31 @@ +#version 450 +layout(triangles, ccw, equal_spacing) in; + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[1]; +}; + +struct t35 +{ + vec2 m0; + vec4 m1; +}; + +layout(location = 0) in t36 +{ + vec2 m0; + t35 m1; +} v40[32]; + +layout(location = 0) out float v80; + +void main() +{ + gl_Position = vec4((gl_TessCoord.xy * 2.0) - vec2(1.0), 0.0, 1.0); + float v34 = ((float(abs(v40[0].m1.m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(v40[0].m1.m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(v40[0].m1.m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(v40[0].m1.m1.w - 7.0) < 0.001000000047497451305389404296875); + v80 = v34; +} diff --git a/shaders-msl/tese/in-block-with-nested-struct.tese b/shaders-msl/tese/in-block-with-nested-struct.tese new file mode 100644 index 00000000000..629415a8736 --- /dev/null +++ b/shaders-msl/tese/in-block-with-nested-struct.tese @@ -0,0 +1,31 @@ +#version 450 +layout(triangles, ccw, equal_spacing) in; + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[1]; +}; + +struct t35 +{ + vec2 m0; + vec4 m1; +}; + +layout(location = 0) in t36 +{ + vec2 m0; + t35 m1; +} v40[32]; + +layout(location = 0) out float v80; + +void main() +{ + gl_Position = vec4((gl_TessCoord.xy * 2.0) - vec2(1.0), 0.0, 1.0); + float v34 = ((float(abs(v40[0].m1.m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(v40[0].m1.m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(v40[0].m1.m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(v40[0].m1.m1.w - 7.0) < 0.001000000047497451305389404296875); + v80 = v34; +} diff --git a/shaders-msl/tese/input-types.raw-tess-in.tese b/shaders-msl/tese/input-types.raw-tess-in.tese new file mode 100644 index 00000000000..3157953fdd5 --- /dev/null +++ b/shaders-msl/tese/input-types.raw-tess-in.tese @@ -0,0 +1,75 @@ +#version 450 + +layout(ccw, quads, fractional_even_spacing) in; + +// Try to use the whole taxonomy of input methods. + +// Per-vertex vector. +layout(location = 0) in vec4 vColor[]; +// Per-patch vector. +layout(location = 1) patch in vec4 vColors; +// Per-patch vector array. +layout(location = 2) patch in vec4 vColorsArray[2]; + +// I/O blocks, per patch and per control point. +layout(location = 4) in Block +{ + vec4 a; + vec4 b; +} blocks[]; + +layout(location = 6) patch in PatchBlock +{ + vec4 a; + vec4 b; +} patch_block; + +// Composites. +struct Foo +{ + vec4 a; + vec4 b; +}; +layout(location = 8) patch in Foo vFoo; +//layout(location = 10) patch in Foo vFooArray[2]; // FIXME: Handling of array-of-struct input is broken! + +// Per-control point struct. +layout(location = 14) in Foo vFoos[]; + +void set_from_function() +{ + gl_Position = blocks[0].a; + gl_Position += blocks[0].b; + gl_Position += blocks[1].a; + gl_Position += blocks[1].b; + gl_Position += patch_block.a; + gl_Position += patch_block.b; + gl_Position += vColor[0]; + gl_Position += vColor[1]; + gl_Position += vColors; + + Foo foo = vFoo; + gl_Position += foo.a; + gl_Position += foo.b; + + /*foo = vFooArray[0]; + gl_Position += foo.a; + gl_Position += foo.b; + + foo = vFooArray[1]; + gl_Position += foo.a; + gl_Position += foo.b;*/ + + foo = vFoos[0]; + gl_Position += foo.a; + gl_Position += foo.b; + + foo = vFoos[1]; + gl_Position += foo.a; + gl_Position += foo.b; +} + +void main() +{ + set_from_function(); +} diff --git a/shaders-msl/tese/load-control-point-array-of-matrix.tese b/shaders-msl/tese/load-control-point-array-of-matrix.tese new file mode 100644 index 00000000000..479b3e651b7 --- /dev/null +++ b/shaders-msl/tese/load-control-point-array-of-matrix.tese @@ -0,0 +1,13 @@ +#version 450 + +layout(cw, quads) in; +layout(location = 0) in mat4 vInputs[gl_MaxPatchVertices]; +layout(location = 4) patch in vec4 vBoo[4]; +layout(location = 8) patch in int vIndex; + +void main() +{ + mat4 tmp[gl_MaxPatchVertices] = vInputs; + gl_Position = tmp[0][vIndex] + tmp[1][vIndex] + vBoo[vIndex]; + +} diff --git a/shaders-msl/tese/load-control-point-array.tese b/shaders-msl/tese/load-control-point-array.tese new file mode 100644 index 00000000000..4fa0bb1242a --- /dev/null +++ b/shaders-msl/tese/load-control-point-array.tese @@ -0,0 +1,13 @@ +#version 450 + +layout(cw, quads) in; +layout(location = 0) in vec4 vInputs[gl_MaxPatchVertices]; +layout(location = 1) patch in vec4 vBoo[4]; +layout(location = 5) patch in int vIndex; + +void main() +{ + vec4 tmp[gl_MaxPatchVertices] = vInputs; + gl_Position = tmp[0] + tmp[1] + vBoo[vIndex]; + +} diff --git a/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese b/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese new file mode 100644 index 00000000000..0c289ac5bd2 --- /dev/null +++ b/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese @@ -0,0 +1,17 @@ +#version 450 +layout(quads) in; + +vec4 read_tess_levels() +{ + return vec4( + gl_TessLevelOuter[0], + gl_TessLevelOuter[1], + gl_TessLevelOuter[2], + gl_TessLevelOuter[3]) + + vec2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; +} + +void main() +{ + gl_Position = read_tess_levels(); +} diff --git a/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese b/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese new file mode 100644 index 00000000000..0c289ac5bd2 --- /dev/null +++ b/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese @@ -0,0 +1,17 @@ +#version 450 +layout(quads) in; + +vec4 read_tess_levels() +{ + return vec4( + gl_TessLevelOuter[0], + gl_TessLevelOuter[1], + gl_TessLevelOuter[2], + gl_TessLevelOuter[3]) + + vec2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; +} + +void main() +{ + gl_Position = read_tess_levels(); +} diff --git a/shaders-msl/tese/read-tess-level-in-func.msl2.tese b/shaders-msl/tese/read-tess-level-in-func.msl2.tese new file mode 100644 index 00000000000..8cf1f1a8f75 --- /dev/null +++ b/shaders-msl/tese/read-tess-level-in-func.msl2.tese @@ -0,0 +1,17 @@ +#version 450 +layout(triangles) in; + +vec4 read_tess_levels() +{ + return vec4( + gl_TessLevelOuter[0], + gl_TessLevelOuter[1], + gl_TessLevelOuter[2], + gl_TessLevelOuter[3]) + + vec2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; +} + +void main() +{ + gl_Position = read_tess_levels(); +} diff --git a/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese b/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese new file mode 100644 index 00000000000..8cf1f1a8f75 --- /dev/null +++ b/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese @@ -0,0 +1,17 @@ +#version 450 +layout(triangles) in; + +vec4 read_tess_levels() +{ + return vec4( + gl_TessLevelOuter[0], + gl_TessLevelOuter[1], + gl_TessLevelOuter[2], + gl_TessLevelOuter[3]) + + vec2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy; +} + +void main() +{ + gl_Position = read_tess_levels(); +} diff --git a/shaders-msl/tese/water_tess.raw-tess-in.tese b/shaders-msl/tese/water_tess.raw-tess-in.tese new file mode 100644 index 00000000000..32d6bc9391b --- /dev/null +++ b/shaders-msl/tese/water_tess.raw-tess-in.tese @@ -0,0 +1,65 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +precision highp int; + +layout(cw, quads, fractional_even_spacing) in; + +layout(location = 0) patch in vec2 vOutPatchPosBase; +layout(location = 1) patch in vec4 vPatchLods; + +layout(binding = 1, std140) uniform UBO +{ + mat4 uMVP; + vec4 uScale; + vec2 uInvScale; + vec3 uCamPos; + vec2 uPatchSize; + vec2 uInvHeightmapSize; +}; +layout(binding = 0) uniform mediump sampler2D uHeightmapDisplacement; + +layout(location = 0) highp out vec3 vWorld; +layout(location = 1) highp out vec4 vGradNormalTex; + +vec2 lerp_vertex(vec2 tess_coord) +{ + return vOutPatchPosBase + tess_coord * uPatchSize; +} + +mediump vec2 lod_factor(vec2 tess_coord) +{ + mediump vec2 x = mix(vPatchLods.yx, vPatchLods.zw, tess_coord.x); + mediump float level = mix(x.x, x.y, tess_coord.y); + mediump float floor_level = floor(level); + mediump float fract_level = level - floor_level; + return vec2(floor_level, fract_level); +} + +mediump vec3 sample_height_displacement(vec2 uv, vec2 off, mediump vec2 lod) +{ + return mix( + textureLod(uHeightmapDisplacement, uv + 0.5 * off, lod.x).xyz, + textureLod(uHeightmapDisplacement, uv + 1.0 * off, lod.x + 1.0).xyz, + lod.y); +} + +void main() +{ + vec2 tess_coord = gl_TessCoord.xy; + vec2 pos = lerp_vertex(tess_coord); + mediump vec2 lod = lod_factor(tess_coord); + + vec2 tex = pos * uInvHeightmapSize.xy; + pos *= uScale.xy; + + mediump float delta_mod = exp2(lod.x); + vec2 off = uInvHeightmapSize.xy * delta_mod; + + vGradNormalTex = vec4(tex + 0.5 * uInvHeightmapSize.xy, tex * uScale.zw); + vec3 height_displacement = sample_height_displacement(tex, off, lod); + + pos += height_displacement.yz; + vWorld = vec3(pos.x, height_displacement.x, pos.y); + gl_Position = uMVP * vec4(vWorld, 1.0); +} + diff --git a/shaders-msl/vert/array-component-io.for-tess.vert b/shaders-msl/vert/array-component-io.for-tess.vert new file mode 100644 index 00000000000..257ac848d16 --- /dev/null +++ b/shaders-msl/vert/array-component-io.for-tess.vert @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 1, component = 0) out float A[2]; +layout(location = 1, component = 2) out vec2 B[2]; +layout(location = 0, component = 1) out float C[3]; +layout(location = 0, component = 3) out float D; + +layout(location = 1, component = 0) in float InA[2]; +layout(location = 1, component = 2) in vec2 InB[2]; +layout(location = 0, component = 1) in float InC[3]; +layout(location = 0, component = 3) in float InD; +layout(location = 4) in vec4 Pos; + +void main() +{ + gl_Position = Pos; + A = InA; + B = InB; + C = InC; + D = InD; +} diff --git a/shaders-msl/vert/array-component-io.vert b/shaders-msl/vert/array-component-io.vert new file mode 100644 index 00000000000..257ac848d16 --- /dev/null +++ b/shaders-msl/vert/array-component-io.vert @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 1, component = 0) out float A[2]; +layout(location = 1, component = 2) out vec2 B[2]; +layout(location = 0, component = 1) out float C[3]; +layout(location = 0, component = 3) out float D; + +layout(location = 1, component = 0) in float InA[2]; +layout(location = 1, component = 2) in vec2 InB[2]; +layout(location = 0, component = 1) in float InC[3]; +layout(location = 0, component = 3) in float InD; +layout(location = 4) in vec4 Pos; + +void main() +{ + gl_Position = Pos; + A = InA; + B = InB; + C = InC; + D = InD; +} diff --git a/shaders-msl/vert/basic.for-tess.vert b/shaders-msl/vert/basic.for-tess.vert new file mode 100644 index 00000000000..8191dc2d0fc --- /dev/null +++ b/shaders-msl/vert/basic.for-tess.vert @@ -0,0 +1,17 @@ +#version 310 es + +layout(std140) uniform UBO +{ + uniform mat4 uMVP; +}; + +layout(location = 0) in vec4 aVertex; +layout(location = 1) in vec3 aNormal; + +layout(location = 0) out vec3 vNormal; + +void main() +{ + gl_Position = uMVP * aVertex; + vNormal = aNormal; +} diff --git a/shaders-msl/vert/buffer_device_address.msl2.vert b/shaders-msl/vert/buffer_device_address.msl2.vert new file mode 100644 index 00000000000..ffc88713060 --- /dev/null +++ b/shaders-msl/vert/buffer_device_address.msl2.vert @@ -0,0 +1,83 @@ +/* Copyright (c) 2021, Arm Limited and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#version 450 + +// Allows buffer_reference. +#extension GL_EXT_buffer_reference : require + +// Since we did not enable vertexPipelineStoresAndAtomics, we must mark everything readonly. +layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer Position +{ + vec2 positions[]; +}; + +layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer PositionReferences +{ + // Represents an array of pointers, where each pointer points to its own VBO (Position). + // The size of a pointer (VkDeviceAddress) is always 8 in Vulkan. + Position buffers[]; +}; + +layout(push_constant) uniform Registers +{ + mat4 view_projection; + + // This is a pointer to an array of pointers, essentially: + // const VBO * const *vbos + PositionReferences references; +} registers; + +// Flat shading looks a little cooler here :) +layout(location = 0) flat out vec4 out_color; + +void main() +{ + int slice = gl_InstanceIndex; + + // One VBO per instance, load the VBO pointer. + // The cool thing here is that a compute shader could hypothetically + // write the pointer list where vertices are stored. + // With vertex attributes we do not have the luxury to modify VBO bindings on the GPU. + // The best we can do is to just modify the vertexOffset in an indirect draw call, + // but that's not always flexible enough, and enforces a very specific engine design to work. + // We can even modify the attribute layout per slice here, since we can just cast the pointer + // to something else if we want. + restrict Position positions = registers.references.buffers[slice]; + + // Load the vertex based on VertexIndex instead of an attribute. Fully flexible. + // Only downside is that we do not get format conversion for free like we do with normal vertex attributes. + vec2 pos = positions.positions[gl_VertexIndex] * 2.5; + + // Place the quad meshes on screen and center it. + pos += 3.0 * (vec2(slice % 8, slice / 8) - 3.5); + + // Normal projection. + gl_Position = registers.view_projection * vec4(pos, 0.0, 1.0); + + // Color the vertex. Use a combination of a wave and checkerboard, completely arbitrary. + int index_x = gl_VertexIndex % 16; + int index_y = gl_VertexIndex / 16; + + float r = 0.5 + 0.3 * sin(float(index_x)); + float g = 0.5 + 0.3 * sin(float(index_y)); + + int checkerboard = (index_x ^ index_y) & 1; + r *= float(checkerboard) * 0.8 + 0.2; + g *= float(checkerboard) * 0.8 + 0.2; + + out_color = vec4(r, g, 0.15, 1.0); +} diff --git a/shaders-msl/vert/clip-distance-block.no-user-varying.vert b/shaders-msl/vert/clip-distance-block.no-user-varying.vert new file mode 100644 index 00000000000..93ed31150c8 --- /dev/null +++ b/shaders-msl/vert/clip-distance-block.no-user-varying.vert @@ -0,0 +1,15 @@ +#version 450 + +layout(location = 0) in vec4 Position; +out gl_PerVertex +{ + vec4 gl_Position; + float gl_ClipDistance[2]; +}; + +void main() +{ + gl_Position = Position; + gl_ClipDistance[0] = Position.x; + gl_ClipDistance[1] = Position.y; +} diff --git a/shaders-msl/vert/clip-distance-block.vert b/shaders-msl/vert/clip-distance-block.vert new file mode 100644 index 00000000000..93ed31150c8 --- /dev/null +++ b/shaders-msl/vert/clip-distance-block.vert @@ -0,0 +1,15 @@ +#version 450 + +layout(location = 0) in vec4 Position; +out gl_PerVertex +{ + vec4 gl_Position; + float gl_ClipDistance[2]; +}; + +void main() +{ + gl_Position = Position; + gl_ClipDistance[0] = Position.x; + gl_ClipDistance[1] = Position.y; +} diff --git a/shaders-msl/vert/float-math.invariant-float-math.vert b/shaders-msl/vert/float-math.invariant-float-math.vert new file mode 100644 index 00000000000..caa8639a895 --- /dev/null +++ b/shaders-msl/vert/float-math.invariant-float-math.vert @@ -0,0 +1,25 @@ +#version 450 + +layout(set = 0, binding = 0) uniform Matrices +{ + mat4 vpMatrix; + mat4 wMatrix; + mat4x3 wMatrix4x3; + mat3x4 wMatrix3x4; +}; + +layout(location = 0) in vec3 InPos; +layout(location = 1) in vec3 InNormal; + +layout(location = 0) out vec3 OutNormal; +layout(location = 1) out vec4 OutWorldPos[4]; + +void main() +{ + gl_Position = vpMatrix * wMatrix * vec4(InPos, 1); + OutWorldPos[0] = wMatrix * vec4(InPos, 1); + OutWorldPos[1] = vec4(InPos, 1) * wMatrix; + OutWorldPos[2] = wMatrix3x4 * InPos; + OutWorldPos[3] = InPos * wMatrix4x3; + OutNormal = (wMatrix * vec4(InNormal, 0)).xyz; +} diff --git a/shaders-msl/vert/float-math.vert b/shaders-msl/vert/float-math.vert new file mode 100644 index 00000000000..caa8639a895 --- /dev/null +++ b/shaders-msl/vert/float-math.vert @@ -0,0 +1,25 @@ +#version 450 + +layout(set = 0, binding = 0) uniform Matrices +{ + mat4 vpMatrix; + mat4 wMatrix; + mat4x3 wMatrix4x3; + mat3x4 wMatrix3x4; +}; + +layout(location = 0) in vec3 InPos; +layout(location = 1) in vec3 InNormal; + +layout(location = 0) out vec3 OutNormal; +layout(location = 1) out vec4 OutWorldPos[4]; + +void main() +{ + gl_Position = vpMatrix * wMatrix * vec4(InPos, 1); + OutWorldPos[0] = wMatrix * vec4(InPos, 1); + OutWorldPos[1] = vec4(InPos, 1) * wMatrix; + OutWorldPos[2] = wMatrix3x4 * InPos; + OutWorldPos[3] = InPos * wMatrix4x3; + OutNormal = (wMatrix * vec4(InNormal, 0)).xyz; +} diff --git a/shaders-msl/vert/implicit-position-1.vert b/shaders-msl/vert/implicit-position-1.vert new file mode 100644 index 00000000000..54300d8daa7 --- /dev/null +++ b/shaders-msl/vert/implicit-position-1.vert @@ -0,0 +1,6 @@ +#version 450 +layout(location = 0) out vec4 V; +void main() +{ + V = vec4(1.0); +} diff --git a/shaders-msl/vert/implicit-position-2.vert b/shaders-msl/vert/implicit-position-2.vert new file mode 100644 index 00000000000..9996ddaad21 --- /dev/null +++ b/shaders-msl/vert/implicit-position-2.vert @@ -0,0 +1,4 @@ +#version 450 +void main() +{ +} diff --git a/shaders-msl/vert/interface-block-single-element-array.vert b/shaders-msl/vert/interface-block-single-element-array.vert new file mode 100644 index 00000000000..993484fba82 --- /dev/null +++ b/shaders-msl/vert/interface-block-single-element-array.vert @@ -0,0 +1,17 @@ +#version 460 + +layout(location = 0) out TDPickVertex +{ +vec4 c; +vec3 uv[1]; +} oTDVert; + +layout(location = 0) in vec3 P; +layout(location = 1) in vec3 uv[1]; + +void main() +{ +gl_Position = vec4(P, 1.0); +oTDVert.uv[0] = uv[0]; +oTDVert.c = vec4(1.); +} \ No newline at end of file diff --git a/shaders-msl/vert/leaf-function.for-tess.vert b/shaders-msl/vert/leaf-function.for-tess.vert new file mode 100644 index 00000000000..cdb60fae31c --- /dev/null +++ b/shaders-msl/vert/leaf-function.for-tess.vert @@ -0,0 +1,22 @@ +#version 310 es + +layout(std140) uniform UBO +{ + uniform mat4 uMVP; +}; + +layout(location = 0) in vec4 aVertex; +layout(location = 1) in vec3 aNormal; + +layout(location = 0) out vec3 vNormal; + +void set_output() +{ + gl_Position = uMVP * aVertex; + vNormal = aNormal; +} + +void main() +{ + set_output(); +} diff --git a/shaders-msl/vert/no-contraction.vert b/shaders-msl/vert/no-contraction.vert new file mode 100644 index 00000000000..206fbf0de80 --- /dev/null +++ b/shaders-msl/vert/no-contraction.vert @@ -0,0 +1,15 @@ +#version 450 + +layout(location = 0) in vec4 vA; +layout(location = 1) in vec4 vB; +layout(location = 2) in vec4 vC; + +void main() +{ + precise vec4 mul = vA * vB; + precise vec4 add = vA + vB; + precise vec4 sub = vA - vB; + precise vec4 mad = vA * vB + vC; + precise vec4 summed = mul + add + sub + mad; + gl_Position = summed; +} diff --git a/shaders-msl/vert/no-disable-vertex-out.frag-output.vert b/shaders-msl/vert/no-disable-vertex-out.frag-output.vert new file mode 100644 index 00000000000..7ea3790a02a --- /dev/null +++ b/shaders-msl/vert/no-disable-vertex-out.frag-output.vert @@ -0,0 +1,16 @@ +#version 400 +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable +layout(std140, binding = 0) uniform buf { + mat4 MVP; + vec4 position[12*3]; + vec4 attr[12*3]; +} ubuf; +layout (location = 0) out vec4 texcoord; +layout (location = 1) out vec3 frag_pos; +void main() +{ + texcoord = ubuf.attr[gl_VertexIndex]; + gl_Position = ubuf.MVP * ubuf.position[gl_VertexIndex]; + frag_pos = gl_Position.xyz; +} diff --git a/shaders-msl/vert/no_stage_out.for-tess.vert b/shaders-msl/vert/no_stage_out.for-tess.vert new file mode 100644 index 00000000000..3c2573a628d --- /dev/null +++ b/shaders-msl/vert/no_stage_out.for-tess.vert @@ -0,0 +1,14 @@ +#version 450 + +layout(binding = 0, std430) writeonly buffer _10_12 +{ + uvec4 _m0[1024]; +} _12; + +layout(location = 0) in uvec4 _19; + +void main() +{ + _12._m0[gl_VertexIndex] = _19; +} + diff --git a/shaders-msl/vert/out-block-with-nested-struct-array.vert b/shaders-msl/vert/out-block-with-nested-struct-array.vert new file mode 100644 index 00000000000..444e7ca9730 --- /dev/null +++ b/shaders-msl/vert/out-block-with-nested-struct-array.vert @@ -0,0 +1,28 @@ +#version 450 + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[1]; +}; + +struct t21 +{ + vec4 m0; + vec4 m1; +}; + +layout(location = 0) in vec4 v17; +layout(location = 0) out t24 +{ + t21 m0[3]; +} v26; + + +void main() +{ + gl_Position = v17; + v26.m0[1].m1 = vec4(-4.0, -9.0, 3.0, 7.0); +} diff --git a/shaders-msl/vert/out-block-with-struct-array.vert b/shaders-msl/vert/out-block-with-struct-array.vert new file mode 100644 index 00000000000..2cb13b30959 --- /dev/null +++ b/shaders-msl/vert/out-block-with-struct-array.vert @@ -0,0 +1,24 @@ +#version 450 + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[1]; +}; + +struct t21 +{ + float m0; + vec4 m1; +}; + +layout(location = 0) in vec4 v17; +layout(location = 0) out t21 v25[3]; + +void main() +{ + gl_Position = v17; + v25[2].m1 = vec4(-4.0, -9.0, 3.0, 7.0); +} diff --git a/shaders-msl/vert/packed-bool-to-uint.vert b/shaders-msl/vert/packed-bool-to-uint.vert new file mode 100644 index 00000000000..933a15e621d --- /dev/null +++ b/shaders-msl/vert/packed-bool-to-uint.vert @@ -0,0 +1,22 @@ +#version 450 core + +struct Struct +{ + bool flags[1]; +}; + +layout(set=0, binding=0, std140) uniform defaultUniformsVS +{ + Struct flags; + vec2 uquad[4]; + mat4 umatrix; +}; + +layout (location = 0) in vec4 a_position; + +void main() +{ + gl_Position = umatrix * vec4(uquad[gl_VertexIndex], a_position.z, a_position.w); + if (flags.flags[0]) + gl_Position.z = 0.0; +} diff --git a/shaders-msl/vert/packed-bool2-to-packed_uint2.vert b/shaders-msl/vert/packed-bool2-to-packed_uint2.vert new file mode 100644 index 00000000000..e3939a4519d --- /dev/null +++ b/shaders-msl/vert/packed-bool2-to-packed_uint2.vert @@ -0,0 +1,22 @@ +#version 450 core + +struct Struct +{ + bvec2 flags[1]; +}; + +layout(set=0, binding=0, std140) uniform defaultUniformsVS +{ + Struct flags; + vec2 uquad[4]; + mat4 umatrix; +}; + +layout (location = 0) in vec4 a_position; + +void main() +{ + gl_Position = umatrix * vec4(uquad[gl_VertexIndex], a_position.z, a_position.w); + if (flags.flags[0].x) + gl_Position.z = 0.0; +} diff --git a/shaders-msl/vert/return-array.force-native-array.vert b/shaders-msl/vert/return-array.force-native-array.vert new file mode 100644 index 00000000000..708460114e5 --- /dev/null +++ b/shaders-msl/vert/return-array.force-native-array.vert @@ -0,0 +1,22 @@ +#version 310 es + +layout(location = 0) in vec4 vInput0; +layout(location = 1) in vec4 vInput1; + +vec4[2] test() +{ + return vec4[](vec4(10.0), vec4(20.0)); +} + +vec4[2] test2() +{ + vec4 foobar[2]; + foobar[0] = vInput0; + foobar[1] = vInput1; + return foobar; +} + +void main() +{ + gl_Position = test()[0] + test2()[1]; +} diff --git a/shaders-msl/vert/signedness-mismatch.shader-inputs.vert b/shaders-msl/vert/signedness-mismatch.shader-inputs.vert new file mode 100644 index 00000000000..dc0f7e6b52e --- /dev/null +++ b/shaders-msl/vert/signedness-mismatch.shader-inputs.vert @@ -0,0 +1,14 @@ +#version 450 + +#extension GL_AMD_gpu_shader_int16 : require + +layout(location = 0) in int16_t a; +layout(location = 1) in ivec2 b; +layout(location = 2) in uint16_t c[2]; +layout(location = 4) in uvec4 d[2]; + +void main() +{ + gl_Position = vec4(float(int(a)), float(b.x), float(uint(c[1])), float(d[0].w)); +} + diff --git a/shaders-msl/vert/uniform-struct-out-of-order-offests.vert b/shaders-msl/vert/uniform-struct-out-of-order-offests.vert new file mode 100644 index 00000000000..21234f94164 --- /dev/null +++ b/shaders-msl/vert/uniform-struct-out-of-order-offests.vert @@ -0,0 +1,31 @@ +#version 450 + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[1]; +}; + +layout(set = 0, binding = 0, std140) uniform data_u_t +{ + layout(offset = 80) mediump int m0[8]; + layout(offset = 0) mediump ivec4 m1[3]; + layout(offset = 64) uvec3 m2; + layout(offset = 48) mediump uint m3; +} data_u; + +layout(location = 0) in vec4 vtx_posn; +layout(location = 0) out mediump float foo; + +void main() +{ + gl_Position = vtx_posn; + ivec4 a = data_u.m1[1]; + uvec3 b = data_u.m2; + int c = data_u.m0[4]; + foo = (a.xyz + b).y * c; +} + + diff --git a/shaders-msl/vert/uniform-struct-packing-nested.vert b/shaders-msl/vert/uniform-struct-packing-nested.vert new file mode 100644 index 00000000000..6744b783622 --- /dev/null +++ b/shaders-msl/vert/uniform-struct-packing-nested.vert @@ -0,0 +1,50 @@ +#version 450 + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[1]; +}; + +struct s0 +{ + mediump mat2x3 m0; + ivec4 m1; + mat4 m2; + uvec2 m3; +}; + +struct s1 +{ + mediump mat3x4 m0; + mediump int m1; + uvec3 m2; + s0 m3; +}; + +layout(set = 0, binding = 0, std140) uniform data_u_t +{ + layout(row_major, offset = 368) mediump mat2x3 m0; + layout(offset = 0) vec2 m1[5]; + layout(row_major, offset = 128) s1 m2; + layout(row_major, offset = 80) mediump mat4x2 m3; + layout(offset = 112) ivec4 m4; +} data_u; + +layout(location = 0) in vec4 vtx_posn; +layout(location = 0) out mediump float foo; + +void main() +{ + gl_Position = vtx_posn; + vec2 a = data_u.m1[3]; + ivec4 b = data_u.m4; + mat2x3 c = data_u.m0; + mat3x4 d = data_u.m2.m0; + mat4 e = data_u.m2.m3.m2; + foo = (a.y + b.z) * c[1][2] * d[2][3] * e[3][3]; +} + + diff --git a/shaders-msl/vert/unused-position.vert b/shaders-msl/vert/unused-position.vert new file mode 100644 index 00000000000..61e30b431ce --- /dev/null +++ b/shaders-msl/vert/unused-position.vert @@ -0,0 +1,13 @@ +#version 450 + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; +}; + +void main() +{ + gl_PointSize = 1.0; +} diff --git a/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag b/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag new file mode 100644 index 00000000000..963493b871d --- /dev/null +++ b/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag @@ -0,0 +1,14 @@ +#version 310 es +#extension GL_EXT_multiview : require +precision mediump float; + +layout(location = 0) in vec4 vColor; +layout(location = 1) in vec2 vTex[4]; +layout(binding = 0) uniform sampler2D uTex; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vColor * texture(uTex, vTex[gl_ViewIndex]); +} + diff --git a/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag b/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag new file mode 100644 index 00000000000..ba57b8c5afa --- /dev/null +++ b/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag @@ -0,0 +1,41 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 19 +; Schema: 0 + OpCapability Shader + OpCapability DemoteToHelperInvocationEXT + OpExtension "SPV_EXT_demote_to_helper_invocation" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_demote_to_helper_invocation" + OpName %main "main" + OpName %FragColor "FragColor" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %bool = OpTypeBool +%_ptr_Function_bool = OpTypePointer Function %bool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %19 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %9 = OpIsHelperInvocationEXT %bool + OpDemoteToHelperInvocationEXT + %10 = OpLogicalNot %bool %9 + OpSelectionMerge %12 None + OpBranchConditional %10 %11 %12 + %11 = OpLabel + OpStore %FragColor %19 + OpBranch %12 + %12 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag b/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag new file mode 100644 index 00000000000..18407988db1 --- /dev/null +++ b/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag @@ -0,0 +1,18 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +void foo() +{ + demote; +} + +void bar() +{ + bool helper = helperInvocationEXT(); +} + +void main() +{ + foo(); + bar(); +} diff --git a/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag b/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag new file mode 100644 index 00000000000..8b8bb61ff7b --- /dev/null +++ b/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag @@ -0,0 +1,8 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +void main() +{ + demote; + bool helper = helperInvocationEXT(); +} diff --git a/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert b/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert new file mode 100644 index 00000000000..d54931a6de9 --- /dev/null +++ b/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert @@ -0,0 +1,8 @@ +#version 450 core +#extension GL_EXT_device_group : require +#extension GL_EXT_multiview : require + +void main() +{ + gl_Position = vec4(gl_DeviceIndex, gl_ViewIndex, 0.0, 1.0); +} diff --git a/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert b/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert new file mode 100644 index 00000000000..16ed51b15ef --- /dev/null +++ b/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert @@ -0,0 +1,7 @@ +#version 450 core +#extension GL_EXT_device_group : require + +void main() +{ + gl_Position = vec4(gl_DeviceIndex); +} diff --git a/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert b/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert new file mode 100644 index 00000000000..eb1bc766f2d --- /dev/null +++ b/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert @@ -0,0 +1,14 @@ +#version 310 es +#extension GL_EXT_multiview : require + +layout(std140, binding = 0) uniform MVPs +{ + mat4 MVP[2]; +}; + +layout(location = 0) in vec4 Position; + +void main() +{ + gl_Position = MVP[gl_ViewIndex] * Position; +} diff --git a/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp b/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp new file mode 100644 index 00000000000..c11d4cdd0a9 --- /dev/null +++ b/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp @@ -0,0 +1,54 @@ +; SPIR-V +; Version: 1.5 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 25 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 460 + OpName %main "main" + OpName %foo_ "foo(" + OpName %Output "Output" + OpMemberName %Output 0 "myout" + OpName %_ "" + OpMemberDecorate %Output 0 Offset 0 + OpDecorate %Output BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %7 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %9 = OpTypeFunction %int + %int_12 = OpConstant %int 12 + %bool = OpTypeBool + %true = OpConstantTrue %bool + %Output = OpTypeStruct %int +%_ptr_Uniform_Output = OpTypePointer Uniform %Output + %_ = OpVariable %_ptr_Uniform_Output Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_int = OpTypePointer Uniform %int + %main = OpFunction %void None %7 + %16 = OpLabel + %17 = OpFunctionCall %int %foo_ + OpBranch %18 + %18 = OpLabel + OpLoopMerge %19 %20 None + OpBranchConditional %true %21 %19 + %21 = OpLabel + %22 = OpAccessChain %_ptr_Uniform_int %_ %int_0 + OpStore %22 %17 + OpReturn + %20 = OpLabel + OpBranch %18 + %19 = OpLabel + %23 = OpAccessChain %_ptr_Uniform_int %_ %int_0 + OpStore %23 %17 + OpReturn + OpFunctionEnd + %foo_ = OpFunction %int None %9 + %24 = OpLabel + OpReturnValue %int_12 + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp new file mode 100644 index 00000000000..87aee2db54f --- /dev/null +++ b/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 37 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %T "T" + OpMemberName %T 0 "a" + OpName %v "v" + OpName %T_0 "T" + OpMemberName %T_0 0 "b" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "foo" + OpName %_ "" + OpName %T_1 "T" + OpMemberName %T_1 0 "c" + OpName %SSBO2 "SSBO2" + OpMemberName %SSBO2 0 "bar" + OpName %__0 "" + OpMemberDecorate %T_0 0 Offset 0 + OpDecorate %_runtimearr_T_0 ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpMemberDecorate %T_1 0 Offset 0 + OpDecorate %_runtimearr_T_1 ArrayStride 16 + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %T = OpTypeStruct %float +%_ptr_Function_T = OpTypePointer Function %T + %float_40 = OpConstant %float 40 + %11 = OpConstantComposite %T %float_40 + %T_0 = OpTypeStruct %float +%_runtimearr_T_0 = OpTypeRuntimeArray %T_0 + %SSBO1 = OpTypeStruct %_runtimearr_T_0 +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_10 = OpConstant %int 10 +%_ptr_Uniform_T_0 = OpTypePointer Uniform %T_0 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %T_1 = OpTypeStruct %float +%_runtimearr_T_1 = OpTypeRuntimeArray %T_1 + %SSBO2 = OpTypeStruct %_runtimearr_T_1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %__0 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int_30 = OpConstant %int 30 +%_ptr_Uniform_T_1 = OpTypePointer Uniform %T_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %v = OpVariable %_ptr_Function_T Function + OpStore %v %11 + %20 = OpLoad %T %v + %22 = OpAccessChain %_ptr_Uniform_T_0 %_ %int_0 %int_10 + %23 = OpCompositeExtract %float %20 0 + %25 = OpAccessChain %_ptr_Uniform_float %22 %int_0 + OpStore %25 %23 + %32 = OpLoad %T %v + %34 = OpAccessChain %_ptr_Uniform_T_1 %__0 %int_0 %int_30 + %35 = OpCompositeExtract %float %32 0 + %36 = OpAccessChain %_ptr_Uniform_float %34 %int_0 + OpStore %36 %35 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp b/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp index 0e1ce235d21..504a9546c5a 100644 --- a/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp +++ b/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp @@ -111,11 +111,23 @@ OpStore %ptr_f32 %s16_to_f32_signed %u16_to_f32_signed = OpConvertSToF %float %u16 OpStore %ptr_f32 %u16_to_f32_signed + + %s32_to_f32_signed = OpConvertSToF %float %s32 + OpStore %ptr_f32 %s32_to_f32_signed + %u32_to_f32_signed = OpConvertSToF %float %u32 + OpStore %ptr_f32 %u32_to_f32_signed + + ; UToF %s16_to_f32_unsigned = OpConvertUToF %float %s16 OpStore %ptr_f32 %s16_to_f32_unsigned %u16_to_f32_unsigned = OpConvertUToF %float %u16 OpStore %ptr_f32 %u16_to_f32_unsigned + %s32_to_f32_unsigned = OpConvertUToF %float %s32 + OpStore %ptr_f32 %s32_to_f32_unsigned + %u32_to_f32_unsigned = OpConvertUToF %float %u32 + OpStore %ptr_f32 %u32_to_f32_unsigned + ; FToS %f32_to_s16_signed = OpConvertFToS %short %f32 OpStore %ptr_s16 %f32_to_s16_signed diff --git a/shaders-no-opt/asm/comp/atomic-load-store.asm.comp b/shaders-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000000..3f2d141a1f5 --- /dev/null +++ b/shaders-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 23 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %c "c" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %SSBO = OpTypeStruct %uint %uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %c = OpVariable %_ptr_Function_uint Function + %15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 + %16 = OpAtomicLoad %uint %15 %int_1 %int_0 + OpStore %c %16 + %18 = OpLoad %uint %c + %19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + OpAtomicStore %19 %int_1 %int_0 %18 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/basic.spv16.asm.comp b/shaders-no-opt/asm/comp/basic.spv16.asm.comp new file mode 100644 index 00000000000..4675c50e134 --- /dev/null +++ b/shaders-no-opt/asm/comp/basic.spv16.asm.comp @@ -0,0 +1,48 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %_ %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_float ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_runtimearr_float = OpTypeRuntimeArray %float + %SSBO = OpTypeStruct %_runtimearr_float +%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO + %_ = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %float_2 = OpConstant %float 2 +%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %20 = OpLoad %uint %19 + %23 = OpAccessChain %_ptr_StorageBuffer_float %_ %int_0 %20 + %24 = OpLoad %float %23 + %25 = OpFAdd %float %24 %float_2 + %26 = OpAccessChain %_ptr_StorageBuffer_float %_ %int_0 %20 + OpStore %26 %25 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp b/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp new file mode 100644 index 00000000000..3651a4de527 --- /dev/null +++ b/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp @@ -0,0 +1,63 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 33 +; Schema: 0 + OpCapability Shader + OpCapability Float16 + OpCapability StorageBuffer16BitAccess + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types" + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpMemberName %SSBO 2 "c" + OpMemberName %SSBO 3 "d" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpMemberDecorate %SSBO 2 Offset 8 + OpMemberDecorate %SSBO 3 Offset 12 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %half = OpTypeFloat 16 + %v2half = OpTypeVector %half 2 + %float = OpTypeFloat 32 + %SSBO = OpTypeStruct %v2half %float %float %v2half +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half + %uint = OpTypeInt 32 0 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %int_3 = OpConstant %int 3 + %int_2 = OpConstant %int 2 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Uniform_v2half %_ %int_0 + %17 = OpLoad %v2half %16 + %20 = OpBitcast %float %17 + %22 = OpAccessChain %_ptr_Uniform_float %_ %int_1 + OpStore %22 %20 + %25 = OpAccessChain %_ptr_Uniform_float %_ %int_2 + %26 = OpLoad %float %25 + %28 = OpBitcast %v2half %26 + %29 = OpAccessChain %_ptr_Uniform_v2half %_ %int_3 + OpStore %29 %28 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp new file mode 100644 index 00000000000..435fa322215 --- /dev/null +++ b/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp @@ -0,0 +1,97 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 26 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "ints" + OpMemberName %SSBO 1 "uints" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %v4int = OpTypeVector %int 4 + %uint = OpTypeInt 32 0 + %v4uint = OpTypeVector %uint 4 + + %int_1 = OpConstant %int 1 + %uint_11 = OpConstant %uint 11 + + %SSBO = OpTypeStruct %v4int %v4uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int +%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint + %main = OpFunction %void None %3 + %5 = OpLabel + %ints_ptr = OpAccessChain %_ptr_Uniform_v4int %_ %int_0 + %uints_ptr = OpAccessChain %_ptr_Uniform_v4uint %_ %int_1 + %ints = OpLoad %v4int %ints_ptr + %uints = OpLoad %v4uint %uints_ptr + + %ints_alt = OpVectorShuffle %v4int %ints %ints 3 2 1 0 + %uints_alt = OpVectorShuffle %v4uint %uints %uints 3 2 1 0 + + %int_to_int_popcount = OpBitCount %v4int %ints + %int_to_uint_popcount = OpBitCount %v4uint %ints + %uint_to_int_popcount = OpBitCount %v4int %uints + %uint_to_uint_popcount = OpBitCount %v4uint %uints + + ; BitReverse must have matching types w.r.t. sign, yay. + %int_to_int_reverse = OpBitReverse %v4int %ints + ;%int_to_uint_reverse = OpBitReverse %v4uint %ints + ;%uint_to_int_reverse = OpBitReverse %v4int %uints + %uint_to_uint_reverse = OpBitReverse %v4uint %uints + + ; Base and Result must match. + %int_to_int_sbit = OpBitFieldSExtract %v4int %ints %int_1 %uint_11 + ;%int_to_uint_sbit = OpBitFieldSExtract %v4uint %ints %offset %count + ;%uint_to_int_sbit = OpBitFieldSExtract %v4int %uints %offset %count + %uint_to_uint_sbit = OpBitFieldSExtract %v4uint %uints %uint_11 %int_1 + + ; Base and Result must match. + %int_to_int_ubit = OpBitFieldUExtract %v4int %ints %int_1 %uint_11 + ;%int_to_uint_ubit = OpBitFieldUExtract %v4uint %ints %offset %count + ;%uint_to_int_ubit = OpBitFieldUExtract %v4int %uints %offset %count + %uint_to_uint_ubit = OpBitFieldUExtract %v4uint %uints %uint_11 %int_1 + + %int_to_int_insert = OpBitFieldInsert %v4int %ints %ints_alt %int_1 %uint_11 + %uint_to_uint_insert = OpBitFieldInsert %v4uint %uints %uints_alt %uint_11 %int_1 + + OpStore %ints_ptr %int_to_int_popcount + OpStore %uints_ptr %int_to_uint_popcount + OpStore %ints_ptr %uint_to_int_popcount + OpStore %uints_ptr %uint_to_uint_popcount + + OpStore %ints_ptr %int_to_int_reverse + ;OpStore %uints_ptr %int_to_uint_reverse + ;OpStore %ints_ptr %uint_to_int_reverse + OpStore %uints_ptr %uint_to_uint_reverse + + OpStore %ints_ptr %int_to_int_sbit + ;OpStore %uints_ptr %int_to_uint_sbit + ;OpStore %ints_ptr %uint_to_int_sbit + OpStore %uints_ptr %uint_to_uint_sbit + + OpStore %ints_ptr %int_to_int_ubit + ;OpStore %uints_ptr %int_to_uint_ubit + ;OpStore %ints_ptr %uint_to_int_ubit + OpStore %uints_ptr %uint_to_uint_ubit + + OpStore %ints_ptr %int_to_int_insert + OpStore %uints_ptr %uint_to_uint_insert + + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/bitscan.asm.comp b/shaders-no-opt/asm/comp/bitscan.asm.comp new file mode 100644 index 00000000000..e3b785cd52b --- /dev/null +++ b/shaders-no-opt/asm/comp/bitscan.asm.comp @@ -0,0 +1,72 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "u" + OpMemberName %SSBO 1 "i" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + %SSBO = OpTypeStruct %uvec4 %ivec4 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uvec4 = OpTypePointer Uniform %uvec4 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_ivec4 = OpTypePointer Uniform %ivec4 + %main = OpFunction %void None %3 + %5 = OpLabel + %uptr = OpAccessChain %_ptr_Uniform_uvec4 %_ %int_0 + %iptr = OpAccessChain %_ptr_Uniform_ivec4 %_ %int_1 + %uvalue = OpLoad %uvec4 %uptr + %ivalue = OpLoad %ivec4 %iptr + + %lsb_uint_to_uint = OpExtInst %uvec4 %1 FindILsb %uvalue + %lsb_uint_to_int = OpExtInst %ivec4 %1 FindILsb %uvalue + %lsb_int_to_uint = OpExtInst %uvec4 %1 FindILsb %ivalue + %lsb_int_to_int = OpExtInst %ivec4 %1 FindILsb %ivalue + + %umsb_uint_to_uint = OpExtInst %uvec4 %1 FindUMsb %uvalue + %umsb_uint_to_int = OpExtInst %ivec4 %1 FindUMsb %uvalue + %umsb_int_to_uint = OpExtInst %uvec4 %1 FindUMsb %ivalue + %umsb_int_to_int = OpExtInst %ivec4 %1 FindUMsb %ivalue + + %smsb_uint_to_uint = OpExtInst %uvec4 %1 FindSMsb %uvalue + %smsb_uint_to_int = OpExtInst %ivec4 %1 FindSMsb %uvalue + %smsb_int_to_uint = OpExtInst %uvec4 %1 FindSMsb %ivalue + %smsb_int_to_int = OpExtInst %ivec4 %1 FindSMsb %ivalue + + OpStore %uptr %lsb_uint_to_uint + OpStore %iptr %lsb_uint_to_int + OpStore %uptr %lsb_int_to_uint + OpStore %iptr %lsb_int_to_int + + OpStore %uptr %umsb_uint_to_uint + OpStore %iptr %umsb_uint_to_int + OpStore %uptr %umsb_int_to_uint + OpStore %iptr %umsb_int_to_int + + OpStore %uptr %smsb_uint_to_uint + OpStore %iptr %smsb_uint_to_int + OpStore %uptr %smsb_int_to_uint + OpStore %iptr %smsb_int_to_int + + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp b/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp new file mode 100644 index 00000000000..132f38bf72d --- /dev/null +++ b/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp @@ -0,0 +1,53 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 26 +; Schema: 0 + OpCapability Shader + OpCapability ShaderNonUniform + OpCapability RuntimeDescriptorArray + OpCapability StorageBufferArrayNonUniformIndexing + OpExtension "SPV_EXT_descriptor_indexing" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_nonuniform_qualifier" + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "v" + OpName %ssbos "ssbos" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %ssbos DescriptorSet 0 + OpDecorate %ssbos Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %22 NonUniform + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %SSBO = OpTypeStruct %uint +%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO +%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO + %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_2 = OpConstant %uint 2 +%_ptr_Input_uint = OpTypePointer Input %uint + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2 + %17 = OpLoad %uint %16 + %18 = OpCopyObject %uint %17 + %22 = OpAccessChain %_ptr_Uniform_uint %ssbos %18 %int_0 + %25 = OpAtomicIAdd %uint %22 %uint_1 %uint_0 %uint_1 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp b/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp new file mode 100644 index 00000000000..ed8d0ba6f5e --- /dev/null +++ b/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp @@ -0,0 +1,106 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 62 +; Schema: 0 + OpCapability Shader + OpCapability Int64 + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_KHR_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_ARB_gpu_shader_int64" + OpSourceExtension "GL_EXT_buffer_reference" + OpSourceExtension "GL_EXT_buffer_reference_uvec2" + OpName %main "main" + OpName %SomeBuffer "SomeBuffer" + OpMemberName %SomeBuffer 0 "v" + OpMemberName %SomeBuffer 1 "a" + OpMemberName %SomeBuffer 2 "b" + OpName %Registers "Registers" + OpMemberName %Registers 0 "address" + OpMemberName %Registers 1 "address2" + OpName %registers "registers" + OpName %a "a" + OpName %b "b" + OpMemberDecorate %SomeBuffer 0 Offset 0 + OpMemberDecorate %SomeBuffer 1 Offset 16 + OpMemberDecorate %SomeBuffer 2 Offset 24 + OpDecorate %SomeBuffer Block + OpMemberDecorate %Registers 0 Offset 0 + OpMemberDecorate %Registers 1 Offset 8 + OpDecorate %Registers Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_SomeBuffer PhysicalStorageBuffer + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %ulong = OpTypeInt 64 0 + %uint = OpTypeInt 32 0 + %v2uint = OpTypeVector %uint 2 + %SomeBuffer = OpTypeStruct %v4float %ulong %v2uint +%_ptr_PhysicalStorageBuffer_SomeBuffer = OpTypePointer PhysicalStorageBuffer %SomeBuffer +%_ptr_Function__ptr_PhysicalStorageBuffer_SomeBuffer = OpTypePointer Function %_ptr_PhysicalStorageBuffer_SomeBuffer + %Registers = OpTypeStruct %ulong %v2uint +%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers + %registers = OpVariable %_ptr_PushConstant_Registers PushConstant + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_PushConstant_ulong = OpTypePointer PushConstant %ulong + %int_1 = OpConstant %int 1 +%_ptr_PushConstant_v2uint = OpTypePointer PushConstant %v2uint + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %35 = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_4 +%_ptr_PhysicalStorageBuffer_v4float = OpTypePointer PhysicalStorageBuffer %v4float + %float_5 = OpConstant %float 5 + %float_6 = OpConstant %float 6 + %float_7 = OpConstant %float 7 + %float_8 = OpConstant %float 8 + %43 = OpConstantComposite %v4float %float_5 %float_6 %float_7 %float_8 +%_ptr_Function_ulong = OpTypePointer Function %ulong +%_ptr_Function_v2uint = OpTypePointer Function %v2uint +%_ptr_PhysicalStorageBuffer_ulong = OpTypePointer PhysicalStorageBuffer %ulong + %int_2 = OpConstant %int 2 +%_ptr_PhysicalStorageBuffer_v2uint = OpTypePointer PhysicalStorageBuffer %v2uint + %main = OpFunction %void None %3 + %5 = OpLabel + %a = OpVariable %_ptr_Function_ulong Function + %b = OpVariable %_ptr_Function_v2uint Function + %21 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_0 + %27 = OpAccessChain %_ptr_PushConstant_v2uint %registers %int_1 + %uint_ptr0 = OpLoad %ulong %21 + %uint_ptr1 = OpLoad %v2uint %27 + + ; ConvertUToPtr and vice versa do not accept vectors. + %ulong_ptr0 = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr0 + %ulong_ptr1 = OpBitcast %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr0 + %uvec2_ptr0 = OpBitcast %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr1 + + %vec4_write0 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %ulong_ptr0 %int_0 + %vec4_write1 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %ulong_ptr1 %int_0 + %vec4_write2 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %uvec2_ptr0 %int_0 + + OpStore %vec4_write0 %35 Aligned 16 + OpStore %vec4_write1 %35 Aligned 16 + OpStore %vec4_write2 %35 Aligned 16 + + %ulong_from_ptr0 = OpConvertPtrToU %ulong %ulong_ptr0 + %ulong_from_ptr1 = OpBitcast %ulong %ulong_ptr1 + %uvec2_from_ptr0 = OpBitcast %v2uint %uvec2_ptr0 + + %ptr0 = OpAccessChain %_ptr_PhysicalStorageBuffer_ulong %ulong_ptr0 %int_1 + %ptr1 = OpAccessChain %_ptr_PhysicalStorageBuffer_ulong %ulong_ptr1 %int_1 + %ptr2 = OpAccessChain %_ptr_PhysicalStorageBuffer_v2uint %uvec2_ptr0 %int_2 + + OpStore %ptr0 %ulong_from_ptr0 Aligned 8 + OpStore %ptr1 %ulong_from_ptr1 Aligned 8 + OpStore %ptr2 %uvec2_from_ptr0 Aligned 8 + + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp b/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp new file mode 100644 index 00000000000..816985a108e --- /dev/null +++ b/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp @@ -0,0 +1,110 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 59 +; Schema: 0 + OpCapability Shader + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_EXT_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 64 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_buffer_reference" + OpName %main "main" + OpName %Registers "Registers" + OpMemberName %Registers 0 "ro" + OpMemberName %Registers 1 "rw" + OpMemberName %Registers 2 "wo" + OpName %RO "Alias" + OpMemberName %RO 0 "v" + OpName %RW "Alias" + OpMemberName %RW 0 "v" + OpName %WO "Alias" + OpMemberName %WO 0 "v" + OpName %registers "registers" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpMemberDecorate %Registers 0 Offset 0 + OpMemberDecorate %Registers 1 Offset 8 + OpMemberDecorate %Registers 2 Offset 16 + OpDecorate %Registers Block + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %RO 0 NonWritable + OpMemberDecorate %RO 0 Offset 0 + OpDecorate %RO Block + OpDecorate %_runtimearr_v4float_0 ArrayStride 16 + OpMemberDecorate %RW 0 Restrict + OpMemberDecorate %RW 0 Offset 0 + OpDecorate %RW Block + OpDecorate %_runtimearr_v4float_1 ArrayStride 16 + OpMemberDecorate %WO 0 Coherent + OpMemberDecorate %WO 0 NonReadable + OpMemberDecorate %WO 0 Offset 0 + OpDecorate %WO Block + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_RO PhysicalStorageBuffer + OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_RW PhysicalStorageBuffer + OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_WO PhysicalStorageBuffer + %Registers = OpTypeStruct %_ptr_PhysicalStorageBuffer_RO %_ptr_PhysicalStorageBuffer_RW %_ptr_PhysicalStorageBuffer_WO + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %RO = OpTypeStruct %_runtimearr_v4float +%_ptr_PhysicalStorageBuffer_RO = OpTypePointer PhysicalStorageBuffer %RO +%_runtimearr_v4float_0 = OpTypeRuntimeArray %v4float + %RW = OpTypeStruct %_runtimearr_v4float_0 +%_ptr_PhysicalStorageBuffer_RW = OpTypePointer PhysicalStorageBuffer %RW +%_runtimearr_v4float_1 = OpTypeRuntimeArray %v4float + %WO = OpTypeStruct %_runtimearr_v4float_1 +%_ptr_PhysicalStorageBuffer_WO = OpTypePointer PhysicalStorageBuffer %WO +%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers + %registers = OpVariable %_ptr_PushConstant_Registers PushConstant + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_PushConstant__ptr_PhysicalStorageBuffer_RW = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_RW + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_PushConstant__ptr_PhysicalStorageBuffer_RO = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_RO +%_ptr_PhysicalStorageBuffer_v4float = OpTypePointer PhysicalStorageBuffer %v4float + %int_2 = OpConstant %int 2 +%_ptr_PushConstant__ptr_PhysicalStorageBuffer_WO = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_WO + %uint_64 = OpConstant %uint 64 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %23 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_RW %registers %int_1 + %24 = OpLoad %_ptr_PhysicalStorageBuffer_RW %23 + %32 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %33 = OpLoad %uint %32 + %35 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_RO %registers %int_0 + %36 = OpLoad %_ptr_PhysicalStorageBuffer_RO %35 + %37 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %38 = OpLoad %uint %37 + %40 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %36 %int_0 %38 + %41 = OpLoad %v4float %40 Aligned 16 + %42 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %24 %int_0 %33 + OpStore %42 %41 Aligned 16 + %45 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_WO %registers %int_2 + %46 = OpLoad %_ptr_PhysicalStorageBuffer_WO %45 + %47 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %48 = OpLoad %uint %47 + %49 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_RO %registers %int_0 + %50 = OpLoad %_ptr_PhysicalStorageBuffer_RO %49 + %51 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %52 = OpLoad %uint %51 + %53 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %50 %int_0 %52 + %54 = OpLoad %v4float %53 Aligned 16 + %55 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %46 %int_0 %48 + OpStore %55 %54 Aligned 16 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp b/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp new file mode 100644 index 00000000000..8fda30e109e --- /dev/null +++ b/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp @@ -0,0 +1,44 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 25 +; Schema: 0 + OpCapability Shader + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_EXT_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_buffer_reference" + OpName %main "main" + OpName %Push "Push" + OpMemberName %Push 0 "ptr" + OpName %_ "" + OpMemberDecorate %Push 0 Offset 0 + OpDecorate %Push Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v4uint = OpTypeVector %uint 4 +%_ptr_PhysicalStorageBuffer_uintPtr = OpTypePointer PhysicalStorageBuffer %v4uint + %Push = OpTypeStruct %_ptr_PhysicalStorageBuffer_uintPtr +%_ptr_PushConstant_Push = OpTypePointer PushConstant %Push + %_ = OpVariable %_ptr_PushConstant_Push PushConstant + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_PushConstant__ptr_PhysicalStorageBuffer_uintPtr = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_uintPtr + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uint_4 = OpConstant %uint 4 + %22 = OpConstantComposite %v4uint %uint_1 %uint_2 %uint_3 %uint_4 +%_ptr_PhysicalStorageBuffer_v4uint = OpTypePointer PhysicalStorageBuffer %v4uint + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_uintPtr %_ %int_0 + %17 = OpLoad %_ptr_PhysicalStorageBuffer_uintPtr %16 + OpStore %17 %22 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp b/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp new file mode 100644 index 00000000000..d7ca03f6639 --- /dev/null +++ b/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp @@ -0,0 +1,44 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 25 +; Schema: 0 + OpCapability Shader + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_EXT_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_buffer_reference" + OpName %main "main" + OpName %Push "Push" + OpMemberName %Push 0 "ptr" + OpName %_ "" + OpMemberDecorate %Push 0 Offset 0 + OpDecorate %Push Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v4uint = OpTypeVector %uint 4 +%_ptr_PhysicalStorageBuffer_uintPtr = OpTypePointer PhysicalStorageBuffer %v4uint + %Push = OpTypeStruct %_ptr_PhysicalStorageBuffer_uintPtr +%_ptr_PushConstant_Push = OpTypePointer PushConstant %Push + %_ = OpVariable %_ptr_PushConstant_Push PushConstant + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_PushConstant__ptr_PhysicalStorageBuffer_uintPtr = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_uintPtr + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uint_4 = OpConstant %uint 4 + %22 = OpConstantComposite %v4uint %uint_1 %uint_2 %uint_3 %uint_4 +%_ptr_PhysicalStorageBuffer_v4uint = OpTypePointer PhysicalStorageBuffer %v4uint + %main = OpFunction %void None %3 + %5 = OpLabel + ;%16 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_uintPtr %_ %int_0 + ;%17 = OpLoad %_ptr_PhysicalStorageBuffer_uintPtr %16 + ; OpStore %17 %22 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp new file mode 100644 index 00000000000..e1dcb0ef8e2 --- /dev/null +++ b/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp @@ -0,0 +1,81 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 49 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %gl_LocalInvocationID + OpExecutionMode %main LocalSize 4 4 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %gl_LocalInvocationID "gl_LocalInvocationID" + OpName %indexable "indexable" + OpName %indexable_0 "indexable" + OpName %25 "indexable" + OpName %38 "indexable" + OpDecorate %_runtimearr_int ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_runtimearr_int = OpTypeRuntimeArray %int + %SSBO = OpTypeStruct %_runtimearr_int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %uint_4 = OpConstant %uint 4 +%_arr_int_uint_4 = OpTypeArray %int %uint_4 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %25 = OpConstantComposite %_arr_int_uint_4 %int_0 %int_1 %int_2 %int_3 +%gl_LocalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Function__arr_int_uint_4 = OpTypePointer Function %_arr_int_uint_4 +%_ptr_Function_int = OpTypePointer Function %int + %int_4 = OpConstant %int 4 + %int_5 = OpConstant %int 5 + %int_6 = OpConstant %int 6 + %int_7 = OpConstant %int 7 + %38 = OpConstantComposite %_arr_int_uint_4 %int_4 %int_5 %int_6 %int_7 + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_int = OpTypePointer Uniform %int +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_4 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %indexable = OpVariable %_ptr_Function__arr_int_uint_4 Function +%indexable_0 = OpVariable %_ptr_Function__arr_int_uint_4 Function + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %27 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0 + %28 = OpLoad %uint %27 + OpStore %indexable %25 + %32 = OpAccessChain %_ptr_Function_int %indexable %28 + %33 = OpLoad %int %32 + %40 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_1 + %41 = OpLoad %uint %40 + OpStore %indexable_0 %38 + %43 = OpAccessChain %_ptr_Function_int %indexable_0 %41 + %44 = OpLoad %int %43 + %45 = OpIAdd %int %33 %44 + %47 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19 + OpStore %47 %45 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp b/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp new file mode 100644 index 00000000000..20fa0b099b8 --- /dev/null +++ b/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp @@ -0,0 +1,69 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 48 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %ssbo + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %B1 "B1" + OpName %A "A" + OpName %C "C" + OpName %B2 "B2" + OpMemberName %A 0 "a" + OpMemberName %A 1 "b1" + OpMemberName %A 2 "b1_array" + OpMemberName %C 0 "c" + OpMemberName %C 1 "b2" + OpMemberName %C 2 "b2_array" + OpMemberName %B1 0 "elem1" + OpMemberName %B2 0 "elem2" + OpMemberName %SSBO 0 "a_block" + OpMemberName %SSBO 1 "c_block" + OpDecorate %B1Array ArrayStride 16 + OpDecorate %B2Array ArrayStride 16 + OpMemberDecorate %B1 0 Offset 0 + OpMemberDecorate %A 0 Offset 0 + OpMemberDecorate %A 1 Offset 16 + OpMemberDecorate %A 2 Offset 32 + OpMemberDecorate %B2 0 Offset 0 + OpMemberDecorate %C 0 Offset 0 + OpMemberDecorate %C 1 Offset 16 + OpMemberDecorate %C 2 Offset 32 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 96 + OpDecorate %SSBO Block + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 + %v4float = OpTypeVector %float 4 + %B2 = OpTypeStruct %v4float + %B2Array = OpTypeArray %B2 %uint_4 + %C = OpTypeStruct %v4float %B2 %B2Array + %B1 = OpTypeStruct %v4float + %B1Array = OpTypeArray %B1 %uint_4 + %A = OpTypeStruct %v4float %B1 %B1Array + %SSBO = OpTypeStruct %A %C +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %ssbo = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_C = OpTypePointer StorageBuffer %C + %int_0 = OpConstant %int 0 +%_ptr_Uniform_A = OpTypePointer StorageBuffer %A + %main = OpFunction %void None %3 + %5 = OpLabel + %22 = OpAccessChain %_ptr_Uniform_C %ssbo %int_1 + %39 = OpAccessChain %_ptr_Uniform_A %ssbo %int_0 + %23 = OpLoad %C %22 + %24 = OpCopyLogical %A %23 + OpStore %39 %24 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp new file mode 100644 index 00000000000..73f3ceee1ad --- /dev/null +++ b/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp @@ -0,0 +1,59 @@ +; SPIR-V +; Version: 1.5 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 26 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + ;OpEntryPoint GLCompute %main "main" %Samp %ubo %ssbo %v %w + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 64 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %Samp "Samp" + OpName %UBO "UBO" + OpMemberName %UBO 0 "v" + OpName %ubo "ubo" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "v" + OpName %ssbo "ssbo" + OpName %v "v" + OpName %w "w" + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpDecorate %Samp DescriptorSet 0 + OpDecorate %Samp Binding 0 + OpMemberDecorate %UBO 0 Offset 0 + OpDecorate %UBO Block + OpDecorate %ubo DescriptorSet 0 + OpDecorate %ubo Binding 1 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 2 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %uint_64 = OpConstant %uint 64 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1 + %float = OpTypeFloat 32 + %12 = OpTypeImage %float 2D 0 0 0 1 Unknown + %13 = OpTypeSampledImage %12 +%_ptr_UniformConstant_13 = OpTypePointer UniformConstant %13 + %Samp = OpVariable %_ptr_UniformConstant_13 UniformConstant + %UBO = OpTypeStruct %float +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO + %ubo = OpVariable %_ptr_Uniform_UBO Uniform + %SSBO = OpTypeStruct %float +%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO + %ssbo = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer +%_ptr_Private_float = OpTypePointer Private %float + %v = OpVariable %_ptr_Private_float Private +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %w = OpVariable %_ptr_Workgroup_float Workgroup + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp b/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp new file mode 100644 index 00000000000..e5ca1cbb508 --- /dev/null +++ b/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp @@ -0,0 +1,288 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 10117 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %gl_GlobalInvocationID + OpExecutionMode %1 LocalSize 1 1 1 + OpSource GLSL 430 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %_arr_uint_int_16 ArrayStride 4 + OpMemberDecorate %_struct_4 0 Offset 0 + OpDecorate %_struct_4 BufferBlock + OpDecorate %5 DescriptorSet 0 + OpDecorate %5 Binding 0 + OpDecorate %6 DescriptorSet 0 + OpDecorate %6 Binding 1 + OpDecorate %7 DescriptorSet 0 + OpDecorate %7 Binding 2 + %void = OpTypeVoid + %bool = OpTypeBool + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %float = OpTypeFloat 32 + %v2int = OpTypeVector %int 2 + %v2uint = OpTypeVector %uint 2 + %v2float = OpTypeVector %float 2 + %v3int = OpTypeVector %int 3 + %v3uint = OpTypeVector %uint 3 + %v3float = OpTypeVector %float 3 + %v4int = OpTypeVector %int 4 + %v4uint = OpTypeVector %uint 4 + %v4float = OpTypeVector %float 4 + %v4bool = OpTypeVector %bool 4 + %23 = OpTypeFunction %v4float %v4float + %24 = OpTypeFunction %bool + %25 = OpTypeFunction %void +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Input_int = OpTypePointer Input %int +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Input_v2int = OpTypePointer Input %v2int +%_ptr_Input_v2uint = OpTypePointer Input %v2uint +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_v4int = OpTypePointer Input %v4int +%_ptr_Input_v4uint = OpTypePointer Input %v4uint +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Output_int = OpTypePointer Output %int +%_ptr_Output_uint = OpTypePointer Output %uint +%_ptr_Output_v2float = OpTypePointer Output %v2float +%_ptr_Output_v2int = OpTypePointer Output %v2int +%_ptr_Output_v2uint = OpTypePointer Output %v2uint +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_ptr_Output_v4int = OpTypePointer Output %v4int +%_ptr_Output_v4uint = OpTypePointer Output %v4uint +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Function_int = OpTypePointer Function %int +%_ptr_Function_v4float = OpTypePointer Function %v4float + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %float_0_5 = OpConstant %float 0.5 + %float_n1 = OpConstant %float -1 + %float_7 = OpConstant %float 7 + %float_8 = OpConstant %float 8 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %int_4 = OpConstant %int 4 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uint_32 = OpConstant %uint 32 + %uint_4 = OpConstant %uint 4 +%uint_2147483647 = OpConstant %uint 2147483647 + %66 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %67 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1 + %68 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32 +%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3 +%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %int_16 = OpConstant %int 16 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_arr_uint_int_16 = OpTypeArray %uint %int_16 + %_struct_4 = OpTypeStruct %_arr_uint_int_16 +%_ptr_Uniform__struct_4 = OpTypePointer Uniform %_struct_4 + %5 = OpVariable %_ptr_Uniform__struct_4 Uniform + %6 = OpVariable %_ptr_Uniform__struct_4 Uniform + %7 = OpVariable %_ptr_Uniform__struct_4 Uniform + %1 = OpFunction %void None %25 + %83 = OpLabel + %84 = OpLoad %v3uint %gl_GlobalInvocationID + %85 = OpCompositeConstruct %v4uint %84 %uint_0 + %86 = OpConvertUToF %v4float %85 + %87 = OpFunctionCall %v4float %88 %86 + OpReturn + OpFunctionEnd + %88 = OpFunction %v4float None %23 + %89 = OpFunctionParameter %v4float + %92 = OpLabel + %93 = OpVariable %_ptr_Function_int Function + OpStore %93 %int_0 + OpBranch %94 + %94 = OpLabel + %95 = OpLoad %int %93 + %96 = OpSLessThan %bool %95 %int_16 + OpLoopMerge %97 %10100 None + OpBranchConditional %96 %10101 %97 + %10101 = OpLabel + %10102 = OpLoad %int %93 + %90 = OpAccessChain %_ptr_Uniform_uint %6 %int_0 %10102 + %91 = OpLoad %uint %90 + %98 = OpAccessChain %_ptr_Uniform_uint %5 %int_0 %10102 + %99 = OpLoad %uint %98 + %100 = OpIAdd %uint %91 %99 + %101 = OpIAdd %uint %91 %100 + %102 = OpIAdd %uint %91 %101 + %103 = OpIAdd %uint %91 %102 + %104 = OpIAdd %uint %91 %103 + %105 = OpIAdd %uint %91 %104 + %106 = OpIAdd %uint %91 %105 + %107 = OpIAdd %uint %91 %106 + %108 = OpIAdd %uint %91 %107 + %109 = OpIAdd %uint %91 %108 + %110 = OpIAdd %uint %91 %109 + %111 = OpIAdd %uint %91 %110 + %112 = OpIAdd %uint %91 %111 + %113 = OpIAdd %uint %91 %112 + %114 = OpIAdd %uint %91 %113 + %115 = OpIAdd %uint %91 %114 + %116 = OpIAdd %uint %91 %115 + %117 = OpIAdd %uint %91 %116 + %118 = OpIAdd %uint %91 %117 + %119 = OpIAdd %uint %91 %118 + %120 = OpIAdd %uint %91 %119 + %121 = OpIAdd %uint %91 %120 + %122 = OpIAdd %uint %91 %121 + %123 = OpIAdd %uint %91 %122 + %124 = OpIAdd %uint %91 %123 + %125 = OpIAdd %uint %91 %124 + %126 = OpIAdd %uint %91 %125 + %127 = OpIAdd %uint %91 %126 + %128 = OpIAdd %uint %91 %127 + %129 = OpIAdd %uint %91 %128 + %130 = OpIAdd %uint %91 %129 + %131 = OpIAdd %uint %91 %130 + %132 = OpIAdd %uint %91 %131 + %133 = OpIAdd %uint %91 %132 + %134 = OpIAdd %uint %91 %133 + %135 = OpIAdd %uint %91 %134 + %136 = OpIAdd %uint %91 %135 + %137 = OpIAdd %uint %91 %136 + %138 = OpIAdd %uint %91 %137 + %139 = OpIAdd %uint %91 %138 + %140 = OpIAdd %uint %91 %139 + %141 = OpIAdd %uint %91 %140 + %142 = OpIAdd %uint %91 %141 + %143 = OpIAdd %uint %91 %142 + %144 = OpIAdd %uint %91 %143 + %145 = OpIAdd %uint %91 %144 + %146 = OpIAdd %uint %91 %145 + %147 = OpIAdd %uint %91 %146 + %148 = OpIAdd %uint %91 %147 + %149 = OpIAdd %uint %91 %148 + %150 = OpIAdd %uint %91 %149 + %151 = OpIAdd %uint %91 %150 + %152 = OpIAdd %uint %91 %151 + %153 = OpIAdd %uint %91 %152 + %154 = OpIAdd %uint %91 %153 + %155 = OpIAdd %uint %91 %154 + %156 = OpIAdd %uint %91 %155 + %157 = OpIAdd %uint %91 %156 + %158 = OpIAdd %uint %91 %157 + %159 = OpIAdd %uint %91 %158 + %160 = OpIAdd %uint %91 %159 + %161 = OpIAdd %uint %91 %160 + %162 = OpIAdd %uint %91 %161 + %163 = OpIAdd %uint %91 %162 + %164 = OpIAdd %uint %91 %163 + %165 = OpIAdd %uint %91 %164 + %166 = OpIAdd %uint %91 %165 + %167 = OpIAdd %uint %91 %166 + %168 = OpIAdd %uint %91 %167 + %169 = OpIAdd %uint %91 %168 + %170 = OpIAdd %uint %91 %169 + %171 = OpIAdd %uint %91 %170 + %172 = OpIAdd %uint %91 %171 + %173 = OpIAdd %uint %91 %172 + %174 = OpIAdd %uint %91 %173 + %175 = OpIAdd %uint %91 %174 + %176 = OpIAdd %uint %91 %175 + %177 = OpIAdd %uint %91 %176 + %178 = OpIAdd %uint %91 %177 + %179 = OpIAdd %uint %91 %178 + %180 = OpIAdd %uint %91 %179 + %181 = OpIAdd %uint %91 %180 + %182 = OpIAdd %uint %91 %181 + %183 = OpIAdd %uint %91 %182 + %184 = OpIAdd %uint %91 %183 + %185 = OpIAdd %uint %91 %184 + %186 = OpIAdd %uint %91 %185 + %187 = OpIAdd %uint %91 %186 + %188 = OpIAdd %uint %91 %187 + %189 = OpIAdd %uint %91 %188 + %190 = OpIAdd %uint %91 %189 + %191 = OpIAdd %uint %91 %190 + %192 = OpIAdd %uint %91 %191 + %193 = OpIAdd %uint %91 %192 + %194 = OpIAdd %uint %91 %193 + %195 = OpIAdd %uint %91 %194 + %196 = OpIAdd %uint %91 %195 + %197 = OpIAdd %uint %91 %196 + %198 = OpIAdd %uint %91 %197 + %199 = OpIAdd %uint %91 %198 + %200 = OpIAdd %uint %91 %199 + %201 = OpIAdd %uint %91 %200 + %202 = OpIAdd %uint %91 %201 + %203 = OpIAdd %uint %91 %202 + %204 = OpIAdd %uint %91 %203 + %205 = OpIAdd %uint %91 %204 + %206 = OpIAdd %uint %91 %205 + %207 = OpIAdd %uint %91 %206 + %208 = OpIAdd %uint %91 %207 + %209 = OpIAdd %uint %91 %208 + %210 = OpIAdd %uint %91 %209 + %211 = OpIAdd %uint %91 %210 + %212 = OpIAdd %uint %91 %211 + %213 = OpIAdd %uint %91 %212 + %214 = OpIAdd %uint %91 %213 + %215 = OpIAdd %uint %91 %214 + %216 = OpIAdd %uint %91 %215 + %217 = OpIAdd %uint %91 %216 + %218 = OpIAdd %uint %91 %217 + %219 = OpIAdd %uint %91 %218 + %220 = OpIAdd %uint %91 %219 + %221 = OpIAdd %uint %91 %220 + %222 = OpIAdd %uint %91 %221 + %223 = OpIAdd %uint %91 %222 + %224 = OpIAdd %uint %91 %223 + %225 = OpIAdd %uint %91 %224 + %226 = OpIAdd %uint %91 %225 + %227 = OpIAdd %uint %91 %226 + %228 = OpIAdd %uint %91 %227 + %229 = OpIAdd %uint %91 %228 + %230 = OpIAdd %uint %91 %229 + %231 = OpIAdd %uint %91 %230 + %232 = OpIAdd %uint %91 %231 + %233 = OpIAdd %uint %91 %232 + %234 = OpIAdd %uint %91 %233 + %235 = OpIAdd %uint %91 %234 + %236 = OpIAdd %uint %91 %235 + %result = OpIAdd %uint %91 %236 + %10103 = OpAccessChain %_ptr_Uniform_uint %7 %int_0 %10102 + OpStore %10103 %result + OpBranch %10100 + %10100 = OpLabel + %10104 = OpLoad %int %93 + %10105 = OpIAdd %int %10104 %int_1 + OpStore %93 %10105 + OpBranch %94 + %97 = OpLabel + OpReturnValue %89 + OpFunctionEnd + %10106 = OpFunction %bool None %24 + %10107 = OpLabel + %10108 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %10109 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1 + %10110 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2 + %10111 = OpLoad %uint %10108 + %10112 = OpLoad %uint %10109 + %10113 = OpLoad %uint %10110 + %10114 = OpBitwiseOr %uint %10111 %10112 + %10115 = OpBitwiseOr %uint %10113 %10114 + %10116 = OpIEqual %bool %10115 %uint_0 + OpReturnValue %10116 + OpFunctionEnd diff --git a/shaders/asm/extended-debug-extinst.invalid.asm.comp b/shaders-no-opt/asm/comp/extended-debug-extinst.invalid.asm.comp similarity index 100% rename from shaders/asm/extended-debug-extinst.invalid.asm.comp rename to shaders-no-opt/asm/comp/extended-debug-extinst.invalid.asm.comp diff --git a/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp b/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp new file mode 100644 index 00000000000..e1efd564cb8 --- /dev/null +++ b/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp @@ -0,0 +1,118 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 71 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" + OpExecutionMode %1 LocalSize 1 1 1 + OpDecorate %_struct_2 BufferBlock + OpMemberDecorate %_struct_2 0 Offset 0 + OpDecorate %_arr_uint_uint_2 ArrayStride 4 + OpDecorate %_struct_4 BufferBlock + OpMemberDecorate %_struct_4 0 Offset 0 + OpDecorate %_arr_uint_uint_3 ArrayStride 4 + OpDecorate %_struct_6 BufferBlock + OpMemberDecorate %_struct_6 0 Offset 0 + OpDecorate %_arr_uint_uint_11 ArrayStride 4 + OpDecorate %8 DescriptorSet 0 + OpDecorate %8 Binding 0 + OpDecorate %9 DescriptorSet 0 + OpDecorate %9 Binding 1 + OpDecorate %10 DescriptorSet 0 + OpDecorate %10 Binding 2 + %void = OpTypeVoid + %12 = OpTypeFunction %void + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %true = OpConstantTrue %bool + %uint_0 = OpConstant %uint 0 + %uint_0_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uint_8 = OpConstant %uint 8 + %uint_9 = OpConstant %uint 9 + %uint_10 = OpConstant %uint 10 + %uint_11 = OpConstant %uint 11 + %uint_12 = OpConstant %uint 12 + %uint_13 = OpConstant %uint 13 + %uint_14 = OpConstant %uint 14 +%_arr_uint_uint_2 = OpTypeArray %uint %uint_2 + %_struct_2 = OpTypeStruct %_arr_uint_uint_2 +%_ptr_Uniform__struct_2 = OpTypePointer Uniform %_struct_2 + %9 = OpVariable %_ptr_Uniform__struct_2 Uniform +%_arr_uint_uint_3 = OpTypeArray %uint %uint_3 + %_struct_4 = OpTypeStruct %_arr_uint_uint_3 +%_ptr_Uniform__struct_4 = OpTypePointer Uniform %_struct_4 + %8 = OpVariable %_ptr_Uniform__struct_4 Uniform +%_arr_uint_uint_11 = OpTypeArray %uint %uint_11 + %_struct_6 = OpTypeStruct %_arr_uint_uint_11 +%_ptr_Uniform__struct_6 = OpTypePointer Uniform %_struct_6 + %10 = OpVariable %_ptr_Uniform__struct_6 Uniform +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %1 = OpFunction %void None %12 + %33 = OpLabel + %34 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %35 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %36 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %37 = OpLoad %uint %34 + %38 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %37 + OpStore %38 %uint_8 + %39 = OpIAdd %uint %37 %uint_1 + OpStore %34 %39 + OpBranch %40 + %40 = OpLabel + %41 = OpLoad %uint %34 + %42 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %41 + OpStore %42 %uint_9 + %43 = OpIAdd %uint %41 %uint_1 + OpStore %34 %43 + %44 = OpLoad %uint %35 + %45 = OpAccessChain %_ptr_Uniform_uint %8 %uint_0_0 %44 + %46 = OpLoad %uint %45 + %47 = OpIEqual %bool %46 %uint_1 + %48 = OpIAdd %uint %44 %uint_1 + OpStore %35 %48 + OpLoopMerge %49 %50 None + OpBranchConditional %47 %51 %49 + %51 = OpLabel + %52 = OpLoad %uint %34 + %53 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %52 + OpStore %53 %uint_12 + %54 = OpIAdd %uint %52 %uint_1 + OpStore %34 %54 + %55 = OpLoad %uint %36 + %56 = OpAccessChain %_ptr_Uniform_uint %9 %uint_0_0 %55 + %57 = OpLoad %uint %56 + %58 = OpIEqual %bool %57 %uint_1 + %59 = OpIAdd %uint %55 %uint_1 + OpStore %36 %59 + OpLoopMerge %60 %61 None + OpBranchConditional %58 %60 %60 + %49 = OpLabel + %62 = OpLoad %uint %34 + %63 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %62 + OpStore %63 %uint_10 + %64 = OpIAdd %uint %62 %uint_1 + OpStore %34 %64 + OpReturn + %60 = OpLabel + %65 = OpLoad %uint %34 + %66 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %65 + OpStore %66 %uint_13 + %67 = OpIAdd %uint %65 %uint_1 + OpStore %34 %67 + OpBranch %50 + %61 = OpLabel + OpBranch %51 + %50 = OpLabel + %68 = OpLoad %uint %34 + %69 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %68 + OpStore %69 %uint_11 + %70 = OpIAdd %uint %68 %uint_1 + OpStore %34 %70 + OpBranch %40 + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp b/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp new file mode 100644 index 00000000000..7fb41ed3f81 --- /dev/null +++ b/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp @@ -0,0 +1,376 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 257 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %gl_LocalInvocationIndex %gl_WorkGroupID + OpExecutionMode %1 LocalSize 1 1 1 + OpDecorate %_struct_4 BufferBlock + OpMemberDecorate %_struct_4 0 Offset 0 + OpDecorate %_arr_uint_uint_1 ArrayStride 4 + OpDecorate %_struct_6 BufferBlock + OpMemberDecorate %_struct_6 0 Offset 0 + OpDecorate %_arr_uint_uint_2 ArrayStride 4 + OpDecorate %_struct_8 BufferBlock + OpMemberDecorate %_struct_8 0 Offset 0 + OpDecorate %_arr_uint_uint_3 ArrayStride 4 + OpDecorate %_struct_10 BufferBlock + OpMemberDecorate %_struct_10 0 Offset 0 + OpDecorate %_arr_uint_uint_37 ArrayStride 4 + OpDecorate %12 DescriptorSet 0 + OpDecorate %12 Binding 0 + OpDecorate %13 DescriptorSet 0 + OpDecorate %13 Binding 1 + OpDecorate %14 DescriptorSet 0 + OpDecorate %14 Binding 2 + OpDecorate %15 DescriptorSet 0 + OpDecorate %15 Binding 3 + OpDecorate %16 DescriptorSet 0 + OpDecorate %16 Binding 4 + OpDecorate %17 DescriptorSet 0 + OpDecorate %17 Binding 5 + OpDecorate %18 DescriptorSet 0 + OpDecorate %18 Binding 6 + OpDecorate %19 DescriptorSet 0 + OpDecorate %19 Binding 7 + OpDecorate %20 DescriptorSet 0 + OpDecorate %20 Binding 8 + OpDecorate %21 DescriptorSet 0 + OpDecorate %21 Binding 9 + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %gl_WorkGroupID BuiltIn WorkgroupId + %void = OpTypeVoid + %23 = OpTypeFunction %void + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %true = OpConstantTrue %bool + %uint_0 = OpConstant %uint 0 + %uint_666 = OpConstant %uint 666 + %uint_0_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uint_8 = OpConstant %uint 8 + %uint_9 = OpConstant %uint 9 + %uint_10 = OpConstant %uint 10 + %uint_11 = OpConstant %uint 11 + %uint_12 = OpConstant %uint 12 + %uint_13 = OpConstant %uint 13 + %uint_14 = OpConstant %uint 14 + %uint_15 = OpConstant %uint 15 + %uint_16 = OpConstant %uint 16 + %uint_17 = OpConstant %uint 17 + %uint_18 = OpConstant %uint 18 + %uint_19 = OpConstant %uint 19 + %uint_20 = OpConstant %uint 20 + %uint_21 = OpConstant %uint 21 + %uint_22 = OpConstant %uint 22 + %uint_23 = OpConstant %uint 23 + %uint_24 = OpConstant %uint 24 + %uint_25 = OpConstant %uint 25 + %uint_26 = OpConstant %uint 26 + %uint_27 = OpConstant %uint 27 + %uint_28 = OpConstant %uint 28 + %uint_29 = OpConstant %uint 29 + %uint_30 = OpConstant %uint 30 + %uint_31 = OpConstant %uint 31 + %uint_32 = OpConstant %uint 32 + %uint_33 = OpConstant %uint 33 + %uint_37 = OpConstant %uint 37 +%_arr_uint_uint_1 = OpTypeArray %uint %uint_1 + %_struct_4 = OpTypeStruct %_arr_uint_uint_1 +%_ptr_Uniform__struct_4 = OpTypePointer Uniform %_struct_4 + %12 = OpVariable %_ptr_Uniform__struct_4 Uniform + %13 = OpVariable %_ptr_Uniform__struct_4 Uniform + %19 = OpVariable %_ptr_Uniform__struct_4 Uniform +%_arr_uint_uint_2 = OpTypeArray %uint %uint_2 + %_struct_6 = OpTypeStruct %_arr_uint_uint_2 +%_ptr_Uniform__struct_6 = OpTypePointer Uniform %_struct_6 + %14 = OpVariable %_ptr_Uniform__struct_6 Uniform + %15 = OpVariable %_ptr_Uniform__struct_6 Uniform + %16 = OpVariable %_ptr_Uniform__struct_6 Uniform + %17 = OpVariable %_ptr_Uniform__struct_6 Uniform + %18 = OpVariable %_ptr_Uniform__struct_6 Uniform +%_arr_uint_uint_3 = OpTypeArray %uint %uint_3 + %_struct_8 = OpTypeStruct %_arr_uint_uint_3 +%_ptr_Uniform__struct_8 = OpTypePointer Uniform %_struct_8 + %20 = OpVariable %_ptr_Uniform__struct_8 Uniform +%_arr_uint_uint_37 = OpTypeArray %uint %uint_37 + %_struct_10 = OpTypeStruct %_arr_uint_uint_37 +%_ptr_Uniform__struct_10 = OpTypePointer Uniform %_struct_10 + %21 = OpVariable %_ptr_Uniform__struct_10 Uniform +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_WorkGroupID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %23 + %69 = OpLabel + %70 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %71 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %72 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %73 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %74 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %75 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %76 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %77 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %78 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %79 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %80 = OpLoad %uint %gl_LocalInvocationIndex + %81 = OpAccessChain %_ptr_Input_uint %gl_WorkGroupID %uint_0_0 + %82 = OpAccessChain %_ptr_Input_uint %gl_WorkGroupID %uint_1 + %83 = OpAccessChain %_ptr_Input_uint %gl_WorkGroupID %uint_2 + %84 = OpLoad %uint %81 + %85 = OpLoad %uint %82 + %86 = OpLoad %uint %83 + %87 = OpIMul %uint %86 %uint_1 + %88 = OpIMul %uint %85 %uint_1 + %89 = OpIAdd %uint %88 %87 + %90 = OpIAdd %uint %89 %84 + %91 = OpIMul %uint %80 %uint_1 + %92 = OpIMul %uint %80 %uint_1 + %93 = OpIMul %uint %80 %uint_2 + %94 = OpIMul %uint %80 %uint_2 + %95 = OpIMul %uint %80 %uint_2 + %96 = OpIMul %uint %80 %uint_2 + %97 = OpIMul %uint %80 %uint_2 + %98 = OpIMul %uint %80 %uint_1 + %99 = OpIMul %uint %80 %uint_3 + %100 = OpIMul %uint %90 %uint_1 + %101 = OpIMul %uint %90 %uint_1 + %102 = OpIMul %uint %90 %uint_2 + %103 = OpIMul %uint %90 %uint_2 + %104 = OpIMul %uint %90 %uint_2 + %105 = OpIMul %uint %90 %uint_2 + %106 = OpIMul %uint %90 %uint_2 + %107 = OpIMul %uint %90 %uint_1 + %108 = OpIMul %uint %90 %uint_3 + %109 = OpIAdd %uint %100 %91 + %110 = OpIAdd %uint %101 %92 + %111 = OpIAdd %uint %102 %93 + %112 = OpIAdd %uint %103 %94 + %113 = OpIAdd %uint %104 %95 + %114 = OpIAdd %uint %105 %96 + %115 = OpIAdd %uint %106 %97 + %116 = OpIAdd %uint %107 %98 + %117 = OpIAdd %uint %108 %99 + %118 = OpIMul %uint %80 %uint_37 + %119 = OpIMul %uint %90 %uint_37 + %120 = OpIAdd %uint %119 %118 + OpStore %71 %109 + OpStore %72 %110 + OpStore %73 %111 + OpStore %74 %112 + OpStore %75 %113 + OpStore %76 %114 + OpStore %77 %115 + OpStore %78 %116 + OpStore %79 %117 + OpStore %70 %120 + %121 = OpLoad %uint %70 + %122 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %121 + OpStore %122 %uint_8 + %123 = OpIAdd %uint %121 %uint_1 + %124 = OpLoad %uint %71 + %125 = OpAccessChain %_ptr_Uniform_uint %12 %uint_0_0 %124 + %126 = OpLoad %uint %125 + %127 = OpIAdd %uint %124 %uint_1 + OpStore %71 %127 + OpSelectionMerge %128 None + OpSwitch %126 %128 + %128 = OpLabel + %129 = OpPhi %uint %130 %131 %123 %69 + %132 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %129 + OpStore %132 %uint_9 + %133 = OpIAdd %uint %129 %uint_1 + OpLoopMerge %134 %131 None + OpBranch %135 + %135 = OpLabel + %136 = OpPhi %uint %uint_666 %137 %133 %128 + %138 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %136 + OpStore %138 %uint_12 + %139 = OpIAdd %uint %136 %uint_1 + OpLoopMerge %140 %137 None + OpBranch %140 + %140 = OpLabel + %141 = OpPhi %uint %139 %135 %uint_666 %142 + %143 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %141 + OpStore %143 %uint_13 + %144 = OpIAdd %uint %141 %uint_1 + %145 = OpLoad %uint %75 + OpLoopMerge %146 %142 None + OpBranch %147 + %137 = OpLabel + OpBranch %135 + %147 = OpLabel + %148 = OpPhi %uint %144 %140 + %149 = OpPhi %uint %145 %140 + %150 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %148 + OpStore %150 %uint_17 + %151 = OpIAdd %uint %148 %uint_1 + %152 = OpAccessChain %_ptr_Uniform_uint %16 %uint_0_0 %149 + %153 = OpLoad %uint %152 + %154 = OpIEqual %bool %153 %uint_1 + %155 = OpIAdd %uint %149 %uint_1 + OpStore %75 %155 + OpSelectionMerge %156 None + OpBranchConditional %154 %157 %156 + %157 = OpLabel + %158 = OpPhi %uint %151 %147 + %159 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %158 + OpStore %159 %uint_19 + %160 = OpIAdd %uint %158 %uint_1 + %161 = OpLoad %uint %74 + OpBranch %146 + %156 = OpLabel + OpBranch %142 + %142 = OpLabel + OpBranchConditional %true %140 %146 + %146 = OpLabel + %162 = OpPhi %uint %160 %157 %uint_666 %142 + %163 = OpPhi %uint %161 %157 %uint_666 %142 + %164 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %162 + OpStore %164 %uint_15 + %165 = OpIAdd %uint %162 %uint_1 + %166 = OpAccessChain %_ptr_Uniform_uint %15 %uint_0_0 %163 + %167 = OpLoad %uint %166 + %168 = OpIEqual %bool %167 %uint_1 + %169 = OpIAdd %uint %163 %uint_1 + OpStore %74 %169 + %170 = OpLoad %uint %76 + OpSelectionMerge %171 None + OpBranchConditional %168 %172 %173 + %173 = OpLabel + %174 = OpPhi %uint %165 %146 + %175 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %174 + OpStore %175 %uint_22 + %176 = OpIAdd %uint %174 %uint_1 + %177 = OpLoad %uint %76 + OpBranch %172 + %172 = OpLabel + %178 = OpPhi %uint %176 %173 %165 %146 + %179 = OpPhi %uint %177 %173 %170 %146 + %180 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %178 + OpStore %180 %uint_21 + %181 = OpIAdd %uint %178 %uint_1 + %182 = OpAccessChain %_ptr_Uniform_uint %17 %uint_0_0 %179 + %183 = OpLoad %uint %182 + %184 = OpIAdd %uint %179 %uint_1 + OpStore %76 %184 + OpSelectionMerge %185 None + OpSwitch %183 %185 + %185 = OpLabel + %186 = OpPhi %uint %uint_666 %187 %181 %172 + %188 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %186 + OpStore %188 %uint_23 + %189 = OpIAdd %uint %186 %uint_1 + OpLoopMerge %190 %187 None + OpBranch %190 + %190 = OpLabel + %191 = OpPhi %uint %189 %185 %192 %193 + %194 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %191 + OpStore %194 %uint_24 + %195 = OpIAdd %uint %191 %uint_1 + %196 = OpLoad %uint %79 + OpLoopMerge %197 %193 None + OpBranch %198 + %187 = OpLabel + OpBranch %185 + %198 = OpLabel + %199 = OpPhi %uint %195 %190 + %200 = OpPhi %uint %196 %190 + %201 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %199 + OpStore %201 %uint_28 + %202 = OpIAdd %uint %199 %uint_1 + %203 = OpAccessChain %_ptr_Uniform_uint %20 %uint_0_0 %200 + %204 = OpLoad %uint %203 + %205 = OpIAdd %uint %200 %uint_1 + OpStore %79 %205 + OpSelectionMerge %206 None + OpSwitch %204 %207 1 %206 + %207 = OpLabel + %208 = OpPhi %uint %202 %198 + %209 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %208 + OpStore %209 %uint_30 + %210 = OpIAdd %uint %208 %uint_1 + %211 = OpLoad %uint %77 + OpBranch %197 + %206 = OpLabel + %212 = OpPhi %uint %202 %198 + %213 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %212 + OpStore %213 %uint_29 + %214 = OpIAdd %uint %212 %uint_1 + %215 = OpLoad %uint %78 + OpBranch %193 + %193 = OpLabel + %216 = OpPhi %uint %214 %206 + %217 = OpPhi %uint %215 %206 + %218 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %216 + OpStore %218 %uint_27 + %192 = OpIAdd %uint %216 %uint_1 + %219 = OpAccessChain %_ptr_Uniform_uint %19 %uint_0_0 %217 + %220 = OpLoad %uint %219 + %221 = OpIEqual %bool %220 %uint_1 + %222 = OpIAdd %uint %217 %uint_1 + OpStore %78 %222 + %223 = OpLoad %uint %77 + OpBranchConditional %221 %190 %197 + %197 = OpLabel + %224 = OpPhi %uint %210 %207 %192 %193 + %225 = OpPhi %uint %211 %207 %223 %193 + %226 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %224 + OpStore %226 %uint_26 + %227 = OpIAdd %uint %224 %uint_1 + %228 = OpAccessChain %_ptr_Uniform_uint %18 %uint_0_0 %225 + %229 = OpLoad %uint %228 + %230 = OpIEqual %bool %229 %uint_1 + %231 = OpIAdd %uint %225 %uint_1 + OpStore %77 %231 + %232 = OpLoad %uint %73 + OpBranchConditional %230 %131 %171 + %171 = OpLabel + OpBranch %131 + %131 = OpLabel + %233 = OpPhi %uint %uint_666 %171 %227 %197 + %234 = OpPhi %uint %uint_666 %171 %232 %197 + %235 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %233 + OpStore %235 %uint_11 + %130 = OpIAdd %uint %233 %uint_1 + %236 = OpAccessChain %_ptr_Uniform_uint %14 %uint_0_0 %234 + %237 = OpLoad %uint %236 + %238 = OpIEqual %bool %237 %uint_1 + %239 = OpIAdd %uint %234 %uint_1 + OpStore %73 %239 + %240 = OpLoad %uint %72 + OpBranchConditional %238 %128 %134 + %134 = OpLabel + %241 = OpPhi %uint %130 %131 + %242 = OpPhi %uint %240 %131 + %243 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %241 + OpStore %243 %uint_10 + %244 = OpIAdd %uint %241 %uint_1 + %245 = OpAccessChain %_ptr_Uniform_uint %13 %uint_0_0 %242 + %246 = OpLoad %uint %245 + %247 = OpIAdd %uint %242 %uint_1 + OpStore %72 %247 + OpSelectionMerge %248 None + OpSwitch %246 %249 1 %250 + %249 = OpLabel + %251 = OpPhi %uint %244 %134 + %252 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %251 + OpStore %252 %uint_32 + %253 = OpIAdd %uint %251 %uint_1 + OpBranch %248 + %250 = OpLabel + OpBranch %248 + %248 = OpLabel + %254 = OpPhi %uint %253 %249 %uint_666 %250 + %255 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %254 + OpStore %255 %uint_31 + %256 = OpIAdd %uint %254 %uint_2 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp b/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp new file mode 100644 index 00000000000..30db11d45bc --- /dev/null +++ b/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp @@ -0,0 +1,55 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %ResTypeMod = OpTypeStruct %float %float +%_ptr_Function_ResTypeMod = OpTypePointer Function %ResTypeMod + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float_20 = OpConstant %float 20 + %int_1 = OpConstant %int 1 +%_ptr_Function_float = OpTypePointer Function %float +%ResTypeFrexp = OpTypeStruct %float %int +%_ptr_Function_ResTypeFrexp = OpTypePointer Function %ResTypeFrexp + %float_40 = OpConstant %float 40 +%_ptr_Function_int = OpTypePointer Function %int + %SSBO = OpTypeStruct %float %int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_int = OpTypePointer Uniform %int + %main = OpFunction %void None %3 + %5 = OpLabel + %modres = OpExtInst %ResTypeMod %1 ModfStruct %float_20 + %frexpres = OpExtInst %ResTypeFrexp %1 FrexpStruct %float_40 + + %modres_f = OpCompositeExtract %float %modres 0 + %modres_i = OpCompositeExtract %float %modres 1 + %frexpres_f = OpCompositeExtract %float %frexpres 0 + %frexpres_i = OpCompositeExtract %int %frexpres 1 + + %float_ptr = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %int_ptr = OpAccessChain %_ptr_Uniform_int %_ %int_1 + + OpStore %float_ptr %modres_f + OpStore %float_ptr %modres_i + OpStore %float_ptr %frexpres_f + OpStore %int_ptr %frexpres_i + + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp b/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp new file mode 100644 index 00000000000..5dad9dd5ed8 --- /dev/null +++ b/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp @@ -0,0 +1,55 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 32 +; Schema: 0 + OpCapability Shader + OpCapability ShaderNonUniform + OpCapability RuntimeDescriptorArray + OpCapability StorageImageArrayNonUniformIndexing + OpExtension "SPV_EXT_descriptor_indexing" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_nonuniform_qualifier" + OpName %main "main" + OpName %uImage "uImage" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %uImage DescriptorSet 0 + OpDecorate %uImage Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %30 NonUniform + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %7 = OpTypeImage %uint 2D 0 0 0 2 R32ui +%_runtimearr_7 = OpTypeRuntimeArray %7 +%_ptr_UniformConstant__runtimearr_7 = OpTypePointer UniformConstant %_runtimearr_7 + %uImage = OpVariable %_ptr_UniformConstant__runtimearr_7 UniformConstant + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_2 = OpConstant %uint 2 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_UniformConstant_7 = OpTypePointer UniformConstant %7 + %v2uint = OpTypeVector %uint 2 + %int = OpTypeInt 32 1 + %v2int = OpTypeVector %int 2 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 +%_ptr_Image_uint = OpTypePointer Image %uint + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2 + %17 = OpLoad %uint %16 + %18 = OpCopyObject %uint %17 + %20 = OpAccessChain %_ptr_UniformConstant_7 %uImage %18 + %22 = OpLoad %v3uint %gl_GlobalInvocationID + %23 = OpVectorShuffle %v2uint %22 %22 0 1 + %26 = OpBitcast %v2int %23 + %30 = OpImageTexelPointer %_ptr_Image_uint %20 %26 %uint_0 + %31 = OpAtomicIAdd %uint %30 %uint_1 %uint_0 %uint_1 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp b/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp new file mode 100644 index 00000000000..2eaef4bdbee --- /dev/null +++ b/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp @@ -0,0 +1,60 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %spec_1 SpecId 1 + OpDecorate %spec_2 SpecId 2 + OpDecorate %spec_3 SpecId 3 + OpDecorate %spec_4 SpecId 4 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %float_2 = OpConstant %float 2 +%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float + %spec_1 = OpSpecConstant %uint 11 + %spec_2 = OpSpecConstant %uint 12 + %spec_3 = OpSpecConstant %uint 13 + %spec_4 = OpSpecConstant %uint 14 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 +%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %uint_3 %spec_1 %spec_2 + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %21 = OpLoad %uint %20 + %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + %25 = OpLoad %v4float %24 + %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2 + %27 = OpFAdd %v4float %25 %26 + %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + OpStore %28 %27 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp b/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp new file mode 100644 index 00000000000..3031f4bb8af --- /dev/null +++ b/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp @@ -0,0 +1,76 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %spec_1 SpecId 1 + OpDecorate %spec_2 SpecId 2 + OpDecorate %spec_3 SpecId 3 + OpDecorate %spec_4 SpecId 4 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %float_2 = OpConstant %float 2 +%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float + ; Test that we can declare the spec constant as signed. + ; Needs implicit bitcast since WorkGroupSize is uint. + %spec_1 = OpSpecConstant %int 11 + %spec_2 = OpSpecConstant %int 12 + %spec_3 = OpSpecConstant %int 13 + %spec_4 = OpSpecConstant %int 14 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + ; Test that we can build spec constant composites out of local size id values. + ; Needs special case handling. + %spec_3_op = OpSpecConstantOp %uint IAdd %spec_3 %uint_3 +%WorkGroupSize = OpSpecConstantComposite %v3uint %spec_3_op %spec_4 %uint_2 + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %21 = OpLoad %uint %20 + %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + %25 = OpLoad %v4float %24 + %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2 + %27 = OpFAdd %v4float %25 %26 + %wg_f = OpConvertUToF %v3float %WorkGroupSize + %wg_f4 = OpVectorShuffle %v4float %wg_f %wg_f 0 1 2 2 + ; Test that we can use the spec constants directly which needs to translate to gl_WorkGroupSize.elem. + ; Needs special case handling. + %res = OpFAdd %v4float %27 %wg_f4 + %f0 = OpConvertSToF %float %spec_3 + %f1 = OpConvertSToF %float %spec_4 + %f2 = OpConvertSToF %float %uint_2 + %res1 = OpVectorTimesScalar %v4float %res %f0 + %res2 = OpVectorTimesScalar %v4float %res1 %f1 + %res3 = OpVectorTimesScalar %v4float %res2 %f2 + %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + OpStore %28 %res3 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp b/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp new file mode 100644 index 00000000000..b928099db85 --- /dev/null +++ b/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp @@ -0,0 +1,185 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 114 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %7 "main" + OpExecutionMode %7 LocalSize 1 1 1 + OpDecorate %_struct_21 BufferBlock + OpMemberDecorate %_struct_21 0 Offset 0 + OpDecorate %_arr_uint_uint_1 ArrayStride 4 + OpDecorate %_struct_23 BufferBlock + OpMemberDecorate %_struct_23 0 Offset 0 + OpDecorate %_arr_uint_uint_2 ArrayStride 4 + OpDecorate %_struct_25 BufferBlock + OpMemberDecorate %_struct_25 0 Offset 0 + OpDecorate %_arr_uint_uint_11 ArrayStride 4 + OpDecorate %27 DescriptorSet 0 + OpDecorate %27 Binding 0 + OpDecorate %28 DescriptorSet 0 + OpDecorate %28 Binding 1 + OpDecorate %29 DescriptorSet 0 + OpDecorate %29 Binding 2 + OpDecorate %30 DescriptorSet 0 + OpDecorate %30 Binding 3 + OpDecorate %31 DescriptorSet 0 + OpDecorate %31 Binding 4 + OpDecorate %32 DescriptorSet 0 + OpDecorate %32 Binding 5 + %void = OpTypeVoid + %2 = OpTypeFunction %void + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %true = OpConstantTrue %bool + %uint_0 = OpConstant %uint 0 + %uint_0_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_8 = OpConstant %uint 8 + %uint_9 = OpConstant %uint 9 + %uint_10 = OpConstant %uint 10 + %uint_11 = OpConstant %uint 11 + %uint_12 = OpConstant %uint 12 + %uint_13 = OpConstant %uint 13 + %uint_14 = OpConstant %uint 14 + %uint_15 = OpConstant %uint 15 + %uint_16 = OpConstant %uint 16 + %uint_17 = OpConstant %uint 17 + %uint_18 = OpConstant %uint 18 + %uint_19 = OpConstant %uint 19 + %uint_20 = OpConstant %uint 20 +%_arr_uint_uint_1 = OpTypeArray %uint %uint_1 + %_struct_21 = OpTypeStruct %_arr_uint_uint_1 +%_ptr_Uniform__struct_21 = OpTypePointer Uniform %_struct_21 + %31 = OpVariable %_ptr_Uniform__struct_21 Uniform + %28 = OpVariable %_ptr_Uniform__struct_21 Uniform + %29 = OpVariable %_ptr_Uniform__struct_21 Uniform + %30 = OpVariable %_ptr_Uniform__struct_21 Uniform +%_arr_uint_uint_2 = OpTypeArray %uint %uint_2 + %_struct_23 = OpTypeStruct %_arr_uint_uint_2 +%_ptr_Uniform__struct_23 = OpTypePointer Uniform %_struct_23 + %27 = OpVariable %_ptr_Uniform__struct_23 Uniform +%_arr_uint_uint_11 = OpTypeArray %uint %uint_11 + %_struct_25 = OpTypeStruct %_arr_uint_uint_11 +%_ptr_Uniform__struct_25 = OpTypePointer Uniform %_struct_25 + %32 = OpVariable %_ptr_Uniform__struct_25 Uniform +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %7 = OpFunction %void None %2 + %8 = OpLabel + %54 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %55 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %56 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %57 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %58 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %59 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %60 = OpLoad %uint %54 + %61 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %60 + OpStore %61 %uint_8 + %62 = OpIAdd %uint %60 %uint_1 + OpStore %54 %62 + OpBranch %9 + %9 = OpLabel + %63 = OpLoad %uint %54 + %64 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %63 + OpStore %64 %uint_9 + %65 = OpIAdd %uint %63 %uint_1 + OpStore %54 %65 + %66 = OpLoad %uint %55 + %67 = OpAccessChain %_ptr_Uniform_uint %27 %uint_0_0 %66 + %68 = OpLoad %uint %67 + %69 = OpIEqual %bool %68 %uint_1 + %70 = OpIAdd %uint %66 %uint_1 + OpStore %55 %70 + OpLoopMerge %10 %11 None + OpBranchConditional %69 %12 %13 + %12 = OpLabel + %71 = OpLoad %uint %54 + %72 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %71 + OpStore %72 %uint_12 + %73 = OpIAdd %uint %71 %uint_1 + OpStore %54 %73 + OpReturn + %13 = OpLabel + %74 = OpLoad %uint %54 + %75 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %74 + OpStore %75 %uint_13 + %76 = OpIAdd %uint %74 %uint_1 + OpStore %54 %76 + %77 = OpLoad %uint %56 + %78 = OpAccessChain %_ptr_Uniform_uint %28 %uint_0_0 %77 + %79 = OpLoad %uint %78 + %80 = OpIEqual %bool %79 %uint_1 + %81 = OpIAdd %uint %77 %uint_1 + OpStore %56 %81 + OpBranchConditional %80 %11 %10 + %11 = OpLabel + %82 = OpLoad %uint %54 + %83 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %82 + OpStore %83 %uint_11 + %84 = OpIAdd %uint %82 %uint_1 + OpStore %54 %84 + OpBranch %14 + %14 = OpLabel + %85 = OpLoad %uint %54 + %86 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %85 + OpStore %86 %uint_14 + %87 = OpIAdd %uint %85 %uint_1 + OpStore %54 %87 + %88 = OpLoad %uint %57 + %89 = OpAccessChain %_ptr_Uniform_uint %29 %uint_0_0 %88 + %90 = OpLoad %uint %89 + %91 = OpIAdd %uint %88 %uint_1 + OpStore %57 %91 + OpSelectionMerge %15 None + OpSwitch %90 %16 + %16 = OpLabel + %92 = OpLoad %uint %54 + %93 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %92 + OpStore %93 %uint_16 + %94 = OpIAdd %uint %92 %uint_1 + OpStore %54 %94 + OpBranch %15 + %15 = OpLabel + %95 = OpLoad %uint %54 + %96 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %95 + OpStore %96 %uint_15 + %97 = OpIAdd %uint %95 %uint_1 + OpStore %54 %97 + %98 = OpLoad %uint %58 + %99 = OpAccessChain %_ptr_Uniform_uint %30 %uint_0_0 %98 + %100 = OpLoad %uint %99 + %101 = OpIEqual %bool %100 %uint_1 + %102 = OpIAdd %uint %98 %uint_1 + OpStore %58 %102 + OpSelectionMerge %17 None + OpBranchConditional %101 %18 %19 + %18 = OpLabel + OpBranch %17 + %19 = OpLabel + %103 = OpLoad %uint %54 + %104 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %103 + OpStore %104 %uint_19 + %105 = OpIAdd %uint %103 %uint_1 + OpStore %54 %105 + OpBranch %17 + %17 = OpLabel + %106 = OpLoad %uint %54 + %107 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %106 + OpStore %107 %uint_17 + %108 = OpIAdd %uint %106 %uint_1 + OpStore %54 %108 + %109 = OpLoad %uint %59 + %110 = OpAccessChain %_ptr_Uniform_uint %31 %uint_0_0 %109 + %111 = OpLoad %uint %110 + %112 = OpIEqual %bool %111 %uint_1 + %113 = OpIAdd %uint %109 %uint_1 + OpStore %59 %113 + OpBranchConditional %112 %9 %10 + %10 = OpLabel + OpBranch %20 + %20 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp b/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp new file mode 100644 index 00000000000..ebee277b7f1 --- /dev/null +++ b/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp @@ -0,0 +1,185 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 114 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %7 "main" + OpExecutionMode %7 LocalSize 1 1 1 + OpDecorate %_struct_21 BufferBlock + OpMemberDecorate %_struct_21 0 Offset 0 + OpDecorate %_arr_uint_uint_1 ArrayStride 4 + OpDecorate %_struct_23 BufferBlock + OpMemberDecorate %_struct_23 0 Offset 0 + OpDecorate %_arr_uint_uint_2 ArrayStride 4 + OpDecorate %_struct_25 BufferBlock + OpMemberDecorate %_struct_25 0 Offset 0 + OpDecorate %_arr_uint_uint_11 ArrayStride 4 + OpDecorate %27 DescriptorSet 0 + OpDecorate %27 Binding 0 + OpDecorate %28 DescriptorSet 0 + OpDecorate %28 Binding 1 + OpDecorate %29 DescriptorSet 0 + OpDecorate %29 Binding 2 + OpDecorate %30 DescriptorSet 0 + OpDecorate %30 Binding 3 + OpDecorate %31 DescriptorSet 0 + OpDecorate %31 Binding 4 + OpDecorate %32 DescriptorSet 0 + OpDecorate %32 Binding 5 + %void = OpTypeVoid + %2 = OpTypeFunction %void + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %true = OpConstantTrue %bool + %uint_0 = OpConstant %uint 0 + %uint_0_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_8 = OpConstant %uint 8 + %uint_9 = OpConstant %uint 9 + %uint_10 = OpConstant %uint 10 + %uint_11 = OpConstant %uint 11 + %uint_12 = OpConstant %uint 12 + %uint_13 = OpConstant %uint 13 + %uint_14 = OpConstant %uint 14 + %uint_15 = OpConstant %uint 15 + %uint_16 = OpConstant %uint 16 + %uint_17 = OpConstant %uint 17 + %uint_18 = OpConstant %uint 18 + %uint_19 = OpConstant %uint 19 + %uint_20 = OpConstant %uint 20 +%_arr_uint_uint_1 = OpTypeArray %uint %uint_1 + %_struct_21 = OpTypeStruct %_arr_uint_uint_1 +%_ptr_Uniform__struct_21 = OpTypePointer Uniform %_struct_21 + %31 = OpVariable %_ptr_Uniform__struct_21 Uniform + %28 = OpVariable %_ptr_Uniform__struct_21 Uniform + %29 = OpVariable %_ptr_Uniform__struct_21 Uniform + %30 = OpVariable %_ptr_Uniform__struct_21 Uniform +%_arr_uint_uint_2 = OpTypeArray %uint %uint_2 + %_struct_23 = OpTypeStruct %_arr_uint_uint_2 +%_ptr_Uniform__struct_23 = OpTypePointer Uniform %_struct_23 + %27 = OpVariable %_ptr_Uniform__struct_23 Uniform +%_arr_uint_uint_11 = OpTypeArray %uint %uint_11 + %_struct_25 = OpTypeStruct %_arr_uint_uint_11 +%_ptr_Uniform__struct_25 = OpTypePointer Uniform %_struct_25 + %32 = OpVariable %_ptr_Uniform__struct_25 Uniform +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %7 = OpFunction %void None %2 + %8 = OpLabel + %54 = OpVariable %_ptr_Function_uint Function + %55 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %56 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %57 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %58 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %59 = OpVariable %_ptr_Function_uint Function %uint_0_0 + %60 = OpLoad %uint %54 + %61 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %60 + OpStore %61 %uint_8 + %62 = OpIAdd %uint %60 %uint_1 + OpStore %54 %62 + OpBranch %9 + %9 = OpLabel + %63 = OpLoad %uint %54 + %64 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %63 + OpStore %64 %uint_9 + %65 = OpIAdd %uint %63 %uint_1 + OpStore %54 %65 + %66 = OpLoad %uint %55 + %67 = OpAccessChain %_ptr_Uniform_uint %27 %uint_0_0 %66 + %68 = OpLoad %uint %67 + %69 = OpIEqual %bool %68 %uint_1 + %70 = OpIAdd %uint %66 %uint_1 + OpStore %55 %70 + OpLoopMerge %10 %11 None + OpBranchConditional %69 %12 %13 + %12 = OpLabel + %71 = OpLoad %uint %54 + %72 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %71 + OpStore %72 %uint_12 + %73 = OpIAdd %uint %71 %uint_1 + OpStore %54 %73 + OpReturn + %13 = OpLabel + %74 = OpLoad %uint %54 + %75 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %74 + OpStore %75 %uint_13 + %76 = OpIAdd %uint %74 %uint_1 + OpStore %54 %76 + %77 = OpLoad %uint %56 + %78 = OpAccessChain %_ptr_Uniform_uint %28 %uint_0_0 %77 + %79 = OpLoad %uint %78 + %80 = OpIEqual %bool %79 %uint_1 + %81 = OpIAdd %uint %77 %uint_1 + OpStore %56 %81 + OpBranchConditional %80 %11 %10 + %11 = OpLabel + %82 = OpLoad %uint %54 + %83 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %82 + OpStore %83 %uint_11 + %84 = OpIAdd %uint %82 %uint_1 + OpStore %54 %84 + OpBranch %14 + %14 = OpLabel + %85 = OpLoad %uint %54 + %86 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %85 + OpStore %86 %uint_14 + %87 = OpIAdd %uint %85 %uint_1 + OpStore %54 %87 + %88 = OpLoad %uint %57 + %89 = OpAccessChain %_ptr_Uniform_uint %29 %uint_0_0 %88 + %90 = OpLoad %uint %89 + %91 = OpIAdd %uint %88 %uint_1 + OpStore %57 %91 + OpSelectionMerge %15 None + OpSwitch %90 %16 + %16 = OpLabel + %92 = OpLoad %uint %54 + %93 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %92 + OpStore %93 %uint_16 + %94 = OpIAdd %uint %92 %uint_1 + OpStore %54 %94 + OpBranch %15 + %15 = OpLabel + %95 = OpLoad %uint %54 + %96 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %95 + OpStore %96 %uint_15 + %97 = OpIAdd %uint %95 %uint_1 + OpStore %54 %97 + %98 = OpLoad %uint %58 + %99 = OpAccessChain %_ptr_Uniform_uint %30 %uint_0_0 %98 + %100 = OpLoad %uint %99 + %101 = OpIEqual %bool %100 %uint_1 + %102 = OpIAdd %uint %98 %uint_1 + OpStore %58 %102 + OpSelectionMerge %17 None + OpBranchConditional %101 %18 %19 + %18 = OpLabel + OpBranch %17 + %19 = OpLabel + %103 = OpLoad %uint %54 + %104 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %103 + OpStore %104 %uint_19 + %105 = OpIAdd %uint %103 %uint_1 + OpStore %54 %105 + OpBranch %17 + %17 = OpLabel + %106 = OpLoad %uint %54 + %107 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %106 + OpStore %107 %uint_17 + %108 = OpIAdd %uint %106 %uint_1 + OpStore %54 %108 + %109 = OpLoad %uint %59 + %110 = OpAccessChain %_ptr_Uniform_uint %31 %uint_0_0 %109 + %111 = OpLoad %uint %110 + %112 = OpIEqual %bool %111 %uint_1 + %113 = OpIAdd %uint %109 %uint_1 + OpStore %59 %113 + OpBranchConditional %112 %9 %10 + %10 = OpLabel + OpBranch %20 + %20 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp b/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp new file mode 100644 index 00000000000..f40377b1181 --- /dev/null +++ b/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp @@ -0,0 +1,31 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos; 0 +; Bound: 62 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %main "main" + OpName %i "i" + %uint = OpTypeInt 32 0 + %void = OpTypeVoid + %11 = OpTypeFunction %void + %uint_0 = OpConstant %uint 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %31 = OpConstantNull %uint + %main = OpFunction %void None %11 + %14 = OpLabel + %i = OpVariable %_ptr_Function_uint Function %31 + OpStore %i %uint_0 + OpBranch %32 + %32 = OpLabel + OpLoopMerge %33 %34 None + OpBranch %33 + %34 = OpLabel + %57 = OpLoad %uint %i + OpBranch %32 + %33 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp b/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp new file mode 100644 index 00000000000..821370379e0 --- /dev/null +++ b/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp @@ -0,0 +1,94 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 53 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %count "count" + OpName %i "i" + OpName %UBO "UBO" + OpMemberName %UBO 0 "v" + OpName %_ "" + OpMemberDecorate %UBO 0 Offset 0 + OpDecorate %UBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %int_4 = OpConstant %int 4 + %bool = OpTypeBool + %UBO = OpTypeStruct %int +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO + %_ = OpVariable %_ptr_Uniform_UBO Uniform +%_ptr_Uniform_int = OpTypePointer Uniform %int + %int_20 = OpConstant %int 20 + %int_1 = OpConstant %int 1 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %count = OpVariable %_ptr_Function_uint Function + %i = OpVariable %_ptr_Function_int Function + OpStore %count %uint_0 + OpStore %i %int_0 + OpBranch %14 + %14 = OpLabel + OpLoopMerge %16 %17 None + OpBranch %18 + %18 = OpLabel + %19 = OpLoad %int %i + %22 = OpSLessThan %bool %19 %int_4 + OpBranchConditional %22 %15 %16 + %15 = OpLabel + OpSelectionMerge %24 None + OpSwitch %int_0 %23 + %23 = OpLabel + OpSelectionMerge %26 None + OpSwitch %int_0 %25 + %25 = OpLabel + OpSelectionMerge %28 None + OpSwitch %int_0 %27 + %27 = OpLabel + %33 = OpAccessChain %_ptr_Uniform_int %_ %int_0 + %34 = OpLoad %int %33 + %36 = OpIEqual %bool %34 %int_20 + OpSelectionMerge %38 None + OpBranchConditional %36 %37 %38 + %37 = OpLabel + OpBranch %16 + %38 = OpLabel + OpBranch %28 + %28 = OpLabel + OpBranch %26 + %26 = OpLabel + %42 = OpLoad %uint %count + %44 = OpIAdd %uint %42 %int_1 + OpStore %count %44 + OpBranch %24 + %24 = OpLabel + %46 = OpLoad %uint %count + %47 = OpIAdd %uint %46 %int_1 + OpStore %count %47 + OpBranch %17 + %17 = OpLabel + %48 = OpLoad %int %i + %49 = OpIAdd %int %48 %int_1 + OpStore %i %49 + OpBranch %14 + %16 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp b/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp new file mode 100644 index 00000000000..5d5fac622a9 --- /dev/null +++ b/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp @@ -0,0 +1,299 @@ +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 233 +; Schema: 0 + OpCapability Shader + OpCapability SampledBuffer + OpCapability ImageBuffer + OpCapability ImageQuery + OpCapability StorageImageWriteWithoutFormat + OpCapability GroupNonUniformBallot + OpCapability RuntimeDescriptorArray + OpCapability UniformTexelBufferArrayDynamicIndexing + OpCapability StorageTexelBufferArrayDynamicIndexing + OpCapability UniformTexelBufferArrayNonUniformIndexing + OpCapability StorageTexelBufferArrayNonUniformIndexing + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_EXT_descriptor_indexing" + OpExtension "SPV_KHR_physical_storage_buffer" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpName %main "main" + OpName %RootConstants "RootConstants" + OpName %registers "registers" + OpName %SSBO_Offsets "SSBO_Offsets" + OpDecorate %RootConstants Block + OpMemberDecorate %RootConstants 0 Offset 0 + OpMemberDecorate %RootConstants 1 Offset 4 + OpMemberDecorate %RootConstants 2 Offset 8 + OpMemberDecorate %RootConstants 3 Offset 12 + OpMemberDecorate %RootConstants 4 Offset 16 + OpMemberDecorate %RootConstants 5 Offset 20 + OpMemberDecorate %RootConstants 6 Offset 24 + OpMemberDecorate %RootConstants 7 Offset 28 + OpDecorate %_runtimearr_v2uint ArrayStride 8 + OpMemberDecorate %SSBO_Offsets 0 Offset 0 + OpDecorate %SSBO_Offsets Block + OpDecorate %13 DescriptorSet 0 + OpDecorate %13 Binding 0 + OpDecorate %13 NonWritable + OpDecorate %13 Restrict + OpDecorate %18 DescriptorSet 1 + OpDecorate %18 Binding 0 + OpDecorate %22 DescriptorSet 4 + OpDecorate %22 Binding 0 + OpDecorate %26 DescriptorSet 4 + OpDecorate %26 Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %148 NonUniform + OpDecorate %149 NonUniform + OpDecorate %172 NonUniform + OpDecorate %173 NonUniform + OpDecorate %196 NonUniform + OpDecorate %197 NonUniform + OpDecorate %205 NonUniform + OpDecorate %212 NonUniform + %void = OpTypeVoid + %2 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%RootConstants = OpTypeStruct %uint %uint %uint %uint %uint %uint %uint %uint +%_ptr_PushConstant_RootConstants = OpTypePointer PushConstant %RootConstants + %registers = OpVariable %_ptr_PushConstant_RootConstants PushConstant + %v2uint = OpTypeVector %uint 2 +%_runtimearr_v2uint = OpTypeRuntimeArray %v2uint +%SSBO_Offsets = OpTypeStruct %_runtimearr_v2uint +%_ptr_StorageBuffer_SSBO_Offsets = OpTypePointer StorageBuffer %SSBO_Offsets + %13 = OpVariable %_ptr_StorageBuffer_SSBO_Offsets StorageBuffer + %float = OpTypeFloat 32 + %15 = OpTypeImage %float Buffer 0 0 0 1 Unknown +%_runtimearr_15 = OpTypeRuntimeArray %15 +%_ptr_UniformConstant__runtimearr_15 = OpTypePointer UniformConstant %_runtimearr_15 + %18 = OpVariable %_ptr_UniformConstant__runtimearr_15 UniformConstant + %19 = OpTypeImage %float Buffer 0 0 0 2 R32f +%_runtimearr_19 = OpTypeRuntimeArray %19 +%_ptr_UniformConstant__runtimearr_19 = OpTypePointer UniformConstant %_runtimearr_19 + %22 = OpVariable %_ptr_UniformConstant__runtimearr_19 UniformConstant + %23 = OpTypeImage %uint Buffer 0 0 0 2 R32ui +%_runtimearr_23 = OpTypeRuntimeArray %23 +%_ptr_UniformConstant__runtimearr_23 = OpTypePointer UniformConstant %_runtimearr_23 + %26 = OpVariable %_ptr_UniformConstant__runtimearr_23 UniformConstant +%_ptr_UniformConstant_23 = OpTypePointer UniformConstant %23 +%_ptr_PushConstant_uint = OpTypePointer PushConstant %uint + %uint_4 = OpConstant %uint 4 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 +%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint + %uint_0 = OpConstant %uint 0 +%_ptr_UniformConstant_19 = OpTypePointer UniformConstant %19 +%_ptr_UniformConstant_15 = OpTypePointer UniformConstant %15 + %uint_1 = OpConstant %uint 1 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Input_uint = OpTypePointer Input %uint + %bool = OpTypeBool +%uint_4294967295 = OpConstant %uint 4294967295 + %v4float = OpTypeVector %float 4 + %uint_1024 = OpConstant %uint 1024 + %uint_2048 = OpConstant %uint 2048 +%_ptr_Image_uint = OpTypePointer Image %uint + %uint_40 = OpConstant %uint 40 + %uint_50 = OpConstant %uint 50 + %uint_70 = OpConstant %uint 70 + %main = OpFunction %void None %2 + %4 = OpLabel + OpBranch %231 + %231 = OpLabel + %30 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_4 + %32 = OpLoad %uint %30 + %33 = OpIAdd %uint %32 %uint_2 + %28 = OpAccessChain %_ptr_UniformConstant_23 %26 %33 + %35 = OpLoad %23 %28 + %36 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %33 + %39 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %36 + %41 = OpLoad %v2uint %39 + %44 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_4 + %45 = OpLoad %uint %44 + %43 = OpAccessChain %_ptr_UniformConstant_19 %22 %45 + %46 = OpLoad %19 %43 + %47 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %45 + %48 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %47 + %49 = OpLoad %v2uint %48 + %52 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_1 + %54 = OpLoad %uint %52 + %55 = OpIAdd %uint %54 %uint_1 + %51 = OpAccessChain %_ptr_UniformConstant_15 %18 %55 + %56 = OpLoad %15 %51 + %57 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %55 + %58 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %57 + %59 = OpLoad %v2uint %58 + %64 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %65 = OpLoad %uint %64 + %66 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1 + %67 = OpLoad %uint %66 + %68 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2 + %69 = OpLoad %uint %68 + %70 = OpIAdd %uint %65 %uint_4 + %71 = OpCompositeExtract %uint %49 0 + %72 = OpCompositeExtract %uint %49 1 + %73 = OpIAdd %uint %70 %71 + %75 = OpULessThan %bool %70 %72 + %76 = OpSelect %uint %75 %73 %uint_4294967295 + %79 = OpImageRead %v4float %46 %76 + %80 = OpCompositeExtract %float %79 0 + %81 = OpCompositeExtract %float %79 1 + %82 = OpCompositeExtract %float %79 2 + %83 = OpCompositeExtract %float %79 3 + %84 = OpIAdd %uint %65 %uint_1024 + %86 = OpCompositeExtract %uint %49 0 + %87 = OpCompositeExtract %uint %49 1 + %88 = OpIAdd %uint %84 %86 + %89 = OpULessThan %bool %84 %87 + %90 = OpSelect %uint %89 %88 %uint_4294967295 + %91 = OpCompositeConstruct %v4float %80 %81 %82 %83 + OpImageWrite %46 %90 %91 + %92 = OpIAdd %uint %65 %uint_2 + %93 = OpCompositeExtract %uint %59 0 + %94 = OpCompositeExtract %uint %59 1 + %95 = OpIAdd %uint %92 %93 + %96 = OpULessThan %bool %92 %94 + %97 = OpSelect %uint %96 %95 %uint_4294967295 + %98 = OpImageFetch %v4float %56 %97 + %99 = OpCompositeExtract %float %98 0 + %100 = OpCompositeExtract %float %98 1 + %101 = OpCompositeExtract %float %98 2 + %102 = OpCompositeExtract %float %98 3 + %103 = OpIAdd %uint %65 %uint_2048 + %105 = OpCompositeExtract %uint %49 0 + %106 = OpCompositeExtract %uint %49 1 + %107 = OpIAdd %uint %103 %105 + %108 = OpULessThan %bool %103 %106 + %109 = OpSelect %uint %108 %107 %uint_4294967295 + %110 = OpCompositeConstruct %v4float %99 %100 %101 %102 + OpImageWrite %46 %109 %110 + %111 = OpCompositeExtract %uint %41 0 + %112 = OpCompositeExtract %uint %41 1 + %113 = OpIAdd %uint %65 %111 + %114 = OpULessThan %bool %65 %112 + %115 = OpSelect %uint %114 %113 %uint_4294967295 + %117 = OpImageTexelPointer %_ptr_Image_uint %28 %115 %uint_0 + %118 = OpAtomicIAdd %uint %117 %uint_1 %uint_0 %uint_40 + %120 = OpCompositeExtract %uint %41 0 + %121 = OpCompositeExtract %uint %41 1 + %122 = OpIAdd %uint %67 %120 + %123 = OpULessThan %bool %67 %121 + %124 = OpSelect %uint %123 %122 %uint_4294967295 + %125 = OpImageTexelPointer %_ptr_Image_uint %28 %124 %uint_0 + %126 = OpAtomicCompareExchange %uint %125 %uint_1 %uint_0 %uint_0 %uint_50 %uint_40 + %128 = OpCompositeExtract %uint %49 1 + %129 = OpConvertUToF %float %128 + %130 = OpCompositeExtract %uint %49 0 + %131 = OpCompositeExtract %uint %49 1 + %132 = OpIAdd %uint %uint_0 %130 + %133 = OpULessThan %bool %uint_0 %131 + %134 = OpSelect %uint %133 %132 %uint_4294967295 + %135 = OpCompositeConstruct %v4float %129 %129 %129 %129 + OpImageWrite %46 %134 %135 + %136 = OpCompositeExtract %uint %59 1 + %137 = OpConvertUToF %float %136 + %138 = OpCompositeExtract %uint %49 0 + %139 = OpCompositeExtract %uint %49 1 + %140 = OpIAdd %uint %uint_1 %138 + %141 = OpULessThan %bool %uint_1 %139 + %142 = OpSelect %uint %141 %140 %uint_4294967295 + %143 = OpCompositeConstruct %v4float %137 %137 %137 %137 + OpImageWrite %46 %142 %143 + %144 = OpIAdd %uint %69 %uint_0 + %146 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_4 + %147 = OpLoad %uint %146 + %148 = OpIAdd %uint %147 %144 + %145 = OpAccessChain %_ptr_UniformConstant_19 %22 %148 + %149 = OpLoad %19 %145 + %150 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %148 + %151 = OpLoad %v2uint %150 + %152 = OpCompositeExtract %uint %151 0 + %153 = OpCompositeExtract %uint %151 1 + %154 = OpIAdd %uint %70 %152 + %155 = OpULessThan %bool %70 %153 + %156 = OpSelect %uint %155 %154 %uint_4294967295 + %157 = OpImageRead %v4float %149 %156 + %158 = OpCompositeExtract %float %157 0 + %159 = OpCompositeExtract %float %157 1 + %160 = OpCompositeExtract %float %157 2 + %161 = OpCompositeExtract %float %157 3 + %162 = OpCompositeExtract %uint %151 0 + %163 = OpCompositeExtract %uint %151 1 + %164 = OpIAdd %uint %84 %162 + %165 = OpULessThan %bool %84 %163 + %166 = OpSelect %uint %165 %164 %uint_4294967295 + %167 = OpCompositeConstruct %v4float %158 %159 %160 %161 + OpImageWrite %149 %166 %167 + %168 = OpIAdd %uint %69 %uint_0 + %170 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_1 + %171 = OpLoad %uint %170 + %172 = OpIAdd %uint %171 %168 + %169 = OpAccessChain %_ptr_UniformConstant_15 %18 %172 + %173 = OpLoad %15 %169 + %174 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %172 + %175 = OpLoad %v2uint %174 + %176 = OpCompositeExtract %uint %175 0 + %177 = OpCompositeExtract %uint %175 1 + %178 = OpIAdd %uint %70 %176 + %179 = OpULessThan %bool %70 %177 + %180 = OpSelect %uint %179 %178 %uint_4294967295 + %181 = OpImageFetch %v4float %173 %180 + %182 = OpCompositeExtract %float %181 0 + %183 = OpCompositeExtract %float %181 1 + %184 = OpCompositeExtract %float %181 2 + %185 = OpCompositeExtract %float %181 3 + %186 = OpCompositeExtract %uint %151 0 + %187 = OpCompositeExtract %uint %151 1 + %188 = OpIAdd %uint %103 %186 + %189 = OpULessThan %bool %103 %187 + %190 = OpSelect %uint %189 %188 %uint_4294967295 + %191 = OpCompositeConstruct %v4float %182 %183 %184 %185 + OpImageWrite %149 %190 %191 + %192 = OpIAdd %uint %69 %uint_0 + %194 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_4 + %195 = OpLoad %uint %194 + %196 = OpIAdd %uint %195 %192 + %193 = OpAccessChain %_ptr_UniformConstant_23 %26 %196 + %197 = OpLoad %23 %193 + %198 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %196 + %199 = OpLoad %v2uint %198 + %200 = OpCompositeExtract %uint %199 0 + %201 = OpCompositeExtract %uint %199 1 + %202 = OpIAdd %uint %67 %200 + %203 = OpULessThan %bool %67 %201 + %204 = OpSelect %uint %203 %202 %uint_4294967295 + %205 = OpImageTexelPointer %_ptr_Image_uint %193 %204 %uint_0 + %206 = OpAtomicIAdd %uint %205 %uint_1 %uint_0 %uint_40 + %207 = OpCompositeExtract %uint %199 0 + %208 = OpCompositeExtract %uint %199 1 + %209 = OpIAdd %uint %67 %207 + %210 = OpULessThan %bool %67 %208 + %211 = OpSelect %uint %210 %209 %uint_4294967295 + %212 = OpImageTexelPointer %_ptr_Image_uint %193 %211 %uint_0 + %213 = OpAtomicCompareExchange %uint %212 %uint_1 %uint_0 %uint_0 %uint_70 %uint_40 + %215 = OpCompositeExtract %uint %151 1 + %216 = OpConvertUToF %float %215 + %217 = OpCompositeExtract %uint %49 0 + %218 = OpCompositeExtract %uint %49 1 + %219 = OpIAdd %uint %uint_2 %217 + %220 = OpULessThan %bool %uint_2 %218 + %221 = OpSelect %uint %220 %219 %uint_4294967295 + %222 = OpCompositeConstruct %v4float %216 %216 %216 %216 + OpImageWrite %46 %221 %222 + %223 = OpCompositeExtract %uint %175 1 + %224 = OpConvertUToF %float %223 + %225 = OpCompositeExtract %uint %49 0 + %226 = OpCompositeExtract %uint %49 1 + %227 = OpIAdd %uint %uint_3 %225 + %228 = OpULessThan %bool %uint_3 %226 + %229 = OpSelect %uint %228 %227 %uint_4294967295 + %230 = OpCompositeConstruct %v4float %224 %224 %224 %224 + OpImageWrite %46 %229 %230 + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/shaders-no-opt/asm/comp/phi-temporary-copy-loop-variable.asm.invalid.comp similarity index 100% rename from shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp rename to shaders-no-opt/asm/comp/phi-temporary-copy-loop-variable.asm.invalid.comp diff --git a/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp b/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp new file mode 100644 index 00000000000..d3b746a6ec9 --- /dev/null +++ b/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp @@ -0,0 +1,81 @@ +; SPIR-V +; Version: 1.5 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 43 +; Schema: 0 + OpCapability Shader + OpCapability RayQueryKHR + OpExtension "SPV_KHR_ray_query" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %_ %__0 %rq + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 460 + OpSourceExtension "GL_EXT_ray_query" + OpSourceExtension "GL_EXT_ray_tracing" + OpName %main "main" + OpName %va "va" + OpName %Buf "Buf" + OpMemberName %Buf 0 "vas" + OpName %_ "" + OpName %Registers "Registers" + OpMemberName %Registers 0 "index" + OpName %__0 "" + OpName %rq "rq" + OpDecorate %_arr_v2uint_uint_1024 ArrayStride 8 + OpMemberDecorate %Buf 0 NonWritable + OpMemberDecorate %Buf 0 Offset 0 + OpDecorate %Buf Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpMemberDecorate %Registers 0 Offset 0 + OpDecorate %Registers Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v2uint = OpTypeVector %uint 2 +%_ptr_Function_v2uint = OpTypePointer Function %v2uint + %uint_1024 = OpConstant %uint 1024 +%_arr_v2uint_uint_1024 = OpTypeArray %v2uint %uint_1024 + %Buf = OpTypeStruct %_arr_v2uint_uint_1024 +%_ptr_StorageBuffer_Buf = OpTypePointer StorageBuffer %Buf + %_ = OpVariable %_ptr_StorageBuffer_Buf StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %Registers = OpTypeStruct %uint +%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers + %__0 = OpVariable %_ptr_PushConstant_Registers PushConstant +%_ptr_PushConstant_uint = OpTypePointer PushConstant %uint +%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint + %bool = OpTypeBool + %false = OpConstantFalse %bool + %32 = OpTypeRayQueryKHR +%_ptr_Private_32 = OpTypePointer Private %32 + %rq = OpVariable %_ptr_Private_32 Private + %36 = OpTypeAccelerationStructureKHR + %uint_0 = OpConstant %uint 0 + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 + %float_0 = OpConstant %float 0 + %42 = OpConstantComposite %v3float %float_0 %float_0 %float_0 + %main = OpFunction %void None %3 + %5 = OpLabel + %va = OpVariable %_ptr_Function_v2uint Function + OpBranch %6 + %6 = OpLabel + OpLoopMerge %8 %9 None + OpBranch %7 + %7 = OpLabel + %25 = OpAccessChain %_ptr_PushConstant_uint %__0 %int_0 + %26 = OpLoad %uint %25 + %28 = OpAccessChain %_ptr_StorageBuffer_v2uint %_ %int_0 %26 + %29 = OpLoad %v2uint %28 + OpStore %va %29 + %37 = OpConvertUToAccelerationStructureKHR %36 %29 + OpBranch %9 + %9 = OpLabel + OpBranchConditional %false %6 %8 + %8 = OpLabel + OpRayQueryInitializeKHR %rq %37 %uint_0 %uint_0 %42 %float_0 %42 %float_0 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp b/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp new file mode 100644 index 00000000000..6f4538a9a33 --- /dev/null +++ b/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp @@ -0,0 +1,39 @@ + OpCapability Shader + OpCapability RayTracingKHR + OpCapability RayQueryKHR + OpExtension "SPV_KHR_ray_tracing" + OpExtension "SPV_KHR_ray_query" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %RTAS %gl_LocalInvocationIndex + OpExecutionMode %main LocalSize 64 1 1 + OpSource GLSL 460 + OpName %accelerationStructureNV "accelerationStructureNV" + OpName %RTAS "RTAS" + OpName %main "main" + OpName %rayQueryKHR "rayQueryKHR" + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %RTAS DescriptorSet 0 + OpDecorate %RTAS Binding 0 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_255 = OpConstant %uint 255 +%accelerationStructureNV = OpTypeAccelerationStructureKHR +%_ptr_UniformConstant_accelerationStructureNV = OpTypePointer UniformConstant %accelerationStructureNV +%_ptr_Input_uint = OpTypePointer Input %uint + %void = OpTypeVoid + %12 = OpTypeFunction %void +%rayQueryKHR = OpTypeRayQueryKHR +%_ptr_Function_rayQueryKHR = OpTypePointer Function %rayQueryKHR + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 + %RTAS = OpVariable %_ptr_UniformConstant_accelerationStructureNV UniformConstant +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %16 = OpUndef %float + %17 = OpUndef %v3float + %main = OpFunction %void None %12 + %18 = OpLabel + %19 = OpVariable %_ptr_Function_rayQueryKHR Function + %20 = OpLoad %accelerationStructureNV %RTAS + OpRayQueryInitializeKHR %19 %20 %uint_2 %uint_255 %17 %16 %17 %16 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp b/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp new file mode 100644 index 00000000000..b4e622baced --- /dev/null +++ b/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp @@ -0,0 +1,78 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %A "A" + OpName %B "A" + OpName %C "A" + OpName %D "A" + OpName %E "A" + OpName %F "A" + OpName %G "A" + OpName %H "A" + OpName %I "A" + OpName %J "A" + OpName %K "A" + OpName %L "A" + OpDecorate %_runtimearr_int ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %A SpecId 0 + OpDecorate %B SpecId 1 + OpDecorate %C SpecId 2 + OpDecorate %D SpecId 3 + OpDecorate %E SpecId 4 + OpDecorate %F SpecId 5 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_runtimearr_int = OpTypeRuntimeArray %int + %SSBO = OpTypeStruct %_runtimearr_int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %A = OpSpecConstant %int 0 + %B = OpSpecConstant %int 1 + %C = OpSpecConstant %int 2 + %D = OpSpecConstant %int 3 + %E = OpSpecConstant %int 4 + %F = OpSpecConstant %int 5 + %G = OpSpecConstantOp %int ISub %A %B + %H = OpSpecConstantOp %int ISub %G %C + %I = OpSpecConstantOp %int ISub %H %D + %J = OpSpecConstantOp %int ISub %I %E + %K = OpSpecConstantOp %int ISub %J %F + %L = OpSpecConstantOp %int IAdd %K %F +%_ptr_Uniform_int = OpTypePointer Uniform %int + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %32 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19 + OpStore %32 %L + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp b/shaders-no-opt/asm/comp/storage-buffer-basic.asm.comp similarity index 95% rename from shaders/asm/comp/storage-buffer-basic.invalid.asm.comp rename to shaders-no-opt/asm/comp/storage-buffer-basic.asm.comp index edb1a05e549..db9a8490df6 100644 --- a/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp +++ b/shaders-no-opt/asm/comp/storage-buffer-basic.asm.comp @@ -4,9 +4,9 @@ ; Bound: 31 ; Schema: 0 OpCapability Shader - OpCapability VariablePointers + ;OpCapability VariablePointers OpExtension "SPV_KHR_storage_buffer_storage_class" - OpExtension "SPV_KHR_variable_pointers" + ;OpExtension "SPV_KHR_variable_pointers" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %22 "main" %gl_WorkGroupID OpSource OpenCL_C 120 diff --git a/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp b/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp new file mode 100644 index 00000000000..deaae421fdd --- /dev/null +++ b/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp @@ -0,0 +1,86 @@ +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 55 +; Schema: 0 + OpCapability Shader + OpCapability SampledBuffer + OpCapability ImageBuffer + OpCapability GroupNonUniform + OpCapability GroupNonUniformBallot + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 64 1 1 + OpName %main "main" + OpName %WaveMatch "WaveMatch" + OpDecorate %8 DescriptorSet 0 + OpDecorate %8 Binding 0 + OpDecorate %11 DescriptorSet 0 + OpDecorate %11 Binding 0 + OpDecorate %11 NonReadable + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + %void = OpTypeVoid + %2 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %6 = OpTypeImage %uint Buffer 0 0 0 1 Unknown +%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6 + %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant + %9 = OpTypeImage %uint Buffer 0 0 0 2 R32ui +%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9 + %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Input_uint = OpTypePointer Input %uint + %uint_0 = OpConstant %uint 0 + %v4uint = OpTypeVector %uint 4 + %24 = OpTypeFunction %v4uint %uint + %uint_3 = OpConstant %uint 3 + %bool = OpTypeBool + %uint_4 = OpConstant %uint 4 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %main = OpFunction %void None %2 + %4 = OpLabel + OpBranch %53 + %53 = OpLabel + %12 = OpLoad %9 %11 + %13 = OpLoad %6 %8 + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %20 = OpLoad %uint %18 + %22 = OpImageFetch %v4uint %13 %20 + %23 = OpCompositeExtract %uint %22 0 + %37 = OpFunctionCall %v4uint %WaveMatch %23 + %38 = OpCompositeExtract %uint %37 0 + %39 = OpCompositeExtract %uint %37 1 + %40 = OpCompositeExtract %uint %37 2 + %41 = OpCompositeExtract %uint %37 3 + %42 = OpIMul %uint %20 %uint_4 + %44 = OpCompositeConstruct %v4uint %38 %38 %38 %38 + OpImageWrite %12 %42 %44 + %45 = OpCompositeConstruct %v4uint %39 %39 %39 %39 + %46 = OpIAdd %uint %42 %uint_1 + OpImageWrite %12 %46 %45 + %48 = OpCompositeConstruct %v4uint %40 %40 %40 %40 + %49 = OpIAdd %uint %42 %uint_2 + OpImageWrite %12 %49 %48 + %51 = OpCompositeConstruct %v4uint %41 %41 %41 %41 + %52 = OpIAdd %uint %42 %uint_3 + OpImageWrite %12 %52 %51 + OpReturn + OpFunctionEnd + %WaveMatch = OpFunction %v4uint None %24 + %25 = OpFunctionParameter %uint + %27 = OpLabel + OpBranch %28 + %28 = OpLabel + OpLoopMerge %30 %29 None + OpBranch %29 + %29 = OpLabel + %31 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %25 + %34 = OpIEqual %bool %25 %31 + %35 = OpGroupNonUniformBallot %v4uint %uint_3 %34 + OpBranchConditional %34 %30 %28 + %30 = OpLabel + OpReturnValue %35 + OpFunctionEnd diff --git a/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag b/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag new file mode 100644 index 00000000000..eac8fadf74d --- /dev/null +++ b/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag @@ -0,0 +1,310 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 816 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 320 + OpName %main "main" + OpName %checkSwap_f1_f1_ "checkSwap(f1;f1;" + OpName %a "a" + OpName %b "b" + OpName %gl_FragCoord "gl_FragCoord" + OpName %buf1 "buf1" + OpMemberName %buf1 0 "resolution" + OpName %_ "" + OpName %i "i" + OpName %data "data" + OpName %buf0 "buf0" + OpMemberName %buf0 0 "injectionSwitch" + OpName %__0 "" + OpName %i_0 "i" + OpName %j "j" + OpName %doSwap "doSwap" + OpName %param "param" + OpName %param_0 "param" + OpName %temp "temp" + OpName %_GLF_color "_GLF_color" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpMemberDecorate %buf1 0 Offset 0 + OpDecorate %buf1 Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpMemberDecorate %buf0 0 Offset 0 + OpDecorate %buf0 Block + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + OpDecorate %_GLF_color Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float + %bool = OpTypeBool + %9 = OpTypeFunction %bool %_ptr_Function_float %_ptr_Function_float + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_ptr_Input_float = OpTypePointer Input %float + %v2float = OpTypeVector %float 2 + %buf1 = OpTypeStruct %v2float +%_ptr_Uniform_buf1 = OpTypePointer Uniform %buf1 + %_ = OpVariable %_ptr_Uniform_buf1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %float_2 = OpConstant %float 2 +%_ptr_Function_bool = OpTypePointer Function %bool +%_ptr_Function_int = OpTypePointer Function %int + %int_10 = OpConstant %int 10 + %uint_10 = OpConstant %uint 10 +%_arr_float_uint_10 = OpTypeArray %float %uint_10 +%_ptr_Function__arr_float_uint_10 = OpTypePointer Function %_arr_float_uint_10 + %buf0 = OpTypeStruct %v2float +%_ptr_Uniform_buf0 = OpTypePointer Uniform %buf0 + %__0 = OpVariable %_ptr_Uniform_buf0 Uniform + %int_1 = OpConstant %int 1 + %int_9 = OpConstant %int 9 + %uint_0 = OpConstant %uint 0 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %_GLF_color = OpVariable %_ptr_Output_v4float Output + %float_10 = OpConstant %float 10 + %int_5 = OpConstant %int 5 + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %false = OpConstantFalse %bool + %true = OpConstantTrue %bool + %main = OpFunction %void None %3 + %5 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %data = OpVariable %_ptr_Function__arr_float_uint_10 Function + %i_0 = OpVariable %_ptr_Function_int Function + %j = OpVariable %_ptr_Function_int Function + %doSwap = OpVariable %_ptr_Function_bool Function + %param = OpVariable %_ptr_Function_float Function + %param_0 = OpVariable %_ptr_Function_float Function + %temp = OpVariable %_ptr_Function_float Function + OpStore %i %int_0 + OpBranch %50 + %50 = OpLabel + OpLoopMerge %52 %53 None + OpBranch %54 + %54 = OpLabel + %55 = OpLoad %int %i + %57 = OpSLessThan %bool %55 %int_10 + OpBranchConditional %57 %51 %52 + %51 = OpLabel + %62 = OpLoad %int %i + %63 = OpLoad %int %i + %64 = OpISub %int %int_10 %63 + %65 = OpConvertSToF %float %64 + %69 = OpAccessChain %_ptr_Uniform_float %__0 %int_0 %uint_1 + %70 = OpLoad %float %69 + %71 = OpFMul %float %65 %70 + %72 = OpAccessChain %_ptr_Function_float %data %62 + OpStore %72 %71 + OpBranch %53 + %53 = OpLabel + %73 = OpLoad %int %i + %75 = OpIAdd %int %73 %int_1 + OpStore %i %75 + OpBranch %50 + %52 = OpLabel + OpStore %i_0 %int_0 + OpBranch %77 + %77 = OpLabel + OpLoopMerge %79 %80 None + OpBranch %81 + %81 = OpLabel + %82 = OpLoad %int %i_0 + %84 = OpSLessThan %bool %82 %int_9 + OpBranchConditional %84 %78 %79 + %78 = OpLabel + OpStore %j %int_0 + OpBranch %86 + %86 = OpLabel + OpLoopMerge %88 %89 None + OpBranch %90 + %90 = OpLabel + %91 = OpLoad %int %j + %92 = OpSLessThan %bool %91 %int_10 + OpBranchConditional %92 %87 %88 + %87 = OpLabel + %93 = OpLoad %int %j + %94 = OpLoad %int %i_0 + %95 = OpIAdd %int %94 %int_1 + %96 = OpSLessThan %bool %93 %95 + OpSelectionMerge %98 None + OpBranchConditional %96 %97 %98 + %97 = OpLabel + OpBranch %89 + %98 = OpLabel + %101 = OpLoad %int %i_0 + %102 = OpLoad %int %j + %104 = OpAccessChain %_ptr_Function_float %data %101 + %105 = OpLoad %float %104 + OpStore %param %105 + %107 = OpAccessChain %_ptr_Function_float %data %102 + %108 = OpLoad %float %107 + OpStore %param_0 %108 + %109 = OpFunctionCall %bool %checkSwap_f1_f1_ %param %param_0 + OpStore %doSwap %109 + %110 = OpLoad %bool %doSwap + OpSelectionMerge %112 None + OpBranchConditional %110 %111 %112 + %111 = OpLabel + %114 = OpLoad %int %i_0 + %115 = OpAccessChain %_ptr_Function_float %data %114 + %116 = OpLoad %float %115 + OpStore %temp %116 + %117 = OpLoad %int %i_0 + %118 = OpLoad %int %j + %119 = OpAccessChain %_ptr_Function_float %data %118 + %120 = OpLoad %float %119 + %121 = OpAccessChain %_ptr_Function_float %data %117 + OpStore %121 %120 + %122 = OpLoad %int %j + %123 = OpLoad %float %temp + %124 = OpAccessChain %_ptr_Function_float %data %122 + OpStore %124 %123 + OpBranch %112 + %112 = OpLabel + OpBranch %89 + %89 = OpLabel + %125 = OpLoad %int %j + %126 = OpIAdd %int %125 %int_1 + OpStore %j %126 + OpBranch %86 + %88 = OpLabel + OpBranch %80 + %80 = OpLabel + %127 = OpLoad %int %i_0 + %128 = OpIAdd %int %127 %int_1 + OpStore %i_0 %128 + OpBranch %77 + %79 = OpLabel + %130 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %131 = OpLoad %float %130 + %132 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %uint_0 + %133 = OpLoad %float %132 + %134 = OpFDiv %float %133 %float_2 + %135 = OpFOrdLessThan %bool %131 %134 + OpSelectionMerge %137 None + OpBranchConditional %135 %136 %153 + %136 = OpLabel + %140 = OpAccessChain %_ptr_Function_float %data %int_0 + %141 = OpLoad %float %140 + %143 = OpFDiv %float %141 %float_10 + %145 = OpAccessChain %_ptr_Function_float %data %int_5 + %146 = OpLoad %float %145 + %147 = OpFDiv %float %146 %float_10 + %148 = OpAccessChain %_ptr_Function_float %data %int_9 + %149 = OpLoad %float %148 + %150 = OpFDiv %float %149 %float_10 + %152 = OpCompositeConstruct %v4float %143 %147 %150 %float_1 + OpStore %_GLF_color %152 + OpBranch %137 + %153 = OpLabel + %154 = OpAccessChain %_ptr_Function_float %data %int_5 + %155 = OpLoad %float %154 + %156 = OpFDiv %float %155 %float_10 + %157 = OpAccessChain %_ptr_Function_float %data %int_9 + %158 = OpLoad %float %157 + %159 = OpFDiv %float %158 %float_10 + %160 = OpAccessChain %_ptr_Function_float %data %int_0 + %161 = OpLoad %float %160 + %162 = OpFDiv %float %161 %float_10 + %163 = OpCompositeConstruct %v4float %156 %159 %162 %float_1 + OpStore %_GLF_color %163 + OpBranch %137 + %137 = OpLabel + OpReturn + OpFunctionEnd +%checkSwap_f1_f1_ = OpFunction %bool None %9 + %a = OpFunctionParameter %_ptr_Function_float + %b = OpFunctionParameter %_ptr_Function_float + %13 = OpLabel + %35 = OpVariable %_ptr_Function_bool Function + %20 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_1 + %21 = OpLoad %float %20 + %29 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %uint_1 + %30 = OpLoad %float %29 + %32 = OpFDiv %float %30 %float_2 + %33 = OpFOrdLessThan %bool %21 %32 + OpBranch %36 + %36 = OpLabel + OpSelectionMerge %351 None + OpBranchConditional %33 %352 %354 + %352 = OpLabel + %353 = OpLoad %float %a + OpBranch %351 + %354 = OpLabel + %355 = OpCopyObject %float %float_0 + OpBranch %351 + %351 = OpLabel + %38 = OpPhi %float %353 %352 %355 %354 + OpSelectionMerge %386 None + OpBranchConditional %false %385 %385 + %385 = OpLabel + OpSelectionMerge %356 None + OpBranchConditional %33 %357 %359 + %357 = OpLabel + %358 = OpLoad %float %b + OpBranch %356 + %359 = OpLabel + %360 = OpCopyObject %float %float_0 + OpBranch %356 + %356 = OpLabel + %39 = OpPhi %float %358 %357 %360 %359 + %40 = OpFOrdGreaterThan %bool %38 %39 + OpBranch %362 + %362 = OpLabel + OpSelectionMerge %479 None + OpBranchConditional %33 %480 %479 + %480 = OpLabel + OpStore %35 %40 + OpBranch %479 + %479 = OpLabel + OpBranchConditional %true %361 %386 + %361 = OpLabel + OpBranch %386 + %386 = OpLabel + OpBranch %41 + %41 = OpLabel + OpSelectionMerge %363 None + OpBranchConditional %33 %366 %364 + %364 = OpLabel + %365 = OpLoad %float %a + OpBranch %363 + %366 = OpLabel + %367 = OpCopyObject %float %float_0 + OpBranch %363 + %363 = OpLabel + %42 = OpPhi %float %365 %364 %367 %366 + OpSelectionMerge %368 None + OpBranchConditional %33 %371 %369 + %369 = OpLabel + %370 = OpLoad %float %b + OpBranch %368 + %371 = OpLabel + %372 = OpCopyObject %float %float_0 + OpBranch %368 + %368 = OpLabel + %43 = OpPhi %float %370 %369 %372 %371 + %44 = OpFOrdLessThan %bool %42 %43 + OpSelectionMerge %373 None + OpBranchConditional %33 %373 %374 + %374 = OpLabel + OpStore %35 %44 + OpBranch %373 + %373 = OpLabel + OpBranch %37 + %37 = OpLabel + %45 = OpLoad %bool %35 + OpReturnValue %45 + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag new file mode 100644 index 00000000000..6782b124730 --- /dev/null +++ b/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag @@ -0,0 +1,83 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 27 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %_ + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpMemberName %AA 0 "foo" + OpMemberName %AB 0 "foo" + OpMemberName %A 0 "_aa" + OpMemberName %A 1 "ab" + OpMemberName %BA 0 "foo" + OpMemberName %BB 0 "foo" + OpMemberName %B 0 "_ba" + OpMemberName %B 1 "bb" + OpName %VertexData "VertexData" + OpMemberName %VertexData 0 "_a" + OpMemberName %VertexData 1 "b" + OpName %_ "" + OpMemberName %CA 0 "foo" + OpMemberName %C 0 "_ca" + OpMemberName %DA 0 "foo" + OpMemberName %D 0 "da" + OpName %UBO "UBO" + OpMemberName %UBO 0 "_c" + OpMemberName %UBO 1 "d" + OpName %__0 "" + OpMemberName %E 0 "a" + OpName %SSBO "SSBO" + ;OpMemberName %SSBO 0 "e" Test that we don't try to assign bogus aliases. + OpMemberName %SSBO 1 "_e" + OpMemberName %SSBO 2 "f" + OpName %__1 "" + OpDecorate %VertexData Block + OpDecorate %_ Location 0 + OpMemberDecorate %CA 0 Offset 0 + OpMemberDecorate %C 0 Offset 0 + OpMemberDecorate %DA 0 Offset 0 + OpMemberDecorate %D 0 Offset 0 + OpMemberDecorate %UBO 0 Offset 0 + OpMemberDecorate %UBO 1 Offset 16 + OpDecorate %UBO Block + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + OpMemberDecorate %E 0 Offset 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpMemberDecorate %SSBO 2 Offset 8 + OpDecorate %SSBO BufferBlock + OpDecorate %__1 DescriptorSet 0 + OpDecorate %__1 Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %AA = OpTypeStruct %int + %AB = OpTypeStruct %int + %A = OpTypeStruct %AA %AB + %BA = OpTypeStruct %int + %BB = OpTypeStruct %int + %B = OpTypeStruct %BA %BB + %VertexData = OpTypeStruct %A %B +%_ptr_Input_VertexData = OpTypePointer Input %VertexData + %_ = OpVariable %_ptr_Input_VertexData Input + %CA = OpTypeStruct %int + %C = OpTypeStruct %CA + %DA = OpTypeStruct %int + %D = OpTypeStruct %DA + %UBO = OpTypeStruct %C %D +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO + %__0 = OpVariable %_ptr_Uniform_UBO Uniform + %E = OpTypeStruct %int + %SSBO = OpTypeStruct %E %E %E +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %__1 = OpVariable %_ptr_Uniform_SSBO Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag b/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag new file mode 100644 index 00000000000..387764c92c8 --- /dev/null +++ b/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.3 +; Generator: Google Tint Compiler; 0 +; Bound: 29 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %fragColor %gl_SampleMask + OpExecutionMode %main OriginUpperLeft + OpName %fragColor "fragColor" + OpName %uBuffer "uBuffer" + OpMemberName %uBuffer 0 "color" + OpName %x_12 "x_12" + OpName %gl_SampleMask "gl_SampleMask" + OpName %main "main" + OpDecorate %fragColor Location 0 + OpDecorate %uBuffer Block + OpMemberDecorate %uBuffer 0 Offset 0 + OpDecorate %x_12 DescriptorSet 0 + OpDecorate %x_12 Binding 0 + OpDecorate %gl_SampleMask BuiltIn SampleMask + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %_ptr_Output_v4float = OpTypePointer Output %v4float + %5 = OpConstantNull %v4float + %fragColor = OpVariable %_ptr_Output_v4float Output %5 + %uBuffer = OpTypeStruct %v4float + %_ptr_Uniform_uBuffer = OpTypePointer Uniform %uBuffer + %x_12 = OpVariable %_ptr_Uniform_uBuffer Uniform + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 + %_arr_uint_uint_1 = OpTypeArray %uint %uint_1 +%_ptr_Output__arr_uint_uint_1 = OpTypePointer Output %_arr_uint_uint_1 + %14 = OpConstantNull %_arr_uint_uint_1 + %gl_SampleMask = OpVariable %_ptr_Output__arr_uint_uint_1 Output %14 + %void = OpTypeVoid + %15 = OpTypeFunction %void + %uint_0 = OpConstant %uint 0 + %_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %_ptr_Output_uint = OpTypePointer Output %uint + %int_6 = OpConstant %int 6 + %main = OpFunction %void None %15 + %18 = OpLabel + %21 = OpAccessChain %_ptr_Uniform_v4float %x_12 %uint_0 + %22 = OpLoad %v4float %21 + OpStore %fragColor %22 + %26 = OpAccessChain %_ptr_Output_uint %gl_SampleMask %int_0 + %27 = OpBitcast %uint %int_6 + OpStore %26 %27 + %loaded_scalar = OpLoad %uint %26 + OpStore %26 %loaded_scalar + %loaded = OpLoad %_arr_uint_uint_1 %gl_SampleMask + OpStore %gl_SampleMask %loaded + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag b/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag new file mode 100644 index 00000000000..d5a07b5497e --- /dev/null +++ b/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag @@ -0,0 +1,38 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 20 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %vIndex + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %vIndex "vIndex" + OpDecorate %FragColor Location 0 + OpDecorate %vIndex Flat + OpDecorate %vIndex Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %15 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Input_int = OpTypePointer Input %int + %vIndex = OpVariable %_ptr_Input_int Input + %main = OpFunction %void None %3 + %5 = OpLabel + OpSelectionMerge %9 None + OpSwitch %int_0 %9 + %9 = OpLabel + %tmp = OpPhi %v4float %15 %5 + OpStore %FragColor %tmp + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag b/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag new file mode 100644 index 00000000000..dda2f0279ca --- /dev/null +++ b/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag @@ -0,0 +1,95 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 48 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %PSMain "main" %in_var_COLOR %in_var_TEXCOORD0 %out_var_SV_TARGET + OpExecutionMode %PSMain OriginUpperLeft + ; Not actually ESSL, but makes testing easier. + OpSource ESSL 310 + OpName %type_2d_image "type.2d.image" + OpName %tex "tex" + OpName %type_sampler "type.sampler" + OpName %Samp "Samp" + OpName %in_var_COLOR "in.var.COLOR" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %out_var_SV_TARGET "out.var.SV_TARGET" + OpName %PSMain "PSMain" + OpName %PSInput "PSInput" + OpMemberName %PSInput 0 "color" + OpMemberName %PSInput 1 "uv" + OpName %param_var_input "param.var.input" + OpName %src_PSMain "src.PSMain" + OpName %input "input" + OpName %bb_entry "bb.entry" + OpName %a "a" + OpName %type_sampled_image "type.sampled.image" + OpDecorate %in_var_COLOR Location 0 + OpDecorate %in_var_TEXCOORD0 Location 1 + OpDecorate %out_var_SV_TARGET Location 0 + OpDecorate %tex DescriptorSet 0 + OpDecorate %tex Binding 0 + OpDecorate %Samp DescriptorSet 0 + OpDecorate %Samp Binding 1 + OpDecorate %tex RelaxedPrecision + OpDecorate %a RelaxedPrecision + OpDecorate %38 RelaxedPrecision + OpDecorate %45 RelaxedPrecision + OpDecorate %47 RelaxedPrecision + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %float = OpTypeFloat 32 +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %21 = OpTypeFunction %void + %PSInput = OpTypeStruct %v4float %v2float +%_ptr_Function_PSInput = OpTypePointer Function %PSInput + %31 = OpTypeFunction %v4float %_ptr_Function_PSInput +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Function_v2float = OpTypePointer Function %v2float +%type_sampled_image = OpTypeSampledImage %type_2d_image + %tex = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant + %Samp = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%in_var_COLOR = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input +%out_var_SV_TARGET = OpVariable %_ptr_Output_v4float Output + %PSMain = OpFunction %void None %21 + %22 = OpLabel +%param_var_input = OpVariable %_ptr_Function_PSInput Function + %26 = OpLoad %v4float %in_var_COLOR + %27 = OpLoad %v2float %in_var_TEXCOORD0 + %28 = OpCompositeConstruct %PSInput %26 %27 + OpStore %param_var_input %28 + %29 = OpFunctionCall %v4float %src_PSMain %param_var_input + OpStore %out_var_SV_TARGET %29 + OpReturn + OpFunctionEnd + %src_PSMain = OpFunction %v4float None %31 + %input = OpFunctionParameter %_ptr_Function_PSInput + %bb_entry = OpLabel + %a = OpVariable %_ptr_Function_v4float Function + %36 = OpAccessChain %_ptr_Function_v4float %input %int_0 + %37 = OpLoad %v4float %36 + %38 = OpLoad %type_2d_image %tex + %39 = OpLoad %type_sampler %Samp + %41 = OpAccessChain %_ptr_Function_v2float %input %int_1 + %42 = OpLoad %v2float %41 + %44 = OpSampledImage %type_sampled_image %38 %39 + %45 = OpImageSampleImplicitLod %v4float %44 %42 None + %46 = OpFMul %v4float %37 %45 + OpStore %a %46 + %47 = OpLoad %v4float %a + OpReturnValue %47 + OpFunctionEnd + diff --git a/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag b/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag new file mode 100644 index 00000000000..ccb7a60fe71 --- /dev/null +++ b/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag @@ -0,0 +1,120 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 71 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %ps_main "main" %out_var_SV_TARGET1 + OpExecutionMode %ps_main OriginUpperLeft + OpSource HLSL 600 + OpName %type_scene "type.scene" + OpMemberName %type_scene 0 "myConsts" + OpName %MyConsts "MyConsts" + OpMemberName %MyConsts 0 "opt" + OpName %scene "scene" + OpName %type_sampler "type.sampler" + OpName %mySampler "mySampler" + OpName %type_2d_image "type.2d.image" + OpName %texTable "texTable" + OpName %out_var_SV_TARGET1 "out.var.SV_TARGET1" + OpName %ps_main "ps_main" + OpName %type_sampled_image "type.sampled.image" + OpDecorate %out_var_SV_TARGET1 Location 1 + OpDecorate %scene DescriptorSet 0 + OpDecorate %scene Binding 3 + OpDecorate %mySampler DescriptorSet 0 + OpDecorate %mySampler Binding 2 + OpDecorate %texTable DescriptorSet 0 + OpDecorate %texTable Binding 0 + OpMemberDecorate %MyConsts 0 Offset 0 + OpMemberDecorate %type_scene 0 Offset 0 + OpDecorate %type_scene Block + %float = OpTypeFloat 32 + %float_1 = OpConstant %float 1 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 +%uint_16777215 = OpConstant %uint 16777215 + %uint_0 = OpConstant %uint 0 + %float_0 = OpConstant %float 0 + %21 = OpConstantComposite %v2float %float_0 %float_0 + %MyConsts = OpTypeStruct %uint + %type_scene = OpTypeStruct %MyConsts +%_ptr_Uniform_type_scene = OpTypePointer Uniform %type_scene +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler + %uint_1 = OpConstant %uint 1 +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_arr_type_2d_image_uint_1 = OpTypeArray %type_2d_image %uint_1 +%_ptr_UniformConstant__arr_type_2d_image_uint_1 = OpTypePointer UniformConstant %_arr_type_2d_image_uint_1 +%_ptr_Output_uint = OpTypePointer Output %uint + %void = OpTypeVoid + %29 = OpTypeFunction %void + %v4uint = OpTypeVector %uint 4 + %v3float = OpTypeVector %float 3 +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bool = OpTypeBool +%type_sampled_image = OpTypeSampledImage %type_2d_image + %v4float = OpTypeVector %float 4 + %scene = OpVariable %_ptr_Uniform_type_scene Uniform + %mySampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant + %texTable = OpVariable %_ptr_UniformConstant__arr_type_2d_image_uint_1 UniformConstant +%out_var_SV_TARGET1 = OpVariable %_ptr_Output_uint Output + %float_n1 = OpConstant %float -1 + %37 = OpUndef %v4uint + %ps_main = OpFunction %void None %29 + %38 = OpLabel + OpSelectionMerge %39 None + OpSwitch %uint_0 %40 + %40 = OpLabel + %41 = OpCompositeExtract %uint %37 1 + %42 = OpBitwiseAnd %uint %41 %uint_16777215 + %43 = OpAccessChain %_ptr_UniformConstant_type_2d_image %texTable %42 + %44 = OpLoad %type_2d_image %43 + %45 = OpAccessChain %_ptr_Uniform_uint %scene %int_0 %int_0 + %46 = OpLoad %uint %45 + %47 = OpINotEqual %bool %46 %uint_0 + OpSelectionMerge %48 DontFlatten + OpBranchConditional %47 %49 %50 + %50 = OpLabel + %51 = OpLoad %type_sampler %mySampler + %52 = OpSampledImage %type_sampled_image %44 %51 + %53 = OpImageSampleExplicitLod %v4float %52 %21 Lod %float_0 + %54 = OpCompositeExtract %float %53 0 + OpBranch %39 + %49 = OpLabel + OpBranch %39 + %48 = OpLabel + OpUnreachable + %39 = OpLabel + %55 = OpPhi %float %54 %50 %float_1 %49 + %56 = OpCompositeConstruct %v3float %float_n1 %float_n1 %55 + OpSelectionMerge %57 None + OpSwitch %uint_0 %58 + %58 = OpLabel + OpSelectionMerge %59 DontFlatten + OpBranchConditional %47 %60 %61 + %61 = OpLabel + %62 = OpLoad %type_sampler %mySampler + %63 = OpSampledImage %type_sampled_image %44 %62 + %64 = OpImageSampleExplicitLod %v4float %63 %21 Lod %float_0 + %65 = OpCompositeExtract %float %64 0 + OpBranch %57 + %60 = OpLabel + OpBranch %57 + %59 = OpLabel + OpUnreachable + %57 = OpLabel + %66 = OpPhi %float %65 %61 %float_1 %60 + %67 = OpCompositeConstruct %v3float %float_1 %float_1 %66 + %68 = OpExtInst %v3float %1 Cross %56 %67 + %69 = OpCompositeExtract %float %68 0 + %70 = OpConvertFToU %uint %69 + OpStore %out_var_SV_TARGET1 %70 + OpReturn + OpFunctionEnd + diff --git a/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag b/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag new file mode 100644 index 00000000000..050a3385307 --- /dev/null +++ b/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag @@ -0,0 +1,80 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 65 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values0" + OpName %_ "" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %__0 "" + OpName %FragColor "FragColor" + OpDecorate %_runtimearr_float ArrayStride 4 + OpMemberDecorate %SSBO 0 NonWritable + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %_runtimearr_float_0 ArrayStride 4 + OpMemberDecorate %SSBO1 0 NonWritable + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 1 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %float_0 = OpConstant %float 0 + %11 = OpConstantComposite %v2float %float_0 %float_0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_16 = OpConstant %int 16 + %bool = OpTypeBool +%_runtimearr_float = OpTypeRuntimeArray %float + %SSBO = OpTypeStruct %_runtimearr_float +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_runtimearr_float_0 = OpTypeRuntimeArray %float + %SSBO1 = OpTypeStruct %_runtimearr_float_0 +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %__0 = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int_1 = OpConstant %int 1 +%_ptr_Output_v2float = OpTypePointer Output %v2float + %FragColor = OpVariable %_ptr_Output_v2float Output + %main = OpFunction %void None %3 + %5 = OpLabel + OpBranch %17 + %17 = OpLabel + %61 = OpPhi %v2float %11 %5 %d %18 + %60 = OpPhi %int %int_0 %5 %49 %18 + %25 = OpSLessThan %bool %60 %int_16 + OpLoopMerge %19 %18 None + OpBranchConditional %25 %pre18 %19 + %pre18 = OpLabel + OpBranch %18 + %18 = OpLabel + %32 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %60 + %43 = OpAccessChain %_ptr_Uniform_float %__0 %int_0 %60 + %33 = OpLoad %float %32 + %44 = OpLoad %float %43 + %a = OpFMul %v2float %61 %61 + %b = OpCompositeInsert %v2float %33 %a 0 + %c = OpCompositeInsert %v2float %44 %b 1 + %d = OpFAdd %v2float %61 %c + %49 = OpIAdd %int %60 %int_1 + OpBranch %17 + %19 = OpLabel + OpStore %FragColor %61 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag b/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag new file mode 100644 index 00000000000..14f1f6efd9e --- /dev/null +++ b/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag @@ -0,0 +1,82 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 65 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values0" + OpName %_ "" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %__0 "" + OpName %FragColor "FragColor" + OpDecorate %_runtimearr_float ArrayStride 4 + OpMemberDecorate %SSBO 0 NonWritable + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %_runtimearr_float_0 ArrayStride 4 + OpMemberDecorate %SSBO1 0 NonWritable + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 1 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %float_0 = OpConstant %float 0 + %11 = OpConstantComposite %v2float %float_0 %float_0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_16 = OpConstant %int 16 + %bool = OpTypeBool +%_runtimearr_float = OpTypeRuntimeArray %float + %SSBO = OpTypeStruct %_runtimearr_float +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_runtimearr_float_0 = OpTypeRuntimeArray %float + %SSBO1 = OpTypeStruct %_runtimearr_float_0 +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %__0 = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int_1 = OpConstant %int 1 +%_ptr_Output_v2float = OpTypePointer Output %v2float + %FragColor = OpVariable %_ptr_Output_v2float Output + %main = OpFunction %void None %3 + %5 = OpLabel + OpBranch %17 + %17 = OpLabel + %61 = OpPhi %v2float %11 %5 %d %cont + %60 = OpPhi %int %int_0 %5 %49 %cont + %25 = OpSLessThan %bool %60 %int_16 + OpLoopMerge %19 %cont None + OpBranchConditional %25 %pre18 %19 + %pre18 = OpLabel + OpBranch %18 + %18 = OpLabel + %32 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %60 + %43 = OpAccessChain %_ptr_Uniform_float %__0 %int_0 %60 + %33 = OpLoad %float %32 + %44 = OpLoad %float %43 + %a = OpFMul %v2float %61 %61 + %b = OpCompositeInsert %v2float %33 %a 0 + %c = OpCompositeInsert %v2float %44 %b 1 + OpBranch %cont + %cont = OpLabel + %d = OpFAdd %v2float %61 %c + %49 = OpIAdd %int %60 %int_1 + OpBranch %17 + %19 = OpLabel + OpStore %FragColor %61 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag new file mode 100644 index 00000000000..9408e69ac09 --- /dev/null +++ b/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag @@ -0,0 +1,127 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vInput %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %vInput "vInput" + OpName %FragColor "FragColor" + OpName %phi "PHI" + OpDecorate %vInput RelaxedPrecision + OpDecorate %vInput Location 0 + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %b0 RelaxedPrecision + OpDecorate %b1 RelaxedPrecision + OpDecorate %b2 RelaxedPrecision + OpDecorate %b3 RelaxedPrecision + OpDecorate %c1 RelaxedPrecision + OpDecorate %c3 RelaxedPrecision + OpDecorate %d4_mp RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float + %vInput = OpVariable %_ptr_Input_v4float Input + %float_1 = OpConstant %float 1 + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Function_float = OpTypePointer Function %float + %float_2 = OpConstant %float 2 + %uint_1 = OpConstant %uint 1 + %float_3 = OpConstant %float 3 + %uint_2 = OpConstant %uint 2 + %float_4 = OpConstant %float 4 + %uint_3 = OpConstant %uint 3 + %v4float_arr2 = OpTypeArray %v4float %uint_2 + %v44float = OpTypeMatrix %v4float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %v4undef = OpUndef %v4float + %v4const = OpConstantNull %v4float + %v4arrconst = OpConstantNull %v4float_arr2 + %v44const = OpConstantNull %v44float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + + %loaded0 = OpLoad %v4float %vInput + + ; Basic case (highp). + %a0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %a1 = OpCompositeInsert %v4float %float_2 %a0 1 + %a2 = OpCompositeInsert %v4float %float_3 %a1 2 + %a3 = OpCompositeInsert %v4float %float_4 %a2 3 + OpStore %FragColor %a3 + + ; Basic case (mediump). + %b0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %b1 = OpCompositeInsert %v4float %float_2 %b0 1 + %b2 = OpCompositeInsert %v4float %float_3 %b1 2 + %b3 = OpCompositeInsert %v4float %float_4 %b2 3 + OpStore %FragColor %b3 + + ; Mix relaxed precision. + %c0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %c1 = OpCompositeInsert %v4float %float_2 %c0 1 + %c2 = OpCompositeInsert %v4float %float_3 %c1 2 + %c3 = OpCompositeInsert %v4float %float_4 %c2 3 + OpStore %FragColor %c3 + + ; SSA use after insert + %d0 = OpCompositeInsert %v4float %float_1 %loaded0 0 + %d1 = OpCompositeInsert %v4float %float_2 %d0 1 + %d2 = OpCompositeInsert %v4float %float_3 %d1 2 + %d3 = OpCompositeInsert %v4float %float_4 %d2 3 + %d4 = OpFAdd %v4float %d3 %d0 + OpStore %FragColor %d4 + %d4_mp = OpFAdd %v4float %d3 %d1 + OpStore %FragColor %d4_mp + + ; Verify Insert behavior on Undef. + %e0 = OpCompositeInsert %v4float %float_1 %v4undef 0 + %e1 = OpCompositeInsert %v4float %float_2 %e0 1 + %e2 = OpCompositeInsert %v4float %float_3 %e1 2 + %e3 = OpCompositeInsert %v4float %float_4 %e2 3 + OpStore %FragColor %e3 + + ; Verify Insert behavior on Constant. + %f0 = OpCompositeInsert %v4float %float_1 %v4const 0 + OpStore %FragColor %f0 + + ; Verify Insert behavior on Array. + %g0 = OpCompositeInsert %v4float_arr2 %float_1 %v4arrconst 1 2 + %g1 = OpCompositeInsert %v4float_arr2 %float_2 %g0 0 3 + %g2 = OpCompositeExtract %v4float %g1 0 + OpStore %FragColor %g2 + %g3 = OpCompositeExtract %v4float %g1 1 + OpStore %FragColor %g3 + + ; Verify Insert behavior on Matrix. + %h0 = OpCompositeInsert %v44float %float_1 %v44const 1 2 + %h1 = OpCompositeInsert %v44float %float_2 %h0 2 3 + %h2 = OpCompositeExtract %v4float %h1 0 + OpStore %FragColor %h2 + %h3 = OpCompositeExtract %v4float %h1 1 + OpStore %FragColor %h3 + %h4 = OpCompositeExtract %v4float %h1 2 + OpStore %FragColor %h4 + %h5 = OpCompositeExtract %v4float %h1 3 + OpStore %FragColor %h5 + + ; Verify that we cannot RMW PHI variables. + OpBranch %next + %next = OpLabel + %phi = OpPhi %v4float %d2 %5 + %i0 = OpCompositeInsert %v4float %float_4 %phi 3 + OpStore %FragColor %i0 + + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag b/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag new file mode 100644 index 00000000000..9f1a4573ddc --- /dev/null +++ b/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag @@ -0,0 +1,63 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 33 +; Schema: 0 + OpCapability Shader + OpCapability DemoteToHelperInvocationEXT + OpExtension "SPV_EXT_demote_to_helper_invocation" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vA %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_demote_to_helper_invocation" + OpName %main "main" + OpName %foobar_i1_ "foobar(i1;" + OpName %a "a" + OpName %a_0 "a" + OpName %vA "vA" + OpName %param "param" + OpName %FragColor "FragColor" + OpDecorate %vA Flat + OpDecorate %vA Location 0 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %10 = OpTypeFunction %v4float %_ptr_Function_int + %int_0 = OpConstant %int 0 + %bool = OpTypeBool + %float_10 = OpConstant %float 10 + %21 = OpConstantComposite %v4float %float_10 %float_10 %float_10 %float_10 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_int = OpTypePointer Input %int + %vA = OpVariable %_ptr_Input_int Input +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %a_0 = OpVariable %_ptr_Function_v4float Function + %param = OpVariable %_ptr_Function_int Function + %29 = OpLoad %int %vA + OpStore %param %29 + %30 = OpFunctionCall %v4float %foobar_i1_ %param + OpStore %FragColor %21 + OpReturn + OpFunctionEnd + %foobar_i1_ = OpFunction %v4float None %10 + %a = OpFunctionParameter %_ptr_Function_int + %13 = OpLabel + %14 = OpLoad %int %a + %17 = OpSLessThan %bool %14 %int_0 + OpSelectionMerge %19 None + OpBranchConditional %17 %18 %19 + %18 = OpLabel + OpDemoteToHelperInvocationEXT + OpBranch %19 + %19 = OpLabel + OpReturnValue %21 + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag b/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag new file mode 100644 index 00000000000..0f039166b07 --- /dev/null +++ b/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag @@ -0,0 +1,59 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 34 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vA %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %foobar_i1_ "foobar(i1;" + OpName %a "a" + OpName %a_0 "a" + OpName %vA "vA" + OpName %param "param" + OpName %FragColor "FragColor" + OpDecorate %vA Flat + OpDecorate %vA Location 0 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %10 = OpTypeFunction %v4float %_ptr_Function_int + %int_0 = OpConstant %int 0 + %bool = OpTypeBool + %float_10 = OpConstant %float 10 + %22 = OpConstantComposite %v4float %float_10 %float_10 %float_10 %float_10 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_int = OpTypePointer Input %int + %vA = OpVariable %_ptr_Input_int Input +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %a_0 = OpVariable %_ptr_Function_v4float Function + %param = OpVariable %_ptr_Function_int Function + %30 = OpLoad %int %vA + OpStore %param %30 + %31 = OpFunctionCall %v4float %foobar_i1_ %param + OpStore %FragColor %22 + OpReturn + OpFunctionEnd + %foobar_i1_ = OpFunction %v4float None %10 + %a = OpFunctionParameter %_ptr_Function_int + %13 = OpLabel + %14 = OpLoad %int %a + %17 = OpSLessThan %bool %14 %int_0 + OpSelectionMerge %19 None + OpBranchConditional %17 %18 %19 + %18 = OpLabel + OpKill + %19 = OpLabel + OpReturnValue %22 + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag b/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag new file mode 100644 index 00000000000..97400dfb16e --- /dev/null +++ b/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag @@ -0,0 +1,64 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 42 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %gl_FragCoord "gl_FragCoord" + OpName %_GLF_color "_GLF_color" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_GLF_color Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %bool = OpTypeBool + %false = OpConstantFalse %bool +%_ptr_Output_v4float = OpTypePointer Output %v4float + %_GLF_color = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %31 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1 + %true = OpConstantTrue %bool + %main = OpFunction %void None %3 + %5 = OpLabel + OpBranch %33 + %33 = OpLabel + OpLoopMerge %32 %35 None + OpBranch %6 + %6 = OpLabel + OpLoopMerge %8 %24 None + OpBranch %7 + %7 = OpLabel + %17 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %18 = OpLoad %float %17 + %22 = OpFOrdNotEqual %bool %18 %18 + OpSelectionMerge %24 None + OpBranchConditional %22 %23 %24 + %23 = OpLabel + OpBranch %8 + %24 = OpLabel + OpBranchConditional %false %6 %8 + %8 = OpLabel + %41 = OpPhi %bool %true %23 %false %24 + OpSelectionMerge %39 None + OpBranchConditional %41 %32 %39 + %39 = OpLabel + OpStore %_GLF_color %31 + OpBranch %32 + %35 = OpLabel + OpBranch %33 + %32 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag b/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag new file mode 100644 index 00000000000..d789ce36b0d --- /dev/null +++ b/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag @@ -0,0 +1,133 @@ +; SPIR-V +; Version: 1.3 +; Generator: Google spiregg; 0 +; Bound: 81 +; Schema: 0 + OpCapability Shader + OpCapability Sampled1D + OpCapability Image1D + OpCapability SampledBuffer + OpCapability ImageBuffer + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %PsTextureLoadArray "main" %gl_FragCoord %out_var_SV_TARGET + OpExecutionMode %PsTextureLoadArray OriginUpperLeft + OpSource HLSL 500 + OpName %type_2d_image "type.2d.image" + OpName %type_gCBuffarrayIndex "type.gCBuffarrayIndex" + OpMemberName %type_gCBuffarrayIndex 0 "gArrayIndex" + OpName %gCBuffarrayIndex "gCBuffarrayIndex" + OpName %g_textureArray0 "g_textureArray0" + OpName %g_textureArray1 "g_textureArray1" + OpName %g_textureArray2 "g_textureArray2" + OpName %g_textureArray3 "g_textureArray3" + OpName %out_var_SV_TARGET "out.var.SV_TARGET" + OpName %PsTextureLoadArray "PsTextureLoadArray" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %out_var_SV_TARGET Location 0 + OpDecorate %gCBuffarrayIndex DescriptorSet 0 + OpDecorate %gCBuffarrayIndex Binding 0 + OpDecorate %g_textureArray0 DescriptorSet 0 + OpDecorate %g_textureArray0 Binding 0 + OpDecorate %g_textureArray1 DescriptorSet 0 + OpDecorate %g_textureArray1 Binding 1 + OpDecorate %g_textureArray2 DescriptorSet 0 + OpDecorate %g_textureArray2 Binding 2 + OpDecorate %g_textureArray3 DescriptorSet 0 + OpDecorate %g_textureArray3 Binding 3 + OpMemberDecorate %type_gCBuffarrayIndex 0 Offset 0 + OpDecorate %type_gCBuffarrayIndex Block + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %v4float = OpTypeVector %float 4 + %18 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1 +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type_gCBuffarrayIndex = OpTypeStruct %uint +%_ptr_Uniform_type_gCBuffarrayIndex = OpTypePointer Uniform %type_gCBuffarrayIndex +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %24 = OpTypeFunction %void +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %v3int = OpTypeVector %int 3 + %v2int = OpTypeVector %int 2 +%gCBuffarrayIndex = OpVariable %_ptr_Uniform_type_gCBuffarrayIndex Uniform +%g_textureArray0 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%g_textureArray1 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%g_textureArray2 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%g_textureArray3 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%out_var_SV_TARGET = OpVariable %_ptr_Output_v4float Output + %uint_0 = OpConstant %uint 0 + %bool = OpTypeBool + %false = OpConstantFalse %bool + %true = OpConstantTrue %bool + %32 = OpUndef %v4float +%PsTextureLoadArray = OpFunction %void None %24 + %33 = OpLabel + %34 = OpLoad %v4float %gl_FragCoord + OpSelectionMerge %35 None + OpSwitch %uint_0 %36 + %36 = OpLabel + %37 = OpAccessChain %_ptr_Uniform_uint %gCBuffarrayIndex %int_0 + %38 = OpLoad %uint %37 + OpSelectionMerge %39 None + OpSwitch %38 %40 0 %41 1 %42 2 %43 3 %44 + %41 = OpLabel + %45 = OpCompositeExtract %float %34 0 + %46 = OpCompositeExtract %float %34 1 + %47 = OpConvertFToS %int %45 + %48 = OpConvertFToS %int %46 + %49 = OpCompositeConstruct %v3int %47 %48 %int_0 + %50 = OpVectorShuffle %v2int %49 %49 0 1 + %51 = OpLoad %type_2d_image %g_textureArray0 + %52 = OpImageFetch %v4float %51 %50 Lod %int_0 + OpBranch %39 + %42 = OpLabel + %53 = OpCompositeExtract %float %34 0 + %54 = OpCompositeExtract %float %34 1 + %55 = OpConvertFToS %int %53 + %56 = OpConvertFToS %int %54 + %57 = OpCompositeConstruct %v3int %55 %56 %int_0 + %58 = OpVectorShuffle %v2int %57 %57 0 1 + %59 = OpLoad %type_2d_image %g_textureArray1 + %60 = OpImageFetch %v4float %59 %58 Lod %int_0 + OpBranch %39 + %43 = OpLabel + %61 = OpCompositeExtract %float %34 0 + %62 = OpCompositeExtract %float %34 1 + %63 = OpConvertFToS %int %61 + %64 = OpConvertFToS %int %62 + %65 = OpCompositeConstruct %v3int %63 %64 %int_0 + %66 = OpVectorShuffle %v2int %65 %65 0 1 + %67 = OpLoad %type_2d_image %g_textureArray2 + %68 = OpImageFetch %v4float %67 %66 Lod %int_0 + OpBranch %39 + %44 = OpLabel + %69 = OpCompositeExtract %float %34 0 + %70 = OpCompositeExtract %float %34 1 + %71 = OpConvertFToS %int %69 + %72 = OpConvertFToS %int %70 + %73 = OpCompositeConstruct %v3int %71 %72 %int_0 + %74 = OpVectorShuffle %v2int %73 %73 0 1 + %75 = OpLoad %type_2d_image %g_textureArray3 + %76 = OpImageFetch %v4float %75 %74 Lod %int_0 + OpBranch %39 + %40 = OpLabel + OpBranch %39 + %39 = OpLabel + %77 = OpPhi %v4float %52 %41 %60 %42 %68 %43 %76 %44 %32 %40 + %78 = OpPhi %bool %true %41 %true %42 %true %43 %true %44 %false %40 + OpSelectionMerge %79 None + OpBranchConditional %78 %35 %79 + %79 = OpLabel + OpBranch %35 + %35 = OpLabel + %80 = OpPhi %v4float %77 %39 %18 %79 + OpStore %out_var_SV_TARGET %80 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag new file mode 100644 index 00000000000..a9650ddbb6b --- /dev/null +++ b/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag @@ -0,0 +1,61 @@ +; SPIR-V +; Version: 1.2 +; Generator: Khronos; 0 +; Bound: 43 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %EntryPoint_Main "main" + OpExecutionMode %EntryPoint_Main OriginUpperLeft + OpSource Unknown 100 + OpName %EmptyStructTest "EmptyStructTest" + OpName %EmptyStruct2Test "EmptyStruct2Test" + OpName %GetValue "GetValue" + OpName %GetValue2 "GetValue" + OpName %self "self" + OpName %self2 "self" + OpName %emptyStruct "emptyStruct" + OpName %value "value" + OpName %EntryPoint_Main "EntryPoint_Main" + +%EmptyStructTest = OpTypeStruct +%EmptyStruct2Test = OpTypeStruct %EmptyStructTest +%_ptr_Function_EmptyStruct2Test = OpTypePointer Function %EmptyStruct2Test + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float + %5 = OpTypeFunction %float %_ptr_Function_EmptyStruct2Test + %6 = OpTypeFunction %float %EmptyStruct2Test + %void = OpTypeVoid +%_ptr_Function_void = OpTypePointer Function %void + %8 = OpTypeFunction %void %_ptr_Function_EmptyStruct2Test + %9 = OpTypeFunction %void + %float_0 = OpConstant %float 0 + %value4 = OpConstantNull %EmptyStruct2Test + + %GetValue = OpFunction %float None %5 + %self = OpFunctionParameter %_ptr_Function_EmptyStruct2Test + %13 = OpLabel + OpReturnValue %float_0 + OpFunctionEnd + + %GetValue2 = OpFunction %float None %6 + %self2 = OpFunctionParameter %EmptyStruct2Test + %14 = OpLabel + OpReturnValue %float_0 + OpFunctionEnd + +%EntryPoint_Main = OpFunction %void None %9 + %37 = OpLabel + %emptyStruct = OpVariable %_ptr_Function_EmptyStruct2Test Function + %18 = OpVariable %_ptr_Function_EmptyStruct2Test Function + %value = OpVariable %_ptr_Function_float Function + %value2 = OpCompositeConstruct %EmptyStructTest + %value3 = OpCompositeConstruct %EmptyStruct2Test %value2 + %22 = OpFunctionCall %float %GetValue %emptyStruct + %23 = OpFunctionCall %float %GetValue2 %value3 + %24 = OpFunctionCall %float %GetValue2 %value4 + OpStore %value %22 + OpStore %value %23 + OpStore %value %24 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.frag b/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.invalid.frag similarity index 100% rename from shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.frag rename to shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.invalid.frag diff --git a/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.frag b/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.invalid.frag similarity index 100% rename from shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.frag rename to shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.invalid.frag diff --git a/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag b/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag new file mode 100644 index 00000000000..a10970e9637 --- /dev/null +++ b/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag @@ -0,0 +1,625 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 761 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 320 + OpName %main "main" + OpName %pos "pos" + OpName %gl_FragCoord "gl_FragCoord" + OpName %buf0 "buf0" + OpMemberName %buf0 0 "resolution" + OpName %_ "" + OpName %ipos "ipos" + OpName %i "i" + OpName %map "map" + OpName %p "p" + OpName %canwalk "canwalk" + OpName %v "v" + OpName %directions "directions" + OpName %j "j" + OpName %d "d" + OpName %_GLF_color "_GLF_color" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpMemberDecorate %buf0 0 Offset 0 + OpDecorate %buf0 Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %_GLF_color Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%_ptr_Function_v2float = OpTypePointer Function %v2float + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %buf0 = OpTypeStruct %v2float +%_ptr_Uniform_buf0 = OpTypePointer Uniform %buf0 + %_ = OpVariable %_ptr_Uniform_buf0 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float + %v2int = OpTypeVector %int 2 +%_ptr_Function_v2int = OpTypePointer Function %v2int + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Function_float = OpTypePointer Function %float + %float_16 = OpConstant %float 16 + %uint_1 = OpConstant %uint 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_256 = OpConstant %int 256 + %bool = OpTypeBool + %uint_256 = OpConstant %uint 256 +%_arr_int_uint_256 = OpTypeArray %int %uint_256 +%_ptr_Private__arr_int_uint_256 = OpTypePointer Private %_arr_int_uint_256 + %map = OpVariable %_ptr_Private__arr_int_uint_256 Private +%_ptr_Private_int = OpTypePointer Private %int + %int_1 = OpConstant %int 1 + %63 = OpConstantComposite %v2int %int_0 %int_0 +%_ptr_Function_bool = OpTypePointer Function %bool + %true = OpConstantTrue %bool + %int_2 = OpConstant %int 2 + %int_16 = OpConstant %int 16 + %int_14 = OpConstant %int 14 + %false = OpConstantFalse %bool + %int_8 = OpConstant %int 8 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %_GLF_color = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %437 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %float_0 = OpConstant %float 0 + %441 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1 +%mat2v4float = OpTypeMatrix %v4float 2 +%_ptr_Private_mat2v4float = OpTypePointer Private %mat2v4float + %556 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %557 = OpConstantComposite %mat2v4float %556 %556 + %558 = OpVariable %_ptr_Private_mat2v4float Private %557 + %760 = OpConstantNull %bool + %main = OpFunction %void None %3 + %5 = OpLabel + %pos = OpVariable %_ptr_Function_v2float Function + %ipos = OpVariable %_ptr_Function_v2int Function + %i = OpVariable %_ptr_Function_int Function + %p = OpVariable %_ptr_Function_v2int Function + %canwalk = OpVariable %_ptr_Function_bool Function + %v = OpVariable %_ptr_Function_int Function + %directions = OpVariable %_ptr_Function_int Function + %j = OpVariable %_ptr_Function_int Function + %d = OpVariable %_ptr_Function_int Function + %13 = OpLoad %v4float %gl_FragCoord + %14 = OpVectorShuffle %v2float %13 %13 0 1 + %564 = OpISub %int %int_256 %int_14 + %21 = OpAccessChain %_ptr_Uniform_v2float %_ %int_0 + %22 = OpLoad %v2float %21 + %566 = OpSNegate %int %564 + %23 = OpFDiv %v2float %14 %22 + OpStore %pos %23 + %30 = OpAccessChain %_ptr_Function_float %pos %uint_0 + %31 = OpLoad %float %30 + %33 = OpFMul %float %31 %float_16 + %34 = OpConvertFToS %int %33 + %36 = OpAccessChain %_ptr_Function_float %pos %uint_1 + %37 = OpLoad %float %36 + %38 = OpFMul %float %37 %float_16 + %39 = OpConvertFToS %int %38 + %40 = OpCompositeConstruct %v2int %34 %39 + OpStore %ipos %40 + OpStore %i %int_0 + OpBranch %43 + %43 = OpLabel + OpLoopMerge %45 %46 None + OpBranch %47 + %47 = OpLabel + %48 = OpLoad %int %i + %51 = OpSLessThan %bool %48 %int_256 + OpBranchConditional %51 %44 %45 + %44 = OpLabel + %56 = OpLoad %int %i + %58 = OpAccessChain %_ptr_Private_int %map %56 + OpStore %58 %int_0 + OpBranch %46 + %46 = OpLabel + %59 = OpLoad %int %i + %61 = OpIAdd %int %59 %int_1 + OpStore %i %61 + OpBranch %43 + %45 = OpLabel + OpStore %p %63 + OpStore %canwalk %true + OpStore %v %int_0 + OpBranch %68 + %68 = OpLabel + OpLoopMerge %70 %71 None + OpBranch %69 + %69 = OpLabel + %72 = OpLoad %int %v + %73 = OpIAdd %int %72 %int_1 + OpStore %v %73 + OpStore %directions %int_0 + %75 = OpAccessChain %_ptr_Function_int %p %uint_0 + %76 = OpLoad %int %75 + %77 = OpSGreaterThan %bool %76 %int_0 + OpSelectionMerge %79 None + OpBranchConditional %77 %78 %79 + %78 = OpLabel + %80 = OpAccessChain %_ptr_Function_int %p %uint_0 + %81 = OpLoad %int %80 + %83 = OpISub %int %81 %int_2 + %84 = OpAccessChain %_ptr_Function_int %p %uint_1 + %85 = OpLoad %int %84 + %87 = OpIMul %int %85 %int_16 + %88 = OpIAdd %int %83 %87 + %89 = OpAccessChain %_ptr_Private_int %map %88 + %90 = OpLoad %int %89 + %91 = OpIEqual %bool %90 %int_0 + OpBranch %79 + %79 = OpLabel + %92 = OpPhi %bool %77 %69 %91 %78 + OpSelectionMerge %94 None + OpBranchConditional %92 %93 %94 + %93 = OpLabel + %95 = OpLoad %int %directions + %96 = OpIAdd %int %95 %int_1 + OpStore %directions %96 + OpBranch %94 + %94 = OpLabel + %97 = OpAccessChain %_ptr_Function_int %p %uint_1 + %98 = OpLoad %int %97 + %99 = OpSGreaterThan %bool %98 %int_0 + OpSelectionMerge %101 None + OpBranchConditional %99 %100 %101 + %100 = OpLabel + %102 = OpAccessChain %_ptr_Function_int %p %uint_0 + %103 = OpLoad %int %102 + %104 = OpAccessChain %_ptr_Function_int %p %uint_1 + %105 = OpLoad %int %104 + %106 = OpISub %int %105 %int_2 + %107 = OpIMul %int %106 %int_16 + %108 = OpIAdd %int %103 %107 + %109 = OpAccessChain %_ptr_Private_int %map %108 + %110 = OpLoad %int %109 + %111 = OpIEqual %bool %110 %int_0 + OpBranch %101 + %101 = OpLabel + %112 = OpPhi %bool %99 %94 %111 %100 + OpSelectionMerge %114 None + OpBranchConditional %112 %113 %114 + %113 = OpLabel + %115 = OpLoad %int %directions + %116 = OpIAdd %int %115 %int_1 + OpStore %directions %116 + OpBranch %114 + %114 = OpLabel + %117 = OpAccessChain %_ptr_Function_int %p %uint_0 + %118 = OpLoad %int %117 + %120 = OpSLessThan %bool %118 %int_14 + OpSelectionMerge %122 None + OpBranchConditional %120 %121 %122 + %121 = OpLabel + %123 = OpAccessChain %_ptr_Function_int %p %uint_0 + %124 = OpLoad %int %123 + %125 = OpIAdd %int %124 %int_2 + %126 = OpAccessChain %_ptr_Function_int %p %uint_1 + %127 = OpLoad %int %126 + %128 = OpIMul %int %127 %int_16 + %129 = OpIAdd %int %125 %128 + %130 = OpAccessChain %_ptr_Private_int %map %129 + %131 = OpLoad %int %130 + %132 = OpIEqual %bool %131 %int_0 + OpBranch %122 + %122 = OpLabel + %133 = OpPhi %bool %120 %114 %132 %121 + OpSelectionMerge %135 None + OpBranchConditional %133 %134 %135 + %134 = OpLabel + %136 = OpLoad %int %directions + %137 = OpIAdd %int %136 %int_1 + OpStore %directions %137 + OpBranch %135 + %135 = OpLabel + %594 = OpISub %int %int_256 %566 + %138 = OpAccessChain %_ptr_Function_int %p %uint_1 + %139 = OpLoad %int %138 + %140 = OpSLessThan %bool %139 %int_14 + OpSelectionMerge %142 None + OpBranchConditional %140 %141 %142 + %141 = OpLabel + %143 = OpAccessChain %_ptr_Function_int %p %uint_0 + %144 = OpLoad %int %143 + %145 = OpAccessChain %_ptr_Function_int %p %uint_1 + %146 = OpLoad %int %145 + %147 = OpIAdd %int %146 %int_2 + %148 = OpIMul %int %147 %int_16 + %149 = OpIAdd %int %144 %148 + %150 = OpAccessChain %_ptr_Private_int %map %149 + %151 = OpLoad %int %150 + %152 = OpIEqual %bool %151 %int_0 + OpBranch %142 + %142 = OpLabel + %153 = OpPhi %bool %140 %135 %152 %141 + OpSelectionMerge %155 None + OpBranchConditional %153 %154 %155 + %154 = OpLabel + %156 = OpLoad %int %directions + %157 = OpIAdd %int %156 %int_1 + OpStore %directions %157 + OpBranch %155 + %155 = OpLabel + %158 = OpLoad %int %directions + %159 = OpIEqual %bool %158 %int_0 + OpSelectionMerge %161 None + OpBranchConditional %159 %160 %207 + %160 = OpLabel + OpStore %canwalk %false + OpStore %i %int_0 + OpBranch %163 + %163 = OpLabel + OpLoopMerge %165 %166 None + OpBranch %167 + %167 = OpLabel + %168 = OpLoad %int %i + %170 = OpSLessThan %bool %168 %int_8 + OpBranchConditional %170 %164 %165 + %164 = OpLabel + OpStore %j %int_0 + %609 = OpISub %int %594 %168 + OpStore %558 %557 + OpBranchConditional %760 %166 %172 + %172 = OpLabel + OpLoopMerge %174 %175 Unroll + OpBranch %176 + %176 = OpLabel + %177 = OpLoad %int %j + %178 = OpSLessThan %bool %177 %int_8 + OpBranchConditional %178 %173 %174 + %173 = OpLabel + %179 = OpLoad %int %j + %180 = OpIMul %int %179 %int_2 + %181 = OpLoad %int %i + %182 = OpIMul %int %181 %int_2 + %183 = OpIMul %int %182 %int_16 + %184 = OpIAdd %int %180 %183 + %185 = OpAccessChain %_ptr_Private_int %map %184 + %186 = OpLoad %int %185 + %187 = OpIEqual %bool %186 %int_0 + OpSelectionMerge %189 None + OpBranchConditional %187 %188 %189 + %188 = OpLabel + %190 = OpLoad %int %j + %191 = OpIMul %int %190 %int_2 + %192 = OpAccessChain %_ptr_Function_int %p %uint_0 + OpStore %192 %191 + %193 = OpLoad %int %i + %194 = OpIMul %int %193 %int_2 + %195 = OpAccessChain %_ptr_Function_int %p %uint_1 + OpStore %195 %194 + OpStore %canwalk %true + OpBranch %189 + %189 = OpLabel + OpBranch %175 + %175 = OpLabel + %196 = OpLoad %int %j + %197 = OpIAdd %int %196 %int_1 + OpStore %j %197 + OpBranch %172 + %174 = OpLabel + OpBranch %166 + %166 = OpLabel + %198 = OpLoad %int %i + %199 = OpIAdd %int %198 %int_1 + OpStore %i %199 + OpBranch %163 + %165 = OpLabel + %200 = OpAccessChain %_ptr_Function_int %p %uint_0 + %201 = OpLoad %int %200 + %202 = OpAccessChain %_ptr_Function_int %p %uint_1 + %203 = OpLoad %int %202 + %204 = OpIMul %int %203 %int_16 + %205 = OpIAdd %int %201 %204 + %206 = OpAccessChain %_ptr_Private_int %map %205 + OpStore %206 %int_1 + OpBranch %161 + %207 = OpLabel + %209 = OpLoad %int %v + %210 = OpLoad %int %directions + %211 = OpSMod %int %209 %210 + OpStore %d %211 + %212 = OpLoad %int %directions + %213 = OpLoad %int %v + %214 = OpIAdd %int %213 %212 + OpStore %v %214 + %215 = OpLoad %int %d + %216 = OpSGreaterThanEqual %bool %215 %int_0 + OpSelectionMerge %218 None + OpBranchConditional %216 %217 %218 + %217 = OpLabel + %219 = OpAccessChain %_ptr_Function_int %p %uint_0 + %220 = OpLoad %int %219 + %221 = OpSGreaterThan %bool %220 %int_0 + OpBranch %218 + %218 = OpLabel + %222 = OpPhi %bool %216 %207 %221 %217 + OpSelectionMerge %224 None + OpBranchConditional %222 %223 %224 + %223 = OpLabel + %225 = OpAccessChain %_ptr_Function_int %p %uint_0 + %226 = OpLoad %int %225 + %227 = OpISub %int %226 %int_2 + %228 = OpAccessChain %_ptr_Function_int %p %uint_1 + %229 = OpLoad %int %228 + %230 = OpIMul %int %229 %int_16 + %231 = OpIAdd %int %227 %230 + %232 = OpAccessChain %_ptr_Private_int %map %231 + %233 = OpLoad %int %232 + %234 = OpIEqual %bool %233 %int_0 + OpBranch %224 + %224 = OpLabel + %235 = OpPhi %bool %222 %218 %234 %223 + OpSelectionMerge %237 None + OpBranchConditional %235 %236 %237 + %236 = OpLabel + %238 = OpLoad %int %d + %239 = OpISub %int %238 %int_1 + OpStore %d %239 + %240 = OpAccessChain %_ptr_Function_int %p %uint_0 + %241 = OpLoad %int %240 + %242 = OpAccessChain %_ptr_Function_int %p %uint_1 + %243 = OpLoad %int %242 + %244 = OpIMul %int %243 %int_16 + %245 = OpIAdd %int %241 %244 + %246 = OpAccessChain %_ptr_Private_int %map %245 + OpStore %246 %int_1 + %247 = OpAccessChain %_ptr_Function_int %p %uint_0 + %248 = OpLoad %int %247 + %249 = OpISub %int %248 %int_1 + %250 = OpAccessChain %_ptr_Function_int %p %uint_1 + %251 = OpLoad %int %250 + %252 = OpIMul %int %251 %int_16 + %253 = OpIAdd %int %249 %252 + %254 = OpAccessChain %_ptr_Private_int %map %253 + OpStore %254 %int_1 + %255 = OpAccessChain %_ptr_Function_int %p %uint_0 + %256 = OpLoad %int %255 + %257 = OpISub %int %256 %int_2 + %258 = OpAccessChain %_ptr_Function_int %p %uint_1 + %259 = OpLoad %int %258 + %260 = OpIMul %int %259 %int_16 + %261 = OpIAdd %int %257 %260 + %262 = OpAccessChain %_ptr_Private_int %map %261 + OpStore %262 %int_1 + %263 = OpAccessChain %_ptr_Function_int %p %uint_0 + %264 = OpLoad %int %263 + %265 = OpISub %int %264 %int_2 + %266 = OpAccessChain %_ptr_Function_int %p %uint_0 + OpStore %266 %265 + OpBranch %237 + %237 = OpLabel + %267 = OpLoad %int %d + %268 = OpSGreaterThanEqual %bool %267 %int_0 + OpSelectionMerge %270 None + OpBranchConditional %268 %269 %270 + %269 = OpLabel + %271 = OpAccessChain %_ptr_Function_int %p %uint_1 + %272 = OpLoad %int %271 + %273 = OpSGreaterThan %bool %272 %int_0 + OpBranch %270 + %270 = OpLabel + %274 = OpPhi %bool %268 %237 %273 %269 + OpSelectionMerge %276 None + OpBranchConditional %274 %275 %276 + %275 = OpLabel + %277 = OpAccessChain %_ptr_Function_int %p %uint_0 + %278 = OpLoad %int %277 + %279 = OpAccessChain %_ptr_Function_int %p %uint_1 + %280 = OpLoad %int %279 + %281 = OpISub %int %280 %int_2 + %282 = OpIMul %int %281 %int_16 + %283 = OpIAdd %int %278 %282 + %284 = OpAccessChain %_ptr_Private_int %map %283 + %285 = OpLoad %int %284 + %286 = OpIEqual %bool %285 %int_0 + OpBranch %276 + %276 = OpLabel + %287 = OpPhi %bool %274 %270 %286 %275 + OpSelectionMerge %289 None + OpBranchConditional %287 %288 %289 + %288 = OpLabel + %290 = OpLoad %int %d + %291 = OpISub %int %290 %int_1 + OpStore %d %291 + %292 = OpAccessChain %_ptr_Function_int %p %uint_0 + %293 = OpLoad %int %292 + %294 = OpAccessChain %_ptr_Function_int %p %uint_1 + %295 = OpLoad %int %294 + %296 = OpIMul %int %295 %int_16 + %297 = OpIAdd %int %293 %296 + %298 = OpAccessChain %_ptr_Private_int %map %297 + OpStore %298 %int_1 + %299 = OpAccessChain %_ptr_Function_int %p %uint_0 + %300 = OpLoad %int %299 + %301 = OpAccessChain %_ptr_Function_int %p %uint_1 + %302 = OpLoad %int %301 + %303 = OpISub %int %302 %int_1 + %304 = OpIMul %int %303 %int_16 + %305 = OpIAdd %int %300 %304 + %306 = OpAccessChain %_ptr_Private_int %map %305 + OpStore %306 %int_1 + %307 = OpAccessChain %_ptr_Function_int %p %uint_0 + %308 = OpLoad %int %307 + %309 = OpAccessChain %_ptr_Function_int %p %uint_1 + %310 = OpLoad %int %309 + %311 = OpISub %int %310 %int_2 + %312 = OpIMul %int %311 %int_16 + %313 = OpIAdd %int %308 %312 + %314 = OpAccessChain %_ptr_Private_int %map %313 + OpStore %314 %int_1 + %315 = OpAccessChain %_ptr_Function_int %p %uint_1 + %316 = OpLoad %int %315 + %317 = OpISub %int %316 %int_2 + %318 = OpAccessChain %_ptr_Function_int %p %uint_1 + OpStore %318 %317 + OpBranch %289 + %289 = OpLabel + %319 = OpLoad %int %d + %320 = OpSGreaterThanEqual %bool %319 %int_0 + OpSelectionMerge %322 None + OpBranchConditional %320 %321 %322 + %321 = OpLabel + %323 = OpAccessChain %_ptr_Function_int %p %uint_0 + %324 = OpLoad %int %323 + %325 = OpSLessThan %bool %324 %int_14 + OpBranch %322 + %322 = OpLabel + %326 = OpPhi %bool %320 %289 %325 %321 + OpSelectionMerge %328 None + OpBranchConditional %326 %327 %328 + %327 = OpLabel + %329 = OpAccessChain %_ptr_Function_int %p %uint_0 + %330 = OpLoad %int %329 + %331 = OpIAdd %int %330 %int_2 + %332 = OpAccessChain %_ptr_Function_int %p %uint_1 + %333 = OpLoad %int %332 + %334 = OpIMul %int %333 %int_16 + %335 = OpIAdd %int %331 %334 + %336 = OpAccessChain %_ptr_Private_int %map %335 + %337 = OpLoad %int %336 + %338 = OpIEqual %bool %337 %int_0 + OpBranch %328 + %328 = OpLabel + %339 = OpPhi %bool %326 %322 %338 %327 + OpSelectionMerge %341 None + OpBranchConditional %339 %340 %341 + %340 = OpLabel + %342 = OpLoad %int %d + %343 = OpISub %int %342 %int_1 + OpStore %d %343 + %344 = OpAccessChain %_ptr_Function_int %p %uint_0 + %345 = OpLoad %int %344 + %346 = OpAccessChain %_ptr_Function_int %p %uint_1 + %347 = OpLoad %int %346 + %348 = OpIMul %int %347 %int_16 + %349 = OpIAdd %int %345 %348 + %350 = OpAccessChain %_ptr_Private_int %map %349 + OpStore %350 %int_1 + %351 = OpAccessChain %_ptr_Function_int %p %uint_0 + %352 = OpLoad %int %351 + %353 = OpIAdd %int %352 %int_1 + %354 = OpAccessChain %_ptr_Function_int %p %uint_1 + %355 = OpLoad %int %354 + %356 = OpIMul %int %355 %int_16 + %357 = OpIAdd %int %353 %356 + %358 = OpAccessChain %_ptr_Private_int %map %357 + OpStore %358 %int_1 + %359 = OpAccessChain %_ptr_Function_int %p %uint_0 + %360 = OpLoad %int %359 + %361 = OpIAdd %int %360 %int_2 + %362 = OpAccessChain %_ptr_Function_int %p %uint_1 + %363 = OpLoad %int %362 + %364 = OpIMul %int %363 %int_16 + %365 = OpIAdd %int %361 %364 + %366 = OpAccessChain %_ptr_Private_int %map %365 + OpStore %366 %int_1 + %367 = OpAccessChain %_ptr_Function_int %p %uint_0 + %368 = OpLoad %int %367 + %369 = OpIAdd %int %368 %int_2 + %370 = OpAccessChain %_ptr_Function_int %p %uint_0 + OpStore %370 %369 + OpBranch %341 + %341 = OpLabel + %371 = OpLoad %int %d + %372 = OpSGreaterThanEqual %bool %371 %int_0 + OpSelectionMerge %374 None + OpBranchConditional %372 %373 %374 + %373 = OpLabel + %375 = OpAccessChain %_ptr_Function_int %p %uint_1 + %376 = OpLoad %int %375 + %377 = OpSLessThan %bool %376 %int_14 + OpBranch %374 + %374 = OpLabel + %378 = OpPhi %bool %372 %341 %377 %373 + OpSelectionMerge %380 None + OpBranchConditional %378 %379 %380 + %379 = OpLabel + %381 = OpAccessChain %_ptr_Function_int %p %uint_0 + %382 = OpLoad %int %381 + %383 = OpAccessChain %_ptr_Function_int %p %uint_1 + %384 = OpLoad %int %383 + %385 = OpIAdd %int %384 %int_2 + %386 = OpIMul %int %385 %int_16 + %387 = OpIAdd %int %382 %386 + %388 = OpAccessChain %_ptr_Private_int %map %387 + %389 = OpLoad %int %388 + %390 = OpIEqual %bool %389 %int_0 + OpBranch %380 + %380 = OpLabel + %391 = OpPhi %bool %378 %374 %390 %379 + OpSelectionMerge %393 None + OpBranchConditional %391 %392 %393 + %392 = OpLabel + %394 = OpLoad %int %d + %395 = OpISub %int %394 %int_1 + OpStore %d %395 + %396 = OpAccessChain %_ptr_Function_int %p %uint_0 + %397 = OpLoad %int %396 + %398 = OpAccessChain %_ptr_Function_int %p %uint_1 + %399 = OpLoad %int %398 + %400 = OpIMul %int %399 %int_16 + %401 = OpIAdd %int %397 %400 + %402 = OpAccessChain %_ptr_Private_int %map %401 + OpStore %402 %int_1 + %403 = OpAccessChain %_ptr_Function_int %p %uint_0 + %404 = OpLoad %int %403 + %405 = OpAccessChain %_ptr_Function_int %p %uint_1 + %406 = OpLoad %int %405 + %407 = OpIAdd %int %406 %int_1 + %408 = OpIMul %int %407 %int_16 + %409 = OpIAdd %int %404 %408 + %410 = OpAccessChain %_ptr_Private_int %map %409 + OpStore %410 %int_1 + %411 = OpAccessChain %_ptr_Function_int %p %uint_0 + %412 = OpLoad %int %411 + %413 = OpAccessChain %_ptr_Function_int %p %uint_1 + %414 = OpLoad %int %413 + %415 = OpIAdd %int %414 %int_2 + %416 = OpIMul %int %415 %int_16 + %417 = OpIAdd %int %412 %416 + %418 = OpAccessChain %_ptr_Private_int %map %417 + OpStore %418 %int_1 + %419 = OpAccessChain %_ptr_Function_int %p %uint_1 + %420 = OpLoad %int %419 + %421 = OpIAdd %int %420 %int_2 + %422 = OpAccessChain %_ptr_Function_int %p %uint_1 + OpStore %422 %421 + OpBranch %393 + %393 = OpLabel + OpBranch %161 + %161 = OpLabel + %423 = OpAccessChain %_ptr_Function_int %ipos %uint_1 + %424 = OpLoad %int %423 + %425 = OpIMul %int %424 %int_16 + %426 = OpAccessChain %_ptr_Function_int %ipos %uint_0 + %427 = OpLoad %int %426 + %428 = OpIAdd %int %425 %427 + %429 = OpAccessChain %_ptr_Private_int %map %428 + %430 = OpLoad %int %429 + %431 = OpIEqual %bool %430 %int_1 + OpSelectionMerge %433 None + OpBranchConditional %431 %432 %433 + %432 = OpLabel + OpStore %_GLF_color %437 + OpReturn + %433 = OpLabel + OpBranch %71 + %71 = OpLabel + %439 = OpLoad %bool %canwalk + OpBranchConditional %439 %68 %70 + %70 = OpLabel + OpStore %_GLF_color %441 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag b/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag new file mode 100644 index 00000000000..6f9192cd200 --- /dev/null +++ b/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag @@ -0,0 +1,109 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 68 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %gl_FragCoord "gl_FragCoord" + OpName %_GLF_color "_GLF_color" + OpName %m "m" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_GLF_color Location 0 + %void = OpTypeVoid + %7 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %float_10 = OpConstant %float 10 + %bool = OpTypeBool +%_ptr_Output_v4float = OpTypePointer Output %v4float + %_GLF_color = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %19 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1 +%mat4v4float = OpTypeMatrix %v4float 4 + %21 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %22 = OpConstantComposite %mat4v4float %21 %21 %21 %21 + %uint_4 = OpConstant %uint 4 +%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4 +%_ptr_Function__arr_mat4v4float_uint_4 = OpTypePointer Function %_arr_mat4v4float_uint_4 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_4 = OpConstant %int 4 + %v2float = OpTypeVector %float 2 + %30 = OpConstantComposite %v2float %float_1 %float_1 + %int_1 = OpConstant %int 1 + %uint_3 = OpConstant %uint 3 +%_ptr_Function_float = OpTypePointer Function %float + %34 = OpConstantComposite %_arr_mat4v4float_uint_4 %22 %22 %22 %22 + %main = OpFunction %void None %7 + %35 = OpLabel + %m = OpVariable %_ptr_Function__arr_mat4v4float_uint_4 Function + OpBranch %36 + %36 = OpLabel + OpLoopMerge %37 %38 None + OpBranch %39 + %39 = OpLabel + %40 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %41 = OpLoad %float %40 + %42 = OpFOrdLessThan %bool %41 %float_10 + OpSelectionMerge %43 None + OpBranchConditional %42 %44 %43 + %44 = OpLabel + OpStore %_GLF_color %19 + OpBranch %37 + %43 = OpLabel + OpStore %m %34 + OpBranch %45 + %45 = OpLabel + %46 = OpPhi %int %int_0 %43 %47 %48 + %49 = OpSLessThan %bool %46 %int_4 + OpLoopMerge %50 %48 None + OpBranchConditional %49 %51 %50 + %51 = OpLabel + OpBranch %52 + %52 = OpLabel + %53 = OpPhi %int %int_0 %51 %54 %55 + %56 = OpSLessThan %bool %53 %int_4 + OpLoopMerge %57 %55 None + OpBranchConditional %56 %58 %57 + %58 = OpLabel + %59 = OpSelect %int %56 %int_1 %int_0 + %60 = OpAccessChain %_ptr_Function_float %m %59 %46 %uint_3 + %61 = OpLoad %float %60 + %62 = OpCompositeConstruct %v2float %61 %61 + %63 = OpFDiv %v2float %30 %62 + %64 = OpExtInst %float %1 Distance %30 %63 + %65 = OpFOrdLessThan %bool %64 %float_1 + OpSelectionMerge %66 None + OpBranchConditional %65 %67 %55 + %67 = OpLabel + OpStore %_GLF_color %21 + OpBranch %55 + %66 = OpLabel + OpBranch %55 + %55 = OpLabel + %54 = OpIAdd %int %53 %int_1 + OpBranch %52 + %57 = OpLabel + OpBranch %48 + %48 = OpLabel + %47 = OpIAdd %int %46 %int_1 + OpBranch %45 + %50 = OpLabel + OpBranch %37 + %38 = OpLabel + OpBranch %36 + %37 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag b/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag new file mode 100644 index 00000000000..9baebc20f40 --- /dev/null +++ b/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag @@ -0,0 +1,36 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 20 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %_GLF_color + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %_GLF_color "_GLF_color" + OpDecorate %_GLF_color Location 0 + OpDecorate %18 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %_GLF_color = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %11 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %uint = OpTypeInt 32 0 + %v4uint = OpTypeVector %uint 4 + %uint_1 = OpConstant %uint 1 + %15 = OpConstantComposite %v4uint %uint_1 %uint_1 %uint_1 %uint_1 + %int = OpTypeInt 32 1 + %v4int = OpTypeVector %int 4 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpBitCount %v4uint %15 + %19 = OpExtInst %v4float %1 Ldexp %11 %18 + OpStore %_GLF_color %19 + OpReturn + OpFunctionEnd diff --git a/shaders/asm/frag/loop-merge-to-continue.asm.frag b/shaders-no-opt/asm/frag/loop-merge-to-continue.asm.invalid.frag similarity index 100% rename from shaders/asm/frag/loop-merge-to-continue.asm.frag rename to shaders-no-opt/asm/frag/loop-merge-to-continue.asm.invalid.frag diff --git a/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag b/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag new file mode 100644 index 00000000000..ea85ed0b964 --- /dev/null +++ b/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag @@ -0,0 +1,96 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 53 +; Schema: 0 + OpCapability Shader + OpCapability ShaderNonUniform + OpCapability RuntimeDescriptorArray + OpCapability SampledImageArrayNonUniformIndexing + OpExtension "SPV_EXT_descriptor_indexing" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %vUV %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_nonuniform_qualifier" + OpName %main "main" + OpName %FragColor "FragColor" + OpName %uSamplers "uSamplers" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "indices" + OpName %_ "" + OpName %vUV "vUV" + OpName %uSampler "uSampler" + OpName %gl_FragCoord "gl_FragCoord" + OpDecorate %FragColor Location 0 + OpDecorate %uSamplers DescriptorSet 0 + OpDecorate %uSamplers Binding 0 + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO 0 NonWritable + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %26 NonUniform + OpDecorate %28 NonUniform + OpDecorate %29 NonUniform + OpDecorate %vUV Location 0 + OpDecorate %uSampler DescriptorSet 1 + OpDecorate %uSampler Binding 0 + OpDecorate %38 NonUniform + OpDecorate %gl_FragCoord BuiltIn FragCoord + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %10 = OpTypeImage %float 2D 0 0 0 1 Unknown + %11 = OpTypeSampledImage %10 +%_runtimearr_11 = OpTypeRuntimeArray %11 +%_ptr_UniformConstant__runtimearr_11 = OpTypePointer UniformConstant %_runtimearr_11 + %uSamplers = OpVariable %_ptr_UniformConstant__runtimearr_11 UniformConstant + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_10 = OpConstant %int 10 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11 + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %vUV = OpVariable %_ptr_Input_v2float Input + %float_0 = OpConstant %float 0 + %uSampler = OpVariable %_ptr_UniformConstant_11 UniformConstant +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_1 = OpConstant %uint 1 +%_ptr_Input_float = OpTypePointer Input %float + %main = OpFunction %void None %3 + %5 = OpLabel + %24 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %int_10 + %26 = OpLoad %uint %24 + %28 = OpAccessChain %_ptr_UniformConstant_11 %uSamplers %26 + %29 = OpLoad %11 %28 + %33 = OpLoad %v2float %vUV + %35 = OpImageSampleExplicitLod %v4float %29 %33 Lod %float_0 + OpStore %FragColor %35 + %37 = OpLoad %11 %uSampler + %38 = OpCopyObject %11 %37 + %39 = OpLoad %v2float %vUV + %44 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_1 + %45 = OpLoad %float %44 + %46 = OpConvertFToS %int %45 + %47 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %46 + %48 = OpLoad %uint %47 + %49 = OpConvertUToF %float %48 + %50 = OpImageSampleExplicitLod %v4float %38 %39 Lod %49 + %51 = OpLoad %v4float %FragColor + %52 = OpFAdd %v4float %51 %50 + OpStore %FragColor %52 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag b/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag new file mode 100644 index 00000000000..c5428a8bb9b --- /dev/null +++ b/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag @@ -0,0 +1,99 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 59 +; Schema: 0 + OpCapability Shader + OpCapability ShaderNonUniform + OpCapability RuntimeDescriptorArray + OpCapability StorageBufferArrayNonUniformIndexing + OpExtension "SPV_EXT_descriptor_indexing" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vIndex %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_nonuniform_qualifier" + OpSourceExtension "GL_EXT_samplerless_texture_functions" + OpName %main "main" + OpName %i "i" + OpName %vIndex "vIndex" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "counter" + OpMemberName %SSBO 1 "v" + OpName %ssbos "ssbos" + OpName %FragColor "FragColor" + OpDecorate %vIndex Flat + OpDecorate %vIndex Location 0 + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %SSBO BufferBlock + OpDecorate %ssbos DescriptorSet 0 + OpDecorate %ssbos Binding 3 + OpDecorate %32 NonUniform + OpDecorate %39 NonUniform + OpDecorate %49 NonUniform + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int +%_ptr_Input_int = OpTypePointer Input %int + %vIndex = OpVariable %_ptr_Input_int Input + %uint = OpTypeInt 32 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %uint %_runtimearr_v4float +%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO +%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO + %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform + %int_60 = OpConstant %int 60 + %int_1 = OpConstant %int 1 + %int_70 = OpConstant %int 70 + %float_20 = OpConstant %float 20 + %30 = OpConstantComposite %v4float %float_20 %float_20 %float_20 %float_20 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_100 = OpConstant %int 100 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_100 = OpConstant %uint 100 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %uint_2 = OpConstant %uint 2 +%_ptr_Output_float = OpTypePointer Output %float + %main = OpFunction %void None %3 + %5 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %11 = OpLoad %int %vIndex + OpStore %i %11 + %20 = OpLoad %int %i + %22 = OpIAdd %int %20 %int_60 + %23 = OpCopyObject %int %22 + %25 = OpLoad %int %i + %27 = OpIAdd %int %25 %int_70 + %28 = OpCopyObject %int %27 + %32 = OpAccessChain %_ptr_Uniform_v4float %ssbos %23 %int_1 %28 + OpStore %32 %30 + %33 = OpLoad %int %i + %35 = OpIAdd %int %33 %int_100 + %36 = OpCopyObject %int %35 + %39 = OpAccessChain %_ptr_Uniform_uint %ssbos %36 %int_0 + %43 = OpAtomicIAdd %uint %39 %uint_1 %uint_0 %uint_100 + %46 = OpLoad %int %i + %47 = OpCopyObject %int %46 + %49 = OpAccessChain %_ptr_Uniform_SSBO %ssbos %47 + %50 = OpArrayLength %uint %49 1 + %51 = OpBitcast %int %50 + %52 = OpConvertSToF %float %51 + %55 = OpAccessChain %_ptr_Output_float %FragColor %uint_2 + %56 = OpLoad %float %55 + %57 = OpFAdd %float %56 %52 + %58 = OpAccessChain %_ptr_Output_float %FragColor %uint_2 + OpStore %58 %57 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag new file mode 100644 index 00000000000..17aab1d8f77 --- /dev/null +++ b/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag @@ -0,0 +1,25 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 10 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragDepth + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main DepthReplacing + OpSource GLSL 450 + OpName %main "main" + OpName %gl_FragDepth "gl_FragDepth" + OpDecorate %gl_FragDepth BuiltIn FragDepth + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Output_float = OpTypePointer Output %float + %float_0_5 = OpConstant %float 0.5 +%gl_FragDepth = OpVariable %_ptr_Output_float Output %float_0_5 + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag b/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag new file mode 100644 index 00000000000..3696660d36d --- /dev/null +++ b/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag @@ -0,0 +1,69 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 40 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vColor %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %vColor "vColor" + OpName %uninit_function_int "uninit_function_int" + OpName %FragColor "FragColor" + OpName %uninit_int "uninit_int" + OpName %uninit_vector "uninit_vector" + OpName %uninit_matrix "uninit_matrix" + OpName %Foo "Foo" + OpMemberName %Foo 0 "a" + OpName %uninit_foo "uninit_foo" + OpDecorate %vColor Location 0 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %vColor = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %float_10 = OpConstant %float 10 + %bool = OpTypeBool + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_10 = OpConstant %int 10 + %int_20 = OpConstant %int 20 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output +%_ptr_Private_int = OpTypePointer Private %int + %uninit_int = OpUndef %int + %v4int = OpTypeVector %int 4 +%_ptr_Private_v4int = OpTypePointer Private %v4int +%uninit_vector = OpUndef %v4int +%mat4v4float = OpTypeMatrix %v4float 4 +%_ptr_Private_mat4v4float = OpTypePointer Private %mat4v4float +%uninit_matrix = OpUndef %mat4v4float + %Foo = OpTypeStruct %int +%_ptr_Private_Foo = OpTypePointer Private %Foo + %uninit_foo = OpUndef %Foo + %main = OpFunction %void None %3 + %5 = OpLabel +%uninit_function_int = OpVariable %_ptr_Function_int Function + %13 = OpAccessChain %_ptr_Input_float %vColor %uint_0 + %14 = OpLoad %float %13 + %17 = OpFOrdGreaterThan %bool %14 %float_10 + OpSelectionMerge %19 None + OpBranchConditional %17 %18 %24 + %18 = OpLabel + OpBranch %19 + %24 = OpLabel + OpBranch %19 + %19 = OpLabel + %27 = OpPhi %int %int_10 %18 %int_20 %24 + %28 = OpLoad %v4float %vColor + OpStore %FragColor %28 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag new file mode 100644 index 00000000000..ebd8d6bab75 --- /dev/null +++ b/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag @@ -0,0 +1,89 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + OpReturn + OpFunctionEnd + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + OpBeginInvocationInterlockEXT + %43 = OpFunctionCall %void %callee2_ + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag new file mode 100644 index 00000000000..69b8f911204 --- /dev/null +++ b/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag @@ -0,0 +1,121 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %ssbo2 DescriptorSet 0 + OpDecorate %ssbo2 Binding 2 + + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint + %SSBO2 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %bool = OpTypeBool + %true = OpConstantTrue %bool +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %callee3_res = OpFunctionCall %void %callee3_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %calle3_block = OpLabel + %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %frag_coord_x = OpLoad %float %frag_coord_x_ptr + %frag_coord_int = OpConvertFToS %int %frag_coord_x + %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int + OpStore %ssbo_ptr %uint_4 + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + + OpSelectionMerge %merged_block None + OpBranchConditional %true %dummy_block %merged_block + %dummy_block = OpLabel + OpBeginInvocationInterlockEXT + OpEndInvocationInterlockEXT + OpBranch %merged_block + + %merged_block = OpLabel + OpReturn + + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag new file mode 100644 index 00000000000..7c0fe9a2b24 --- /dev/null +++ b/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag @@ -0,0 +1,102 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %call3res = OpFunctionCall %void %callee3_ + %call4res = OpFunctionCall %void %callee4_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %begin3 = OpLabel + OpBeginInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee4_ = OpFunction %void None %3 + %begin4 = OpLabel + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag new file mode 100644 index 00000000000..bccea17b392 --- /dev/null +++ b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag @@ -0,0 +1,97 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 52 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vColor %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %a "a" + OpName %vColor "vColor" + OpName %b "b" + OpName %i "i" + OpName %FragColor "FragColor" + OpDecorate %a RelaxedPrecision + OpDecorate %vColor RelaxedPrecision + OpDecorate %vColor Location 0 + OpDecorate %16 RelaxedPrecision + OpDecorate %20 RelaxedPrecision + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %37 RelaxedPrecision + OpDecorate %38 RelaxedPrecision + OpDecorate %39 RelaxedPrecision + OpDecorate %43 RelaxedPrecision + OpDecorate %44 RelaxedPrecision + OpDecorate %45 RelaxedPrecision + OpDecorate %49 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %vColor = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %int_4 = OpConstant %int 4 + %bool = OpTypeBool +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %a = OpVariable %_ptr_Function_float Function + %b = OpVariable %_ptr_Function_float Function + %i = OpVariable %_ptr_Function_int Function + %15 = OpAccessChain %_ptr_Input_float %vColor %uint_0 + %16 = OpLoad %float %15 + OpStore %a %16 + %19 = OpAccessChain %_ptr_Input_float %vColor %uint_1 + %20 = OpLoad %float %19 + OpStore %b %20 + OpStore %i %int_0 + OpBranch %25 + %25 = OpLabel + OpLoopMerge %27 %28 None + OpBranch %29 + %29 = OpLabel + %30 = OpLoad %int %i + %33 = OpSLessThan %bool %30 %int_4 + OpBranchConditional %33 %26 %27 + %26 = OpLabel + %37 = OpLoad %v4float %FragColor + %38 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1 + %39 = OpFAdd %v4float %37 %38 + OpStore %FragColor %39 + OpBranch %28 + %28 = OpLabel + %40 = OpLoad %int %i + %42 = OpIAdd %int %40 %int_1 + OpStore %i %42 + %43 = OpLoad %float %a + %44 = OpLoad %float %a + %45 = OpFMul %float %43 %44 + %force_tmp = OpFMul %float %45 %44 + %46 = OpLoad %float %b + %47 = OpFAdd %float %46 %force_tmp + OpStore %b %47 + OpBranch %25 + %27 = OpLabel + %48 = OpLoad %float %b + %49 = OpLoad %v4float %FragColor + %50 = OpCompositeConstruct %v4float %48 %48 %48 %48 + %51 = OpFAdd %v4float %49 %50 + OpStore %FragColor %51 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag new file mode 100644 index 00000000000..42d72dc410c --- /dev/null +++ b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag @@ -0,0 +1,47 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 27 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vColor %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %b "b" + OpName %vColor "vColor" + OpName %FragColor "FragColor" + OpDecorate %b RelaxedPrecision + OpDecorate %vColor RelaxedPrecision + OpDecorate %vColor Location 0 + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Input_float = OpTypePointer Input %float + %vColor = OpVariable %_ptr_Input_float Input + %bool = OpTypeBool + %false = OpConstantFalse %bool +%_ptr_Output_float = OpTypePointer Output %float + %FragColor = OpVariable %_ptr_Output_float Output + %main = OpFunction %void None %3 + %5 = OpLabel + OpBranch %6 + %6 = OpLabel + OpLoopMerge %8 %9 None + OpBranch %7 + %7 = OpLabel + %15 = OpLoad %float %vColor + %b = OpFMul %float %15 %15 + OpBranch %9 + %9 = OpLabel + OpBranchConditional %false %6 %8 + %8 = OpLabel + %bb = OpFMul %float %b %b + OpStore %FragColor %bb + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag new file mode 100644 index 00000000000..ad526e5560f --- /dev/null +++ b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag @@ -0,0 +1,146 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 15 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor0 %FragColor1 %FragColor2 %FragColor3 %V4 + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 320 + OpName %main "main" + OpName %FragColor0 "FragColor0" + OpName %FragColor1 "FragColor1" + OpName %FragColor2 "FragColor2" + OpName %FragColor3 "FragColor3" + OpName %V4 "V4" + OpName %V4_value0 "V4_value0" + OpName %V1_value0 "V1_value0" + OpName %V1_value1 "V1_value1" + OpName %V1_value2 "V1_value2" + OpName %float_0_weird "float_0_weird" + OpName %ubo "ubo" + OpName %ubo_mp0 "ubo_mp0" + OpName %ubo_hp0 "ubo_hp0" + OpName %block "UBO" + OpName %phi_mp "phi_mp" + OpName %phi_hp "phi_hp" + OpMemberName %block 0 "mediump_float" + OpMemberName %block 1 "highp_float" + OpDecorate %FragColor0 RelaxedPrecision + OpDecorate %FragColor0 Location 0 + OpDecorate %FragColor1 RelaxedPrecision + OpDecorate %FragColor1 Location 1 + OpDecorate %FragColor2 RelaxedPrecision + OpDecorate %FragColor2 Location 2 + OpDecorate %FragColor3 RelaxedPrecision + OpDecorate %FragColor3 Location 3 + OpDecorate %V4 RelaxedPrecision + OpDecorate %V4 Location 0 + OpDecorate %V4_add RelaxedPrecision + OpDecorate %V4_mul RelaxedPrecision + OpDecorate %V1_add RelaxedPrecision + OpDecorate %V1_mul RelaxedPrecision + OpDecorate %phi_mp RelaxedPrecision + OpDecorate %mp_to_mp RelaxedPrecision + OpDecorate %hp_to_mp RelaxedPrecision + OpDecorate %V1_add_composite RelaxedPrecision + OpDecorate %V1_mul_composite RelaxedPrecision + OpDecorate %V4_sin1 RelaxedPrecision + OpDecorate %float_0_weird RelaxedPrecision + OpDecorate %ubo Binding 0 + OpDecorate %ubo DescriptorSet 0 + OpDecorate %block Block + OpMemberDecorate %block 0 Offset 0 + OpMemberDecorate %block 1 Offset 4 + OpMemberDecorate %block 0 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %block = OpTypeStruct %float %float + %block_ptr = OpTypePointer Uniform %block + %ubo_float_ptr = OpTypePointer Uniform %float + %ubo = OpVariable %block_ptr Uniform + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %float_3 = OpConstant %float 3.0 + %v4float = OpTypeVector %float 4 + %float_3_splat = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor0 = OpVariable %_ptr_Output_v4float Output + %FragColor1 = OpVariable %_ptr_Output_v4float Output + %FragColor2 = OpVariable %_ptr_Output_v4float Output + %FragColor3 = OpVariable %_ptr_Output_v4float Output +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_float = OpTypePointer Input %float + %V4 = OpVariable %_ptr_Input_v4float Input + %main = OpFunction %void None %3 + %5 = OpLabel + + ; Inherits precision in GLSL + %V4_value0 = OpLoad %v4float %V4 + + ; Inherits precision in GLSL + %ptr_V4x = OpAccessChain %_ptr_Input_float %V4 %uint_0 + + ; Inherits precision in GLSL + %V1_value0 = OpLoad %float %ptr_V4x + %V1_value1 = OpCompositeExtract %float %V4_value0 2 + %V1_value2 = OpCopyObject %float %V1_value1 + + %mp_ptr = OpAccessChain %ubo_float_ptr %ubo %uint_0 + %hp_ptr = OpAccessChain %ubo_float_ptr %ubo %uint_1 + %ubo_mp0 = OpLoad %float %mp_ptr + %ubo_hp0 = OpLoad %float %hp_ptr + + ; Stays mediump + %V4_add = OpFAdd %v4float %V4_value0 %float_3_splat + ; Must promote to highp + %V4_sub = OpFSub %v4float %V4_value0 %float_3_splat + ; Relaxed, truncate inputs. + %V4_mul = OpFMul %v4float %V4_sub %float_3_splat + OpStore %FragColor0 %V4_add + OpStore %FragColor1 %V4_sub + OpStore %FragColor2 %V4_mul + + ; Same as V4 tests. + %V1_add = OpFAdd %float %V1_value0 %float_3 + %float_0_weird = OpFSub %float %float_3 %ubo_hp0 + %V1_sub = OpFSub %float %V1_value0 %float_0_weird + %V1_mul = OpFMul %float %V1_sub %ubo_hp0 + %V1_result = OpCompositeConstruct %v4float %V1_add %V1_sub %V1_mul %float_3 + OpStore %FragColor3 %V1_result + + ; Same as V4 tests, but composite forwarding. + %V1_add_composite = OpFAdd %float %V1_value1 %ubo_mp0 + %V1_sub_composite = OpFSub %float %V1_value2 %ubo_mp0 + %V1_mul_composite = OpFMul %float %V1_sub_composite %ubo_hp0 + %V1_result_composite = OpCompositeConstruct %v4float %V1_add_composite %V1_sub_composite %V1_mul_composite %float_3 + OpStore %FragColor3 %V1_result_composite + + ; Must promote input to highp. + %V4_sin0 = OpExtInst %v4float %1 Sin %V4_value0 + OpStore %FragColor0 %V4_sin0 + ; Can keep mediump input. + %V4_sin1 = OpExtInst %v4float %1 Sin %V4_value0 + OpStore %FragColor1 %V4_sin1 + + OpBranch %next + %next = OpLabel + %phi_mp = OpPhi %float %V1_add %5 + %phi_hp = OpPhi %float %V1_sub %5 + + ; Consume PHIs in different precision contexts + %mp_to_mp = OpFAdd %float %phi_mp %phi_mp + %mp_to_hp = OpFAdd %float %phi_mp %phi_mp + %hp_to_mp = OpFAdd %float %phi_hp %phi_hp + %hp_to_hp = OpFAdd %float %phi_hp %phi_hp + %complete = OpCompositeConstruct %v4float %mp_to_mp %mp_to_hp %hp_to_mp %hp_to_hp + OpStore %FragColor2 %complete + + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag b/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag new file mode 100644 index 00000000000..a5a16f2873b --- /dev/null +++ b/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 37 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %ACOS_f1_ "mat3" + OpName %a "a" + OpName %ACOS_i1_ "gl_Foo" + OpName %a_0 "a" + OpName %FragColor "FragColor" + OpName %param "param" + OpName %param_0 "param" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float + %8 = OpTypeFunction %float %_ptr_Function_float + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %14 = OpTypeFunction %float %_ptr_Function_int + %float_1 = OpConstant %float 1 +%_ptr_Output_float = OpTypePointer Output %float + %FragColor = OpVariable %_ptr_Output_float Output + %float_2 = OpConstant %float 2 + %int_4 = OpConstant %int 4 + %main = OpFunction %void None %3 + %5 = OpLabel + %param = OpVariable %_ptr_Function_float Function + %param_0 = OpVariable %_ptr_Function_int Function + OpStore %param %float_2 + %32 = OpFunctionCall %float %ACOS_f1_ %param + OpStore %param_0 %int_4 + %35 = OpFunctionCall %float %ACOS_i1_ %param_0 + %36 = OpFAdd %float %32 %35 + OpStore %FragColor %36 + OpReturn + OpFunctionEnd + %ACOS_f1_ = OpFunction %float None %8 + %a = OpFunctionParameter %_ptr_Function_float + %11 = OpLabel + %18 = OpLoad %float %a + %20 = OpFAdd %float %18 %float_1 + OpReturnValue %20 + OpFunctionEnd + %ACOS_i1_ = OpFunction %float None %14 + %a_0 = OpFunctionParameter %_ptr_Function_int + %17 = OpLabel + %23 = OpLoad %int %a_0 + %24 = OpConvertSToF %float %23 + %25 = OpFAdd %float %24 %float_1 + OpReturnValue %25 + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag b/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag new file mode 100644 index 00000000000..5015cef82af --- /dev/null +++ b/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag @@ -0,0 +1,51 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 24 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %spvFoo %SPIRV_Cross_blah %_40 %_m40 %_underscore_foo_bar_meep + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %spvFoo "spvFoo" + OpName %SPIRV_Cross_blah "SPIRV_Cross_blah" + OpName %_40 "_40Bar" + OpName %_m40 "_m40" + OpName %_underscore_foo_bar_meep "__underscore_foo__bar_meep__" + OpDecorate %spvFoo Location 0 + OpDecorate %SPIRV_Cross_blah Location 1 + OpDecorate %_40 Location 2 + OpDecorate %_m40 Location 3 + OpDecorate %_underscore_foo_bar_meep Location 4 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %spvFoo = OpVariable %_ptr_Output_v4float Output + %float_0 = OpConstant %float 0 + %11 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%SPIRV_Cross_blah = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %14 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %_40 = OpVariable %_ptr_Output_v4float Output + %float_2 = OpConstant %float 2 + %17 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %_m40 = OpVariable %_ptr_Output_v4float Output + %float_3 = OpConstant %float 3 + %20 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3 +%_underscore_foo_bar_meep = OpVariable %_ptr_Output_v4float Output + %float_4 = OpConstant %float 4 + %23 = OpConstantComposite %v4float %float_4 %float_4 %float_4 %float_4 + %main = OpFunction %void None %3 + %5 = OpLabel + OpStore %spvFoo %11 + OpStore %SPIRV_Cross_blah %14 + OpStore %_40 %17 + OpStore %_m40 %20 + OpStore %_underscore_foo_bar_meep %23 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag b/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag new file mode 100644 index 00000000000..07450ee80b6 --- /dev/null +++ b/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag @@ -0,0 +1,62 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %bool = OpTypeBool + %false = OpConstantFalse %bool + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %true = OpConstantTrue %bool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %s = OpTypeStruct %float + %arr = OpTypeArray %float %uint_2 +%_ptr_Function_s = OpTypePointer Function %s +%_ptr_Function_arr = OpTypePointer Function %arr + %FragColor = OpVariable %_ptr_Output_v4float Output + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %17 = OpConstantComposite %v4float %float_1 %float_1 %float_0 %float_1 + %18 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1 + %19 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %20 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %s0 = OpConstantComposite %s %float_0 + %s1 = OpConstantComposite %s %float_1 + %v4bool = OpTypeVector %bool 4 + %b4 = OpConstantComposite %v4bool %false %true %false %true + %arr1 = OpConstantComposite %arr %float_0 %float_1 + %arr2 = OpConstantComposite %arr %float_1 %float_0 + %main = OpFunction %void None %3 + %5 = OpLabel + %ss = OpVariable %_ptr_Function_s Function + %arrvar = OpVariable %_ptr_Function_arr Function + ; Not trivial + %21 = OpSelect %v4float %false %17 %18 + OpStore %FragColor %21 + ; Trivial + %22 = OpSelect %v4float %false %19 %20 + OpStore %FragColor %22 + ; Vector not trivial + %23 = OpSelect %v4float %b4 %17 %18 + OpStore %FragColor %23 + ; Vector trivial + %24 = OpSelect %v4float %b4 %19 %20 + OpStore %FragColor %24 + ; Struct selection + %sout = OpSelect %s %false %s0 %s1 + OpStore %ss %sout + ; Array selection + %arrout = OpSelect %arr %true %arr1 %arr2 + OpStore %arrvar %arrout + + OpReturn + OpFunctionEnd diff --git a/shaders/asm/frag/selection-merge-to-continue.asm.frag b/shaders-no-opt/asm/frag/selection-merge-to-continue.asm.invalid.frag similarity index 100% rename from shaders/asm/frag/selection-merge-to-continue.asm.frag rename to shaders-no-opt/asm/frag/selection-merge-to-continue.asm.invalid.frag diff --git a/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag b/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag new file mode 100644 index 00000000000..9b2eb72899e --- /dev/null +++ b/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 30 +; Schema: 0 + OpCapability Shader + OpCapability SparseResidency + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vUV %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_ARB_sparse_texture2" + OpSourceExtension "GL_ARB_sparse_texture_clamp" + OpName %main "main" + OpName %ret "ret" + OpName %uSamp "uSamp" + OpName %vUV "vUV" + OpName %texel "texel" + OpName %ResType "ResType" + OpName %FragColor "FragColor" + OpDecorate %uSamp DescriptorSet 0 + OpDecorate %uSamp Binding 0 + OpDecorate %vUV Location 0 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %bool = OpTypeBool +%_ptr_Function_bool = OpTypePointer Function %bool + %float = OpTypeFloat 32 + %10 = OpTypeImage %float 2D 0 0 0 1 Unknown + %11 = OpTypeSampledImage %10 +%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11 + %uSamp = OpVariable %_ptr_UniformConstant_11 UniformConstant + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %vUV = OpVariable %_ptr_Input_v2float Input + %v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %uint = OpTypeInt 32 0 + %ResType = OpTypeStruct %uint %v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %ret = OpVariable %_ptr_Function_bool Function + %texel = OpVariable %_ptr_Function_v4float Function + %14 = OpLoad %11 %uSamp + %18 = OpLoad %v2float %vUV + %24 = OpImageSparseSampleImplicitLod %ResType %14 %18 + %25 = OpCompositeExtract %v4float %24 1 + OpStore %texel %25 + %26 = OpCompositeExtract %uint %24 0 + %27 = OpImageSparseTexelsResident %bool %26 + OpStore %ret %27 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag b/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag new file mode 100644 index 00000000000..a47c6b785f4 --- /dev/null +++ b/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag @@ -0,0 +1,65 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 78 +; Schema: 0 + OpCapability Shader + OpCapability GroupNonUniform + OpCapability GroupNonUniformArithmetic + OpCapability GroupNonUniformClustered + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %index %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_KHR_shader_subgroup_arithmetic" + OpSourceExtension "GL_KHR_shader_subgroup_basic" + OpSourceExtension "GL_KHR_shader_subgroup_clustered" + OpName %main "main" + OpName %index "index" + OpName %FragColor "FragColor" + OpDecorate %index Flat + OpDecorate %index Location 0 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %uint_0 = OpConstant %uint 0 + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int + %index = OpVariable %_ptr_Input_int Input + %uint_3 = OpConstant %uint 3 + %uint_4 = OpConstant %uint 4 +%_ptr_Output_uint = OpTypePointer Output %uint + %FragColor = OpVariable %_ptr_Output_uint Output + %main = OpFunction %void None %3 + %5 = OpLabel + %i = OpLoad %int %index + %u = OpBitcast %uint %i + %res0 = OpGroupNonUniformSMin %uint %uint_3 Reduce %i + %res1 = OpGroupNonUniformSMax %uint %uint_3 Reduce %u + %res2 = OpGroupNonUniformUMin %uint %uint_3 Reduce %i + %res3 = OpGroupNonUniformUMax %uint %uint_3 Reduce %u + %res4 = OpGroupNonUniformSMax %uint %uint_3 InclusiveScan %i + %res5 = OpGroupNonUniformSMin %uint %uint_3 InclusiveScan %u + %res6 = OpGroupNonUniformUMax %uint %uint_3 ExclusiveScan %i + %res7 = OpGroupNonUniformUMin %uint %uint_3 ExclusiveScan %u + %res8 = OpGroupNonUniformSMin %uint %uint_3 ClusteredReduce %i %uint_4 + %res9 = OpGroupNonUniformSMax %uint %uint_3 ClusteredReduce %u %uint_4 + %res10 = OpGroupNonUniformUMin %uint %uint_3 ClusteredReduce %i %uint_4 + %res11 = OpGroupNonUniformUMax %uint %uint_3 ClusteredReduce %u %uint_4 + OpStore %FragColor %res0 + OpStore %FragColor %res1 + OpStore %FragColor %res2 + OpStore %FragColor %res3 + OpStore %FragColor %res4 + OpStore %FragColor %res5 + OpStore %FragColor %res6 + OpStore %FragColor %res7 + OpStore %FragColor %res8 + OpStore %FragColor %res9 + OpStore %FragColor %res10 + OpStore %FragColor %res11 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag b/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag new file mode 100644 index 00000000000..39f4d066db8 --- /dev/null +++ b/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag @@ -0,0 +1,52 @@ +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 31 +; Schema: 0 + OpCapability Shader + OpCapability GroupNonUniformBallot + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %INDEX %SV_Target + OpExecutionMode %main OriginUpperLeft + OpName %main "main" + OpName %INDEX "INDEX" + OpName %SV_Target "SV_Target" + OpDecorate %INDEX Flat + OpDecorate %INDEX Location 0 + OpDecorate %SV_Target Location 0 + %void = OpTypeVoid + %2 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %INDEX = OpVariable %_ptr_Input_uint Input + %v4uint = OpTypeVector %uint 4 +%_ptr_Output_v4uint = OpTypePointer Output %v4uint + %SV_Target = OpVariable %_ptr_Output_v4uint Output + %bool = OpTypeBool + %uint_100 = OpConstant %uint 100 + %uint_3 = OpConstant %uint 3 +%_ptr_Output_uint = OpTypePointer Output %uint + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %main = OpFunction %void None %2 + %4 = OpLabel + OpBranch %29 + %29 = OpLabel + %11 = OpLoad %uint %INDEX + %13 = OpULessThan %bool %11 %uint_100 + %15 = OpGroupNonUniformBallot %v4uint %uint_3 %13 + %17 = OpCompositeExtract %uint %15 0 + %18 = OpCompositeExtract %uint %15 1 + %19 = OpCompositeExtract %uint %15 2 + %20 = OpCompositeExtract %uint %15 3 + %22 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_0 + OpStore %22 %17 + %24 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_1 + OpStore %24 %18 + %26 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_2 + OpStore %26 %19 + %28 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_3 + OpStore %28 %20 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag similarity index 100% rename from shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag rename to shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag diff --git a/shaders/asm/frag/switch-merge-to-continue.asm.frag b/shaders-no-opt/asm/frag/switch-merge-to-continue.asm.invalid.frag similarity index 100% rename from shaders/asm/frag/switch-merge-to-continue.asm.frag rename to shaders-no-opt/asm/frag/switch-merge-to-continue.asm.invalid.frag diff --git a/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag b/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag new file mode 100644 index 00000000000..dd9a5a97b0f --- /dev/null +++ b/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag @@ -0,0 +1,105 @@ +; SPIR-V +; Version: 1.3 +; Generator: Google rspirv; 0 +; Bound: 80 +; Schema: 0 + OpCapability Shader + OpCapability VulkanMemoryModel + OpExtension "SPV_KHR_vulkan_memory_model" + OpMemoryModel Logical Vulkan + OpEntryPoint Fragment %1 "main" %2 %3 + OpExecutionMode %1 OriginUpperLeft + OpMemberDecorate %_struct_14 0 Offset 0 + OpMemberDecorate %_struct_14 1 Offset 4 + OpMemberDecorate %_struct_15 0 Offset 0 + OpMemberDecorate %_struct_15 1 Offset 4 + OpDecorate %2 Location 0 + OpDecorate %3 Location 0 + OpDecorate %2 Flat + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %bool = OpTypeBool +%_ptr_Input_int = OpTypePointer Input %int +%_ptr_Output_int = OpTypePointer Output %int +%_ptr_Function_int = OpTypePointer Function %int + %void = OpTypeVoid + %_struct_14 = OpTypeStruct %uint %int + %_struct_15 = OpTypeStruct %int %int +%_ptr_Function__struct_15 = OpTypePointer Function %_struct_15 + %24 = OpTypeFunction %void + %2 = OpVariable %_ptr_Input_int Input + %3 = OpVariable %_ptr_Output_int Output + %uint_1 = OpConstant %uint 1 + %26 = OpUndef %_struct_14 + %uint_0 = OpConstant %uint 0 + %int_0 = OpConstant %int 0 + %int_10 = OpConstant %int 10 + %true = OpConstantTrue %bool + %31 = OpUndef %int + %false = OpConstantFalse %bool +%_ptr_Function_bool = OpTypePointer Function %bool + %1 = OpFunction %void None %24 + %32 = OpLabel + %76 = OpVariable %_ptr_Function_bool Function %false + %33 = OpVariable %_ptr_Function__struct_15 Function + %34 = OpVariable %_ptr_Function_int Function + %35 = OpVariable %_ptr_Function_int Function + OpSelectionMerge %72 None + OpSwitch %uint_0 %73 + %73 = OpLabel + %36 = OpLoad %int %2 + %37 = OpAccessChain %_ptr_Function_int %33 %uint_0 + OpStore %37 %int_0 + %38 = OpAccessChain %_ptr_Function_int %33 %uint_1 + OpStore %38 %int_10 + OpBranch %40 + %40 = OpLabel + %41 = OpPhi %_struct_14 %26 %73 %42 %43 + %44 = OpPhi %int %int_0 %73 %45 %43 + OpLoopMerge %48 %43 None + OpBranch %49 + %49 = OpLabel + %52 = OpLoad %int %37 + %53 = OpLoad %int %38 + %54 = OpSLessThan %bool %52 %53 + OpSelectionMerge %55 None + OpBranchConditional %54 %56 %57 + %57 = OpLabel + %65 = OpCompositeInsert %_struct_14 %uint_0 %41 0 + OpBranch %55 + %56 = OpLabel + %59 = OpLoad %int %37 + %60 = OpBitcast %int %uint_1 + %61 = OpIAdd %int %59 %60 + OpCopyMemory %34 %37 + %63 = OpLoad %int %34 + OpStore %35 %61 + OpCopyMemory %37 %35 + %64 = OpCompositeConstruct %_struct_14 %uint_1 %63 + OpBranch %55 + %55 = OpLabel + %42 = OpPhi %_struct_14 %64 %56 %65 %57 + %66 = OpCompositeExtract %uint %42 0 + %67 = OpBitcast %int %66 + OpSelectionMerge %71 None + OpSwitch %67 %69 0 %70 1 %71 + %71 = OpLabel + %45 = OpIAdd %int %44 %36 + OpBranch %43 + %70 = OpLabel + OpStore %3 %44 + OpStore %76 %true + OpBranch %48 + %69 = OpLabel + OpBranch %48 + %43 = OpLabel + OpBranch %40 + %48 = OpLabel + %79 = OpPhi %bool %false %69 %true %70 + OpSelectionMerge %77 None + OpBranchConditional %79 %72 %77 + %77 = OpLabel + OpBranch %72 + %72 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag b/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag new file mode 100644 index 00000000000..d2bd15a9785 --- /dev/null +++ b/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 54 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %gl_FragCoord "gl_FragCoord" + OpName %_GLF_color "_GLF_color" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_GLF_color Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %bool = OpTypeBool + %v2float = OpTypeVector %float 2 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %_GLF_color = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %52 = OpUndef %v2float + %main = OpFunction %void None %3 + %5 = OpLabel + OpSelectionMerge %9 None + OpSwitch %int_0 %8 + %8 = OpLabel + %17 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %18 = OpLoad %float %17 + %22 = OpFOrdNotEqual %bool %18 %18 + OpSelectionMerge %24 None + OpBranchConditional %22 %23 %24 + %23 = OpLabel + OpBranch %9 + %24 = OpLabel + %33 = OpCompositeExtract %float %52 1 + %51 = OpCompositeInsert %v2float %33 %52 1 + OpBranch %9 + %9 = OpLabel + %53 = OpPhi %v2float %52 %23 %51 %24 + %42 = OpCompositeExtract %float %53 0 + %43 = OpCompositeExtract %float %53 1 + %48 = OpCompositeConstruct %v4float %42 %43 %float_1 %float_1 + OpStore %_GLF_color %48 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag b/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag new file mode 100644 index 00000000000..d596e4efe72 --- /dev/null +++ b/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag @@ -0,0 +1,59 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 34 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vA %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %foobar_i1_ "foobar(i1;" + OpName %a "a" + OpName %a_0 "a" + OpName %vA "vA" + OpName %param "param" + OpName %FragColor "FragColor" + OpDecorate %vA Flat + OpDecorate %vA Location 0 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %10 = OpTypeFunction %v4float %_ptr_Function_int + %int_0 = OpConstant %int 0 + %bool = OpTypeBool + %float_10 = OpConstant %float 10 + %22 = OpConstantComposite %v4float %float_10 %float_10 %float_10 %float_10 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_int = OpTypePointer Input %int + %vA = OpVariable %_ptr_Input_int Input +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %a_0 = OpVariable %_ptr_Function_v4float Function + %param = OpVariable %_ptr_Function_int Function + %30 = OpLoad %int %vA + OpStore %param %30 + %31 = OpFunctionCall %v4float %foobar_i1_ %param + OpStore %FragColor %22 + OpReturn + OpFunctionEnd + %foobar_i1_ = OpFunction %v4float None %10 + %a = OpFunctionParameter %_ptr_Function_int + %13 = OpLabel + %14 = OpLoad %int %a + %17 = OpSLessThan %bool %14 %int_0 + OpSelectionMerge %19 None + OpBranchConditional %17 %18 %19 + %18 = OpLabel + OpTerminateInvocation + %19 = OpLabel + OpReturnValue %22 + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag b/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag new file mode 100644 index 00000000000..6f198079ff7 --- /dev/null +++ b/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag @@ -0,0 +1,68 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 61 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %uSamp "uSamp" + OpName %gl_FragCoord "gl_FragCoord" + OpDecorate %FragColor Location 0 + OpDecorate %uSamp DescriptorSet 0 + OpDecorate %uSamp Binding 0 + OpDecorate %gl_FragCoord BuiltIn FragCoord + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %10 = OpTypeImage %float 2D 0 0 1 1 Unknown + %11 = OpTypeSampledImage %10 +%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11 + %uSamp = OpVariable %_ptr_UniformConstant_11 UniformConstant +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %v2float = OpTypeVector %float 2 + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %v2int = OpTypeVector %int 2 + %uint_0 = OpConstant %uint 0 +%_ptr_Output_float = OpTypePointer Output %float + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %main = OpFunction %void None %3 + %5 = OpLabel + %14 = OpLoad %11 %uSamp + %18 = OpLoad %v4float %gl_FragCoord + %19 = OpVectorShuffle %v2float %18 %18 0 1 + %22 = OpConvertFToS %v2int %19 + %24 = OpImage %10 %14 + %25 = OpImageFetch %v4float %24 %22 Sample %uint_0 + %28 = OpCompositeExtract %float %25 0 + %30 = OpAccessChain %_ptr_Output_float %FragColor %uint_0 + OpStore %30 %28 + %36 = OpImage %10 %14 + %37 = OpImageFetch %v4float %36 %22 Sample %uint_1 + %38 = OpCompositeExtract %float %37 0 + %40 = OpAccessChain %_ptr_Output_float %FragColor %uint_1 + OpStore %40 %38 + %46 = OpImage %10 %14 + %47 = OpImageFetch %v4float %46 %22 Sample %uint_2 + %48 = OpCompositeExtract %float %47 0 + %50 = OpAccessChain %_ptr_Output_float %FragColor %uint_2 + OpStore %50 %48 + %56 = OpImage %10 %14 + %57 = OpImageFetch %v4float %56 %22 Sample %uint_3 + %58 = OpCompositeExtract %float %57 0 + %60 = OpAccessChain %_ptr_Output_float %FragColor %uint_3 + OpStore %60 %58 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.asm.frag new file mode 100644 index 00000000000..15286e0897e --- /dev/null +++ b/shaders-no-opt/asm/frag/unordered-compare.asm.frag @@ -0,0 +1,179 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 132 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %A %B %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %test_vector_ "test_vector(" + OpName %test_scalar_ "test_scalar(" + OpName %le "le" + OpName %A "A" + OpName %B "B" + OpName %leq "leq" + OpName %ge "ge" + OpName %geq "geq" + OpName %eq "eq" + OpName %neq "neq" + OpName %le_0 "le" + OpName %leq_0 "leq" + OpName %ge_0 "ge" + OpName %geq_0 "geq" + OpName %eq_0 "eq" + OpName %neq_0 "neq" + OpName %FragColor "FragColor" + OpDecorate %A Location 0 + OpDecorate %B Location 1 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %8 = OpTypeFunction %v4float + %11 = OpTypeFunction %float + %bool = OpTypeBool + %v4bool = OpTypeVector %bool 4 +%_ptr_Function_v4bool = OpTypePointer Function %v4bool +%_ptr_Input_v4float = OpTypePointer Input %v4float + %A = OpVariable %_ptr_Input_v4float Input + %B = OpVariable %_ptr_Input_v4float Input + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Function_bool = OpTypePointer Function %bool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %128 = OpFunctionCall %v4float %test_vector_ + %129 = OpFunctionCall %float %test_scalar_ + %130 = OpCompositeConstruct %v4float %129 %129 %129 %129 + %131 = OpFAdd %v4float %128 %130 + OpStore %FragColor %131 + OpReturn + OpFunctionEnd +%test_vector_ = OpFunction %v4float None %8 + %10 = OpLabel + %le = OpVariable %_ptr_Function_v4bool Function + %leq = OpVariable %_ptr_Function_v4bool Function + %ge = OpVariable %_ptr_Function_v4bool Function + %geq = OpVariable %_ptr_Function_v4bool Function + %eq = OpVariable %_ptr_Function_v4bool Function + %neq = OpVariable %_ptr_Function_v4bool Function + %20 = OpLoad %v4float %A + %22 = OpLoad %v4float %B + %23 = OpFUnordLessThan %v4bool %20 %22 + OpStore %le %23 + %25 = OpLoad %v4float %A + %26 = OpLoad %v4float %B + %27 = OpFUnordLessThanEqual %v4bool %25 %26 + OpStore %leq %27 + %29 = OpLoad %v4float %A + %30 = OpLoad %v4float %B + %31 = OpFUnordGreaterThan %v4bool %29 %30 + OpStore %ge %31 + %33 = OpLoad %v4float %A + %34 = OpLoad %v4float %B + %35 = OpFUnordGreaterThanEqual %v4bool %33 %34 + OpStore %geq %35 + %37 = OpLoad %v4float %A + %38 = OpLoad %v4float %B + %39 = OpFUnordEqual %v4bool %37 %38 + OpStore %eq %39 + %41 = OpLoad %v4float %A + %42 = OpLoad %v4float %B + %43 = OpFUnordNotEqual %v4bool %41 %42 + OpStore %neq %43 + %ordered = OpFOrdNotEqual %v4bool %41 %42 + OpStore %neq %ordered + %44 = OpLoad %v4bool %le + %49 = OpSelect %v4float %44 %48 %47 + %50 = OpLoad %v4bool %leq + %51 = OpSelect %v4float %50 %48 %47 + %52 = OpFAdd %v4float %49 %51 + %53 = OpLoad %v4bool %ge + %54 = OpSelect %v4float %53 %48 %47 + %55 = OpFAdd %v4float %52 %54 + %56 = OpLoad %v4bool %geq + %57 = OpSelect %v4float %56 %48 %47 + %58 = OpFAdd %v4float %55 %57 + %59 = OpLoad %v4bool %eq + %60 = OpSelect %v4float %59 %48 %47 + %61 = OpFAdd %v4float %58 %60 + %62 = OpLoad %v4bool %neq + %63 = OpSelect %v4float %62 %48 %47 + %64 = OpFAdd %v4float %61 %63 + OpReturnValue %64 + OpFunctionEnd +%test_scalar_ = OpFunction %float None %11 + %13 = OpLabel + %le_0 = OpVariable %_ptr_Function_bool Function + %leq_0 = OpVariable %_ptr_Function_bool Function + %ge_0 = OpVariable %_ptr_Function_bool Function + %geq_0 = OpVariable %_ptr_Function_bool Function + %eq_0 = OpVariable %_ptr_Function_bool Function + %neq_0 = OpVariable %_ptr_Function_bool Function + %72 = OpAccessChain %_ptr_Input_float %A %uint_0 + %73 = OpLoad %float %72 + %74 = OpAccessChain %_ptr_Input_float %B %uint_0 + %75 = OpLoad %float %74 + %76 = OpFUnordLessThan %bool %73 %75 + OpStore %le_0 %76 + %78 = OpAccessChain %_ptr_Input_float %A %uint_0 + %79 = OpLoad %float %78 + %80 = OpAccessChain %_ptr_Input_float %B %uint_0 + %81 = OpLoad %float %80 + %82 = OpFUnordLessThanEqual %bool %79 %81 + OpStore %leq_0 %82 + %84 = OpAccessChain %_ptr_Input_float %A %uint_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Input_float %B %uint_0 + %87 = OpLoad %float %86 + %88 = OpFUnordGreaterThan %bool %85 %87 + OpStore %ge_0 %88 + %90 = OpAccessChain %_ptr_Input_float %A %uint_0 + %91 = OpLoad %float %90 + %92 = OpAccessChain %_ptr_Input_float %B %uint_0 + %93 = OpLoad %float %92 + %94 = OpFUnordGreaterThanEqual %bool %91 %93 + OpStore %geq_0 %94 + %96 = OpAccessChain %_ptr_Input_float %A %uint_0 + %97 = OpLoad %float %96 + %98 = OpAccessChain %_ptr_Input_float %B %uint_0 + %99 = OpLoad %float %98 + %100 = OpFUnordEqual %bool %97 %99 + OpStore %eq_0 %100 + %102 = OpAccessChain %_ptr_Input_float %A %uint_0 + %103 = OpLoad %float %102 + %104 = OpAccessChain %_ptr_Input_float %B %uint_0 + %105 = OpLoad %float %104 + %106 = OpFUnordNotEqual %bool %103 %105 + OpStore %neq_0 %106 + %107 = OpLoad %bool %le_0 + %108 = OpSelect %float %107 %float_1 %float_0 + %109 = OpLoad %bool %leq_0 + %110 = OpSelect %float %109 %float_1 %float_0 + %111 = OpFAdd %float %108 %110 + %112 = OpLoad %bool %ge_0 + %113 = OpSelect %float %112 %float_1 %float_0 + %114 = OpFAdd %float %111 %113 + %115 = OpLoad %bool %geq_0 + %116 = OpSelect %float %115 %float_1 %float_0 + %117 = OpFAdd %float %114 %116 + %118 = OpLoad %bool %eq_0 + %119 = OpSelect %float %118 %float_1 %float_0 + %120 = OpFAdd %float %117 %119 + %121 = OpLoad %bool %neq_0 + %122 = OpSelect %float %121 %float_1 %float_0 + %123 = OpFAdd %float %120 %122 + OpReturnValue %123 + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag new file mode 100644 index 00000000000..15286e0897e --- /dev/null +++ b/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag @@ -0,0 +1,179 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 132 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %A %B %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %test_vector_ "test_vector(" + OpName %test_scalar_ "test_scalar(" + OpName %le "le" + OpName %A "A" + OpName %B "B" + OpName %leq "leq" + OpName %ge "ge" + OpName %geq "geq" + OpName %eq "eq" + OpName %neq "neq" + OpName %le_0 "le" + OpName %leq_0 "leq" + OpName %ge_0 "ge" + OpName %geq_0 "geq" + OpName %eq_0 "eq" + OpName %neq_0 "neq" + OpName %FragColor "FragColor" + OpDecorate %A Location 0 + OpDecorate %B Location 1 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %8 = OpTypeFunction %v4float + %11 = OpTypeFunction %float + %bool = OpTypeBool + %v4bool = OpTypeVector %bool 4 +%_ptr_Function_v4bool = OpTypePointer Function %v4bool +%_ptr_Input_v4float = OpTypePointer Input %v4float + %A = OpVariable %_ptr_Input_v4float Input + %B = OpVariable %_ptr_Input_v4float Input + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Function_bool = OpTypePointer Function %bool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %128 = OpFunctionCall %v4float %test_vector_ + %129 = OpFunctionCall %float %test_scalar_ + %130 = OpCompositeConstruct %v4float %129 %129 %129 %129 + %131 = OpFAdd %v4float %128 %130 + OpStore %FragColor %131 + OpReturn + OpFunctionEnd +%test_vector_ = OpFunction %v4float None %8 + %10 = OpLabel + %le = OpVariable %_ptr_Function_v4bool Function + %leq = OpVariable %_ptr_Function_v4bool Function + %ge = OpVariable %_ptr_Function_v4bool Function + %geq = OpVariable %_ptr_Function_v4bool Function + %eq = OpVariable %_ptr_Function_v4bool Function + %neq = OpVariable %_ptr_Function_v4bool Function + %20 = OpLoad %v4float %A + %22 = OpLoad %v4float %B + %23 = OpFUnordLessThan %v4bool %20 %22 + OpStore %le %23 + %25 = OpLoad %v4float %A + %26 = OpLoad %v4float %B + %27 = OpFUnordLessThanEqual %v4bool %25 %26 + OpStore %leq %27 + %29 = OpLoad %v4float %A + %30 = OpLoad %v4float %B + %31 = OpFUnordGreaterThan %v4bool %29 %30 + OpStore %ge %31 + %33 = OpLoad %v4float %A + %34 = OpLoad %v4float %B + %35 = OpFUnordGreaterThanEqual %v4bool %33 %34 + OpStore %geq %35 + %37 = OpLoad %v4float %A + %38 = OpLoad %v4float %B + %39 = OpFUnordEqual %v4bool %37 %38 + OpStore %eq %39 + %41 = OpLoad %v4float %A + %42 = OpLoad %v4float %B + %43 = OpFUnordNotEqual %v4bool %41 %42 + OpStore %neq %43 + %ordered = OpFOrdNotEqual %v4bool %41 %42 + OpStore %neq %ordered + %44 = OpLoad %v4bool %le + %49 = OpSelect %v4float %44 %48 %47 + %50 = OpLoad %v4bool %leq + %51 = OpSelect %v4float %50 %48 %47 + %52 = OpFAdd %v4float %49 %51 + %53 = OpLoad %v4bool %ge + %54 = OpSelect %v4float %53 %48 %47 + %55 = OpFAdd %v4float %52 %54 + %56 = OpLoad %v4bool %geq + %57 = OpSelect %v4float %56 %48 %47 + %58 = OpFAdd %v4float %55 %57 + %59 = OpLoad %v4bool %eq + %60 = OpSelect %v4float %59 %48 %47 + %61 = OpFAdd %v4float %58 %60 + %62 = OpLoad %v4bool %neq + %63 = OpSelect %v4float %62 %48 %47 + %64 = OpFAdd %v4float %61 %63 + OpReturnValue %64 + OpFunctionEnd +%test_scalar_ = OpFunction %float None %11 + %13 = OpLabel + %le_0 = OpVariable %_ptr_Function_bool Function + %leq_0 = OpVariable %_ptr_Function_bool Function + %ge_0 = OpVariable %_ptr_Function_bool Function + %geq_0 = OpVariable %_ptr_Function_bool Function + %eq_0 = OpVariable %_ptr_Function_bool Function + %neq_0 = OpVariable %_ptr_Function_bool Function + %72 = OpAccessChain %_ptr_Input_float %A %uint_0 + %73 = OpLoad %float %72 + %74 = OpAccessChain %_ptr_Input_float %B %uint_0 + %75 = OpLoad %float %74 + %76 = OpFUnordLessThan %bool %73 %75 + OpStore %le_0 %76 + %78 = OpAccessChain %_ptr_Input_float %A %uint_0 + %79 = OpLoad %float %78 + %80 = OpAccessChain %_ptr_Input_float %B %uint_0 + %81 = OpLoad %float %80 + %82 = OpFUnordLessThanEqual %bool %79 %81 + OpStore %leq_0 %82 + %84 = OpAccessChain %_ptr_Input_float %A %uint_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Input_float %B %uint_0 + %87 = OpLoad %float %86 + %88 = OpFUnordGreaterThan %bool %85 %87 + OpStore %ge_0 %88 + %90 = OpAccessChain %_ptr_Input_float %A %uint_0 + %91 = OpLoad %float %90 + %92 = OpAccessChain %_ptr_Input_float %B %uint_0 + %93 = OpLoad %float %92 + %94 = OpFUnordGreaterThanEqual %bool %91 %93 + OpStore %geq_0 %94 + %96 = OpAccessChain %_ptr_Input_float %A %uint_0 + %97 = OpLoad %float %96 + %98 = OpAccessChain %_ptr_Input_float %B %uint_0 + %99 = OpLoad %float %98 + %100 = OpFUnordEqual %bool %97 %99 + OpStore %eq_0 %100 + %102 = OpAccessChain %_ptr_Input_float %A %uint_0 + %103 = OpLoad %float %102 + %104 = OpAccessChain %_ptr_Input_float %B %uint_0 + %105 = OpLoad %float %104 + %106 = OpFUnordNotEqual %bool %103 %105 + OpStore %neq_0 %106 + %107 = OpLoad %bool %le_0 + %108 = OpSelect %float %107 %float_1 %float_0 + %109 = OpLoad %bool %leq_0 + %110 = OpSelect %float %109 %float_1 %float_0 + %111 = OpFAdd %float %108 %110 + %112 = OpLoad %bool %ge_0 + %113 = OpSelect %float %112 %float_1 %float_0 + %114 = OpFAdd %float %111 %113 + %115 = OpLoad %bool %geq_0 + %116 = OpSelect %float %115 %float_1 %float_0 + %117 = OpFAdd %float %114 %116 + %118 = OpLoad %bool %eq_0 + %119 = OpSelect %float %118 %float_1 %float_0 + %120 = OpFAdd %float %117 %119 + %121 = OpLoad %bool %neq_0 + %122 = OpSelect %float %121 %float_1 %float_0 + %123 = OpFAdd %float %120 %122 + OpReturnValue %123 + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag b/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag new file mode 100644 index 00000000000..dda5fc40340 --- /dev/null +++ b/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag @@ -0,0 +1,49 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 27 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %vColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %vColor "vColor" + OpName %omap_r "omap_r" + OpName %omap_g "omap_g" + OpName %omap_b "omap_b" + OpName %omap_a "omap_a" + OpDecorate %FragColor Location 0 + OpDecorate %vColor Location 0 + OpDecorate %omap_r SpecId 0 + OpDecorate %omap_g SpecId 1 + OpDecorate %omap_b SpecId 2 + OpDecorate %omap_a SpecId 3 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output +%_ptr_Input_v4float = OpTypePointer Input %v4float + %vColor = OpVariable %_ptr_Input_v4float Input + %int = OpTypeInt 32 1 + %omap_r = OpSpecConstant %int 0 +%_ptr_Input_float = OpTypePointer Input %float + %omap_g = OpSpecConstant %int 1 + %omap_b = OpSpecConstant %int 2 + %omap_a = OpSpecConstant %int 3 + %main = OpFunction %void None %3 + %5 = OpLabel + %loaded = OpLoad %v4float %vColor + %r = OpVectorExtractDynamic %float %loaded %omap_r + %g = OpVectorExtractDynamic %float %loaded %omap_g + %b = OpVectorExtractDynamic %float %loaded %omap_b + %a = OpVectorExtractDynamic %float %loaded %omap_a + %rgba = OpCompositeConstruct %v4float %r %g %b %a + OpStore %FragColor %rgba + OpReturn + OpFunctionEnd diff --git a/shaders/asm/geom/store-uint-layer.invalid.asm.geom b/shaders-no-opt/asm/geom/store-uint-layer.invalid.asm.geom similarity index 100% rename from shaders/asm/geom/store-uint-layer.invalid.asm.geom rename to shaders-no-opt/asm/geom/store-uint-layer.invalid.asm.geom diff --git a/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp b/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp new file mode 100644 index 00000000000..a38b111235a --- /dev/null +++ b/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp @@ -0,0 +1,109 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Clspv; 0 +; Bound: 83 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_KHR_storage_buffer_storage_class" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %19 "main" %gl_GlobalInvocationID + OpSource OpenCL_C 120 + OpDecorate %_runtimearr_float ArrayStride 4 + OpMemberDecorate %_struct_3 0 Offset 0 + OpDecorate %_struct_3 Block + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpDecorate %15 DescriptorSet 0 + OpDecorate %15 Binding 0 + OpDecorate %16 DescriptorSet 0 + OpDecorate %16 Binding 1 + OpDecorate %10 SpecId 0 + OpDecorate %11 SpecId 1 + OpDecorate %12 SpecId 2 + %float = OpTypeFloat 32 +%_runtimearr_float = OpTypeRuntimeArray %float + %_struct_3 = OpTypeStruct %_runtimearr_float +%_ptr_StorageBuffer__struct_3 = OpTypePointer StorageBuffer %_struct_3 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%_ptr_Private_v3uint = OpTypePointer Private %v3uint + %10 = OpSpecConstant %uint 1 + %11 = OpSpecConstant %uint 1 + %12 = OpSpecConstant %uint 1 +%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %10 %11 %12 + %void = OpTypeVoid + %18 = OpTypeFunction %void +%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %uint_35 = OpConstant %uint 35 + %uint_1 = OpConstant %uint 1 + %float_3 = OpConstant %float 3 + %bool = OpTypeBool + %uint_34 = OpConstant %uint 34 + %uint_5 = OpConstant %uint 5 +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %14 = OpVariable %_ptr_Private_v3uint Private %gl_WorkGroupSize + %15 = OpVariable %_ptr_StorageBuffer__struct_3 StorageBuffer + %16 = OpVariable %_ptr_StorageBuffer__struct_3 StorageBuffer + %19 = OpFunction %void None %18 + %20 = OpLabel + %23 = OpAccessChain %_ptr_StorageBuffer_float %15 %uint_0 %uint_0 + %25 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %26 = OpLoad %uint %25 + %27 = OpLoad %float %23 + %29 = OpAccessChain %_ptr_StorageBuffer_float %15 %uint_0 %uint_35 + OpBranch %31 + %31 = OpLabel + %32 = OpPhi %float %27 %20 %67 %58 + %33 = OpPhi %uint %uint_0 %20 %63 %58 + %34 = OpLoad %float %29 + OpLoopMerge %69 %58 None + OpBranch %37 + %37 = OpLabel + %38 = OpPhi %float %46 %37 %32 %31 + %39 = OpPhi %float %38 %37 %34 %31 + %40 = OpPhi %uint %44 %37 %uint_0 %31 + %41 = OpAccessChain %_ptr_StorageBuffer_float %15 %uint_0 %40 + %42 = OpFAdd %float %39 %38 + %44 = OpIAdd %uint %40 %uint_1 + %45 = OpAccessChain %_ptr_StorageBuffer_float %15 %uint_0 %44 + %46 = OpLoad %float %45 + %47 = OpFAdd %float %42 %46 + %49 = OpFDiv %float %47 %float_3 + OpStore %41 %49 + %52 = OpULessThan %bool %40 %uint_34 + %53 = OpLogicalNot %bool %52 + OpLoopMerge %56 %37 None + OpBranchConditional %53 %56 %37 + %56 = OpLabel + OpBranch %58 + %58 = OpLabel + %59 = OpLoad %float %29 + %60 = OpFAdd %float %38 %59 + %61 = OpFAdd %float %32 %60 + %62 = OpFDiv %float %61 %float_3 + OpStore %29 %62 + %63 = OpIAdd %uint %33 %uint_1 + %65 = OpULessThan %bool %33 %uint_5 + %66 = OpLogicalNot %bool %65 + %67 = OpLoad %float %23 + OpBranchConditional %66 %69 %31 + %69 = OpLabel + %70 = OpPhi %float %75 %69 %67 %58 + %71 = OpPhi %uint %76 %69 %uint_1 %58 + %72 = OpAccessChain %_ptr_StorageBuffer_float %15 %uint_0 %71 + %73 = OpLoad %float %72 + %74 = OpFOrdLessThan %bool %70 %73 + %75 = OpSelect %float %74 %73 %70 + %76 = OpIAdd %uint %71 %uint_1 + %77 = OpULessThan %bool %71 %uint_35 + %78 = OpLogicalNot %bool %77 + OpLoopMerge %81 %69 None + OpBranchConditional %78 %81 %69 + %81 = OpLabel + %82 = OpAccessChain %_ptr_StorageBuffer_float %16 %uint_0 %26 + OpStore %82 %75 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh new file mode 100644 index 00000000000..7b38001d8d4 --- /dev/null +++ b/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh @@ -0,0 +1,150 @@ +; SPIR-V +; Version: 1.4 +; Generator: Unknown(30017); 21022 +; Bound: 89 +; Schema: 0 + OpCapability Shader + OpCapability Geometry + OpCapability ShaderViewportIndexLayerEXT + OpCapability MeshShadingEXT + OpExtension "SPV_EXT_mesh_shader" + OpExtension "SPV_EXT_shader_viewport_index_layer" + OpMemoryModel Logical GLSL450 + OpEntryPoint MeshEXT %main "main" %SV_Position %B %SV_CullPrimitive %SV_RenderTargetArrayIndex %SV_PrimitiveID %C %indices %32 %gl_LocalInvocationIndex %38 + OpExecutionMode %main OutputVertices 24 + OpExecutionMode %main OutputPrimitivesNV 8 + OpExecutionMode %main OutputTrianglesNV + OpExecutionMode %main LocalSize 2 3 4 + OpName %main "main" + OpName %SV_Position "SV_Position" + OpName %B "B" + OpName %SV_CullPrimitive "SV_CullPrimitive" + OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex" + OpName %SV_PrimitiveID "SV_PrimitiveID" + OpName %C "C" + OpName %indices "indices" + OpName %_ "" + OpDecorate %SV_Position BuiltIn Position + OpDecorate %B Location 1 + OpDecorate %SV_CullPrimitive BuiltIn CullPrimitiveEXT + OpDecorate %SV_CullPrimitive PerPrimitiveNV + OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer + OpDecorate %SV_RenderTargetArrayIndex PerPrimitiveNV + OpDecorate %SV_PrimitiveID BuiltIn PrimitiveId + OpDecorate %SV_PrimitiveID PerPrimitiveNV + OpDecorate %C Location 3 + OpDecorate %C PerPrimitiveNV + OpDecorate %indices BuiltIn PrimitiveTriangleIndicesEXT + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + %void = OpTypeVoid + %2 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_24 = OpConstant %uint 24 +%_arr_v4float_uint_24 = OpTypeArray %v4float %uint_24 +%_ptr_Output__arr_v4float_uint_24 = OpTypePointer Output %_arr_v4float_uint_24 +%SV_Position = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %B = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %bool = OpTypeBool + %uint_8 = OpConstant %uint 8 +%_arr_bool_uint_8 = OpTypeArray %bool %uint_8 +%_ptr_Output__arr_bool_uint_8 = OpTypePointer Output %_arr_bool_uint_8 +%SV_CullPrimitive = OpVariable %_ptr_Output__arr_bool_uint_8 Output +%_arr_uint_uint_8 = OpTypeArray %uint %uint_8 +%_ptr_Output__arr_uint_uint_8 = OpTypePointer Output %_arr_uint_uint_8 +%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%SV_PrimitiveID = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8 +%_ptr_Output__arr_v4float_uint_8 = OpTypePointer Output %_arr_v4float_uint_8 + %C = OpVariable %_ptr_Output__arr_v4float_uint_8 Output + %v3uint = OpTypeVector %uint 3 +%_arr_v3uint_uint_8 = OpTypeArray %v3uint %uint_8 +%_ptr_Output__arr_v3uint_uint_8 = OpTypePointer Output %_arr_v3uint_uint_8 + %indices = OpVariable %_ptr_Output__arr_v3uint_uint_8 Output + %uint_64 = OpConstant %uint 64 +%_arr_float_uint_64 = OpTypeArray %float %uint_64 +%_ptr_Workgroup__arr_float_uint_64 = OpTypePointer Workgroup %_arr_float_uint_64 + %32 = OpVariable %_ptr_Workgroup__arr_float_uint_64 Workgroup +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %_ = OpTypeStruct %float +%_ptr_TaskPayloadWorkgroupEXT__ = OpTypePointer TaskPayloadWorkgroupEXT %_ + %38 = OpVariable %_ptr_TaskPayloadWorkgroupEXT__ TaskPayloadWorkgroupEXT +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %uint_2 = OpConstant %uint 2 + %uint_264 = OpConstant %uint 264 +%_ptr_Output_float = OpTypePointer Output %float + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_3 = OpConstant %uint 3 +%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float +%_ptr_Output_v3uint = OpTypePointer Output %v3uint +%_ptr_Output_bool = OpTypePointer Output %bool +%_ptr_Output_uint = OpTypePointer Output %uint + %main = OpFunction %void None %2 + %4 = OpLabel + OpBranch %85 + %85 = OpLabel + %35 = OpLoad %uint %gl_LocalInvocationIndex + %39 = OpConvertUToF %float %35 + %41 = OpAccessChain %_ptr_Workgroup_float %32 %35 + OpStore %41 %39 + OpControlBarrier %uint_2 %uint_2 %uint_264 + OpSetMeshOutputsEXT %uint_24 %uint_8 + %44 = OpLoad %float %41 + %46 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_0 + OpStore %46 %44 + %48 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_1 + OpStore %48 %44 + %50 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_2 + OpStore %50 %44 + %51 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_3 + OpStore %51 %44 + %53 = OpBitwiseXor %uint %35 %uint_1 + %54 = OpAccessChain %_ptr_Workgroup_float %32 %53 + %55 = OpLoad %float %54 + %57 = OpInBoundsAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %38 %uint_0 + %58 = OpLoad %float %57 + %59 = OpFAdd %float %58 %55 + %60 = OpAccessChain %_ptr_Output_float %B %35 %uint_0 + OpStore %60 %59 + %61 = OpAccessChain %_ptr_Output_float %B %35 %uint_1 + OpStore %61 %59 + %62 = OpAccessChain %_ptr_Output_float %B %35 %uint_2 + OpStore %62 %59 + %63 = OpAccessChain %_ptr_Output_float %B %35 %uint_3 + OpStore %63 %59 + %64 = OpULessThan %bool %35 %uint_8 + OpSelectionMerge %87 None + OpBranchConditional %64 %86 %87 + %86 = OpLabel + %65 = OpIMul %uint %35 %uint_3 + %66 = OpIAdd %uint %65 %uint_1 + %67 = OpIAdd %uint %65 %uint_2 + %68 = OpCompositeConstruct %v3uint %65 %66 %67 + %70 = OpAccessChain %_ptr_Output_v3uint %indices %35 + OpStore %70 %68 + %71 = OpBitwiseAnd %uint %35 %uint_1 + %72 = OpINotEqual %bool %71 %uint_0 + %74 = OpAccessChain %_ptr_Output_bool %SV_CullPrimitive %35 + OpStore %74 %72 + %76 = OpAccessChain %_ptr_Output_uint %SV_PrimitiveID %35 + OpStore %76 %35 + %77 = OpAccessChain %_ptr_Output_uint %SV_RenderTargetArrayIndex %35 + OpStore %77 %35 + %78 = OpBitwiseXor %uint %35 %uint_2 + %79 = OpAccessChain %_ptr_Workgroup_float %32 %78 + %80 = OpLoad %float %79 + %81 = OpAccessChain %_ptr_Output_float %C %35 %uint_0 + OpStore %81 %80 + %82 = OpAccessChain %_ptr_Output_float %C %35 %uint_1 + OpStore %82 %80 + %83 = OpAccessChain %_ptr_Output_float %C %35 %uint_2 + OpStore %83 %80 + %84 = OpAccessChain %_ptr_Output_float %C %35 %uint_3 + OpStore %84 %80 + OpBranch %87 + %87 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen b/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen new file mode 100644 index 00000000000..2178c8af7ac --- /dev/null +++ b/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen @@ -0,0 +1,112 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 73 +; Schema: 0 + OpCapability RayTracingKHR + OpCapability ShaderNonUniform + OpCapability RuntimeDescriptorArray + OpExtension "SPV_EXT_descriptor_indexing" + OpExtension "SPV_KHR_ray_tracing" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint RayGenerationNV %main "main" %gl_LaunchIDEXT %gl_LaunchSizeEXT %as %payload %image + OpSource GLSL 460 + OpSourceExtension "GL_EXT_nonuniform_qualifier" + OpSourceExtension "GL_EXT_ray_tracing" + OpName %main "main" + OpName %col "col" + OpName %origin "origin" + OpName %gl_LaunchIDEXT "gl_LaunchIDEXT" + OpName %gl_LaunchSizeEXT "gl_LaunchSizeEXT" + OpName %direction "direction" + OpName %as "as" + OpName %payload "payload" + OpName %image "image" + OpDecorate %gl_LaunchIDEXT BuiltIn LaunchIdNV + OpDecorate %gl_LaunchSizeEXT BuiltIn LaunchSizeNV + OpDecorate %as DescriptorSet 0 + OpDecorate %as Binding 1 + OpDecorate %51 NonUniform + OpDecorate %payload Location 0 + OpDecorate %image DescriptorSet 0 + OpDecorate %image Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %12 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1 + %v3float = OpTypeVector %float 3 +%_ptr_Function_v3float = OpTypePointer Function %v3float + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_LaunchIDEXT = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LaunchSizeEXT = OpVariable %_ptr_Input_v3uint Input + %uint_1 = OpConstant %uint 1 + %float_n1 = OpConstant %float -1 + %41 = OpConstantComposite %v3float %float_0 %float_0 %float_n1 + %42 = OpTypeAccelerationStructureKHR +%_runtimearr_42 = OpTypeRuntimeArray %42 +%_ptr_UniformConstant__runtimearr_42 = OpTypePointer UniformConstant %_runtimearr_42 + %as = OpVariable %_ptr_UniformConstant__runtimearr_42 UniformConstant +%_ptr_UniformConstant_42 = OpTypePointer UniformConstant %42 + %uint_255 = OpConstant %uint 255 + %float_1000 = OpConstant %float 1000 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_RayPayloadNV_float = OpTypePointer RayPayloadNV %float + %payload = OpVariable %_ptr_RayPayloadNV_float RayPayloadNV +%_ptr_Function_float = OpTypePointer Function %float + %63 = OpTypeImage %float 2D 0 0 0 2 Rgba8 +%_ptr_UniformConstant_63 = OpTypePointer UniformConstant %63 + %image = OpVariable %_ptr_UniformConstant_63 UniformConstant + %v2uint = OpTypeVector %uint 2 + %v2int = OpTypeVector %int 2 + %main = OpFunction %void None %3 + %5 = OpLabel + %col = OpVariable %_ptr_Function_v4float Function + %origin = OpVariable %_ptr_Function_v3float Function + %direction = OpVariable %_ptr_Function_v3float Function + OpStore %col %12 + %22 = OpAccessChain %_ptr_Input_uint %gl_LaunchIDEXT %uint_0 + %23 = OpLoad %uint %22 + %24 = OpConvertUToF %float %23 + %26 = OpAccessChain %_ptr_Input_uint %gl_LaunchSizeEXT %uint_0 + %27 = OpLoad %uint %26 + %28 = OpConvertUToF %float %27 + %29 = OpFDiv %float %24 %28 + %31 = OpAccessChain %_ptr_Input_uint %gl_LaunchIDEXT %uint_1 + %32 = OpLoad %uint %31 + %33 = OpConvertUToF %float %32 + %34 = OpAccessChain %_ptr_Input_uint %gl_LaunchSizeEXT %uint_1 + %35 = OpLoad %uint %34 + %36 = OpConvertUToF %float %35 + %37 = OpFDiv %float %33 %36 + %38 = OpCompositeConstruct %v3float %29 %37 %float_1 + OpStore %origin %38 + OpStore %direction %41 + %46 = OpAccessChain %_ptr_Input_uint %gl_LaunchIDEXT %uint_0 + %47 = OpLoad %uint %46 + %48 = OpCopyObject %uint %47 + %50 = OpAccessChain %_ptr_UniformConstant_42 %as %48 + %51 = OpLoad %42 %50 + %53 = OpLoad %v3float %origin + %54 = OpLoad %v3float %direction + OpTraceRayKHR %51 %uint_0 %uint_255 %uint_0 %uint_1 %uint_0 %53 %float_0 %54 %float_1000 %payload + %60 = OpLoad %float %payload + %62 = OpAccessChain %_ptr_Function_float %col %uint_1 + OpStore %62 %60 + %66 = OpLoad %63 %image + %68 = OpLoad %v3uint %gl_LaunchIDEXT + %69 = OpVectorShuffle %v2uint %68 %68 0 1 + %71 = OpBitcast %v2int %69 + %72 = OpLoad %v4float %col + OpImageWrite %66 %71 %72 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task b/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task new file mode 100644 index 00000000000..cbef97ed1fb --- /dev/null +++ b/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task @@ -0,0 +1,132 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 93 +; Schema: 0 + OpCapability MeshShadingEXT + OpExtension "SPV_EXT_mesh_shader" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TaskEXT %main "main" %vs %gl_LocalInvocationIndex %p + OpExecutionMode %main LocalSize 4 3 2 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_mesh_shader" + OpName %main "main" + OpName %vs "vs" + OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex" + OpName %Payload "Payload" + OpMemberName %Payload 0 "v" + OpName %p "p" + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_24 = OpConstant %uint 24 +%_arr_float_uint_24 = OpTypeArray %float %uint_24 +%_ptr_Workgroup__arr_float_uint_24 = OpTypePointer Workgroup %_arr_float_uint_24 + %vs = OpVariable %_ptr_Workgroup__arr_float_uint_24 Workgroup +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %float_10 = OpConstant %float 10 +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %uint_2 = OpConstant %uint 2 + %uint_264 = OpConstant %uint 264 + %uint_12 = OpConstant %uint 12 + %bool = OpTypeBool + %uint_6 = OpConstant %uint 6 + %uint_3 = OpConstant %uint 3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 + %Payload = OpTypeStruct %_arr_float_uint_3 +%_ptr_TaskPayloadWorkgroupEXT_Payload = OpTypePointer TaskPayloadWorkgroupEXT %Payload + %p = OpVariable %_ptr_TaskPayloadWorkgroupEXT_Payload TaskPayloadWorkgroupEXT + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float + %int_4 = OpConstant %int 4 + %int_6 = OpConstant %int 6 + %int_8 = OpConstant %int 8 + %v3uint = OpTypeVector %uint 3 + %uint_4 = OpConstant %uint 4 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_3 %uint_2 + %main = OpFunction %void None %3 + %5 = OpLabel + %14 = OpLoad %uint %gl_LocalInvocationIndex + %17 = OpAccessChain %_ptr_Workgroup_float %vs %14 + OpStore %17 %float_10 + OpControlBarrier %uint_2 %uint_2 %uint_264 + %20 = OpLoad %uint %gl_LocalInvocationIndex + %23 = OpULessThan %bool %20 %uint_12 + OpSelectionMerge %25 None + OpBranchConditional %23 %24 %25 + %24 = OpLabel + %26 = OpLoad %uint %gl_LocalInvocationIndex + %27 = OpLoad %uint %gl_LocalInvocationIndex + %28 = OpIAdd %uint %27 %uint_12 + %29 = OpAccessChain %_ptr_Workgroup_float %vs %28 + %30 = OpLoad %float %29 + %31 = OpAccessChain %_ptr_Workgroup_float %vs %26 + %32 = OpLoad %float %31 + %33 = OpFAdd %float %32 %30 + %34 = OpAccessChain %_ptr_Workgroup_float %vs %26 + OpStore %34 %33 + OpBranch %25 + %25 = OpLabel + OpControlBarrier %uint_2 %uint_2 %uint_264 + %35 = OpLoad %uint %gl_LocalInvocationIndex + %37 = OpULessThan %bool %35 %uint_6 + OpSelectionMerge %39 None + OpBranchConditional %37 %38 %39 + %38 = OpLabel + %40 = OpLoad %uint %gl_LocalInvocationIndex + %41 = OpLoad %uint %gl_LocalInvocationIndex + %42 = OpIAdd %uint %41 %uint_6 + %43 = OpAccessChain %_ptr_Workgroup_float %vs %42 + %44 = OpLoad %float %43 + %45 = OpAccessChain %_ptr_Workgroup_float %vs %40 + %46 = OpLoad %float %45 + %47 = OpFAdd %float %46 %44 + %48 = OpAccessChain %_ptr_Workgroup_float %vs %40 + OpStore %48 %47 + OpBranch %39 + %39 = OpLabel + OpControlBarrier %uint_2 %uint_2 %uint_264 + %49 = OpLoad %uint %gl_LocalInvocationIndex + %51 = OpULessThan %bool %49 %uint_3 + OpSelectionMerge %53 None + OpBranchConditional %51 %52 %53 + %52 = OpLabel + %54 = OpLoad %uint %gl_LocalInvocationIndex + %55 = OpLoad %uint %gl_LocalInvocationIndex + %56 = OpIAdd %uint %55 %uint_3 + %57 = OpAccessChain %_ptr_Workgroup_float %vs %56 + %58 = OpLoad %float %57 + %59 = OpAccessChain %_ptr_Workgroup_float %vs %54 + %60 = OpLoad %float %59 + %61 = OpFAdd %float %60 %58 + %62 = OpAccessChain %_ptr_Workgroup_float %vs %54 + OpStore %62 %61 + OpBranch %53 + %53 = OpLabel + OpControlBarrier %uint_2 %uint_2 %uint_264 + %69 = OpLoad %uint %gl_LocalInvocationIndex + %70 = OpLoad %uint %gl_LocalInvocationIndex + %71 = OpAccessChain %_ptr_Workgroup_float %vs %70 + %72 = OpLoad %float %71 + %74 = OpAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %p %int_0 %69 + OpStore %74 %72 + %76 = OpAccessChain %_ptr_Workgroup_float %vs %int_4 + %77 = OpLoad %float %76 + %78 = OpConvertFToS %int %77 + %79 = OpBitcast %uint %78 + %81 = OpAccessChain %_ptr_Workgroup_float %vs %int_6 + %82 = OpLoad %float %81 + %83 = OpConvertFToS %int %82 + %84 = OpBitcast %uint %83 + %86 = OpAccessChain %_ptr_Workgroup_float %vs %int_8 + %87 = OpLoad %float %86 + %88 = OpConvertFToS %int %87 + %89 = OpBitcast %uint %88 + OpEmitMeshTasksEXT %79 %84 %89 %p + OpFunctionEnd diff --git a/shaders-no-opt/asm/temporary.zero-initialize.asm.frag b/shaders-no-opt/asm/temporary.zero-initialize.asm.frag new file mode 100644 index 00000000000..eccff08b331 --- /dev/null +++ b/shaders-no-opt/asm/temporary.zero-initialize.asm.frag @@ -0,0 +1,93 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 65 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %vA %vB + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %vA "vA" + OpName %vB "vB" + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %vA RelaxedPrecision + OpDecorate %vA Flat + OpDecorate %vA Location 0 + OpDecorate %25 RelaxedPrecision + OpDecorate %30 RelaxedPrecision + OpDecorate %vB RelaxedPrecision + OpDecorate %vB Flat + OpDecorate %vB Location 1 + OpDecorate %38 RelaxedPrecision + OpDecorate %40 RelaxedPrecision + OpDecorate %49 RelaxedPrecision + OpDecorate %51 RelaxedPrecision + OpDecorate %53 RelaxedPrecision + OpDecorate %56 RelaxedPrecision + OpDecorate %64 RelaxedPrecision + OpDecorate %58 RelaxedPrecision + OpDecorate %57 RelaxedPrecision + OpDecorate %60 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %float_0 = OpConstant %float 0 + %11 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Input_int = OpTypePointer Input %int + %vA = OpVariable %_ptr_Input_int Input + %bool = OpTypeBool + %int_20 = OpConstant %int 20 + %int_50 = OpConstant %int 50 + %vB = OpVariable %_ptr_Input_int Input + %int_40 = OpConstant %int 40 + %int_60 = OpConstant %int 60 + %int_10 = OpConstant %int 10 + %float_1 = OpConstant %float 1 + %63 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %main = OpFunction %void None %3 + %5 = OpLabel + OpStore %FragColor %11 + OpBranch %17 + %17 = OpLabel + %60 = OpPhi %int %int_0 %5 %58 %20 + %57 = OpPhi %int %int_0 %5 %56 %20 + %25 = OpLoad %int %vA + %27 = OpSLessThan %bool %57 %25 + OpLoopMerge %19 %20 None + OpBranchConditional %27 %18 %19 + %18 = OpLabel + %30 = OpIAdd %int %25 %57 + %32 = OpIEqual %bool %30 %int_20 + OpSelectionMerge %34 None + OpBranchConditional %32 %33 %36 + %33 = OpLabel + OpBranch %34 + %36 = OpLabel + %38 = OpLoad %int %vB + %40 = OpIAdd %int %38 %57 + %42 = OpIEqual %bool %40 %int_40 + %64 = OpSelect %int %42 %int_60 %60 + OpBranch %34 + %34 = OpLabel + %58 = OpPhi %int %int_50 %33 %64 %36 + %49 = OpIAdd %int %58 %int_10 + %51 = OpLoad %v4float %FragColor + %53 = OpFAdd %v4float %51 %63 + OpStore %FragColor %53 + OpBranch %20 + %20 = OpLabel + %56 = OpIAdd %int %57 %49 + OpBranch %17 + %19 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc b/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc new file mode 100644 index 00000000000..0ec5fa90a96 --- /dev/null +++ b/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc @@ -0,0 +1,101 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 42 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %_ %patches %v2 %v3 %verts + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpName %gl_InvocationID "gl_InvocationID" + OpName %vert "vert" + OpMemberName %vert 0 "v0" + OpMemberName %vert 1 "v1" + OpName %_ "" + OpName %vert_patch "vert_patch" + OpMemberName %vert_patch 0 "v2" + OpMemberName %vert_patch 1 "v3" + OpName %patches "patches" + OpName %v2 "v2" + OpName %v3 "v3" + OpName %vert2 "vert2" + OpMemberName %vert2 0 "v4" + OpMemberName %vert2 1 "v5" + OpName %verts "verts" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpMemberDecorate %vert 0 Patch + OpMemberDecorate %vert 1 Patch + OpDecorate %vert Block + OpDecorate %_ Location 0 + OpMemberDecorate %vert_patch 0 Patch + OpMemberDecorate %vert_patch 1 Patch + OpDecorate %vert_patch Block + OpDecorate %patches Location 2 + OpDecorate %v2 Patch + OpDecorate %v2 Location 6 + OpDecorate %v3 Location 7 + OpDecorate %vert2 Block + OpDecorate %verts Location 8 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 + %uint_4 = OpConstant %uint 4 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %gl_out_zero = OpConstantNull %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %gl_out_zero + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %vert = OpTypeStruct %float %float +%_ptr_Output_vert = OpTypePointer Output %vert + %__zero = OpConstantNull %vert + %_ = OpVariable %_ptr_Output_vert Output %__zero + %vert_patch = OpTypeStruct %float %float + %uint_2 = OpConstant %uint 2 +%_arr_vert_patch_uint_2 = OpTypeArray %vert_patch %uint_2 +%_ptr_Output__arr_vert_patch_uint_2 = OpTypePointer Output %_arr_vert_patch_uint_2 + %patches_zero = OpConstantNull %_arr_vert_patch_uint_2 + %patches = OpVariable %_ptr_Output__arr_vert_patch_uint_2 Output %patches_zero +%_ptr_Output_float = OpTypePointer Output %float + %v2_zero = OpConstantNull %float + %v2 = OpVariable %_ptr_Output_float Output %v2_zero +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 + %v3_zero = OpConstantNull %_arr_float_uint_4 + %v3 = OpVariable %_ptr_Output__arr_float_uint_4 Output %v3_zero + %vert2 = OpTypeStruct %float %float +%_arr_vert2_uint_4 = OpTypeArray %vert2 %uint_4 +%_ptr_Output__arr_vert2_uint_4 = OpTypePointer Output %_arr_vert2_uint_4 + %verts_zero = OpConstantNull %_arr_vert2_uint_4 + %verts = OpVariable %_ptr_Output__arr_vert2_uint_4 Output %verts_zero + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpLoad %int %gl_InvocationID + %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0 + OpStore %24 %22 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc b/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc new file mode 100644 index 00000000000..7c0a638f985 --- /dev/null +++ b/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc @@ -0,0 +1,199 @@ +; SPIR-V +; Version: 1.0 +; Generator: Wine VKD3D Shader Compiler; 2 +; Bound: 126 +; Schema: 0 + OpCapability Tessellation + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %1 "main" %4 %30 %80 %101 %103 %108 %110 %115 %117 + OpExecutionMode %1 OutputVertices 3 + OpExecutionMode %1 Triangles + OpExecutionMode %1 SpacingEqual + OpExecutionMode %1 VertexOrderCw + OpName %1 "main" + OpName %11 "opc" + OpName %14 "cb1_struct" + OpName %16 "cb0_0" + OpName %22 "vicp" + OpName %23 "fork0" + OpName %26 "vForkInstanceId" + OpName %34 "r0" + OpName %32 "fork0_epilogue" + OpName %75 "fork1" + OpName %81 "fork1_epilogue" + OpName %101 "v0" + OpName %103 "v1" + OpName %108 "vicp0" + OpName %110 "vocp0" + OpName %115 "vicp1" + OpName %117 "vocp1" + OpDecorate %4 BuiltIn InvocationId + OpDecorate %13 ArrayStride 16 + OpDecorate %14 Block + OpMemberDecorate %14 0 Offset 0 + OpDecorate %16 DescriptorSet 0 + OpDecorate %16 Binding 0 + OpDecorate %30 BuiltIn TessLevelOuter + OpDecorate %30 Patch + OpDecorate %30 Patch + OpDecorate %30 Patch + OpDecorate %30 Patch + OpDecorate %80 BuiltIn TessLevelInner + OpDecorate %80 Patch + OpDecorate %80 Patch + OpDecorate %101 Location 0 + OpDecorate %103 Location 1 + OpDecorate %108 Location 2 + OpDecorate %110 Location 3 + OpDecorate %115 Location 4 + OpDecorate %117 Location 5 + %2 = OpTypeInt 32 1 + %3 = OpTypePointer Input %2 + %4 = OpVariable %3 Input + %5 = OpTypeFloat 32 + %6 = OpTypeVector %5 4 + %7 = OpTypeInt 32 0 + %8 = OpConstant %7 4 + %9 = OpTypeArray %6 %8 + %10 = OpTypePointer Private %9 + %11 = OpVariable %10 Private + %12 = OpConstant %7 1 + %13 = OpTypeArray %6 %12 + %14 = OpTypeStruct %13 + %15 = OpTypePointer Uniform %14 + %16 = OpVariable %15 Uniform + %17 = OpConstant %7 3 + %18 = OpTypeArray %6 %17 + %19 = OpConstant %7 2 + %20 = OpTypeArray %18 %19 + %21 = OpTypePointer Private %20 + %22 = OpVariable %21 Private + %24 = OpTypeVoid + %25 = OpTypeFunction %24 %7 + %28 = OpTypeArray %5 %8 + %29 = OpTypePointer Output %28 + %30 = OpVariable %29 Output + %31 = OpConstant %7 0 + %33 = OpTypePointer Function %6 + %36 = OpTypePointer Function %5 + %38 = OpTypePointer Uniform %6 + %40 = OpTypePointer Uniform %5 + %46 = OpTypePointer Private %6 + %48 = OpTypePointer Private %5 + %52 = OpVariable %46 Private + %55 = OpVariable %46 Private + %58 = OpVariable %46 Private + %60 = OpTypeFunction %24 %46 %46 %46 + %69 = OpTypePointer Output %5 + %76 = OpTypeFunction %24 + %78 = OpTypeArray %5 %19 + %79 = OpTypePointer Output %78 + %80 = OpVariable %79 Output + %89 = OpVariable %46 Private + %91 = OpTypeFunction %24 %46 + %98 = OpTypePointer Private %18 + %100 = OpTypePointer Input %18 + %101 = OpVariable %100 Input + %103 = OpVariable %100 Input + %105 = OpTypeVector %5 3 + %106 = OpTypeArray %105 %17 + %107 = OpTypePointer Input %106 + %108 = OpVariable %107 Input + %109 = OpTypePointer Output %106 + %110 = OpVariable %109 Output + %111 = OpTypePointer Output %105 + %112 = OpTypePointer Input %105 + %115 = OpVariable %100 Input + %116 = OpTypePointer Output %18 + %117 = OpVariable %116 Output + %118 = OpTypePointer Output %6 + %119 = OpTypePointer Input %6 + %23 = OpFunction %24 None %25 + %26 = OpFunctionParameter %7 + %27 = OpLabel + %34 = OpVariable %33 Function + %35 = OpBitcast %5 %26 + %37 = OpInBoundsAccessChain %36 %34 %31 + OpStore %37 %35 + %39 = OpAccessChain %38 %16 %31 %31 + %41 = OpInBoundsAccessChain %40 %39 %31 + %42 = OpLoad %5 %41 + %43 = OpInBoundsAccessChain %36 %34 %31 + %44 = OpLoad %5 %43 + %45 = OpBitcast %2 %44 + %47 = OpAccessChain %46 %11 %45 + %49 = OpInBoundsAccessChain %48 %47 %31 + OpStore %49 %42 + %50 = OpAccessChain %46 %11 %31 + %51 = OpLoad %6 %50 + OpStore %52 %51 + %53 = OpAccessChain %46 %11 %12 + %54 = OpLoad %6 %53 + OpStore %55 %54 + %56 = OpAccessChain %46 %11 %19 + %57 = OpLoad %6 %56 + OpStore %58 %57 + %59 = OpFunctionCall %24 %32 %52 %55 %58 + OpReturn + OpFunctionEnd + %32 = OpFunction %24 None %60 + %61 = OpFunctionParameter %46 + %62 = OpFunctionParameter %46 + %63 = OpFunctionParameter %46 + %64 = OpLabel + %65 = OpLoad %6 %61 + %66 = OpLoad %6 %62 + %67 = OpLoad %6 %63 + %68 = OpCompositeExtract %5 %65 0 + %70 = OpAccessChain %69 %30 %31 + OpStore %70 %68 + %71 = OpCompositeExtract %5 %66 0 + %72 = OpAccessChain %69 %30 %12 + OpStore %72 %71 + %73 = OpCompositeExtract %5 %67 0 + %74 = OpAccessChain %69 %30 %19 + OpStore %74 %73 + OpReturn + OpFunctionEnd + %75 = OpFunction %24 None %76 + %77 = OpLabel + %82 = OpAccessChain %38 %16 %31 %31 + %83 = OpInBoundsAccessChain %40 %82 %31 + %84 = OpLoad %5 %83 + %85 = OpAccessChain %46 %11 %17 + %86 = OpInBoundsAccessChain %48 %85 %31 + OpStore %86 %84 + %87 = OpAccessChain %46 %11 %17 + %88 = OpLoad %6 %87 + OpStore %89 %88 + %90 = OpFunctionCall %24 %81 %89 + OpReturn + OpFunctionEnd + %81 = OpFunction %24 None %91 + %92 = OpFunctionParameter %46 + %93 = OpLabel + %94 = OpLoad %6 %92 + %95 = OpCompositeExtract %5 %94 0 + %96 = OpAccessChain %69 %80 %31 + OpStore %96 %95 + OpReturn + OpFunctionEnd + %1 = OpFunction %24 None %76 + %97 = OpLabel + %99 = OpInBoundsAccessChain %98 %22 %31 + OpCopyMemory %99 %101 + %102 = OpInBoundsAccessChain %98 %22 %12 + OpCopyMemory %102 %103 + %104 = OpLoad %2 %4 + %113 = OpAccessChain %111 %110 %104 + %114 = OpAccessChain %112 %108 %104 + OpCopyMemory %113 %114 + %120 = OpAccessChain %118 %117 %104 + %121 = OpAccessChain %119 %115 %104 + OpCopyMemory %120 %121 + %122 = OpFunctionCall %24 %23 %31 + %123 = OpFunctionCall %24 %23 %12 + %124 = OpFunctionCall %24 %23 %19 + %125 = OpFunctionCall %24 %75 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/shaders-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc new file mode 100644 index 00000000000..0fd4dce256e --- /dev/null +++ b/shaders-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc @@ -0,0 +1,248 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 2 +; Bound: 162 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %hs_main "main" %p_pos %p_1 %i_1 %_entryPointOutput_pos %_entryPointOutput %_patchConstantOutput_EdgeTess %_patchConstantOutput_InsideTess + OpExecutionMode %hs_main OutputVertices 3 + OpExecutionMode %hs_main Triangles + OpExecutionMode %hs_main SpacingFractionalOdd + OpExecutionMode %hs_main VertexOrderCw + OpSource HLSL 500 + OpName %hs_main "hs_main" + OpName %VertexOutput "VertexOutput" + OpMemberName %VertexOutput 0 "pos" + OpMemberName %VertexOutput 1 "uv" + OpName %HSOut "HSOut" + OpMemberName %HSOut 0 "pos" + OpMemberName %HSOut 1 "uv" + OpName %_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ "@hs_main(struct-VertexOutput-vf4-vf21[3];u1;" + OpName %p "p" + OpName %i "i" + OpName %HSConstantOut "HSConstantOut" + OpMemberName %HSConstantOut 0 "EdgeTess" + OpMemberName %HSConstantOut 1 "InsideTess" + OpName %PatchHS_struct_VertexOutput_vf4_vf21_3__ "PatchHS(struct-VertexOutput-vf4-vf21[3];" + OpName %patch "patch" + OpName %output "output" + OpName %p_0 "p" + OpName %p_pos "p.pos" + OpName %VertexOutput_0 "VertexOutput" + OpMemberName %VertexOutput_0 0 "uv" + OpName %p_1 "p" + OpName %i_0 "i" + OpName %i_1 "i" + OpName %flattenTemp "flattenTemp" + OpName %param "param" + OpName %param_0 "param" + OpName %_entryPointOutput_pos "@entryPointOutput.pos" + OpName %HSOut_0 "HSOut" + OpMemberName %HSOut_0 0 "uv" + OpName %_entryPointOutput "@entryPointOutput" + OpName %_patchConstantResult "@patchConstantResult" + OpName %param_1 "param" + OpName %_patchConstantOutput_EdgeTess "@patchConstantOutput.EdgeTess" + OpName %_patchConstantOutput_InsideTess "@patchConstantOutput.InsideTess" + OpName %output_0 "output" + OpDecorate %p_pos BuiltIn Position + OpDecorate %p_1 Location 0 + OpDecorate %i_1 BuiltIn InvocationId + OpDecorate %_entryPointOutput_pos BuiltIn Position + OpDecorate %_entryPointOutput Location 0 + OpDecorate %_patchConstantOutput_EdgeTess Patch + OpDecorate %_patchConstantOutput_EdgeTess BuiltIn TessLevelOuter + OpDecorate %_patchConstantOutput_InsideTess Patch + OpDecorate %_patchConstantOutput_InsideTess BuiltIn TessLevelInner + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %v2float = OpTypeVector %float 2 +%VertexOutput = OpTypeStruct %v4float %v2float + %uint = OpTypeInt 32 0 + %uint_3 = OpConstant %uint 3 +%_arr_VertexOutput_uint_3 = OpTypeArray %VertexOutput %uint_3 +%_ptr_Function__arr_VertexOutput_uint_3 = OpTypePointer Function %_arr_VertexOutput_uint_3 +%_ptr_Function_uint = OpTypePointer Function %uint + %HSOut = OpTypeStruct %v4float %v2float + %16 = OpTypeFunction %HSOut %_ptr_Function__arr_VertexOutput_uint_3 %_ptr_Function_uint +%_arr_float_uint_3 = OpTypeArray %float %uint_3 +%HSConstantOut = OpTypeStruct %_arr_float_uint_3 %float + %23 = OpTypeFunction %HSConstantOut %_ptr_Function__arr_VertexOutput_uint_3 +%_ptr_Function_HSOut = OpTypePointer Function %HSOut + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %int_1 = OpConstant %int 1 +%_ptr_Function_v2float = OpTypePointer Function %v2float +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3 + %p_pos = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%_ptr_Input_v4float = OpTypePointer Input %v4float +%VertexOutput_0 = OpTypeStruct %v2float +%_arr_VertexOutput_0_uint_3 = OpTypeArray %VertexOutput_0 %uint_3 +%_ptr_Input__arr_VertexOutput_0_uint_3 = OpTypePointer Input %_arr_VertexOutput_0_uint_3 + %p_1 = OpVariable %_ptr_Input__arr_VertexOutput_0_uint_3 Input +%_ptr_Input_v2float = OpTypePointer Input %v2float + %int_2 = OpConstant %int 2 +%_ptr_Input_uint = OpTypePointer Input %uint + %i_1 = OpVariable %_ptr_Input_uint Input +%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3 +%_entryPointOutput_pos = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%_ptr_Output_v4float = OpTypePointer Output %v4float + %HSOut_0 = OpTypeStruct %v2float +%_arr_HSOut_0_uint_3 = OpTypeArray %HSOut_0 %uint_3 +%_ptr_Output__arr_HSOut_0_uint_3 = OpTypePointer Output %_arr_HSOut_0_uint_3 +%_entryPointOutput = OpVariable %_ptr_Output__arr_HSOut_0_uint_3 Output +%_ptr_Output_v2float = OpTypePointer Output %v2float + %uint_2 = OpConstant %uint 2 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %bool = OpTypeBool +%_ptr_Function_HSConstantOut = OpTypePointer Function %HSConstantOut + %uint_4 = OpConstant %uint 4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 +%_patchConstantOutput_EdgeTess = OpVariable %_ptr_Output__arr_float_uint_4 Output +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Output_float = OpTypePointer Output %float +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%_patchConstantOutput_InsideTess = OpVariable %_ptr_Output__arr_float_uint_2 Output + %float_1 = OpConstant %float 1 + %hs_main = OpFunction %void None %3 + %5 = OpLabel + %p_0 = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function + %i_0 = OpVariable %_ptr_Function_uint Function +%flattenTemp = OpVariable %_ptr_Function_HSOut Function + %param = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function + %param_0 = OpVariable %_ptr_Function_uint Function +%_patchConstantResult = OpVariable %_ptr_Function_HSConstantOut Function + %param_1 = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function + %50 = OpAccessChain %_ptr_Input_v4float %p_pos %int_0 + %51 = OpLoad %v4float %50 + %52 = OpAccessChain %_ptr_Function_v4float %p_0 %int_0 %int_0 + OpStore %52 %51 + %58 = OpAccessChain %_ptr_Input_v2float %p_1 %int_0 %int_0 + %59 = OpLoad %v2float %58 + %60 = OpAccessChain %_ptr_Function_v2float %p_0 %int_0 %int_1 + OpStore %60 %59 + %61 = OpAccessChain %_ptr_Input_v4float %p_pos %int_1 + %62 = OpLoad %v4float %61 + %63 = OpAccessChain %_ptr_Function_v4float %p_0 %int_1 %int_0 + OpStore %63 %62 + %64 = OpAccessChain %_ptr_Input_v2float %p_1 %int_1 %int_0 + %65 = OpLoad %v2float %64 + %66 = OpAccessChain %_ptr_Function_v2float %p_0 %int_1 %int_1 + OpStore %66 %65 + %68 = OpAccessChain %_ptr_Input_v4float %p_pos %int_2 + %69 = OpLoad %v4float %68 + %70 = OpAccessChain %_ptr_Function_v4float %p_0 %int_2 %int_0 + OpStore %70 %69 + %71 = OpAccessChain %_ptr_Input_v2float %p_1 %int_2 %int_0 + %72 = OpLoad %v2float %71 + %73 = OpAccessChain %_ptr_Function_v2float %p_0 %int_2 %int_1 + OpStore %73 %72 + %77 = OpLoad %uint %i_1 + OpStore %i_0 %77 + %80 = OpLoad %_arr_VertexOutput_uint_3 %p_0 + OpStore %param %80 + %82 = OpLoad %uint %i_0 + OpStore %param_0 %82 + %83 = OpFunctionCall %HSOut %_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ %param %param_0 + OpStore %flattenTemp %83 + %86 = OpAccessChain %_ptr_Function_v4float %flattenTemp %int_0 + %87 = OpLoad %v4float %86 + %94 = OpLoad %uint %i_1 + %89 = OpAccessChain %_ptr_Output_v4float %_entryPointOutput_pos %94 + OpStore %89 %87 + %95 = OpAccessChain %_ptr_Function_v2float %flattenTemp %int_1 + %96 = OpLoad %v2float %95 + %98 = OpAccessChain %_ptr_Output_v2float %_entryPointOutput %94 %int_0 + OpStore %98 %96 + OpControlBarrier %uint_2 %uint_1 %uint_0 + %102 = OpLoad %uint %i_1 + %104 = OpIEqual %bool %102 %int_0 + OpSelectionMerge %106 None + OpBranchConditional %104 %105 %106 + %105 = OpLabel + %110 = OpLoad %_arr_VertexOutput_uint_3 %p_0 + OpStore %param_1 %110 + %111 = OpFunctionCall %HSConstantOut %PatchHS_struct_VertexOutput_vf4_vf21_3__ %param_1 + OpStore %_patchConstantResult %111 + %117 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_0 + %118 = OpLoad %float %117 + %120 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_0 + OpStore %120 %118 + %121 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_1 + %122 = OpLoad %float %121 + %123 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_1 + OpStore %123 %122 + %124 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_2 + %125 = OpLoad %float %124 + %126 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_2 + OpStore %126 %125 + %130 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_1 + %131 = OpLoad %float %130 + %132 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_InsideTess %int_0 + OpStore %132 %131 + OpBranch %106 + %106 = OpLabel + OpReturn + OpFunctionEnd +%_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ = OpFunction %HSOut None %16 + %p = OpFunctionParameter %_ptr_Function__arr_VertexOutput_uint_3 + %i = OpFunctionParameter %_ptr_Function_uint + %20 = OpLabel + %output = OpVariable %_ptr_Function_HSOut Function + %31 = OpLoad %uint %i + %33 = OpAccessChain %_ptr_Function_v4float %p %31 %int_0 + %34 = OpLoad %v4float %33 + %35 = OpAccessChain %_ptr_Function_v4float %output %int_0 + OpStore %35 %34 + %37 = OpLoad %uint %i + %39 = OpAccessChain %_ptr_Function_v2float %p %37 %int_1 + %40 = OpLoad %v2float %39 + %41 = OpAccessChain %_ptr_Function_v2float %output %int_1 + OpStore %41 %40 + %42 = OpLoad %HSOut %output + OpReturnValue %42 + OpFunctionEnd +%PatchHS_struct_VertexOutput_vf4_vf21_3__ = OpFunction %HSConstantOut None %23 + %patch = OpFunctionParameter %_ptr_Function__arr_VertexOutput_uint_3 + %26 = OpLabel + %output_0 = OpVariable %_ptr_Function_HSConstantOut Function + %135 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1 + %136 = OpLoad %v2float %135 + %137 = OpCompositeConstruct %v2float %float_1 %float_1 + %138 = OpFAdd %v2float %137 %136 + %139 = OpCompositeExtract %float %138 0 + %140 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_0 + OpStore %140 %139 + %141 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1 + %142 = OpLoad %v2float %141 + %143 = OpCompositeConstruct %v2float %float_1 %float_1 + %144 = OpFAdd %v2float %143 %142 + %145 = OpCompositeExtract %float %144 0 + %146 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_1 + OpStore %146 %145 + %147 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1 + %148 = OpLoad %v2float %147 + %149 = OpCompositeConstruct %v2float %float_1 %float_1 + %150 = OpFAdd %v2float %149 %148 + %151 = OpCompositeExtract %float %150 0 + %152 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_2 + OpStore %152 %151 + %153 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1 + %154 = OpLoad %v2float %153 + %155 = OpCompositeConstruct %v2float %float_1 %float_1 + %156 = OpFAdd %v2float %155 %154 + %157 = OpCompositeExtract %float %156 0 + %158 = OpAccessChain %_ptr_Function_float %output_0 %int_1 + OpStore %158 %157 + %159 = OpLoad %HSConstantOut %output_0 + OpReturnValue %159 + OpFunctionEnd diff --git a/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc b/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc new file mode 100644 index 00000000000..95fd147e7be --- /dev/null +++ b/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc @@ -0,0 +1,87 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 47 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %gl_TessLevelInner %gl_TessLevelOuter + OpExecutionMode %main OutputVertices 4 + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %gl_out "gl_out" + OpName %gl_InvocationID "gl_InvocationID" + OpName %gl_TessLevelInner "gl_TessLevelInner" + OpName %gl_TessLevelOuter "gl_TessLevelOuter" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorate %gl_TessLevelInner Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 + %uint_4 = OpConstant %uint 4 +%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4 +%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4 + %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%gl_InvocationID = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %uint_2 = OpConstant %uint 2 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 + %inner_zero = OpConstantNull %_arr_float_uint_2 +%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output %inner_zero +%_ptr_Output_float = OpTypePointer Output %float + %int_1 = OpConstant %int 1 + %float_2 = OpConstant %float 2 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 + %outer_zero = OpConstantNull %_arr_float_uint_4 +%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output %outer_zero + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %int_2 = OpConstant %int 2 + %float_5 = OpConstant %float 5 + %int_3 = OpConstant %int 3 + %float_6 = OpConstant %float 6 + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpLoad %int %gl_InvocationID + %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0 + OpStore %24 %22 + %30 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0 + OpStore %30 %float_1 + %33 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_1 + OpStore %33 %float_2 + %38 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_0 + OpStore %38 %float_3 + %40 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1 + OpStore %40 %float_4 + %43 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_2 + OpStore %43 %float_5 + %46 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_3 + OpStore %46 %float_6 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert b/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert new file mode 100644 index 00000000000..a431e6a7174 --- /dev/null +++ b/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert @@ -0,0 +1,37 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 13 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ %foo + OpSource GLSL 450 + OpName %main "main" + OpName %Vert "Vert" + OpMemberName %Vert 0 "a" + OpMemberName %Vert 1 "b" + OpName %_ "" + OpName %Foo "Foo" + OpMemberName %Foo 0 "c" + OpMemberName %Foo 1 "d" + OpName %foo "foo" + OpDecorate %Vert Block + OpDecorate %_ Location 0 + OpDecorate %foo Location 2 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %Vert = OpTypeStruct %float %float +%_ptr_Output_Vert = OpTypePointer Output %Vert + %zero_vert = OpConstantNull %Vert + %_ = OpVariable %_ptr_Output_Vert Output %zero_vert + %Foo = OpTypeStruct %float %float +%_ptr_Output_Foo = OpTypePointer Output %Foo +%zero_foo = OpConstantNull %Foo + %foo = OpVariable %_ptr_Output_Foo Output %zero_foo + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert b/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert new file mode 100644 index 00000000000..aaa68662e5d --- /dev/null +++ b/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert @@ -0,0 +1,44 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 20 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %_ "" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex + %zero = OpConstantNull %gl_PerVertex + %_ = OpVariable %_ptr_Output_gl_PerVertex Output %zero + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %17 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + OpStore %19 %17 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert b/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert new file mode 100644 index 00000000000..94a883c1ed1 --- /dev/null +++ b/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert @@ -0,0 +1,119 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 59 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ %output_location_0 %output_location_2 %output_location_3 + OpSource GLSL 450 + OpName %main "main" + OpName %Foo "Struct_vec4" + OpMemberName %Foo 0 "m0" + OpName %c "c" + OpName %Foo_0 "Struct_vec4" + OpMemberName %Foo_0 0 "m0" + OpName %Bar "Struct_vec4" + OpMemberName %Bar 0 "m0" + OpName %UBO "UBO" + OpMemberName %UBO 0 "m0" + OpMemberName %UBO 1 "m1" + OpName %ubo_binding_0 "ubo_binding_0" + OpName %Bar_0 "Struct_vec4" + OpMemberName %Bar_0 0 "m0" + OpName %b "b" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %_ "" + OpName %VertexOut "VertexOut" + OpMemberName %VertexOut 0 "m0" + OpMemberName %VertexOut 1 "m1" + OpName %output_location_0 "output_location_0" + OpName %output_location_2 "output_location_2" + OpName %output_location_3 "output_location_3" + OpMemberDecorate %Foo_0 0 Offset 0 + OpMemberDecorate %Bar 0 Offset 0 + OpMemberDecorate %UBO 0 Offset 0 + OpMemberDecorate %UBO 1 Offset 16 + OpDecorate %UBO Block + OpDecorate %ubo_binding_0 DescriptorSet 0 + OpDecorate %ubo_binding_0 Binding 0 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %VertexOut Block + OpDecorate %output_location_0 Location 0 + OpDecorate %output_location_2 Location 2 + OpDecorate %output_location_3 Location 3 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %Foo = OpTypeStruct %v4float +%_ptr_Function_Foo = OpTypePointer Function %Foo + %Foo_0 = OpTypeStruct %v4float + %Bar = OpTypeStruct %v4float + %UBO = OpTypeStruct %Foo_0 %Bar +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO +%ubo_binding_0 = OpVariable %_ptr_Uniform_UBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %Bar_0 = OpTypeStruct %v4float +%_ptr_Function_Bar_0 = OpTypePointer Function %Bar_0 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_Bar = OpTypePointer Uniform %Bar + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex + %_ = OpVariable %_ptr_Output_gl_PerVertex Output +%_ptr_Output_v4float = OpTypePointer Output %v4float + %VertexOut = OpTypeStruct %Foo %Bar_0 +%_ptr_Output_VertexOut = OpTypePointer Output %VertexOut +%output_location_0 = OpVariable %_ptr_Output_VertexOut Output +%_ptr_Output_Foo = OpTypePointer Output %Foo +%_ptr_Output_Bar_0 = OpTypePointer Output %Bar_0 +%output_location_2 = OpVariable %_ptr_Output_Foo Output +%output_location_3 = OpVariable %_ptr_Output_Bar_0 Output + %main = OpFunction %void None %3 + %5 = OpLabel + %c = OpVariable %_ptr_Function_Foo Function + %b = OpVariable %_ptr_Function_Bar_0 Function + %19 = OpAccessChain %_ptr_Uniform_Foo_0 %ubo_binding_0 %int_0 + %20 = OpLoad %Foo_0 %19 + %21 = OpCompositeExtract %v4float %20 0 + %23 = OpAccessChain %_ptr_Function_v4float %c %int_0 + OpStore %23 %21 + %29 = OpAccessChain %_ptr_Uniform_Bar %ubo_binding_0 %int_1 + %30 = OpLoad %Bar %29 + %31 = OpCompositeExtract %v4float %30 0 + %32 = OpAccessChain %_ptr_Function_v4float %b %int_0 + OpStore %32 %31 + %39 = OpAccessChain %_ptr_Function_v4float %c %int_0 + %40 = OpLoad %v4float %39 + %41 = OpAccessChain %_ptr_Function_v4float %b %int_0 + %42 = OpLoad %v4float %41 + %43 = OpFAdd %v4float %40 %42 + %45 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + OpStore %45 %43 + %49 = OpLoad %Foo %c + %51 = OpAccessChain %_ptr_Output_Foo %output_location_0 %int_0 + OpStore %51 %49 + %52 = OpLoad %Bar_0 %b + %54 = OpAccessChain %_ptr_Output_Bar_0 %output_location_0 %int_1 + OpStore %54 %52 + %56 = OpLoad %Foo %c + OpStore %output_location_2 %56 + %58 = OpLoad %Bar_0 %b + OpStore %output_location_3 %58 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert b/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert new file mode 100644 index 00000000000..94a883c1ed1 --- /dev/null +++ b/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert @@ -0,0 +1,119 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 59 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ %output_location_0 %output_location_2 %output_location_3 + OpSource GLSL 450 + OpName %main "main" + OpName %Foo "Struct_vec4" + OpMemberName %Foo 0 "m0" + OpName %c "c" + OpName %Foo_0 "Struct_vec4" + OpMemberName %Foo_0 0 "m0" + OpName %Bar "Struct_vec4" + OpMemberName %Bar 0 "m0" + OpName %UBO "UBO" + OpMemberName %UBO 0 "m0" + OpMemberName %UBO 1 "m1" + OpName %ubo_binding_0 "ubo_binding_0" + OpName %Bar_0 "Struct_vec4" + OpMemberName %Bar_0 0 "m0" + OpName %b "b" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %_ "" + OpName %VertexOut "VertexOut" + OpMemberName %VertexOut 0 "m0" + OpMemberName %VertexOut 1 "m1" + OpName %output_location_0 "output_location_0" + OpName %output_location_2 "output_location_2" + OpName %output_location_3 "output_location_3" + OpMemberDecorate %Foo_0 0 Offset 0 + OpMemberDecorate %Bar 0 Offset 0 + OpMemberDecorate %UBO 0 Offset 0 + OpMemberDecorate %UBO 1 Offset 16 + OpDecorate %UBO Block + OpDecorate %ubo_binding_0 DescriptorSet 0 + OpDecorate %ubo_binding_0 Binding 0 + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + OpDecorate %VertexOut Block + OpDecorate %output_location_0 Location 0 + OpDecorate %output_location_2 Location 2 + OpDecorate %output_location_3 Location 3 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %Foo = OpTypeStruct %v4float +%_ptr_Function_Foo = OpTypePointer Function %Foo + %Foo_0 = OpTypeStruct %v4float + %Bar = OpTypeStruct %v4float + %UBO = OpTypeStruct %Foo_0 %Bar +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO +%ubo_binding_0 = OpVariable %_ptr_Uniform_UBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %Bar_0 = OpTypeStruct %v4float +%_ptr_Function_Bar_0 = OpTypePointer Function %Bar_0 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_Bar = OpTypePointer Uniform %Bar + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex + %_ = OpVariable %_ptr_Output_gl_PerVertex Output +%_ptr_Output_v4float = OpTypePointer Output %v4float + %VertexOut = OpTypeStruct %Foo %Bar_0 +%_ptr_Output_VertexOut = OpTypePointer Output %VertexOut +%output_location_0 = OpVariable %_ptr_Output_VertexOut Output +%_ptr_Output_Foo = OpTypePointer Output %Foo +%_ptr_Output_Bar_0 = OpTypePointer Output %Bar_0 +%output_location_2 = OpVariable %_ptr_Output_Foo Output +%output_location_3 = OpVariable %_ptr_Output_Bar_0 Output + %main = OpFunction %void None %3 + %5 = OpLabel + %c = OpVariable %_ptr_Function_Foo Function + %b = OpVariable %_ptr_Function_Bar_0 Function + %19 = OpAccessChain %_ptr_Uniform_Foo_0 %ubo_binding_0 %int_0 + %20 = OpLoad %Foo_0 %19 + %21 = OpCompositeExtract %v4float %20 0 + %23 = OpAccessChain %_ptr_Function_v4float %c %int_0 + OpStore %23 %21 + %29 = OpAccessChain %_ptr_Uniform_Bar %ubo_binding_0 %int_1 + %30 = OpLoad %Bar %29 + %31 = OpCompositeExtract %v4float %30 0 + %32 = OpAccessChain %_ptr_Function_v4float %b %int_0 + OpStore %32 %31 + %39 = OpAccessChain %_ptr_Function_v4float %c %int_0 + %40 = OpLoad %v4float %39 + %41 = OpAccessChain %_ptr_Function_v4float %b %int_0 + %42 = OpLoad %v4float %41 + %43 = OpFAdd %v4float %40 %42 + %45 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + OpStore %45 %43 + %49 = OpLoad %Foo %c + %51 = OpAccessChain %_ptr_Output_Foo %output_location_0 %int_0 + OpStore %51 %49 + %52 = OpLoad %Bar_0 %b + %54 = OpAccessChain %_ptr_Output_Bar_0 %output_location_0 %int_1 + OpStore %54 %52 + %56 = OpLoad %Foo %c + OpStore %output_location_2 %56 + %58 = OpLoad %Bar_0 %b + OpStore %output_location_3 %58 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert b/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert new file mode 100644 index 00000000000..d5a1b41146c --- /dev/null +++ b/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert @@ -0,0 +1,66 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 22 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %_ + OpSource GLSL 450 + OpName %main "main" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpMemberName %gl_PerVertex 2 "gl_ClipDistance" + OpMemberName %gl_PerVertex 3 "gl_CullDistance" + OpName %_ "" + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance + OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance + OpDecorate %gl_PerVertex Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %m4float = OpTypeMatrix %v4float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex + %_ = OpVariable %_ptr_Output_gl_PerVertex Output + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %float_5 = OpConstant %float 5 + %float_6 = OpConstant %float 6 + %float_7 = OpConstant %float 7 + %float_8 = OpConstant %float 8 + %vec0 = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_4 + %vec1 = OpConstantComposite %v4float %float_5 %float_6 %float_7 %float_8 + %cmat = OpConstantComposite %m4float %vec0 %vec1 %vec0 %vec1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %main = OpFunction %void None %3 + %5 = OpLabel + %21 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + %e0 = OpCompositeExtract %float %vec0 0 + %e1 = OpCompositeExtract %float %vec0 1 + %e2 = OpCompositeExtract %float %vec0 2 + %e3 = OpCompositeExtract %float %vec0 3 + %m13 = OpCompositeExtract %float %cmat 1 3 + %m21 = OpCompositeExtract %float %cmat 2 1 + %e_front = OpCompositeConstruct %v4float %e0 %e1 %e2 %e3 + %e_back = OpCompositeConstruct %v4float %e3 %e2 %m13 %m21 + %m0 = OpCompositeExtract %v4float %cmat 2 + %m1 = OpCompositeExtract %v4float %cmat 3 + %sum0 = OpFAdd %v4float %m0 %m1 + %sum1 = OpFAdd %v4float %e_front %e_back + %sum = OpFAdd %v4float %sum0 %sum1 + OpStore %21 %sum + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert b/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert new file mode 100644 index 00000000000..38c3de909cf --- /dev/null +++ b/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert @@ -0,0 +1,29 @@ + OpCapability Shader + OpExtension "SPV_KHR_non_semantic_info" + %1 = OpExtInstImport "NonSemantic.DebugPrintf" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %vert "main" %gl_Position + %4 = OpString "Foo %f %f" + OpSource HLSL 600 + OpName %vert "vert" + OpDecorate %gl_Position BuiltIn Position + %float = OpTypeFloat 32 + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_0 = OpConstant %float 0 + %v4float = OpTypeVector %float 4 + %9 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %12 = OpTypeFunction %void + %13 = OpTypeFunction %v4float +%gl_Position = OpVariable %_ptr_Output_v4float Output +%_ptr_Function_v4float = OpTypePointer Function %v4float + %vert = OpFunction %void None %12 + %15 = OpLabel + %16 = OpVariable %_ptr_Function_v4float Function + %17 = OpExtInst %void %1 1 %4 %float_1 %float_2 + OpStore %16 %9 + OpStore %gl_Position %9 + OpReturn + OpFunctionEnd diff --git a/shaders/comp/bitcast-16bit-1.invalid.comp b/shaders-no-opt/comp/bitcast-16bit-1.invalid.comp similarity index 100% rename from shaders/comp/bitcast-16bit-1.invalid.comp rename to shaders-no-opt/comp/bitcast-16bit-1.invalid.comp diff --git a/shaders/comp/bitcast-16bit-2.invalid.comp b/shaders-no-opt/comp/bitcast-16bit-2.invalid.comp similarity index 100% rename from shaders/comp/bitcast-16bit-2.invalid.comp rename to shaders-no-opt/comp/bitcast-16bit-2.invalid.comp diff --git a/shaders-no-opt/comp/glsl.std450.comp b/shaders-no-opt/comp/glsl.std450.comp new file mode 100644 index 00000000000..a17a82b82af --- /dev/null +++ b/shaders-no-opt/comp/glsl.std450.comp @@ -0,0 +1,129 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float res; + int ires; + uint ures; + + vec4 f32; + ivec4 s32; + uvec4 u32; + + mat2 m2; + mat3 m3; + mat4 m4; +}; + +void main() +{ + float tmp; + vec2 v2; + vec3 v3; + vec4 v4; + int itmp; + + res = round(f32.x); + res = roundEven(f32.x); + res = trunc(f32.x); + res = abs(f32.x); + ires = abs(s32.x); + res = sign(f32.x); + ires = sign(s32.x); + res = floor(f32.x); + res = ceil(f32.x); + res = fract(f32.x); + res = radians(f32.x); + res = degrees(f32.x); + res = sin(f32.x); + res = cos(f32.x); + res = tan(f32.x); + res = asin(f32.x); + res = acos(f32.x); + res = atan(f32.x); + res = sinh(f32.x); + res = cosh(f32.x); + res = tanh(f32.x); + res = asinh(f32.x); + res = acosh(f32.x); + res = atanh(f32.x); + res = atan(f32.x, f32.y); + res = pow(f32.x, f32.y); + res = exp(f32.x); + res = log(f32.x); + res = exp2(f32.x); + res = log2(f32.x); + res = sqrt(f32.x); + res = inversesqrt(f32.x); + + res = length(f32.x); + res = distance(f32.x, f32.y); + res = normalize(f32.x); + res = faceforward(f32.x, f32.y, f32.z); + res = reflect(f32.x, f32.y); + res = refract(f32.x, f32.y, f32.z); + + res = length(f32.xy); + res = distance(f32.xy, f32.zw); + v2 = normalize(f32.xy); + v2 = faceforward(f32.xy, f32.yz, f32.zw); + v2 = reflect(f32.xy, f32.zw); + v2 = refract(f32.xy, f32.yz, f32.w); + + v3 = cross(f32.xyz, f32.yzw); + + res = determinant(m2); + res = determinant(m3); + res = determinant(m4); + m2 = inverse(m2); + m3 = inverse(m3); + m4 = inverse(m4); + + res = modf(f32.x, tmp); + // ModfStruct + + res = min(f32.x, f32.y); + ures = min(u32.x, u32.y); + ires = min(s32.x, s32.y); + res = max(f32.x, f32.y); + ures = max(u32.x, u32.y); + ires = max(s32.x, s32.y); + + res = clamp(f32.x, f32.y, f32.z); + ures = clamp(u32.x, u32.y, u32.z); + ires = clamp(s32.x, s32.y, s32.z); + + res = mix(f32.x, f32.y, f32.z); + res = step(f32.x, f32.y); + res = smoothstep(f32.x, f32.y, f32.z); + res = fma(f32.x, f32.y, f32.z); + + res = frexp(f32.x, itmp); + // FrexpStruct + res = ldexp(f32.x, itmp); + + ures = packSnorm4x8(f32); + ures = packUnorm4x8(f32); + ures = packSnorm2x16(f32.xy); + ures = packUnorm2x16(f32.xy); + ures = packHalf2x16(f32.xy); + // packDouble2x32 + + v2 = unpackSnorm2x16(u32.x); + v2 = unpackUnorm2x16(u32.x); + v2 = unpackHalf2x16(u32.x); + v4 = unpackSnorm4x8(u32.x); + v4 = unpackUnorm4x8(u32.x); + // unpackDouble2x32 + + s32 = findLSB(s32); + s32 = findLSB(u32); + s32 = findMSB(s32); + s32 = findMSB(u32); + + // interpolateAtSample + // interpolateAtOffset + + // NMin, NMax, NClamp +} diff --git a/shaders-no-opt/comp/illegal-struct-name.asm.comp b/shaders-no-opt/comp/illegal-struct-name.asm.comp new file mode 100644 index 00000000000..f7a8787d3d8 --- /dev/null +++ b/shaders-no-opt/comp/illegal-struct-name.asm.comp @@ -0,0 +1,62 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 31 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %Foo "Foo" + OpMemberName %Foo 0 "abs" + OpName %f "f" + OpName %Foo_0 "Foo" + OpMemberName %Foo_0 0 "abs" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "foo" + OpMemberName %SSBO 1 "foo2" + OpName %_ "" + OpName %linear "abs" + OpMemberDecorate %Foo_0 0 Offset 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %Foo = OpTypeStruct %float +%_ptr_Function_Foo = OpTypePointer Function %Foo + %Foo_0 = OpTypeStruct %float + %SSBO = OpTypeStruct %Foo_0 %Foo_0 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Function_int = OpTypePointer Function %int + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %main = OpFunction %void None %3 + %5 = OpLabel + %f = OpVariable %_ptr_Function_Foo Function + %linear = OpVariable %_ptr_Function_int Function + %17 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0 + %18 = OpLoad %Foo_0 %17 + %19 = OpCompositeExtract %float %18 0 + %21 = OpAccessChain %_ptr_Function_float %f %int_0 + OpStore %21 %19 + OpStore %linear %int_10 + %26 = OpLoad %Foo %f + %27 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_1 + %28 = OpCompositeExtract %float %26 0 + %30 = OpAccessChain %_ptr_Uniform_float %27 %int_0 + OpStore %30 %28 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/comp/image-load-formatted.comp b/shaders-no-opt/comp/image-load-formatted.comp new file mode 100644 index 00000000000..7fd587d99ad --- /dev/null +++ b/shaders-no-opt/comp/image-load-formatted.comp @@ -0,0 +1,11 @@ +#version 450 +#extension GL_EXT_shader_image_load_formatted : require +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0) uniform image2D img; + +void main() +{ + vec4 v = imageLoad(img, ivec2(gl_GlobalInvocationID.xy)); + imageStore(img, ivec2(gl_GlobalInvocationID.xy), v + 1.0); +} diff --git a/shaders/comp/inout-struct.invalid.comp b/shaders-no-opt/comp/inout-struct.invalid.comp similarity index 100% rename from shaders/comp/inout-struct.invalid.comp rename to shaders-no-opt/comp/inout-struct.invalid.comp diff --git a/shaders-no-opt/comp/int16min-literal.comp b/shaders-no-opt/comp/int16min-literal.comp new file mode 100644 index 00000000000..c1b345266d8 --- /dev/null +++ b/shaders-no-opt/comp/int16min-literal.comp @@ -0,0 +1,22 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 1) buffer SSBO +{ + float16_t a; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + float16_t b; +}; + +void main() +{ + int16_t v = float16BitsToInt16(b); + v ^= 0x8000s; + a = int16BitsToFloat16(v); +} diff --git a/shaders-no-opt/comp/int64min-literal.comp b/shaders-no-opt/comp/int64min-literal.comp new file mode 100644 index 00000000000..ac20389033d --- /dev/null +++ b/shaders-no-opt/comp/int64min-literal.comp @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_gpu_shader_int64 : require + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 1) buffer SSBO +{ + float a; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + float b; +}; + +void main() +{ + double b2 = b; + int64_t v = doubleBitsToInt64(b2); + v ^= 0x8000000000000000L; + double a2 = int64BitsToDouble(v); + a = float(a2); +} diff --git a/shaders-no-opt/comp/intmin-literal.comp b/shaders-no-opt/comp/intmin-literal.comp new file mode 100644 index 00000000000..ee35cedabb9 --- /dev/null +++ b/shaders-no-opt/comp/intmin-literal.comp @@ -0,0 +1,18 @@ +#version 450 + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 1) buffer SSBO +{ + float a; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + float b; +}; + +void main() +{ + a = intBitsToFloat(floatBitsToInt(b) ^ 0x80000000); +} diff --git a/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp b/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp new file mode 100644 index 00000000000..e916ab2408c --- /dev/null +++ b/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp @@ -0,0 +1,21 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0) buffer STO +{ + uint data[]; +} ssbo; + +void main() +{ + while(true) + { + ssbo.data[0] += 1; + if (bool(ssbo.data[2])) + { + ssbo.data[5] += 1; + continue; + } + break; + } +} diff --git a/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp b/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp new file mode 100644 index 00000000000..72998477111 --- /dev/null +++ b/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp @@ -0,0 +1,16 @@ +#version 450 + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0) buffer SSBO +{ + int v[]; +}; + +void main() +{ + for (int i = 0; i < 4; i++) + { + v[i] += 10; + } +} diff --git a/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp b/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp new file mode 100644 index 00000000000..72998477111 --- /dev/null +++ b/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp @@ -0,0 +1,16 @@ +#version 450 + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0) buffer SSBO +{ + int v[]; +}; + +void main() +{ + for (int i = 0; i < 4; i++) + { + v[i] += 10; + } +} diff --git a/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp b/shaders-no-opt/comp/shader_ballot_nonuniform_invocations.invalid.comp similarity index 100% rename from shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp rename to shaders-no-opt/comp/shader_ballot_nonuniform_invocations.invalid.comp diff --git a/shaders-no-opt/comp/specialization-constant-evaluation.comp b/shaders-no-opt/comp/specialization-constant-evaluation.comp new file mode 100644 index 00000000000..d45d021ac55 --- /dev/null +++ b/shaders-no-opt/comp/specialization-constant-evaluation.comp @@ -0,0 +1,123 @@ +#version 450 + +layout(local_size_x = 1) in; + +layout(constant_id = 0) const bool TRUE = true; +layout(constant_id = 1) const bool FALSE = false; +layout(constant_id = 2) const int SONE = 1; +layout(constant_id = 3) const int STWO = 2; +layout(constant_id = 4) const int SNEG_TWO = -2; +layout(constant_id = 5) const uint UONE = 1; +layout(constant_id = 6) const uint UTWO = 2; +layout(constant_id = 7) const int SNEG_THREE = -3; + +const uint IADD = SONE + STWO + UONE + UTWO; // 6 +const uint ISUB = UTWO - SONE; // 1 +const uint IMUL = UTWO * UTWO; // 4 +const uint UDIV = UTWO / UTWO; // 1 +const int SDIV = STWO / SNEG_TWO; // -1 +//const int SREM = STWO % SNEG_THREE; // 1 +const int SREM = 1; +const int SMOD = STWO % SNEG_THREE; // -1 +const uint UMOD = IADD % IMUL; // 2 + +const uint LSHL = IADD << ISUB; // 12 +const uint RSHL = IADD >> ISUB; // 3 +const int RSHA = (-int(IADD)) >> (-SDIV); // -3 + +const bool IEQ = IADD == ISUB; // false +const bool INEQ = IADD != ISUB; // true +const bool ULT = IADD < ISUB; // false +const bool ULE = IADD <= ISUB; // false +const bool UGT = IADD > ISUB; // true +const bool UGE = IADD >= ISUB; // true + +const bool SLT = SMOD < SREM; // true +const bool SLE = SMOD <= SREM; // true +const bool SGT = SMOD > SREM; // false +const bool SGE = SMOD >= SREM; // false + +const bool LOR = IEQ || SLT; // true +const bool LAND = IEQ && SLT; // false +const bool LNOT = !LOR; // false + +const uint AND = IADD & IADD; // 6 +const uint OR = IADD | ISUB; // 7 +const uint XOR = IADD ^ IADD; // 0 +const uint NOT = ~XOR; // UINT_MAX + +const bool LEQ = LAND == LNOT; // true +const bool LNEQ = LAND != LNOT; // false + +const uint SEL = IEQ ? IADD : ISUB; // 1 + +#define DUMMY_SSBO(name, bind, size) layout(std430, set = 0, binding = bind) buffer SSBO_##name { float val[size]; float dummy; } name + +// Normalize all sizes to 1 element so that the default offsets in glslang matches up with what we should be computing. +// If we do it right, we should get no layout(offset = N) expressions. +DUMMY_SSBO(IAdd, 0, IADD - 5); +DUMMY_SSBO(ISub, 1, ISUB); +DUMMY_SSBO(IMul, 2, IMUL - 3); +DUMMY_SSBO(UDiv, 3, UDIV); +DUMMY_SSBO(SDiv, 4, SDIV + 2); +DUMMY_SSBO(SRem, 5, SREM); +DUMMY_SSBO(SMod, 6, SMOD + 2); +DUMMY_SSBO(UMod, 7, UMOD - 1); +DUMMY_SSBO(LShl, 8, LSHL - 11); +DUMMY_SSBO(RShl, 9, RSHL - 2); +DUMMY_SSBO(RSha, 10, RSHA + 4); +DUMMY_SSBO(IEq, 11, IEQ ? 2 : 1); +DUMMY_SSBO(INeq, 12, INEQ ? 1 : 2); +DUMMY_SSBO(Ult, 13, ULT ? 2 : 1); +DUMMY_SSBO(Ule, 14, ULE ? 2 : 1); +DUMMY_SSBO(Ugt, 15, UGT ? 1 : 2); +DUMMY_SSBO(Uge, 16, UGE ? 1 : 2); +DUMMY_SSBO(Slt, 17, SLT ? 1 : 2); +DUMMY_SSBO(Sle, 18, SLE ? 1 : 2); +DUMMY_SSBO(Sgt, 19, SGT ? 2 : 1); +DUMMY_SSBO(Sge, 20, SGE ? 2 : 1); +DUMMY_SSBO(Lor, 21, LOR ? 1 : 2); +DUMMY_SSBO(Land, 22, LAND ? 2 : 1); +DUMMY_SSBO(Lnot, 23, LNOT ? 2 : 1); +DUMMY_SSBO(And, 24, AND - 5); +DUMMY_SSBO(Or, 24, OR - 6); +DUMMY_SSBO(Xor, 24, XOR + 1); +DUMMY_SSBO(Not, 25, NOT - 0xfffffffeu); +DUMMY_SSBO(Leq, 26, LEQ ? 1 : 2); +DUMMY_SSBO(Lneq, 27, LNEQ ? 2 : 1); +DUMMY_SSBO(Sel, 28, SEL); + +void main() +{ + IAdd.val[0] = 0.0; + ISub.val[0] = 0.0; + IMul.val[0] = 0.0; + UDiv.val[0] = 0.0; + SDiv.val[0] = 0.0; + SRem.val[0] = 0.0; + SMod.val[0] = 0.0; + UMod.val[0] = 0.0; + LShl.val[0] = 0.0; + RShl.val[0] = 0.0; + RSha.val[0] = 0.0; + IEq.val[0] = 0.0; + INeq.val[0] = 0.0; + Ult.val[0] = 0.0; + Ule.val[0] = 0.0; + Ugt.val[0] = 0.0; + Uge.val[0] = 0.0; + Slt.val[0] = 0.0; + Sle.val[0] = 0.0; + Sgt.val[0] = 0.0; + Sge.val[0] = 0.0; + Lor.val[0] = 0.0; + Land.val[0] = 0.0; + Lnot.val[0] = 0.0; + And.val[0] = 0.0; + Or.val[0] = 0.0; + Xor.val[0] = 0.0; + Not.val[0] = 0.0; + Leq.val[0] = 0.0; + Lneq.val[0] = 0.0; + Sel.val[0] = 0.0; +} diff --git a/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/shaders-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp similarity index 100% rename from shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp rename to shaders-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp diff --git a/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp b/shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp similarity index 92% rename from shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp rename to shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp index 68fc74f910d..a73a231259a 100644 --- a/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp +++ b/shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp @@ -72,6 +72,9 @@ void main() uvec4 anded = subgroupAnd(ballot_value); uvec4 ored = subgroupOr(ballot_value); uvec4 xored = subgroupXor(ballot_value); + bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42))); + bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42))); + bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42))); added = subgroupInclusiveAdd(added); iadded = subgroupInclusiveAdd(iadded); @@ -117,6 +120,10 @@ void main() ored = subgroupClusteredOr(ored, 4u); xored = subgroupClusteredXor(xored, 4u); + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u); + // quad vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); diff --git a/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp b/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp new file mode 100644 index 00000000000..833f43079b1 --- /dev/null +++ b/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp @@ -0,0 +1,49 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_vote : require + +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +void main() +{ + // basic + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); + + // ballot + FragColor = float(gl_SubgroupEqMask); + FragColor = float(gl_SubgroupGeMask); + FragColor = float(gl_SubgroupGtMask); + FragColor = float(gl_SubgroupLeMask); + FragColor = float(gl_SubgroupLtMask); + vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + vec3 first = subgroupBroadcastFirst(vec3(20.0)); + uvec4 ballot_value = subgroupBallot(true); + bool inverse_ballot_value = subgroupInverseBallot(ballot_value); + bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); + uint bit_count = subgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint lsb = subgroupBallotFindLSB(ballot_value); + uint msb = subgroupBallotFindMSB(ballot_value); + + // vote + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + bool has_equal_bool = subgroupAllEqual(true); + bool has_equal_T = subgroupAllEqual(uvec3(5u)); +} \ No newline at end of file diff --git a/shaders-no-opt/comp/trivial-select-cast-vector.comp b/shaders-no-opt/comp/trivial-select-cast-vector.comp new file mode 100644 index 00000000000..c3e0922a166 --- /dev/null +++ b/shaders-no-opt/comp/trivial-select-cast-vector.comp @@ -0,0 +1,14 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0) buffer A +{ + vec3 a; + vec3 b; +}; + +void main() +{ + bvec3 c = lessThan(b, vec3(1.0)); + a = mix(vec3(1, 0, 0), vec3(0, 0, 1), c); +} diff --git a/shaders-no-opt/comp/trivial-select-matrix.spv14.comp b/shaders-no-opt/comp/trivial-select-matrix.spv14.comp new file mode 100644 index 00000000000..5ffcc3f3a49 --- /dev/null +++ b/shaders-no-opt/comp/trivial-select-matrix.spv14.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0) buffer A +{ + mat3 a; + float b; +}; + +void main() +{ + // Scalar to Matrix + bool c = b < 1.0; + a = c ? mat3(vec3(1), vec3(1), vec3(1)) : mat3(vec3(0), vec3(0), vec3(0)); + a = c ? mat3(1) : mat3(0); +} diff --git a/shaders/frag/16bit-constants.frag b/shaders-no-opt/frag/16bit-constants.invalid.frag similarity index 100% rename from shaders/frag/16bit-constants.frag rename to shaders-no-opt/frag/16bit-constants.invalid.frag diff --git a/shaders/desktop-only/frag/fp16.invalid.desktop.frag b/shaders-no-opt/frag/fp16.invalid.desktop.frag similarity index 100% rename from shaders/desktop-only/frag/fp16.invalid.desktop.frag rename to shaders-no-opt/frag/fp16.invalid.desktop.frag diff --git a/shaders-no-opt/frag/frag-fully-covered.frag b/shaders-no-opt/frag/frag-fully-covered.frag new file mode 100644 index 00000000000..95cc4fc9757 --- /dev/null +++ b/shaders-no-opt/frag/frag-fully-covered.frag @@ -0,0 +1,11 @@ +#version 450 +#extension GL_NV_conservative_raster_underestimation : require + +layout(location = 0) out vec4 FragColor; + +void main() +{ + if (!gl_FragFullyCoveredNV) + discard; + FragColor = vec4(1.0); +} diff --git a/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag b/shaders-no-opt/frag/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag similarity index 100% rename from shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag rename to shaders-no-opt/frag/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag diff --git a/shaders/amd/fs.invalid.frag b/shaders-no-opt/frag/fs.invalid.frag similarity index 100% rename from shaders/amd/fs.invalid.frag rename to shaders-no-opt/frag/fs.invalid.frag diff --git a/shaders-no-opt/frag/image-gather.frag b/shaders-no-opt/frag/image-gather.frag new file mode 100644 index 00000000000..b492cfbe903 --- /dev/null +++ b/shaders-no-opt/frag/image-gather.frag @@ -0,0 +1,14 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 0) uniform sampler2D uSamp; +layout(set = 0, binding = 1) uniform sampler2DShadow uSampShadow; +layout(location = 0) in vec3 vUV; + +void main() +{ + FragColor = textureGather(uSamp, vUV.xy, 0); + FragColor += textureGather(uSamp, vUV.xy, 1); + FragColor += textureGather(uSampShadow, vUV.xy, vUV.z); +} diff --git a/shaders-no-opt/frag/modf-non-function-purity-analysis.frag b/shaders-no-opt/frag/modf-non-function-purity-analysis.frag new file mode 100644 index 00000000000..c1f1a1266f1 --- /dev/null +++ b/shaders-no-opt/frag/modf-non-function-purity-analysis.frag @@ -0,0 +1,15 @@ +#version 450 + +layout(location = 0) in vec4 v; +layout(location = 0) out vec4 vo0; +layout(location = 1) out vec4 vo1; + +vec4 modf_inner() +{ + return modf(v, vo1); +} + +void main() +{ + vo0 = modf_inner(); +} diff --git a/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag b/shaders-no-opt/frag/multi-dimensional.desktop.invalid.flatten_dim.frag similarity index 100% rename from shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag rename to shaders-no-opt/frag/multi-dimensional.desktop.invalid.flatten_dim.frag diff --git a/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag b/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag new file mode 100644 index 00000000000..452aa953a42 --- /dev/null +++ b/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag @@ -0,0 +1,14 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vUV; +layout(location = 1) flat in int vIndex; + +layout(set = 0, binding = 0) uniform texture2D uTex[]; +layout(set = 1, binding = 0) uniform sampler Immut; + +void main() +{ + FragColor = texture(nonuniformEXT(sampler2D(uTex[vIndex], Immut)), vUV); +} diff --git a/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag new file mode 100644 index 00000000000..59079fe58b4 --- /dev/null +++ b/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(set = 0, binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +}; + +layout(set = 0, binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +}; + +void callee2() +{ + values1[int(gl_FragCoord.x)] += 1; +} + +void callee() +{ + values0[int(gl_FragCoord.x)] += 1; + callee2(); +} + +void main() +{ + beginInvocationInterlockARB(); + callee(); + endInvocationInterlockARB(); +} diff --git a/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag b/shaders-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag similarity index 100% rename from shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag rename to shaders-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag diff --git a/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag b/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag new file mode 100644 index 00000000000..880e67e5de2 --- /dev/null +++ b/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_sparse_texture2 : require +#extension GL_ARB_sparse_texture_clamp : require + +layout(set = 0, binding = 0) uniform sampler2D uSamp; +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vUV; + +void main() +{ + vec4 texel; + int code; + + code = sparseTextureClampARB(uSamp, vUV, 1.0, texel, 2.0); + texel = textureClampARB(uSamp, vUV, 1.0, 2.0); + code = sparseTextureOffsetClampARB(uSamp, vUV, ivec2(1, 2), 1.0, texel, 2.0); + texel = textureOffsetClampARB(uSamp, vUV, ivec2(1, 2), 1.0, 2.0); + code = sparseTextureGradClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), 1.0, texel); + texel = textureGradClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), 1.0); + code = sparseTextureGradOffsetClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), ivec2(-1, -2), 1.0, texel); + texel = textureGradOffsetClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), ivec2(-1, -2), 1.0); +} + diff --git a/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag b/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag new file mode 100644 index 00000000000..67cc5b42a91 --- /dev/null +++ b/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_sparse_texture2 : require +#extension GL_ARB_sparse_texture_clamp : require + +layout(set = 0, binding = 0) uniform sampler2D uSamp; +layout(set = 0, binding = 1) uniform sampler2DMS uSampMS; +layout(set = 0, binding = 2, rgba8) uniform image2D uImage; +layout(set = 0, binding = 3, rgba8) uniform image2DMS uImageMS; +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vUV; + +void main() +{ + vec4 texel; + bool ret; + + ret = sparseTexelsResidentARB(sparseTextureARB(uSamp, vUV, texel)); + ret = sparseTexelsResidentARB(sparseTextureARB(uSamp, vUV, texel, 1.1)); + ret = sparseTexelsResidentARB(sparseTextureLodARB(uSamp, vUV, 1.0, texel)); + ret = sparseTexelsResidentARB(sparseTextureOffsetARB(uSamp, vUV, ivec2(1, 1), texel)); + ret = sparseTexelsResidentARB(sparseTextureOffsetARB(uSamp, vUV, ivec2(2, 2), texel, 0.5)); + ret = sparseTexelsResidentARB(sparseTexelFetchARB(uSamp, ivec2(vUV), 1, texel)); + ret = sparseTexelsResidentARB(sparseTexelFetchARB(uSampMS, ivec2(vUV), 2, texel)); + ret = sparseTexelsResidentARB(sparseTexelFetchOffsetARB(uSamp, ivec2(vUV), 1, ivec2(2, 3), texel)); + ret = sparseTexelsResidentARB(sparseTextureLodOffsetARB(uSamp, vUV, 1.5, ivec2(2, 3), texel)); + ret = sparseTexelsResidentARB(sparseTextureGradARB(uSamp, vUV, vec2(1.0), vec2(3.0), texel)); + ret = sparseTexelsResidentARB(sparseTextureGradOffsetARB(uSamp, vUV, vec2(1.0), vec2(3.0), ivec2(-2, -3), texel)); + ret = sparseTexelsResidentARB(sparseTextureClampARB(uSamp, vUV, 4.0, texel)); + ret = sparseTexelsResidentARB(sparseImageLoadARB(uImage, ivec2(vUV), texel)); + ret = sparseTexelsResidentARB(sparseImageLoadARB(uImageMS, ivec2(vUV), 1, texel)); +} diff --git a/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag new file mode 100644 index 00000000000..621457a14ad --- /dev/null +++ b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag @@ -0,0 +1,12 @@ +#version 310 es +precision mediump float; + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1; +layout(location = 0) out vec3 FragColor; +layout(location = 1) out vec4 FragColor2; + +void main() +{ + FragColor.rgb = subpassLoad(uSubpass0).rgb + subpassLoad(uSubpass1).rgb; +} diff --git a/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag new file mode 100644 index 00000000000..621457a14ad --- /dev/null +++ b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag @@ -0,0 +1,12 @@ +#version 310 es +precision mediump float; + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1; +layout(location = 0) out vec3 FragColor; +layout(location = 1) out vec4 FragColor2; + +void main() +{ + FragColor.rgb = subpassLoad(uSubpass0).rgb + subpassLoad(uSubpass1).rgb; +} diff --git a/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag new file mode 100644 index 00000000000..621457a14ad --- /dev/null +++ b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag @@ -0,0 +1,12 @@ +#version 310 es +precision mediump float; + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1; +layout(location = 0) out vec3 FragColor; +layout(location = 1) out vec4 FragColor2; + +void main() +{ + FragColor.rgb = subpassLoad(uSubpass0).rgb + subpassLoad(uSubpass1).rgb; +} diff --git a/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag new file mode 100644 index 00000000000..621457a14ad --- /dev/null +++ b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag @@ -0,0 +1,12 @@ +#version 310 es +precision mediump float; + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1; +layout(location = 0) out vec3 FragColor; +layout(location = 1) out vec4 FragColor2; + +void main() +{ + FragColor.rgb = subpassLoad(uSubpass0).rgb + subpassLoad(uSubpass1).rgb; +} diff --git a/shaders-no-opt/frag/texture-gather-offsets.frag b/shaders-no-opt/frag/texture-gather-offsets.frag new file mode 100644 index 00000000000..52d79097464 --- /dev/null +++ b/shaders-no-opt/frag/texture-gather-offsets.frag @@ -0,0 +1,14 @@ +#version 460 core +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec2 inUv; + +layout(location = 0) out vec4 outColor; + +layout(set=0, binding=0) uniform sampler2D Image0; + +void main(void) +{ + const ivec2 offs[4] = {ivec2(0,0), ivec2(1,0), ivec2(1,1), ivec2(0,1)}; + outColor = textureGatherOffsets(Image0, inUv, offs); +} diff --git a/shaders-no-opt/frag/texture-gather-uint-component.asm.frag b/shaders-no-opt/frag/texture-gather-uint-component.asm.frag new file mode 100644 index 00000000000..b4d9509ab49 --- /dev/null +++ b/shaders-no-opt/frag/texture-gather-uint-component.asm.frag @@ -0,0 +1,42 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 22 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %vUV + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %uSamp "uSamp" + OpName %vUV "vUV" + OpDecorate %FragColor Location 0 + OpDecorate %uSamp DescriptorSet 0 + OpDecorate %uSamp Binding 0 + OpDecorate %vUV Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %10 = OpTypeImage %float 2D 0 0 0 1 Unknown + %11 = OpTypeSampledImage %10 +%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11 + %uSamp = OpVariable %_ptr_UniformConstant_11 UniformConstant + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %vUV = OpVariable %_ptr_Input_v2float Input + %int = OpTypeInt 32 0 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %14 = OpLoad %11 %uSamp + %18 = OpLoad %v2float %vUV + %21 = OpImageGather %v4float %14 %18 %int_1 + OpStore %FragColor %21 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/frag/texture1d-emulation.es.frag b/shaders-no-opt/frag/texture1d-emulation.es.frag new file mode 100644 index 00000000000..1ad99932b60 --- /dev/null +++ b/shaders-no-opt/frag/texture1d-emulation.es.frag @@ -0,0 +1,32 @@ +#version 450 + +layout(set = 0, binding = 0) uniform sampler1D uSamp; +layout(set = 0, binding = 1) uniform sampler1DShadow uSampShadow; +layout(set = 0, binding = 2) uniform sampler1DArray uSampArray; +layout(set = 0, binding = 3) uniform sampler1DArrayShadow uSampArrayShadow; +layout(set = 0, binding = 4, r32f) uniform image1D uImage; +layout(location = 0) in vec4 vUV; +layout(location = 0) out vec4 FragColor; + +void main() +{ + // 1D + FragColor = texture(uSamp, vUV.x); + FragColor += textureProj(uSamp, vUV.xy); + FragColor += texelFetch(uSamp, int(vUV.x), 0); + + // 1D Shadow + FragColor += texture(uSampShadow, vUV.xyz); + FragColor += textureProj(uSampShadow, vUV); + + // 1D Array + FragColor = texture(uSampArray, vUV.xy); + FragColor += texelFetch(uSampArray, ivec2(vUV.xy), 0); + + // 1D Array Shadow + FragColor += texture(uSampArrayShadow, vUV.xyz); + + // 1D images + FragColor += imageLoad(uImage, int(vUV.x)); + imageStore(uImage, int(vUV.x), FragColor); +} diff --git a/shaders-no-opt/frag/texture1d-emulation.legacy.frag b/shaders-no-opt/frag/texture1d-emulation.legacy.frag new file mode 100644 index 00000000000..9ebd81e3338 --- /dev/null +++ b/shaders-no-opt/frag/texture1d-emulation.legacy.frag @@ -0,0 +1,17 @@ +#version 450 + +layout(set = 0, binding = 0) uniform sampler1D uSamp; +layout(set = 0, binding = 1) uniform sampler1DShadow uSampShadow; +layout(location = 0) in vec4 vUV; +layout(location = 0) out vec4 FragColor; + +void main() +{ + // 1D + FragColor = texture(uSamp, vUV.x); + FragColor += textureProj(uSamp, vUV.xy); + + // 1D Shadow + FragColor += texture(uSampShadow, vUV.xyz); + FragColor += textureProj(uSampShadow, vUV); +} diff --git a/shaders-no-opt/frag/variables.zero-initialize.frag b/shaders-no-opt/frag/variables.zero-initialize.frag new file mode 100644 index 00000000000..41da8001f47 --- /dev/null +++ b/shaders-no-opt/frag/variables.zero-initialize.frag @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) in vec4 vColor; +layout(location = 0) out vec4 FragColor; + +int uninit_int; +ivec4 uninit_vector; +mat4 uninit_matrix; + +struct Foo { int a; }; +Foo uninit_foo; + +void main() +{ + int uninit_function_int; + if (vColor.x > 10.0) + uninit_function_int = 10; + else + uninit_function_int = 20; + FragColor = vColor; +} diff --git a/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag b/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag new file mode 100644 index 00000000000..d2bd15a9785 --- /dev/null +++ b/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 54 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %gl_FragCoord "gl_FragCoord" + OpName %_GLF_color "_GLF_color" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_GLF_color Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %bool = OpTypeBool + %v2float = OpTypeVector %float 2 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %_GLF_color = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %52 = OpUndef %v2float + %main = OpFunction %void None %3 + %5 = OpLabel + OpSelectionMerge %9 None + OpSwitch %int_0 %8 + %8 = OpLabel + %17 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %18 = OpLoad %float %17 + %22 = OpFOrdNotEqual %bool %18 %18 + OpSelectionMerge %24 None + OpBranchConditional %22 %23 %24 + %23 = OpLabel + OpBranch %9 + %24 = OpLabel + %33 = OpCompositeExtract %float %52 1 + %51 = OpCompositeInsert %v2float %33 %52 1 + OpBranch %9 + %9 = OpLabel + %53 = OpPhi %v2float %52 %23 %51 %24 + %42 = OpCompositeExtract %float %53 0 + %43 = OpCompositeExtract %float %53 1 + %48 = OpCompositeConstruct %v4float %42 %43 %float_1 %float_1 + OpStore %_GLF_color %48 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task b/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task new file mode 100644 index 00000000000..3fcb7147114 --- /dev/null +++ b/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task @@ -0,0 +1,35 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in; + +struct Payload +{ + float v[3]; +}; +taskPayloadSharedEXT Payload p; +shared float vs[24]; + +void main() +{ + vs[gl_LocalInvocationIndex] = 10.0; + barrier(); + if (gl_LocalInvocationIndex < 12) + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12]; + barrier(); + if (gl_LocalInvocationIndex < 6) + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6]; + barrier(); + if (gl_LocalInvocationIndex < 3) + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3]; + barrier(); + + p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex]; + if (vs[5] > 20.0) + { + EmitMeshTasksEXT(int(vs[4]), int(vs[6]), int(vs[8])); + } + else + { + EmitMeshTasksEXT(int(vs[6]), 10, 50u); + } +} diff --git a/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task b/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task new file mode 100644 index 00000000000..6e97160309a --- /dev/null +++ b/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task @@ -0,0 +1,28 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in; + +struct Payload +{ + float v[3]; +}; +taskPayloadSharedEXT Payload p; +shared float vs[24]; + +void main() +{ + vs[gl_LocalInvocationIndex] = 10.0; + barrier(); + if (gl_LocalInvocationIndex < 12) + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12]; + barrier(); + if (gl_LocalInvocationIndex < 6) + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6]; + barrier(); + if (gl_LocalInvocationIndex < 3) + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3]; + barrier(); + + p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex]; + EmitMeshTasksEXT(int(vs[4]), int(vs[6]), int(vs[8])); +} diff --git a/shaders-no-opt/vert/io-blocks.force-flattened-io.vert b/shaders-no-opt/vert/io-blocks.force-flattened-io.vert new file mode 100644 index 00000000000..e308a9f2891 --- /dev/null +++ b/shaders-no-opt/vert/io-blocks.force-flattened-io.vert @@ -0,0 +1,25 @@ +#version 450 + +struct Foo +{ + vec4 bar[2]; + vec4 baz[2]; +}; + +layout(location = 0) out Vertex +{ + Foo foo; + Foo foo2; +}; + +layout(location = 8) out Foo foo3; + +void main() +{ + foo.bar[0] = vec4(1.0); + foo.baz[1] = vec4(2.0); + foo2.bar[0] = vec4(3.0); + foo2.baz[1] = vec4(4.0); + foo3.bar[0] = vec4(5.0); + foo3.baz[1] = vec4(6.0); +} diff --git a/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag b/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag new file mode 100644 index 00000000000..8aee6d35909 --- /dev/null +++ b/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag @@ -0,0 +1,9 @@ +#version 450 +#extension GL_EXT_fragment_shading_rate : require + +layout(location = 0) out uint FragColor; + +void main() +{ + FragColor = gl_ShadingRateEXT; +} diff --git a/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag b/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag new file mode 100644 index 00000000000..6d3987a886d --- /dev/null +++ b/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag @@ -0,0 +1,16 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +layout(std140, binding = 0) uniform UBO +{ + layout(offset = 16) mat4 m; + layout(offset = 0) vec4 v; +}; + +layout(location = 0) in vec4 vColor; + +void main() +{ + FragColor = m * vColor + v; +} diff --git a/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag b/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag new file mode 100644 index 00000000000..9a8d9d20b25 --- /dev/null +++ b/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag @@ -0,0 +1,11 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +layout(location = 0) out float FragColor; + +void main() +{ + FragColor = float(gl_HelperInvocation); + demote; + FragColor = float(gl_HelperInvocation); +} diff --git a/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert b/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert new file mode 100644 index 00000000000..95ac8d64453 --- /dev/null +++ b/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert @@ -0,0 +1,8 @@ +#version 450 +#extension GL_EXT_fragment_shading_rate : require + +void main() +{ + gl_PrimitiveShadingRateEXT = 3; + gl_Position = vec4(1.0); +} diff --git a/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp b/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp new file mode 100644 index 00000000000..caca050ad3f --- /dev/null +++ b/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp @@ -0,0 +1,51 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 17 +; Schema: 0 + OpCapability Shader + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_EXT_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 460 + OpSourceExtension "GL_EXT_buffer_reference" + OpSourceExtension "GL_EXT_buffer_reference2" + OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_int64" + OpName %main "main" + OpName %Params "Params" + OpMemberName %Params 0 "x" + OpMemberName %Params 1 "y" + OpName %IntBuf "IntBuf" + OpMemberName %IntBuf 0 "v" + OpName %_ "" + OpDecorate %_arr_7_uint_3 ArrayStride 16 + OpMemberDecorate %Params 0 Offset 0 + OpMemberDecorate %Params 1 Offset 16 + OpDecorate %Params Block + OpMemberDecorate %IntBuf 0 Offset 0 + OpDecorate %IntBuf Block + OpDecorate %_arr__ptr_PhysicalStorageBuffer_IntBuf_uint_3 ArrayStride 16 + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_IntBuf PhysicalStorageBuffer + %uint = OpTypeInt 32 0 + %uint_3 = OpConstant %uint 3 +%_arr_7_uint_3 = OpTypeArray %_ptr_PhysicalStorageBuffer_IntBuf %uint_3 +%ptr_array_ptr = OpTypePointer PhysicalStorageBuffer %_arr_7_uint_3 + %Params = OpTypeStruct %float %ptr_array_ptr + %int = OpTypeInt 32 1 + %IntBuf = OpTypeStruct %int +%_ptr_PhysicalStorageBuffer_IntBuf = OpTypePointer PhysicalStorageBuffer %IntBuf +%_arr__ptr_PhysicalStorageBuffer_IntBuf_uint_3 = OpTypeArray %_ptr_PhysicalStorageBuffer_IntBuf %uint_3 +%_ptr_Uniform_Params = OpTypePointer Uniform %Params + %_ = OpVariable %_ptr_Uniform_Params Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-reflection/comp/array-of-physical-pointer.comp b/shaders-reflection/comp/array-of-physical-pointer.comp new file mode 100644 index 00000000000..992f6f90891 --- /dev/null +++ b/shaders-reflection/comp/array-of-physical-pointer.comp @@ -0,0 +1,15 @@ +#version 460 +#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable +#extension GL_EXT_buffer_reference2 : enable +layout(buffer_reference, std430, buffer_reference_align = 4) buffer IntBuf +{ + int v; +}; +layout(std140, binding = 0) uniform Params +{ + float x; + IntBuf y[3]; +}; +void main() +{ +} diff --git a/shaders-reflection/comp/function-pointer.invalid.asm.comp b/shaders-reflection/comp/function-pointer.invalid.asm.comp new file mode 100644 index 00000000000..440f3311ef3 --- /dev/null +++ b/shaders-reflection/comp/function-pointer.invalid.asm.comp @@ -0,0 +1,19 @@ +; SPIR-V +; Version: 1.5 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 7 +; Schema: 0 +OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %2 "main" +OpExecutionMode %2 LocalSize 1 1 1 +OpSource GLSL 450 +OpName %2 "main" +%3 = OpTypeVoid +%4 = OpTypeFunction %3 +%5 = OpTypePointer Private %4 +%2 = OpFunction %3 None %4 +%6 = OpLabel +OpReturn +OpFunctionEnd diff --git a/shaders-reflection/comp/out-of-order-block-offsets.comp b/shaders-reflection/comp/out-of-order-block-offsets.comp new file mode 100644 index 00000000000..da5c86eef46 --- /dev/null +++ b/shaders-reflection/comp/out-of-order-block-offsets.comp @@ -0,0 +1,12 @@ +#version 450 + +layout(set = 0, binding = 0) buffer SSBO +{ + layout(offset = 8) uint foo; + layout(offset = 4) uint bar; +}; + +void main() +{ + bar = foo; +} diff --git a/shaders-reflection/comp/physical-pointer.comp b/shaders-reflection/comp/physical-pointer.comp new file mode 100644 index 00000000000..ecd1e287d23 --- /dev/null +++ b/shaders-reflection/comp/physical-pointer.comp @@ -0,0 +1,15 @@ +#version 460 +#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable +#extension GL_EXT_buffer_reference2 : enable +layout(buffer_reference, std430, buffer_reference_align = 4) buffer IntBuf +{ + int v; +}; +layout(std140, binding = 0) uniform Params +{ + float x; + IntBuf y; +}; +void main() +{ +} diff --git a/shaders-reflection/comp/workgroup-size-spec-constant.comp b/shaders-reflection/comp/workgroup-size-spec-constant.comp new file mode 100644 index 00000000000..376a3516220 --- /dev/null +++ b/shaders-reflection/comp/workgroup-size-spec-constant.comp @@ -0,0 +1,13 @@ +#version 450 + +layout(local_size_x_id = 10, local_size_y_id = 40, local_size_z_id = 60) in; + +layout(std430, set = 0, binding = 0) buffer SSBO +{ + vec4 v; +}; + +void main() +{ + v = vec4(10.0); +} diff --git a/shaders-reflection/vert/array-size-reflection.vert b/shaders-reflection/vert/array-size-reflection.vert new file mode 100644 index 00000000000..24a4a43831f --- /dev/null +++ b/shaders-reflection/vert/array-size-reflection.vert @@ -0,0 +1,13 @@ +#version 450 +layout(constant_id = 0) const int ARR_SIZE = 1; + +layout(binding = 0, set = 1, std140) uniform u_ +{ + vec4 u_0[ARR_SIZE]; +}; + +void main() +{ + gl_Position = u_0[0]; +} + diff --git a/shaders-reflection/vert/stride-reflection.vert b/shaders-reflection/vert/stride-reflection.vert new file mode 100644 index 00000000000..6e7d96df44d --- /dev/null +++ b/shaders-reflection/vert/stride-reflection.vert @@ -0,0 +1,14 @@ +#version 450 + +layout(binding = 0, set = 0, std140) uniform U +{ + vec4 v[4]; + mat4 c[4]; + layout(row_major) mat4 r[4]; +}; + +void main() +{ + gl_Position = v[0]; +} + diff --git a/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag b/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag new file mode 100644 index 00000000000..fae211f278d --- /dev/null +++ b/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag @@ -0,0 +1,1087 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 572 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %Main "main" %in_var_TEXCOORD0 %in_var_TEXCOORD7 %in_var_TEXCOORD8 %gl_FragCoord %gl_FrontFacing %out_var_SV_Target0 + OpExecutionMode %Main OriginUpperLeft + OpSource HLSL 600 + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_TranslatedWorldToView" + OpMemberName %type_View 3 "View_ViewToTranslatedWorld" + OpMemberName %type_View 4 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 6 "View_ViewToClip" + OpMemberName %type_View 7 "View_ViewToClipNoAA" + OpMemberName %type_View 8 "View_ClipToView" + OpMemberName %type_View 9 "View_ClipToTranslatedWorld" + OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 11 "View_ScreenToWorld" + OpMemberName %type_View 12 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 13 "View_ViewForward" + OpMemberName %type_View 14 "PrePadding_View_844" + OpMemberName %type_View 15 "View_ViewUp" + OpMemberName %type_View 16 "PrePadding_View_860" + OpMemberName %type_View 17 "View_ViewRight" + OpMemberName %type_View 18 "PrePadding_View_876" + OpMemberName %type_View 19 "View_HMDViewNoRollUp" + OpMemberName %type_View 20 "PrePadding_View_892" + OpMemberName %type_View 21 "View_HMDViewNoRollRight" + OpMemberName %type_View 22 "PrePadding_View_908" + OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 24 "View_ScreenPositionScaleBias" + OpMemberName %type_View 25 "View_WorldCameraOrigin" + OpMemberName %type_View 26 "PrePadding_View_956" + OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 28 "PrePadding_View_972" + OpMemberName %type_View 29 "View_WorldViewOrigin" + OpMemberName %type_View 30 "PrePadding_View_988" + OpMemberName %type_View 31 "View_PreViewTranslation" + OpMemberName %type_View 32 "PrePadding_View_1004" + OpMemberName %type_View 33 "View_PrevProjection" + OpMemberName %type_View 34 "View_PrevViewProj" + OpMemberName %type_View 35 "View_PrevViewRotationProj" + OpMemberName %type_View 36 "View_PrevViewToClip" + OpMemberName %type_View 37 "View_PrevClipToView" + OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 43 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 44 "PrePadding_View_1660" + OpMemberName %type_View 45 "View_PrevWorldViewOrigin" + OpMemberName %type_View 46 "PrePadding_View_1676" + OpMemberName %type_View 47 "View_PrevPreViewTranslation" + OpMemberName %type_View 48 "PrePadding_View_1692" + OpMemberName %type_View 49 "View_PrevInvViewProj" + OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 51 "View_ClipToPrevClip" + OpMemberName %type_View 52 "View_TemporalAAJitter" + OpMemberName %type_View 53 "View_GlobalClippingPlane" + OpMemberName %type_View 54 "View_FieldOfViewWideAngles" + OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 56 "View_ViewRectMin" + OpMemberName %type_View 57 "View_ViewSizeAndInvSize" + OpMemberName %type_View 58 "View_BufferSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 60 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 61 "View_PreExposure" + OpMemberName %type_View 62 "View_OneOverPreExposure" + OpMemberName %type_View 63 "PrePadding_View_2012" + OpMemberName %type_View 64 "View_DiffuseOverrideParameter" + OpMemberName %type_View 65 "View_SpecularOverrideParameter" + OpMemberName %type_View 66 "View_NormalOverrideParameter" + OpMemberName %type_View 67 "View_RoughnessOverrideParameter" + OpMemberName %type_View 68 "View_PrevFrameGameTime" + OpMemberName %type_View 69 "View_PrevFrameRealTime" + OpMemberName %type_View 70 "View_OutOfBoundsMask" + OpMemberName %type_View 71 "PrePadding_View_2084" + OpMemberName %type_View 72 "PrePadding_View_2088" + OpMemberName %type_View 73 "PrePadding_View_2092" + OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 75 "View_CullingSign" + OpMemberName %type_View 76 "View_NearPlane" + OpMemberName %type_View 77 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 78 "View_GameTime" + OpMemberName %type_View 79 "View_RealTime" + OpMemberName %type_View 80 "View_DeltaTime" + OpMemberName %type_View 81 "View_MaterialTextureMipBias" + OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 83 "View_Random" + OpMemberName %type_View 84 "View_FrameNumber" + OpMemberName %type_View 85 "View_StateFrameIndexMod8" + OpMemberName %type_View 86 "View_StateFrameIndex" + OpMemberName %type_View 87 "View_CameraCut" + OpMemberName %type_View 88 "View_UnlitViewmodeMask" + OpMemberName %type_View 89 "PrePadding_View_2164" + OpMemberName %type_View 90 "PrePadding_View_2168" + OpMemberName %type_View 91 "PrePadding_View_2172" + OpMemberName %type_View 92 "View_DirectionalLightColor" + OpMemberName %type_View 93 "View_DirectionalLightDirection" + OpMemberName %type_View 94 "PrePadding_View_2204" + OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 97 "View_TemporalAAParams" + OpMemberName %type_View 98 "View_CircleDOFParams" + OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 101 "View_DepthOfFieldScale" + OpMemberName %type_View 102 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 108 "View_GeneralPurposeTweak" + OpMemberName %type_View 109 "View_DemosaicVposOffset" + OpMemberName %type_View 110 "PrePadding_View_2348" + OpMemberName %type_View 111 "View_IndirectLightingColorScale" + OpMemberName %type_View 112 "View_HDR32bppEncodingMode" + OpMemberName %type_View 113 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 114 "View_AtmosphericFogSunPower" + OpMemberName %type_View 115 "View_AtmosphericFogPower" + OpMemberName %type_View 116 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 122 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 125 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 127 "View_AtmosphericFogSunColor" + OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 130 "View_AmbientCubemapTint" + OpMemberName %type_View 131 "View_AmbientCubemapIntensity" + OpMemberName %type_View 132 "View_SkyLightParameters" + OpMemberName %type_View 133 "PrePadding_View_2488" + OpMemberName %type_View 134 "PrePadding_View_2492" + OpMemberName %type_View 135 "View_SkyLightColor" + OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 137 "View_MobilePreviewMode" + OpMemberName %type_View 138 "View_HMDEyePaddingOffset" + OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 140 "View_ShowDecalsMask" + OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 143 "PrePadding_View_2648" + OpMemberName %type_View 144 "PrePadding_View_2652" + OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 146 "View_StereoPassIndex" + OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 149 "View_GlobalVolumeDimension" + OpMemberName %type_View 150 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 151 "View_MaxGlobalDistance" + OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 153 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 154 "PrePadding_View_2828" + OpMemberName %type_View 155 "View_VolumetricFogGridZParams" + OpMemberName %type_View 156 "PrePadding_View_2844" + OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 158 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 159 "PrePadding_View_2860" + OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 161 "PrePadding_View_2876" + OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 163 "PrePadding_View_2892" + OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 167 "View_StereoIPD" + OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_MobileDirectionalLight "type.MobileDirectionalLight" + OpMemberName %type_MobileDirectionalLight 0 "MobileDirectionalLight_DirectionalLightColor" + OpMemberName %type_MobileDirectionalLight 1 "MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition" + OpMemberName %type_MobileDirectionalLight 2 "MobileDirectionalLight_DirectionalLightShadowSize" + OpMemberName %type_MobileDirectionalLight 3 "MobileDirectionalLight_DirectionalLightDistanceFadeMAD" + OpMemberName %type_MobileDirectionalLight 4 "MobileDirectionalLight_DirectionalLightShadowDistances" + OpMemberName %type_MobileDirectionalLight 5 "MobileDirectionalLight_DirectionalLightScreenToShadow" + OpName %MobileDirectionalLight "MobileDirectionalLight" + OpName %type_2d_image "type.2d.image" + OpName %MobileDirectionalLight_DirectionalLightShadowTexture "MobileDirectionalLight_DirectionalLightShadowTexture" + OpName %type_sampler "type.sampler" + OpName %MobileDirectionalLight_DirectionalLightShadowSampler "MobileDirectionalLight_DirectionalLightShadowSampler" + OpName %Material_Texture2D_0 "Material_Texture2D_0" + OpName %Material_Texture2D_0Sampler "Material_Texture2D_0Sampler" + OpName %Material_Texture2D_1 "Material_Texture2D_1" + OpName %Material_Texture2D_1Sampler "Material_Texture2D_1Sampler" + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "NumDynamicPointLights" + OpMemberName %type__Globals 1 "LightPositionAndInvRadius" + OpMemberName %type__Globals 2 "LightColorAndFalloffExponent" + OpMemberName %type__Globals 3 "MobileReflectionParams" + OpName %_Globals "$Globals" + OpName %type_cube_image "type.cube.image" + OpName %ReflectionCubemap "ReflectionCubemap" + OpName %ReflectionCubemapSampler "ReflectionCubemapSampler" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %in_var_TEXCOORD7 "in.var.TEXCOORD7" + OpName %in_var_TEXCOORD8 "in.var.TEXCOORD8" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %Main "Main" + OpName %type_sampled_image "type.sampled.image" + OpName %type_sampled_image_0 "type.sampled.image" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %in_var_TEXCOORD7 UserSemantic "TEXCOORD7" + OpDecorateString %in_var_TEXCOORD8 UserSemantic "TEXCOORD8" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_Position" + OpDecorate %gl_FrontFacing BuiltIn FrontFacing + OpDecorateString %gl_FrontFacing UserSemantic "SV_IsFrontFace" + OpDecorate %gl_FrontFacing Flat + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %in_var_TEXCOORD0 Location 0 + OpDecorate %in_var_TEXCOORD7 Location 1 + OpDecorate %in_var_TEXCOORD8 Location 2 + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %MobileDirectionalLight DescriptorSet 0 + OpDecorate %MobileDirectionalLight Binding 1 + OpDecorate %MobileDirectionalLight_DirectionalLightShadowTexture DescriptorSet 0 + OpDecorate %MobileDirectionalLight_DirectionalLightShadowTexture Binding 0 + OpDecorate %MobileDirectionalLight_DirectionalLightShadowSampler DescriptorSet 0 + OpDecorate %MobileDirectionalLight_DirectionalLightShadowSampler Binding 0 + OpDecorate %Material_Texture2D_0 DescriptorSet 0 + OpDecorate %Material_Texture2D_0 Binding 1 + OpDecorate %Material_Texture2D_0Sampler DescriptorSet 0 + OpDecorate %Material_Texture2D_0Sampler Binding 1 + OpDecorate %Material_Texture2D_1 DescriptorSet 0 + OpDecorate %Material_Texture2D_1 Binding 2 + OpDecorate %Material_Texture2D_1Sampler DescriptorSet 0 + OpDecorate %Material_Texture2D_1Sampler Binding 2 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 2 + OpDecorate %ReflectionCubemap DescriptorSet 0 + OpDecorate %ReflectionCubemap Binding 3 + OpDecorate %ReflectionCubemapSampler DescriptorSet 0 + OpDecorate %ReflectionCubemapSampler Binding 3 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 14 Offset 844 + OpMemberDecorate %type_View 15 Offset 848 + OpMemberDecorate %type_View 16 Offset 860 + OpMemberDecorate %type_View 17 Offset 864 + OpMemberDecorate %type_View 18 Offset 876 + OpMemberDecorate %type_View 19 Offset 880 + OpMemberDecorate %type_View 20 Offset 892 + OpMemberDecorate %type_View 21 Offset 896 + OpMemberDecorate %type_View 22 Offset 908 + OpMemberDecorate %type_View 23 Offset 912 + OpMemberDecorate %type_View 24 Offset 928 + OpMemberDecorate %type_View 25 Offset 944 + OpMemberDecorate %type_View 26 Offset 956 + OpMemberDecorate %type_View 27 Offset 960 + OpMemberDecorate %type_View 28 Offset 972 + OpMemberDecorate %type_View 29 Offset 976 + OpMemberDecorate %type_View 30 Offset 988 + OpMemberDecorate %type_View 31 Offset 992 + OpMemberDecorate %type_View 32 Offset 1004 + OpMemberDecorate %type_View 33 Offset 1008 + OpMemberDecorate %type_View 33 MatrixStride 16 + OpMemberDecorate %type_View 33 ColMajor + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 44 Offset 1660 + OpMemberDecorate %type_View 45 Offset 1664 + OpMemberDecorate %type_View 46 Offset 1676 + OpMemberDecorate %type_View 47 Offset 1680 + OpMemberDecorate %type_View 48 Offset 1692 + OpMemberDecorate %type_View 49 Offset 1696 + OpMemberDecorate %type_View 49 MatrixStride 16 + OpMemberDecorate %type_View 49 ColMajor + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 53 Offset 1904 + OpMemberDecorate %type_View 54 Offset 1920 + OpMemberDecorate %type_View 55 Offset 1928 + OpMemberDecorate %type_View 56 Offset 1936 + OpMemberDecorate %type_View 57 Offset 1952 + OpMemberDecorate %type_View 58 Offset 1968 + OpMemberDecorate %type_View 59 Offset 1984 + OpMemberDecorate %type_View 60 Offset 2000 + OpMemberDecorate %type_View 61 Offset 2004 + OpMemberDecorate %type_View 62 Offset 2008 + OpMemberDecorate %type_View 63 Offset 2012 + OpMemberDecorate %type_View 64 Offset 2016 + OpMemberDecorate %type_View 65 Offset 2032 + OpMemberDecorate %type_View 66 Offset 2048 + OpMemberDecorate %type_View 67 Offset 2064 + OpMemberDecorate %type_View 68 Offset 2072 + OpMemberDecorate %type_View 69 Offset 2076 + OpMemberDecorate %type_View 70 Offset 2080 + OpMemberDecorate %type_View 71 Offset 2084 + OpMemberDecorate %type_View 72 Offset 2088 + OpMemberDecorate %type_View 73 Offset 2092 + OpMemberDecorate %type_View 74 Offset 2096 + OpMemberDecorate %type_View 75 Offset 2108 + OpMemberDecorate %type_View 76 Offset 2112 + OpMemberDecorate %type_View 77 Offset 2116 + OpMemberDecorate %type_View 78 Offset 2120 + OpMemberDecorate %type_View 79 Offset 2124 + OpMemberDecorate %type_View 80 Offset 2128 + OpMemberDecorate %type_View 81 Offset 2132 + OpMemberDecorate %type_View 82 Offset 2136 + OpMemberDecorate %type_View 83 Offset 2140 + OpMemberDecorate %type_View 84 Offset 2144 + OpMemberDecorate %type_View 85 Offset 2148 + OpMemberDecorate %type_View 86 Offset 2152 + OpMemberDecorate %type_View 87 Offset 2156 + OpMemberDecorate %type_View 88 Offset 2160 + OpMemberDecorate %type_View 89 Offset 2164 + OpMemberDecorate %type_View 90 Offset 2168 + OpMemberDecorate %type_View 91 Offset 2172 + OpMemberDecorate %type_View 92 Offset 2176 + OpMemberDecorate %type_View 93 Offset 2192 + OpMemberDecorate %type_View 94 Offset 2204 + OpMemberDecorate %type_View 95 Offset 2208 + OpMemberDecorate %type_View 96 Offset 2240 + OpMemberDecorate %type_View 97 Offset 2272 + OpMemberDecorate %type_View 98 Offset 2288 + OpMemberDecorate %type_View 99 Offset 2304 + OpMemberDecorate %type_View 100 Offset 2308 + OpMemberDecorate %type_View 101 Offset 2312 + OpMemberDecorate %type_View 102 Offset 2316 + OpMemberDecorate %type_View 103 Offset 2320 + OpMemberDecorate %type_View 104 Offset 2324 + OpMemberDecorate %type_View 105 Offset 2328 + OpMemberDecorate %type_View 106 Offset 2332 + OpMemberDecorate %type_View 107 Offset 2336 + OpMemberDecorate %type_View 108 Offset 2340 + OpMemberDecorate %type_View 109 Offset 2344 + OpMemberDecorate %type_View 110 Offset 2348 + OpMemberDecorate %type_View 111 Offset 2352 + OpMemberDecorate %type_View 112 Offset 2364 + OpMemberDecorate %type_View 113 Offset 2368 + OpMemberDecorate %type_View 114 Offset 2380 + OpMemberDecorate %type_View 115 Offset 2384 + OpMemberDecorate %type_View 116 Offset 2388 + OpMemberDecorate %type_View 117 Offset 2392 + OpMemberDecorate %type_View 118 Offset 2396 + OpMemberDecorate %type_View 119 Offset 2400 + OpMemberDecorate %type_View 120 Offset 2404 + OpMemberDecorate %type_View 121 Offset 2408 + OpMemberDecorate %type_View 122 Offset 2412 + OpMemberDecorate %type_View 123 Offset 2416 + OpMemberDecorate %type_View 124 Offset 2420 + OpMemberDecorate %type_View 125 Offset 2424 + OpMemberDecorate %type_View 126 Offset 2428 + OpMemberDecorate %type_View 127 Offset 2432 + OpMemberDecorate %type_View 128 Offset 2448 + OpMemberDecorate %type_View 129 Offset 2460 + OpMemberDecorate %type_View 130 Offset 2464 + OpMemberDecorate %type_View 131 Offset 2480 + OpMemberDecorate %type_View 132 Offset 2484 + OpMemberDecorate %type_View 133 Offset 2488 + OpMemberDecorate %type_View 134 Offset 2492 + OpMemberDecorate %type_View 135 Offset 2496 + OpMemberDecorate %type_View 136 Offset 2512 + OpMemberDecorate %type_View 137 Offset 2624 + OpMemberDecorate %type_View 138 Offset 2628 + OpMemberDecorate %type_View 139 Offset 2632 + OpMemberDecorate %type_View 140 Offset 2636 + OpMemberDecorate %type_View 141 Offset 2640 + OpMemberDecorate %type_View 142 Offset 2644 + OpMemberDecorate %type_View 143 Offset 2648 + OpMemberDecorate %type_View 144 Offset 2652 + OpMemberDecorate %type_View 145 Offset 2656 + OpMemberDecorate %type_View 146 Offset 2668 + OpMemberDecorate %type_View 147 Offset 2672 + OpMemberDecorate %type_View 148 Offset 2736 + OpMemberDecorate %type_View 149 Offset 2800 + OpMemberDecorate %type_View 150 Offset 2804 + OpMemberDecorate %type_View 151 Offset 2808 + OpMemberDecorate %type_View 152 Offset 2812 + OpMemberDecorate %type_View 153 Offset 2816 + OpMemberDecorate %type_View 154 Offset 2828 + OpMemberDecorate %type_View 155 Offset 2832 + OpMemberDecorate %type_View 156 Offset 2844 + OpMemberDecorate %type_View 157 Offset 2848 + OpMemberDecorate %type_View 158 Offset 2856 + OpMemberDecorate %type_View 159 Offset 2860 + OpMemberDecorate %type_View 160 Offset 2864 + OpMemberDecorate %type_View 161 Offset 2876 + OpMemberDecorate %type_View 162 Offset 2880 + OpMemberDecorate %type_View 163 Offset 2892 + OpMemberDecorate %type_View 164 Offset 2896 + OpMemberDecorate %type_View 165 Offset 2908 + OpMemberDecorate %type_View 166 Offset 2912 + OpMemberDecorate %type_View 167 Offset 2924 + OpMemberDecorate %type_View 168 Offset 2928 + OpMemberDecorate %type_View 169 Offset 2932 + OpDecorate %type_View Block + OpDecorate %_arr_mat4v4float_uint_4 ArrayStride 64 + OpMemberDecorate %type_MobileDirectionalLight 0 Offset 0 + OpMemberDecorate %type_MobileDirectionalLight 1 Offset 16 + OpMemberDecorate %type_MobileDirectionalLight 2 Offset 32 + OpMemberDecorate %type_MobileDirectionalLight 3 Offset 48 + OpMemberDecorate %type_MobileDirectionalLight 4 Offset 64 + OpMemberDecorate %type_MobileDirectionalLight 5 Offset 80 + OpMemberDecorate %type_MobileDirectionalLight 5 MatrixStride 16 + OpMemberDecorate %type_MobileDirectionalLight 5 ColMajor + OpDecorate %type_MobileDirectionalLight Block + OpMemberDecorate %type__Globals 0 Offset 0 + OpMemberDecorate %type__Globals 1 Offset 16 + OpMemberDecorate %type__Globals 2 Offset 80 + OpMemberDecorate %type__Globals 3 Offset 144 + OpDecorate %type__Globals Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %int_4 = OpConstant %int 4 + %float_0 = OpConstant %float 0 + %int_3 = OpConstant %int 3 + %47 = OpConstantComposite %v3float %float_0 %float_0 %float_0 + %float_1 = OpConstant %float 1 + %int_0 = OpConstant %int 0 + %bool = OpTypeBool + %int_5 = OpConstant %int 5 + %52 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%float_0_999989986 = OpConstant %float 0.999989986 +%float_65000 = OpConstant %float 65000 + %55 = OpConstantComposite %v3float %float_65000 %float_65000 %float_65000 +%float_0_318309873 = OpConstant %float 0.318309873 + %57 = OpConstantComposite %v3float %float_0_318309873 %float_0_318309873 %float_0_318309873 +%float_65500 = OpConstant %float 65500 + %float_0_5 = OpConstant %float 0.5 + %60 = OpConstantComposite %v2float %float_0_5 %float_0_5 + %float_2 = OpConstant %float 2 + %float_n2 = OpConstant %float -2 + %63 = OpConstantComposite %v2float %float_2 %float_n2 + %64 = OpConstantComposite %v3float %float_1 %float_1 %float_1 +%float_0_119999997 = OpConstant %float 0.119999997 + %float_n1 = OpConstant %float -1 +%float_n0_0274999999 = OpConstant %float -0.0274999999 + %68 = OpConstantComposite %v2float %float_n1 %float_n0_0274999999 +%float_0_0425000004 = OpConstant %float 0.0425000004 + %70 = OpConstantComposite %v2float %float_1 %float_0_0425000004 +%float_n9_27999973 = OpConstant %float -9.27999973 + %72 = OpConstantComposite %v2float %float_1 %float_1 + %float_0_25 = OpConstant %float 0.25 + %float_16 = OpConstant %float 16 + %int_31 = OpConstant %int 31 + %int_56 = OpConstant %int 56 + %int_57 = OpConstant %int 57 + %int_64 = OpConstant %int 64 + %int_65 = OpConstant %int 65 + %int_66 = OpConstant %int 66 + %int_67 = OpConstant %int 67 + %int_88 = OpConstant %int 88 + %int_135 = OpConstant %int 135 + %int_139 = OpConstant %int 139 +%mat3v3float = OpTypeMatrix %v3float 3 + %86 = OpConstantComposite %v2float %float_2 %float_2 +%float_0_300000012 = OpConstant %float 0.300000012 + %88 = OpConstantComposite %v3float %float_0_300000012 %float_0_300000012 %float_1 + %float_20 = OpConstant %float 20 + %90 = OpConstantComposite %v2float %float_20 %float_20 +%float_0_400000006 = OpConstant %float 0.400000006 + %float_24 = OpConstant %float 24 +%float_0_294999987 = OpConstant %float 0.294999987 +%float_0_660000026 = OpConstant %float 0.660000026 +%float_0_699999988 = OpConstant %float 0.699999988 +%float_65504 = OpConstant %float 65504 +%float_1_20000005 = OpConstant %float 1.20000005 + %98 = OpConstantComposite %v3float %float_2 %float_2 %float_2 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4 +%type_MobileDirectionalLight = OpTypeStruct %v4float %v4float %v4float %v4float %v4float %_arr_mat4v4float_uint_4 +%_ptr_Uniform_type_MobileDirectionalLight = OpTypePointer Uniform %type_MobileDirectionalLight +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler +%type__Globals = OpTypeStruct %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %v4float +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%type_cube_image = OpTypeImage %float Cube 2 0 0 1 Unknown +%_ptr_UniformConstant_type_cube_image = OpTypePointer UniformConstant %type_cube_image +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_bool = OpTypePointer Input %bool +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %110 = OpTypeFunction %void +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_int = OpTypePointer Uniform %int +%type_sampled_image = OpTypeSampledImage %type_cube_image +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%type_sampled_image_0 = OpTypeSampledImage %type_2d_image + %View = OpVariable %_ptr_Uniform_type_View Uniform +%MobileDirectionalLight = OpVariable %_ptr_Uniform_type_MobileDirectionalLight Uniform +%MobileDirectionalLight_DirectionalLightShadowTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%MobileDirectionalLight_DirectionalLightShadowSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%Material_Texture2D_0 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%Material_Texture2D_0Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%Material_Texture2D_1 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%Material_Texture2D_1Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%ReflectionCubemap = OpVariable %_ptr_UniformConstant_type_cube_image UniformConstant +%ReflectionCubemapSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input +%in_var_TEXCOORD7 = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD8 = OpVariable %_ptr_Input_v4float Input +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%gl_FrontFacing = OpVariable %_ptr_Input_bool Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output + %117 = OpConstantComposite %v3float %float_1 %float_0 %float_0 + %118 = OpConstantComposite %v3float %float_0 %float_1 %float_0 + %119 = OpConstantComposite %v3float %float_0 %float_0 %float_1 + %120 = OpConstantComposite %mat3v3float %117 %118 %119 + %float_10 = OpConstant %float 10 + %122 = OpConstantComposite %v2float %float_10 %float_10 + %float_5 = OpConstant %float 5 + %124 = OpConstantComposite %v2float %float_5 %float_5 +%float_0_00066666666 = OpConstant %float 0.00066666666 + %float_n0_5 = OpConstant %float -0.5 + %127 = OpConstantComposite %v2float %float_n0_5 %float_n0_5 + %128 = OpConstantComposite %v2float %float_0_5 %float_n0_5 + %float_1_5 = OpConstant %float 1.5 + %130 = OpConstantComposite %v2float %float_1_5 %float_n0_5 + %131 = OpConstantComposite %v2float %float_n0_5 %float_0_5 + %132 = OpConstantComposite %v2float %float_1_5 %float_0_5 + %133 = OpConstantComposite %v2float %float_n0_5 %float_1_5 + %134 = OpConstantComposite %v2float %float_0_5 %float_1_5 + %135 = OpConstantComposite %v2float %float_1_5 %float_1_5 + %136 = OpUndef %v3float + %137 = OpUndef %v4float + %138 = OpUndef %float + %139 = OpUndef %v3float + %Main = OpFunction %void None %110 + %140 = OpLabel + %141 = OpLoad %v2float %in_var_TEXCOORD0 + %142 = OpLoad %v4float %in_var_TEXCOORD7 + %143 = OpLoad %v4float %in_var_TEXCOORD8 + %144 = OpLoad %v4float %gl_FragCoord + %145 = OpAccessChain %_ptr_Uniform_v3float %View %int_31 + %146 = OpLoad %v3float %145 + %147 = OpAccessChain %_ptr_Uniform_v4float %View %int_56 + %148 = OpLoad %v4float %147 + %149 = OpAccessChain %_ptr_Uniform_v4float %View %int_57 + %150 = OpLoad %v4float %149 + %151 = OpAccessChain %_ptr_Uniform_v4float %View %int_64 + %152 = OpLoad %v4float %151 + %153 = OpAccessChain %_ptr_Uniform_v4float %View %int_65 + %154 = OpLoad %v4float %153 + %155 = OpAccessChain %_ptr_Uniform_v4float %View %int_66 + %156 = OpLoad %v4float %155 + %157 = OpAccessChain %_ptr_Uniform_v2float %View %int_67 + %158 = OpLoad %v2float %157 + %159 = OpAccessChain %_ptr_Uniform_float %View %int_88 + %160 = OpLoad %float %159 + %161 = OpAccessChain %_ptr_Uniform_v4float %View %int_135 + %162 = OpLoad %v4float %161 + %163 = OpAccessChain %_ptr_Uniform_float %View %int_139 + %164 = OpLoad %float %163 + %165 = OpVectorShuffle %v2float %144 %144 0 1 + %166 = OpVectorShuffle %v2float %148 %148 0 1 + %167 = OpFSub %v2float %165 %166 + %168 = OpVectorShuffle %v2float %150 %150 2 3 + %169 = OpFMul %v2float %167 %168 + %170 = OpFSub %v2float %169 %60 + %171 = OpFMul %v2float %170 %63 + %172 = OpCompositeExtract %float %171 0 + %173 = OpCompositeExtract %float %171 1 + %174 = OpCompositeConstruct %v4float %172 %173 %138 %float_1 + %175 = OpCompositeExtract %float %144 3 + %176 = OpCompositeConstruct %v4float %175 %175 %175 %175 + %177 = OpFMul %v4float %174 %176 + %178 = OpVectorShuffle %v3float %143 %143 0 1 2 + %179 = OpFSub %v3float %178 %146 + %180 = OpFNegate %v3float %178 + %181 = OpExtInst %v3float %1 Normalize %180 + %182 = OpFMul %v2float %141 %60 + %183 = OpFMul %v2float %141 %122 + %184 = OpLoad %type_2d_image %Material_Texture2D_0 + %185 = OpLoad %type_sampler %Material_Texture2D_0Sampler + %186 = OpSampledImage %type_sampled_image_0 %184 %185 + %187 = OpImageSampleImplicitLod %v4float %186 %183 None + %188 = OpVectorShuffle %v2float %187 %187 0 1 + %189 = OpFMul %v2float %188 %86 + %190 = OpFSub %v2float %189 %72 + %191 = OpDot %float %190 %190 + %192 = OpFSub %float %float_1 %191 + %193 = OpExtInst %float %1 FClamp %192 %float_0 %float_1 + %194 = OpExtInst %float %1 Sqrt %193 + %195 = OpCompositeExtract %float %190 0 + %196 = OpCompositeExtract %float %190 1 + %197 = OpCompositeConstruct %v4float %195 %196 %194 %float_1 + %198 = OpVectorShuffle %v3float %197 %197 0 1 2 + %199 = OpFMul %v3float %198 %88 + %200 = OpVectorShuffle %v3float %156 %156 0 1 2 + %201 = OpCompositeExtract %float %156 3 + %202 = OpCompositeConstruct %v3float %201 %201 %201 + %203 = OpFMul %v3float %199 %202 + %204 = OpFAdd %v3float %203 %200 + %205 = OpMatrixTimesVector %v3float %120 %204 + %206 = OpExtInst %v3float %1 Normalize %205 + %207 = OpFNegate %v3float %181 + %208 = OpDot %float %206 %181 + %209 = OpCompositeConstruct %v3float %208 %208 %208 + %210 = OpFMul %v3float %206 %209 + %211 = OpFMul %v3float %210 %98 + %212 = OpFAdd %v3float %207 %211 + %213 = OpFMul %v2float %141 %90 + %214 = OpLoad %type_2d_image %Material_Texture2D_1 + %215 = OpLoad %type_sampler %Material_Texture2D_1Sampler + %216 = OpSampledImage %type_sampled_image_0 %214 %215 + %217 = OpImageSampleImplicitLod %v4float %216 %213 None + %218 = OpCompositeExtract %float %217 0 + %219 = OpExtInst %float %1 FMix %float_0_400000006 %float_1 %218 + %220 = OpFSub %float %float_1 %219 + %221 = OpFMul %v2float %141 %124 + %222 = OpSampledImage %type_sampled_image_0 %214 %215 + %223 = OpImageSampleImplicitLod %v4float %222 %221 None + %224 = OpCompositeExtract %float %177 3 + %225 = OpFSub %float %224 %float_24 + %226 = OpFMul %float %225 %float_0_00066666666 + %227 = OpExtInst %float %1 FMax %226 %float_0 + %228 = OpExtInst %float %1 FMin %227 %float_1 + %229 = OpCompositeExtract %float %223 1 + %230 = OpExtInst %float %1 FMix %229 %float_1 %228 + %231 = OpExtInst %float %1 FMix %219 %220 %230 + %232 = OpSampledImage %type_sampled_image_0 %214 %215 + %233 = OpImageSampleImplicitLod %v4float %232 %182 None + %234 = OpExtInst %float %1 FMix %229 %float_0 %228 + %235 = OpCompositeExtract %float %233 1 + %236 = OpFAdd %float %235 %234 + %237 = OpExtInst %float %1 FMix %236 %float_0_5 %float_0_5 + %238 = OpExtInst %float %1 FMix %float_0_294999987 %float_0_660000026 %237 + %239 = OpFMul %float %238 %float_0_5 + %240 = OpFMul %float %231 %239 + %241 = OpExtInst %float %1 FMix %float_0 %float_0_5 %235 + %242 = OpExtInst %float %1 FMix %float_0_699999988 %float_1 %229 + %243 = OpExtInst %float %1 FMix %242 %float_1 %228 + %244 = OpFAdd %float %241 %243 + %245 = OpExtInst %float %1 FMax %244 %float_0 + %246 = OpExtInst %float %1 FMin %245 %float_1 + %247 = OpCompositeConstruct %v3float %240 %240 %240 + %248 = OpExtInst %v3float %1 FClamp %247 %47 %64 + %249 = OpCompositeExtract %float %158 1 + %250 = OpFMul %float %246 %249 + %251 = OpCompositeExtract %float %158 0 + %252 = OpFAdd %float %250 %251 + %253 = OpExtInst %float %1 FClamp %252 %float_0_119999997 %float_1 + %254 = OpExtInst %float %1 FMax %208 %float_0 + %255 = OpCompositeConstruct %v2float %253 %253 + %256 = OpFMul %v2float %255 %68 + %257 = OpFAdd %v2float %256 %70 + %258 = OpCompositeExtract %float %257 0 + %259 = OpFMul %float %258 %258 + %260 = OpFMul %float %float_n9_27999973 %254 + %261 = OpExtInst %float %1 Exp2 %260 + %262 = OpExtInst %float %1 FMin %259 %261 + %263 = OpFMul %float %262 %258 + %264 = OpCompositeExtract %float %257 1 + %265 = OpFAdd %float %263 %264 + %266 = OpCompositeExtract %float %152 3 + %267 = OpCompositeConstruct %v3float %266 %266 %266 + %268 = OpFMul %v3float %248 %267 + %269 = OpVectorShuffle %v3float %152 %152 0 1 2 + %270 = OpFAdd %v3float %268 %269 + %271 = OpCompositeExtract %float %154 3 + %272 = OpFMul %float %265 %271 + %273 = OpCompositeConstruct %v3float %272 %272 %272 + %274 = OpVectorShuffle %v3float %154 %154 0 1 2 + %275 = OpFAdd %v3float %273 %274 + %276 = OpCompositeExtract %float %275 0 + %277 = OpExtInst %float %1 FClamp %float_1 %float_0 %float_1 + %278 = OpLoad %type_2d_image %MobileDirectionalLight_DirectionalLightShadowTexture + %279 = OpLoad %type_sampler %MobileDirectionalLight_DirectionalLightShadowSampler + %280 = OpAccessChain %_ptr_Uniform_v4float %MobileDirectionalLight %int_1 + %281 = OpAccessChain %_ptr_Uniform_float %MobileDirectionalLight %int_1 %int_3 + %282 = OpLoad %float %281 + %283 = OpAccessChain %_ptr_Uniform_v4float %MobileDirectionalLight %int_2 + %284 = OpLoad %v4float %283 + OpBranch %285 + %285 = OpLabel + %286 = OpPhi %int %int_0 %140 %287 %288 + %289 = OpSLessThan %bool %286 %int_2 + OpLoopMerge %290 %288 None + OpBranchConditional %289 %291 %290 + %291 = OpLabel + %292 = OpBitcast %uint %286 + %293 = OpAccessChain %_ptr_Uniform_float %MobileDirectionalLight %int_4 %292 + %294 = OpLoad %float %293 + %295 = OpFOrdLessThan %bool %224 %294 + OpSelectionMerge %288 None + OpBranchConditional %295 %296 %288 + %296 = OpLabel + %297 = OpCompositeExtract %float %177 0 + %298 = OpCompositeExtract %float %177 1 + %299 = OpCompositeConstruct %v4float %297 %298 %224 %float_1 + %300 = OpAccessChain %_ptr_Uniform_mat4v4float %MobileDirectionalLight %int_5 %286 + %301 = OpLoad %mat4v4float %300 + %302 = OpMatrixTimesVector %v4float %301 %299 + OpBranch %290 + %288 = OpLabel + %287 = OpIAdd %int %286 %int_1 + OpBranch %285 + %290 = OpLabel + %303 = OpPhi %v4float %52 %285 %302 %296 + %304 = OpCompositeExtract %float %303 2 + %305 = OpFOrdGreaterThan %bool %304 %float_0 + OpSelectionMerge %306 None + OpBranchConditional %305 %307 %306 + %307 = OpLabel + %308 = OpExtInst %float %1 FMin %304 %float_0_999989986 + %309 = OpVectorShuffle %v2float %303 %303 0 1 + %310 = OpVectorShuffle %v2float %284 %284 0 1 + %311 = OpFMul %v2float %309 %310 + %312 = OpExtInst %v2float %1 Fract %311 + %313 = OpExtInst %v2float %1 Floor %311 + %314 = OpFAdd %v2float %313 %127 + %315 = OpVectorShuffle %v2float %284 %284 2 3 + %316 = OpFMul %v2float %314 %315 + %317 = OpSampledImage %type_sampled_image_0 %278 %279 + %318 = OpImageSampleExplicitLod %v4float %317 %316 Lod %float_0 + %319 = OpCompositeExtract %float %318 0 + %320 = OpCompositeInsert %v3float %319 %139 0 + %321 = OpFAdd %v2float %313 %128 + %322 = OpFMul %v2float %321 %315 + %323 = OpSampledImage %type_sampled_image_0 %278 %279 + %324 = OpImageSampleExplicitLod %v4float %323 %322 Lod %float_0 + %325 = OpCompositeExtract %float %324 0 + %326 = OpCompositeInsert %v3float %325 %320 1 + %327 = OpFAdd %v2float %313 %130 + %328 = OpFMul %v2float %327 %315 + %329 = OpSampledImage %type_sampled_image_0 %278 %279 + %330 = OpImageSampleExplicitLod %v4float %329 %328 Lod %float_0 + %331 = OpCompositeExtract %float %330 0 + %332 = OpCompositeInsert %v3float %331 %326 2 + %333 = OpFMul %float %308 %282 + %334 = OpFSub %float %333 %float_1 + %335 = OpCompositeConstruct %v3float %282 %282 %282 + %336 = OpFMul %v3float %332 %335 + %337 = OpCompositeConstruct %v3float %334 %334 %334 + %338 = OpFSub %v3float %336 %337 + %339 = OpExtInst %v3float %1 FClamp %338 %47 %64 + %340 = OpFAdd %v2float %313 %131 + %341 = OpFMul %v2float %340 %315 + %342 = OpSampledImage %type_sampled_image_0 %278 %279 + %343 = OpImageSampleExplicitLod %v4float %342 %341 Lod %float_0 + %344 = OpCompositeExtract %float %343 0 + %345 = OpCompositeInsert %v3float %344 %139 0 + %346 = OpFAdd %v2float %313 %60 + %347 = OpFMul %v2float %346 %315 + %348 = OpSampledImage %type_sampled_image_0 %278 %279 + %349 = OpImageSampleExplicitLod %v4float %348 %347 Lod %float_0 + %350 = OpCompositeExtract %float %349 0 + %351 = OpCompositeInsert %v3float %350 %345 1 + %352 = OpFAdd %v2float %313 %132 + %353 = OpFMul %v2float %352 %315 + %354 = OpSampledImage %type_sampled_image_0 %278 %279 + %355 = OpImageSampleExplicitLod %v4float %354 %353 Lod %float_0 + %356 = OpCompositeExtract %float %355 0 + %357 = OpCompositeInsert %v3float %356 %351 2 + %358 = OpFMul %v3float %357 %335 + %359 = OpFSub %v3float %358 %337 + %360 = OpExtInst %v3float %1 FClamp %359 %47 %64 + %361 = OpFAdd %v2float %313 %133 + %362 = OpFMul %v2float %361 %315 + %363 = OpSampledImage %type_sampled_image_0 %278 %279 + %364 = OpImageSampleExplicitLod %v4float %363 %362 Lod %float_0 + %365 = OpCompositeExtract %float %364 0 + %366 = OpCompositeInsert %v3float %365 %139 0 + %367 = OpFAdd %v2float %313 %134 + %368 = OpFMul %v2float %367 %315 + %369 = OpSampledImage %type_sampled_image_0 %278 %279 + %370 = OpImageSampleExplicitLod %v4float %369 %368 Lod %float_0 + %371 = OpCompositeExtract %float %370 0 + %372 = OpCompositeInsert %v3float %371 %366 1 + %373 = OpFAdd %v2float %313 %135 + %374 = OpFMul %v2float %373 %315 + %375 = OpSampledImage %type_sampled_image_0 %278 %279 + %376 = OpImageSampleExplicitLod %v4float %375 %374 Lod %float_0 + %377 = OpCompositeExtract %float %376 0 + %378 = OpCompositeInsert %v3float %377 %372 2 + %379 = OpFMul %v3float %378 %335 + %380 = OpFSub %v3float %379 %337 + %381 = OpExtInst %v3float %1 FClamp %380 %47 %64 + %382 = OpCompositeExtract %float %339 0 + %383 = OpCompositeExtract %float %312 0 + %384 = OpFSub %float %float_1 %383 + %385 = OpFMul %float %382 %384 + %386 = OpCompositeExtract %float %360 0 + %387 = OpFMul %float %386 %384 + %388 = OpCompositeExtract %float %381 0 + %389 = OpFMul %float %388 %384 + %390 = OpCompositeExtract %float %339 1 + %391 = OpFAdd %float %385 %390 + %392 = OpCompositeExtract %float %360 1 + %393 = OpFAdd %float %387 %392 + %394 = OpCompositeExtract %float %381 1 + %395 = OpFAdd %float %389 %394 + %396 = OpCompositeExtract %float %339 2 + %397 = OpFMul %float %396 %383 + %398 = OpFAdd %float %391 %397 + %399 = OpCompositeInsert %v3float %398 %136 0 + %400 = OpCompositeExtract %float %360 2 + %401 = OpFMul %float %400 %383 + %402 = OpFAdd %float %393 %401 + %403 = OpCompositeInsert %v3float %402 %399 1 + %404 = OpCompositeExtract %float %381 2 + %405 = OpFMul %float %404 %383 + %406 = OpFAdd %float %395 %405 + %407 = OpCompositeInsert %v3float %406 %403 2 + %408 = OpCompositeExtract %float %312 1 + %409 = OpFSub %float %float_1 %408 + %410 = OpCompositeConstruct %v3float %409 %float_1 %408 + %411 = OpDot %float %407 %410 + %412 = OpFMul %float %float_0_25 %411 + %413 = OpExtInst %float %1 FClamp %412 %float_0 %float_1 + %414 = OpAccessChain %_ptr_Uniform_float %MobileDirectionalLight %int_3 %int_0 + %415 = OpLoad %float %414 + %416 = OpFMul %float %224 %415 + %417 = OpAccessChain %_ptr_Uniform_float %MobileDirectionalLight %int_3 %int_1 + %418 = OpLoad %float %417 + %419 = OpFAdd %float %416 %418 + %420 = OpExtInst %float %1 FClamp %419 %float_0 %float_1 + %421 = OpFMul %float %420 %420 + %422 = OpExtInst %float %1 FMix %413 %float_1 %421 + OpBranch %306 + %306 = OpLabel + %423 = OpPhi %float %float_1 %290 %422 %307 + %424 = OpLoad %v4float %280 + %425 = OpVectorShuffle %v3float %424 %424 0 1 2 + %426 = OpDot %float %206 %425 + %427 = OpExtInst %float %1 FMax %float_0 %426 + %428 = OpFAdd %v3float %181 %425 + %429 = OpExtInst %v3float %1 Normalize %428 + %430 = OpDot %float %206 %429 + %431 = OpExtInst %float %1 FMax %float_0 %430 + %432 = OpFMul %float %423 %427 + %433 = OpCompositeConstruct %v3float %432 %432 %432 + %434 = OpAccessChain %_ptr_Uniform_v4float %MobileDirectionalLight %int_0 + %435 = OpLoad %v4float %434 + %436 = OpVectorShuffle %v3float %435 %435 0 1 2 + %437 = OpFMul %v3float %433 %436 + %438 = OpFMul %float %253 %float_0_25 + %439 = OpFAdd %float %438 %float_0_25 + %440 = OpExtInst %v3float %1 Cross %206 %429 + %441 = OpDot %float %440 %440 + %442 = OpFMul %float %253 %253 + %443 = OpFMul %float %431 %442 + %444 = OpFMul %float %443 %443 + %445 = OpFAdd %float %441 %444 + %446 = OpFDiv %float %442 %445 + %447 = OpFMul %float %446 %446 + %448 = OpExtInst %float %1 FMin %447 %float_65504 + %449 = OpFMul %float %439 %448 + %450 = OpFMul %float %276 %449 + %451 = OpCompositeConstruct %v3float %450 %450 %450 + %452 = OpFAdd %v3float %270 %451 + %453 = OpFMul %v3float %437 %452 + %454 = OpAccessChain %_ptr_Uniform_float %_Globals %int_3 %int_3 + %455 = OpLoad %float %454 + %456 = OpFOrdGreaterThan %bool %455 %float_0 + %457 = OpSelect %float %456 %float_1 %float_0 + %458 = OpFOrdNotEqual %bool %457 %float_0 + %459 = OpSelect %float %458 %455 %164 + %460 = OpExtInst %float %1 Log2 %253 + %461 = OpFMul %float %float_1_20000005 %460 + %462 = OpFSub %float %float_1 %461 + %463 = OpFSub %float %459 %float_1 + %464 = OpFSub %float %463 %462 + %465 = OpLoad %type_cube_image %ReflectionCubemap + %466 = OpLoad %type_sampler %ReflectionCubemapSampler + %467 = OpSampledImage %type_sampled_image %465 %466 + %468 = OpImageSampleExplicitLod %v4float %467 %212 Lod %464 + OpSelectionMerge %469 None + OpBranchConditional %458 %470 %471 + %471 = OpLabel + %472 = OpVectorShuffle %v3float %468 %468 0 1 2 + %473 = OpCompositeExtract %float %468 3 + %474 = OpFMul %float %473 %float_16 + %475 = OpCompositeConstruct %v3float %474 %474 %474 + %476 = OpFMul %v3float %472 %475 + %477 = OpFMul %v3float %476 %476 + OpBranch %469 + %470 = OpLabel + %478 = OpVectorShuffle %v3float %468 %468 0 1 2 + %479 = OpVectorShuffle %v3float %162 %162 0 1 2 + %480 = OpFMul %v3float %478 %479 + OpBranch %469 + %469 = OpLabel + %481 = OpPhi %v3float %477 %471 %480 %470 + %482 = OpCompositeConstruct %v3float %277 %277 %277 + %483 = OpFMul %v3float %481 %482 + %484 = OpCompositeConstruct %v3float %276 %276 %276 + %485 = OpFMul %v3float %483 %484 + %486 = OpFAdd %v3float %453 %485 + OpBranch %487 + %487 = OpLabel + %488 = OpPhi %v3float %486 %469 %489 %490 + %491 = OpPhi %int %int_0 %469 %492 %490 + %493 = OpAccessChain %_ptr_Uniform_int %_Globals %int_0 + %494 = OpLoad %int %493 + %495 = OpSLessThan %bool %491 %494 + OpLoopMerge %496 %490 None + OpBranchConditional %495 %497 %496 + %497 = OpLabel + %498 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_1 %491 + %499 = OpLoad %v4float %498 + %500 = OpVectorShuffle %v3float %499 %499 0 1 2 + %501 = OpFSub %v3float %500 %179 + %502 = OpDot %float %501 %501 + %503 = OpExtInst %float %1 InverseSqrt %502 + %504 = OpCompositeConstruct %v3float %503 %503 %503 + %505 = OpFMul %v3float %501 %504 + %506 = OpFAdd %v3float %181 %505 + %507 = OpExtInst %v3float %1 Normalize %506 + %508 = OpDot %float %206 %505 + %509 = OpExtInst %float %1 FMax %float_0 %508 + %510 = OpDot %float %206 %507 + %511 = OpExtInst %float %1 FMax %float_0 %510 + %512 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_2 %491 + %513 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 %491 %int_3 + %514 = OpLoad %float %513 + %515 = OpFOrdEqual %bool %514 %float_0 + OpSelectionMerge %490 None + OpBranchConditional %515 %516 %517 + %517 = OpLabel + %518 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %491 %int_3 + %519 = OpLoad %float %518 + %520 = OpCompositeConstruct %v3float %519 %519 %519 + %521 = OpFMul %v3float %501 %520 + %522 = OpDot %float %521 %521 + %523 = OpExtInst %float %1 FClamp %522 %float_0 %float_1 + %524 = OpFSub %float %float_1 %523 + %525 = OpExtInst %float %1 Pow %524 %514 + OpBranch %490 + %516 = OpLabel + %526 = OpFAdd %float %502 %float_1 + %527 = OpFDiv %float %float_1 %526 + %528 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %491 %int_3 + %529 = OpLoad %float %528 + %530 = OpFMul %float %529 %529 + %531 = OpFMul %float %502 %530 + %532 = OpFMul %float %531 %531 + %533 = OpFSub %float %float_1 %532 + %534 = OpExtInst %float %1 FClamp %533 %float_0 %float_1 + %535 = OpFMul %float %534 %534 + %536 = OpFMul %float %527 %535 + OpBranch %490 + %490 = OpLabel + %537 = OpPhi %float %525 %517 %536 %516 + %538 = OpFMul %float %537 %509 + %539 = OpCompositeConstruct %v3float %538 %538 %538 + %540 = OpLoad %v4float %512 + %541 = OpVectorShuffle %v3float %540 %540 0 1 2 + %542 = OpFMul %v3float %539 %541 + %543 = OpFMul %v3float %542 %57 + %544 = OpExtInst %v3float %1 Cross %206 %507 + %545 = OpDot %float %544 %544 + %546 = OpFMul %float %511 %442 + %547 = OpFMul %float %546 %546 + %548 = OpFAdd %float %545 %547 + %549 = OpFDiv %float %442 %548 + %550 = OpFMul %float %549 %549 + %551 = OpExtInst %float %1 FMin %550 %float_65504 + %552 = OpFMul %float %439 %551 + %553 = OpFMul %float %276 %552 + %554 = OpCompositeConstruct %v3float %553 %553 %553 + %555 = OpFAdd %v3float %270 %554 + %556 = OpFMul %v3float %543 %555 + %557 = OpExtInst %v3float %1 FMin %55 %556 + %489 = OpFAdd %v3float %488 %557 + %492 = OpIAdd %int %491 %int_1 + OpBranch %487 + %496 = OpLabel + %558 = OpExtInst %v3float %1 FMax %47 %47 + %559 = OpFAdd %v3float %488 %558 + %560 = OpFAdd %v3float %270 %484 + %561 = OpCompositeConstruct %v3float %160 %160 %160 + %562 = OpExtInst %v3float %1 FMix %559 %560 %561 + %563 = OpCompositeExtract %float %142 3 + %564 = OpCompositeConstruct %v3float %563 %563 %563 + %565 = OpFMul %v3float %562 %564 + %566 = OpVectorShuffle %v3float %142 %142 0 1 2 + %567 = OpFAdd %v3float %565 %566 + %568 = OpVectorShuffle %v4float %137 %567 4 5 6 3 + %569 = OpCompositeExtract %float %143 3 + %570 = OpExtInst %float %1 FMin %569 %float_65500 + %571 = OpCompositeInsert %v4float %570 %568 3 + OpStore %out_var_SV_Target0 %571 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag b/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag new file mode 100644 index 00000000000..eba220ba4e7 --- /dev/null +++ b/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag @@ -0,0 +1,878 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 353 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %MainPixelShader "main" %gl_FragCoord %in_var_TEXCOORD6 %in_var_TEXCOORD7 %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_TEXCOORD0 %in_var_PRIMITIVE_ID %gl_FrontFacing %gl_FragDepth %out_var_SV_Target0 + OpExecutionMode %MainPixelShader OriginUpperLeft + OpExecutionMode %MainPixelShader DepthReplacing + OpExecutionMode %MainPixelShader DepthLess + OpSource HLSL 600 + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_ClipToWorld" + OpMemberName %type_View 3 "View_TranslatedWorldToView" + OpMemberName %type_View 4 "View_ViewToTranslatedWorld" + OpMemberName %type_View 5 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 7 "View_ViewToClip" + OpMemberName %type_View 8 "View_ViewToClipNoAA" + OpMemberName %type_View 9 "View_ClipToView" + OpMemberName %type_View 10 "View_ClipToTranslatedWorld" + OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 12 "View_ScreenToWorld" + OpMemberName %type_View 13 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 14 "View_ViewForward" + OpMemberName %type_View 15 "PrePadding_View_908" + OpMemberName %type_View 16 "View_ViewUp" + OpMemberName %type_View 17 "PrePadding_View_924" + OpMemberName %type_View 18 "View_ViewRight" + OpMemberName %type_View 19 "PrePadding_View_940" + OpMemberName %type_View 20 "View_HMDViewNoRollUp" + OpMemberName %type_View 21 "PrePadding_View_956" + OpMemberName %type_View 22 "View_HMDViewNoRollRight" + OpMemberName %type_View 23 "PrePadding_View_972" + OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 25 "View_ScreenPositionScaleBias" + OpMemberName %type_View 26 "View_WorldCameraOrigin" + OpMemberName %type_View 27 "PrePadding_View_1020" + OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 29 "PrePadding_View_1036" + OpMemberName %type_View 30 "View_WorldViewOrigin" + OpMemberName %type_View 31 "PrePadding_View_1052" + OpMemberName %type_View 32 "View_PreViewTranslation" + OpMemberName %type_View 33 "PrePadding_View_1068" + OpMemberName %type_View 34 "View_PrevProjection" + OpMemberName %type_View 35 "View_PrevViewProj" + OpMemberName %type_View 36 "View_PrevViewRotationProj" + OpMemberName %type_View 37 "View_PrevViewToClip" + OpMemberName %type_View 38 "View_PrevClipToView" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 40 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 44 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 45 "PrePadding_View_1724" + OpMemberName %type_View 46 "View_PrevWorldViewOrigin" + OpMemberName %type_View 47 "PrePadding_View_1740" + OpMemberName %type_View 48 "View_PrevPreViewTranslation" + OpMemberName %type_View 49 "PrePadding_View_1756" + OpMemberName %type_View 50 "View_PrevInvViewProj" + OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 52 "View_ClipToPrevClip" + OpMemberName %type_View 53 "View_TemporalAAJitter" + OpMemberName %type_View 54 "View_GlobalClippingPlane" + OpMemberName %type_View 55 "View_FieldOfViewWideAngles" + OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 57 "View_ViewRectMin" + OpMemberName %type_View 58 "View_ViewSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferSizeAndInvSize" + OpMemberName %type_View 60 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 61 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 62 "View_PreExposure" + OpMemberName %type_View 63 "View_OneOverPreExposure" + OpMemberName %type_View 64 "PrePadding_View_2076" + OpMemberName %type_View 65 "View_DiffuseOverrideParameter" + OpMemberName %type_View 66 "View_SpecularOverrideParameter" + OpMemberName %type_View 67 "View_NormalOverrideParameter" + OpMemberName %type_View 68 "View_RoughnessOverrideParameter" + OpMemberName %type_View 69 "View_PrevFrameGameTime" + OpMemberName %type_View 70 "View_PrevFrameRealTime" + OpMemberName %type_View 71 "View_OutOfBoundsMask" + OpMemberName %type_View 72 "PrePadding_View_2148" + OpMemberName %type_View 73 "PrePadding_View_2152" + OpMemberName %type_View 74 "PrePadding_View_2156" + OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 76 "View_CullingSign" + OpMemberName %type_View 77 "View_NearPlane" + OpMemberName %type_View 78 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 79 "View_GameTime" + OpMemberName %type_View 80 "View_RealTime" + OpMemberName %type_View 81 "View_DeltaTime" + OpMemberName %type_View 82 "View_MaterialTextureMipBias" + OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 84 "View_Random" + OpMemberName %type_View 85 "View_FrameNumber" + OpMemberName %type_View 86 "View_StateFrameIndexMod8" + OpMemberName %type_View 87 "View_StateFrameIndex" + OpMemberName %type_View 88 "View_CameraCut" + OpMemberName %type_View 89 "View_UnlitViewmodeMask" + OpMemberName %type_View 90 "PrePadding_View_2228" + OpMemberName %type_View 91 "PrePadding_View_2232" + OpMemberName %type_View 92 "PrePadding_View_2236" + OpMemberName %type_View 93 "View_DirectionalLightColor" + OpMemberName %type_View 94 "View_DirectionalLightDirection" + OpMemberName %type_View 95 "PrePadding_View_2268" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 98 "View_TemporalAAParams" + OpMemberName %type_View 99 "View_CircleDOFParams" + OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 102 "View_DepthOfFieldScale" + OpMemberName %type_View 103 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 109 "View_GeneralPurposeTweak" + OpMemberName %type_View 110 "View_DemosaicVposOffset" + OpMemberName %type_View 111 "PrePadding_View_2412" + OpMemberName %type_View 112 "View_IndirectLightingColorScale" + OpMemberName %type_View 113 "View_HDR32bppEncodingMode" + OpMemberName %type_View 114 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 115 "View_AtmosphericFogSunPower" + OpMemberName %type_View 116 "View_AtmosphericFogPower" + OpMemberName %type_View 117 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 123 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian" + OpMemberName %type_View 127 "PrePadding_View_2492" + OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance" + OpMemberName %type_View 129 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 131 "PrePadding_View_2520" + OpMemberName %type_View 132 "PrePadding_View_2524" + OpMemberName %type_View 133 "View_AtmosphericFogSunColor" + OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 136 "View_AmbientCubemapTint" + OpMemberName %type_View 137 "View_AmbientCubemapIntensity" + OpMemberName %type_View 138 "View_SkyLightParameters" + OpMemberName %type_View 139 "PrePadding_View_2584" + OpMemberName %type_View 140 "PrePadding_View_2588" + OpMemberName %type_View 141 "View_SkyLightColor" + OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 143 "View_MobilePreviewMode" + OpMemberName %type_View 144 "View_HMDEyePaddingOffset" + OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 146 "View_ShowDecalsMask" + OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 149 "PrePadding_View_2744" + OpMemberName %type_View 150 "PrePadding_View_2748" + OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 152 "View_StereoPassIndex" + OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 155 "View_GlobalVolumeDimension" + OpMemberName %type_View 156 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 157 "View_MaxGlobalDistance" + OpMemberName %type_View 158 "PrePadding_View_2908" + OpMemberName %type_View 159 "View_CursorPosition" + OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 161 "PrePadding_View_2924" + OpMemberName %type_View 162 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 163 "PrePadding_View_2940" + OpMemberName %type_View 164 "View_VolumetricFogGridZParams" + OpMemberName %type_View 165 "PrePadding_View_2956" + OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 167 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 168 "PrePadding_View_2972" + OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 170 "PrePadding_View_2988" + OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 172 "PrePadding_View_3004" + OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 176 "View_StereoIPD" + OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_PrimitiveDither "type.PrimitiveDither" + OpMemberName %type_PrimitiveDither 0 "PrimitiveDither_LODFactor" + OpName %PrimitiveDither "PrimitiveDither" + OpName %type_PrimitiveFade "type.PrimitiveFade" + OpMemberName %type_PrimitiveFade 0 "PrimitiveFade_FadeTimeScaleBias" + OpName %PrimitiveFade "PrimitiveFade" + OpName %type_Material "type.Material" + OpMemberName %type_Material 0 "Material_VectorExpressions" + OpMemberName %type_Material 1 "Material_ScalarExpressions" + OpName %Material "Material" + OpName %type_2d_image "type.2d.image" + OpName %Material_Texture2D_0 "Material_Texture2D_0" + OpName %type_sampler "type.sampler" + OpName %Material_Texture2D_0Sampler "Material_Texture2D_0Sampler" + OpName %Material_Texture2D_3 "Material_Texture2D_3" + OpName %Material_Texture2D_3Sampler "Material_Texture2D_3Sampler" + OpName %in_var_TEXCOORD6 "in.var.TEXCOORD6" + OpName %in_var_TEXCOORD7 "in.var.TEXCOORD7" + OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid" + OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %in_var_PRIMITIVE_ID "in.var.PRIMITIVE_ID" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %MainPixelShader "MainPixelShader" + OpName %type_sampled_image "type.sampled.image" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_Position" + OpDecorateString %in_var_TEXCOORD6 UserSemantic "TEXCOORD6" + OpDecorateString %in_var_TEXCOORD7 UserSemantic "TEXCOORD7" + OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %in_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID" + OpDecorate %in_var_PRIMITIVE_ID Flat + OpDecorate %gl_FrontFacing BuiltIn FrontFacing + OpDecorateString %gl_FrontFacing UserSemantic "SV_IsFrontFace" + OpDecorate %gl_FrontFacing Flat + OpDecorate %gl_FragDepth BuiltIn FragDepth + OpDecorateString %gl_FragDepth UserSemantic "SV_DepthLessEqual" + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %in_var_TEXCOORD6 Location 0 + OpDecorate %in_var_TEXCOORD7 Location 1 + OpDecorate %in_var_TEXCOORD10_centroid Location 2 + OpDecorate %in_var_TEXCOORD11_centroid Location 3 + OpDecorate %in_var_TEXCOORD0 Location 4 + OpDecorate %in_var_PRIMITIVE_ID Location 5 + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %PrimitiveDither DescriptorSet 0 + OpDecorate %PrimitiveDither Binding 1 + OpDecorate %PrimitiveFade DescriptorSet 0 + OpDecorate %PrimitiveFade Binding 2 + OpDecorate %Material DescriptorSet 0 + OpDecorate %Material Binding 3 + OpDecorate %Material_Texture2D_0 DescriptorSet 0 + OpDecorate %Material_Texture2D_0 Binding 0 + OpDecorate %Material_Texture2D_0Sampler DescriptorSet 0 + OpDecorate %Material_Texture2D_0Sampler Binding 0 + OpDecorate %Material_Texture2D_3 DescriptorSet 0 + OpDecorate %Material_Texture2D_3 Binding 1 + OpDecorate %Material_Texture2D_3Sampler DescriptorSet 0 + OpDecorate %Material_Texture2D_3Sampler Binding 1 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 13 MatrixStride 16 + OpMemberDecorate %type_View 13 ColMajor + OpMemberDecorate %type_View 14 Offset 896 + OpMemberDecorate %type_View 15 Offset 908 + OpMemberDecorate %type_View 16 Offset 912 + OpMemberDecorate %type_View 17 Offset 924 + OpMemberDecorate %type_View 18 Offset 928 + OpMemberDecorate %type_View 19 Offset 940 + OpMemberDecorate %type_View 20 Offset 944 + OpMemberDecorate %type_View 21 Offset 956 + OpMemberDecorate %type_View 22 Offset 960 + OpMemberDecorate %type_View 23 Offset 972 + OpMemberDecorate %type_View 24 Offset 976 + OpMemberDecorate %type_View 25 Offset 992 + OpMemberDecorate %type_View 26 Offset 1008 + OpMemberDecorate %type_View 27 Offset 1020 + OpMemberDecorate %type_View 28 Offset 1024 + OpMemberDecorate %type_View 29 Offset 1036 + OpMemberDecorate %type_View 30 Offset 1040 + OpMemberDecorate %type_View 31 Offset 1052 + OpMemberDecorate %type_View 32 Offset 1056 + OpMemberDecorate %type_View 33 Offset 1068 + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 43 MatrixStride 16 + OpMemberDecorate %type_View 43 ColMajor + OpMemberDecorate %type_View 44 Offset 1712 + OpMemberDecorate %type_View 45 Offset 1724 + OpMemberDecorate %type_View 46 Offset 1728 + OpMemberDecorate %type_View 47 Offset 1740 + OpMemberDecorate %type_View 48 Offset 1744 + OpMemberDecorate %type_View 49 Offset 1756 + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 52 MatrixStride 16 + OpMemberDecorate %type_View 52 ColMajor + OpMemberDecorate %type_View 53 Offset 1952 + OpMemberDecorate %type_View 54 Offset 1968 + OpMemberDecorate %type_View 55 Offset 1984 + OpMemberDecorate %type_View 56 Offset 1992 + OpMemberDecorate %type_View 57 Offset 2000 + OpMemberDecorate %type_View 58 Offset 2016 + OpMemberDecorate %type_View 59 Offset 2032 + OpMemberDecorate %type_View 60 Offset 2048 + OpMemberDecorate %type_View 61 Offset 2064 + OpMemberDecorate %type_View 62 Offset 2068 + OpMemberDecorate %type_View 63 Offset 2072 + OpMemberDecorate %type_View 64 Offset 2076 + OpMemberDecorate %type_View 65 Offset 2080 + OpMemberDecorate %type_View 66 Offset 2096 + OpMemberDecorate %type_View 67 Offset 2112 + OpMemberDecorate %type_View 68 Offset 2128 + OpMemberDecorate %type_View 69 Offset 2136 + OpMemberDecorate %type_View 70 Offset 2140 + OpMemberDecorate %type_View 71 Offset 2144 + OpMemberDecorate %type_View 72 Offset 2148 + OpMemberDecorate %type_View 73 Offset 2152 + OpMemberDecorate %type_View 74 Offset 2156 + OpMemberDecorate %type_View 75 Offset 2160 + OpMemberDecorate %type_View 76 Offset 2172 + OpMemberDecorate %type_View 77 Offset 2176 + OpMemberDecorate %type_View 78 Offset 2180 + OpMemberDecorate %type_View 79 Offset 2184 + OpMemberDecorate %type_View 80 Offset 2188 + OpMemberDecorate %type_View 81 Offset 2192 + OpMemberDecorate %type_View 82 Offset 2196 + OpMemberDecorate %type_View 83 Offset 2200 + OpMemberDecorate %type_View 84 Offset 2204 + OpMemberDecorate %type_View 85 Offset 2208 + OpMemberDecorate %type_View 86 Offset 2212 + OpMemberDecorate %type_View 87 Offset 2216 + OpMemberDecorate %type_View 88 Offset 2220 + OpMemberDecorate %type_View 89 Offset 2224 + OpMemberDecorate %type_View 90 Offset 2228 + OpMemberDecorate %type_View 91 Offset 2232 + OpMemberDecorate %type_View 92 Offset 2236 + OpMemberDecorate %type_View 93 Offset 2240 + OpMemberDecorate %type_View 94 Offset 2256 + OpMemberDecorate %type_View 95 Offset 2268 + OpMemberDecorate %type_View 96 Offset 2272 + OpMemberDecorate %type_View 97 Offset 2304 + OpMemberDecorate %type_View 98 Offset 2336 + OpMemberDecorate %type_View 99 Offset 2352 + OpMemberDecorate %type_View 100 Offset 2368 + OpMemberDecorate %type_View 101 Offset 2372 + OpMemberDecorate %type_View 102 Offset 2376 + OpMemberDecorate %type_View 103 Offset 2380 + OpMemberDecorate %type_View 104 Offset 2384 + OpMemberDecorate %type_View 105 Offset 2388 + OpMemberDecorate %type_View 106 Offset 2392 + OpMemberDecorate %type_View 107 Offset 2396 + OpMemberDecorate %type_View 108 Offset 2400 + OpMemberDecorate %type_View 109 Offset 2404 + OpMemberDecorate %type_View 110 Offset 2408 + OpMemberDecorate %type_View 111 Offset 2412 + OpMemberDecorate %type_View 112 Offset 2416 + OpMemberDecorate %type_View 113 Offset 2428 + OpMemberDecorate %type_View 114 Offset 2432 + OpMemberDecorate %type_View 115 Offset 2444 + OpMemberDecorate %type_View 116 Offset 2448 + OpMemberDecorate %type_View 117 Offset 2452 + OpMemberDecorate %type_View 118 Offset 2456 + OpMemberDecorate %type_View 119 Offset 2460 + OpMemberDecorate %type_View 120 Offset 2464 + OpMemberDecorate %type_View 121 Offset 2468 + OpMemberDecorate %type_View 122 Offset 2472 + OpMemberDecorate %type_View 123 Offset 2476 + OpMemberDecorate %type_View 124 Offset 2480 + OpMemberDecorate %type_View 125 Offset 2484 + OpMemberDecorate %type_View 126 Offset 2488 + OpMemberDecorate %type_View 127 Offset 2492 + OpMemberDecorate %type_View 128 Offset 2496 + OpMemberDecorate %type_View 129 Offset 2512 + OpMemberDecorate %type_View 130 Offset 2516 + OpMemberDecorate %type_View 131 Offset 2520 + OpMemberDecorate %type_View 132 Offset 2524 + OpMemberDecorate %type_View 133 Offset 2528 + OpMemberDecorate %type_View 134 Offset 2544 + OpMemberDecorate %type_View 135 Offset 2556 + OpMemberDecorate %type_View 136 Offset 2560 + OpMemberDecorate %type_View 137 Offset 2576 + OpMemberDecorate %type_View 138 Offset 2580 + OpMemberDecorate %type_View 139 Offset 2584 + OpMemberDecorate %type_View 140 Offset 2588 + OpMemberDecorate %type_View 141 Offset 2592 + OpMemberDecorate %type_View 142 Offset 2608 + OpMemberDecorate %type_View 143 Offset 2720 + OpMemberDecorate %type_View 144 Offset 2724 + OpMemberDecorate %type_View 145 Offset 2728 + OpMemberDecorate %type_View 146 Offset 2732 + OpMemberDecorate %type_View 147 Offset 2736 + OpMemberDecorate %type_View 148 Offset 2740 + OpMemberDecorate %type_View 149 Offset 2744 + OpMemberDecorate %type_View 150 Offset 2748 + OpMemberDecorate %type_View 151 Offset 2752 + OpMemberDecorate %type_View 152 Offset 2764 + OpMemberDecorate %type_View 153 Offset 2768 + OpMemberDecorate %type_View 154 Offset 2832 + OpMemberDecorate %type_View 155 Offset 2896 + OpMemberDecorate %type_View 156 Offset 2900 + OpMemberDecorate %type_View 157 Offset 2904 + OpMemberDecorate %type_View 158 Offset 2908 + OpMemberDecorate %type_View 159 Offset 2912 + OpMemberDecorate %type_View 160 Offset 2920 + OpMemberDecorate %type_View 161 Offset 2924 + OpMemberDecorate %type_View 162 Offset 2928 + OpMemberDecorate %type_View 163 Offset 2940 + OpMemberDecorate %type_View 164 Offset 2944 + OpMemberDecorate %type_View 165 Offset 2956 + OpMemberDecorate %type_View 166 Offset 2960 + OpMemberDecorate %type_View 167 Offset 2968 + OpMemberDecorate %type_View 168 Offset 2972 + OpMemberDecorate %type_View 169 Offset 2976 + OpMemberDecorate %type_View 170 Offset 2988 + OpMemberDecorate %type_View 171 Offset 2992 + OpMemberDecorate %type_View 172 Offset 3004 + OpMemberDecorate %type_View 173 Offset 3008 + OpMemberDecorate %type_View 174 Offset 3020 + OpMemberDecorate %type_View 175 Offset 3024 + OpMemberDecorate %type_View 176 Offset 3036 + OpMemberDecorate %type_View 177 Offset 3040 + OpMemberDecorate %type_View 178 Offset 3044 + OpDecorate %type_View Block + OpMemberDecorate %type_PrimitiveDither 0 Offset 0 + OpDecorate %type_PrimitiveDither Block + OpMemberDecorate %type_PrimitiveFade 0 Offset 0 + OpDecorate %type_PrimitiveFade Block + OpDecorate %_arr_v4float_uint_9 ArrayStride 16 + OpDecorate %_arr_v4float_uint_3 ArrayStride 16 + OpMemberDecorate %type_Material 0 Offset 0 + OpMemberDecorate %type_Material 1 Offset 144 + OpDecorate %type_Material Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %v2int = OpTypeVector %int 2 +%float_0_00100000005 = OpConstant %float 0.00100000005 + %int_2 = OpConstant %int 2 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %float_0 = OpConstant %float 0 + %49 = OpConstantComposite %v2float %float_0 %float_0 + %float_1 = OpConstant %float 1 + %int_4 = OpConstant %int 4 + %int_11 = OpConstant %int 11 +%float_0_249500006 = OpConstant %float 0.249500006 + %54 = OpConstantComposite %v2float %float_0_249500006 %float_0_249500006 +%float_0_499992371 = OpConstant %float 0.499992371 + %56 = OpConstantComposite %v2float %float_0_499992371 %float_0_499992371 + %int_32 = OpConstant %int 32 + %int_53 = OpConstant %int 53 + %int_57 = OpConstant %int 57 + %int_80 = OpConstant %int 80 + %int_82 = OpConstant %int 82 + %int_98 = OpConstant %int 98 + %uint_1 = OpConstant %uint 1 +%mat3v3float = OpTypeMatrix %v3float 3 + %float_2 = OpConstant %float 2 + %float_n1 = OpConstant %float -1 + %67 = OpConstantComposite %v2float %float_n1 %float_n1 + %bool = OpTypeBool + %float_n0_5 = OpConstant %float -0.5 + %70 = OpConstantComposite %v3float %float_0 %float_0 %float_1 +%float_0_333299994 = OpConstant %float 0.333299994 + %uint_5 = OpConstant %uint 5 +%float_347_834503 = OpConstant %float 347.834503 +%float_3343_28369 = OpConstant %float 3343.28369 + %75 = OpConstantComposite %v2float %float_347_834503 %float_3343_28369 + %float_1000 = OpConstant %float 1000 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%type_PrimitiveDither = OpTypeStruct %float +%_ptr_Uniform_type_PrimitiveDither = OpTypePointer Uniform %type_PrimitiveDither +%type_PrimitiveFade = OpTypeStruct %v2float +%_ptr_Uniform_type_PrimitiveFade = OpTypePointer Uniform %type_PrimitiveFade + %uint_9 = OpConstant %uint 9 +%_arr_v4float_uint_9 = OpTypeArray %v4float %uint_9 + %uint_3 = OpConstant %uint 3 +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%type_Material = OpTypeStruct %_arr_v4float_uint_9 %_arr_v4float_uint_3 +%_ptr_Uniform_type_Material = OpTypePointer Uniform %type_Material +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1 +%_ptr_Input__arr_v4float_uint_1 = OpTypePointer Input %_arr_v4float_uint_1 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Input_bool = OpTypePointer Input %bool +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %93 = OpTypeFunction %void +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float +%type_sampled_image = OpTypeSampledImage %type_2d_image + %View = OpVariable %_ptr_Uniform_type_View Uniform +%PrimitiveDither = OpVariable %_ptr_Uniform_type_PrimitiveDither Uniform +%PrimitiveFade = OpVariable %_ptr_Uniform_type_PrimitiveFade Uniform + %Material = OpVariable %_ptr_Uniform_type_Material Uniform +%Material_Texture2D_0 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%Material_Texture2D_0Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%Material_Texture2D_3 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%Material_Texture2D_3Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD6 = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD7 = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr_v4float_uint_1 Input +%in_var_PRIMITIVE_ID = OpVariable %_ptr_Input_uint Input +%gl_FrontFacing = OpVariable %_ptr_Input_bool Input +%gl_FragDepth = OpVariable %_ptr_Output_float Output +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output + %98 = OpUndef %float + %99 = OpConstantNull %v2float +%float_0_015625 = OpConstant %float 0.015625 + %101 = OpConstantComposite %v2float %float_0_015625 %float_0_015625 +%float_0_166666672 = OpConstant %float 0.166666672 + %103 = OpUndef %float + %104 = OpConstantNull %v3float +%MainPixelShader = OpFunction %void None %93 + %105 = OpLabel + %106 = OpLoad %v4float %gl_FragCoord + %107 = OpLoad %v4float %in_var_TEXCOORD6 + %108 = OpLoad %v4float %in_var_TEXCOORD7 + %109 = OpLoad %v4float %in_var_TEXCOORD10_centroid + %110 = OpLoad %v4float %in_var_TEXCOORD11_centroid + %111 = OpLoad %_arr_v4float_uint_1 %in_var_TEXCOORD0 + %112 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_4 + %113 = OpLoad %mat4v4float %112 + %114 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_11 + %115 = OpLoad %mat4v4float %114 + %116 = OpAccessChain %_ptr_Uniform_v3float %View %int_32 + %117 = OpLoad %v3float %116 + %118 = OpAccessChain %_ptr_Uniform_v4float %View %int_53 + %119 = OpLoad %v4float %118 + %120 = OpAccessChain %_ptr_Uniform_v4float %View %int_57 + %121 = OpLoad %v4float %120 + %122 = OpAccessChain %_ptr_Uniform_float %View %int_80 + %123 = OpLoad %float %122 + %124 = OpCompositeExtract %v4float %111 0 + %125 = OpVectorShuffle %v2float %99 %124 2 3 + %126 = OpVectorShuffle %v3float %109 %109 0 1 2 + %127 = OpVectorShuffle %v3float %110 %110 0 1 2 + %128 = OpExtInst %v3float %1 Cross %127 %126 + %129 = OpCompositeExtract %float %110 3 + %130 = OpCompositeConstruct %v3float %129 %129 %129 + %131 = OpFMul %v3float %128 %130 + %132 = OpCompositeConstruct %mat3v3float %126 %131 %127 + %133 = OpVectorShuffle %v2float %106 %106 0 1 + %134 = OpVectorShuffle %v2float %121 %121 0 1 + %135 = OpFSub %v2float %133 %134 + %136 = OpCompositeExtract %float %106 2 + %137 = OpCompositeConstruct %v4float %103 %103 %136 %float_1 + %138 = OpCompositeExtract %float %106 3 + %139 = OpCompositeConstruct %v4float %138 %138 %138 %138 + %140 = OpFMul %v4float %137 %139 + %141 = OpCompositeExtract %float %106 0 + %142 = OpCompositeExtract %float %106 1 + %143 = OpCompositeConstruct %v4float %141 %142 %136 %float_1 + %144 = OpMatrixTimesVector %v4float %115 %143 + %145 = OpVectorShuffle %v3float %144 %144 0 1 2 + %146 = OpCompositeExtract %float %144 3 + %147 = OpCompositeConstruct %v3float %146 %146 %146 + %148 = OpFDiv %v3float %145 %147 + %149 = OpFSub %v3float %148 %117 + %150 = OpFNegate %v3float %148 + %151 = OpExtInst %v3float %1 Normalize %150 + %152 = OpVectorTimesMatrix %v3float %151 %132 + %153 = OpVectorShuffle %v2float %152 %152 0 1 + %154 = OpFMul %v2float %153 %67 + %155 = OpCompositeExtract %float %152 2 + %156 = OpCompositeConstruct %v2float %155 %155 + %157 = OpFDiv %v2float %154 %156 + %158 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_0 + %159 = OpLoad %float %158 + %160 = OpCompositeConstruct %v2float %159 %159 + %161 = OpFMul %v2float %160 %157 + %162 = OpDot %float %151 %127 + %163 = OpExtInst %float %1 FAbs %162 + %164 = OpExtInst %float %1 FMax %163 %float_0 + %165 = OpExtInst %float %1 FMin %164 %float_1 + %166 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_1 + %167 = OpLoad %float %166 + %168 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_2 + %169 = OpLoad %float %168 + %170 = OpExtInst %float %1 FMix %167 %169 %165 + %171 = OpExtInst %float %1 Floor %170 + %172 = OpFDiv %float %float_1 %170 + %173 = OpCompositeConstruct %v2float %172 %172 + %174 = OpFMul %v2float %161 %173 + %175 = OpDPdx %v2float %125 + %176 = OpDPdy %v2float %125 + %177 = OpLoad %type_2d_image %Material_Texture2D_0 + %178 = OpLoad %type_sampler %Material_Texture2D_0Sampler + OpBranch %179 + %179 = OpLabel + %180 = OpPhi %float %float_1 %105 %181 %182 + %183 = OpPhi %v2float %49 %105 %184 %182 + %185 = OpPhi %int %int_0 %105 %186 %182 + %187 = OpPhi %float %float_1 %105 %188 %182 + %189 = OpPhi %float %float_1 %105 %180 %182 + %190 = OpConvertSToF %float %185 + %191 = OpFAdd %float %171 %float_2 + %192 = OpFOrdLessThan %bool %190 %191 + OpLoopMerge %193 %182 None + OpBranchConditional %192 %194 %193 + %194 = OpLabel + %195 = OpFAdd %v2float %125 %183 + %196 = OpSampledImage %type_sampled_image %177 %178 + %197 = OpImageSampleExplicitLod %v4float %196 %195 Grad %175 %176 + %188 = OpCompositeExtract %float %197 1 + %198 = OpFOrdLessThan %bool %180 %188 + OpSelectionMerge %182 None + OpBranchConditional %198 %199 %182 + %199 = OpLabel + %200 = OpFSub %float %189 %187 + %201 = OpFSub %float %188 %180 + %202 = OpFAdd %float %200 %201 + %203 = OpFDiv %float %201 %202 + %204 = OpFMul %float %189 %203 + %205 = OpFSub %float %float_1 %203 + %206 = OpFMul %float %180 %205 + %207 = OpFAdd %float %204 %206 + %208 = OpCompositeConstruct %v2float %203 %203 + %209 = OpFMul %v2float %208 %174 + %210 = OpFSub %v2float %183 %209 + OpBranch %193 + %182 = OpLabel + %181 = OpFSub %float %180 %172 + %184 = OpFAdd %v2float %183 %174 + %186 = OpIAdd %int %185 %int_1 + OpBranch %179 + %193 = OpLabel + %211 = OpPhi %float %98 %179 %207 %199 + %212 = OpPhi %v2float %183 %179 %210 %199 + %213 = OpVectorShuffle %v2float %212 %104 0 1 + %214 = OpFAdd %v2float %125 %213 + %215 = OpAccessChain %_ptr_Uniform_float %View %int_82 + %216 = OpLoad %float %215 + %217 = OpSampledImage %type_sampled_image %177 %178 + %218 = OpImageSampleImplicitLod %v4float %217 %214 Bias %216 + %219 = OpCompositeExtract %float %218 0 + %220 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_2 %int_1 + %221 = OpLoad %float %220 + %222 = OpFMul %float %219 %221 + %223 = OpFSub %float %float_1 %222 + %224 = OpExtInst %float %1 FMax %223 %float_0 + %225 = OpExtInst %float %1 FMin %224 %float_1 + %226 = OpAccessChain %_ptr_Uniform_float %View %int_98 %int_0 + %227 = OpLoad %float %226 + %228 = OpCompositeConstruct %v2float %227 %227 + %229 = OpFAdd %v2float %135 %228 + %230 = OpCompositeExtract %float %229 0 + %231 = OpConvertFToU %uint %230 + %232 = OpCompositeExtract %float %229 1 + %233 = OpConvertFToU %uint %232 + %234 = OpIMul %uint %uint_2 %233 + %235 = OpIAdd %uint %231 %234 + %236 = OpUMod %uint %235 %uint_5 + %237 = OpConvertUToF %float %236 + %238 = OpFMul %v2float %135 %101 + %239 = OpLoad %type_2d_image %Material_Texture2D_3 + %240 = OpLoad %type_sampler %Material_Texture2D_3Sampler + %241 = OpSampledImage %type_sampled_image %239 %240 + %242 = OpImageSampleImplicitLod %v4float %241 %238 Bias %216 + %243 = OpCompositeExtract %float %242 0 + %244 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_2 %int_2 + %245 = OpLoad %float %244 + %246 = OpFMul %float %243 %245 + %247 = OpFAdd %float %237 %246 + %248 = OpFMul %float %247 %float_0_166666672 + %249 = OpFAdd %float %225 %248 + %250 = OpFAdd %float %249 %float_n0_5 + %251 = OpCompositeExtract %float %218 2 + %252 = OpFAdd %float %251 %250 + %253 = OpSampledImage %type_sampled_image %239 %240 + %254 = OpImageSampleImplicitLod %v4float %253 %238 Bias %216 + %255 = OpCompositeExtract %float %254 0 + %256 = OpFAdd %float %237 %255 + %257 = OpFMul %float %256 %float_0_166666672 + %258 = OpAccessChain %_ptr_Uniform_float %PrimitiveFade %int_0 %int_0 + %259 = OpLoad %float %258 + %260 = OpFMul %float %123 %259 + %261 = OpAccessChain %_ptr_Uniform_float %PrimitiveFade %int_0 %int_1 + %262 = OpLoad %float %261 + %263 = OpFAdd %float %260 %262 + %264 = OpExtInst %float %1 FClamp %263 %float_0 %float_1 + %265 = OpFAdd %float %264 %257 + %266 = OpFAdd %float %265 %float_n0_5 + %267 = OpFMul %float %252 %266 + %268 = OpFSub %float %float_1 %211 + %269 = OpFMul %float %268 %159 + %270 = OpCompositeExtract %float %212 0 + %271 = OpCompositeExtract %float %212 1 + %272 = OpCompositeConstruct %v3float %270 %271 %269 + %273 = OpDot %float %272 %272 + %274 = OpExtInst %float %1 Sqrt %273 + %275 = OpDPdx %v2float %125 + %276 = OpExtInst %v2float %1 FAbs %275 + %277 = OpDot %float %276 %276 + %278 = OpExtInst %float %1 Sqrt %277 + %279 = OpDPdx %v3float %149 + %280 = OpDot %float %279 %279 + %281 = OpExtInst %float %1 Sqrt %280 + %282 = OpFDiv %float %278 %281 + %283 = OpDPdy %v2float %125 + %284 = OpExtInst %v2float %1 FAbs %283 + %285 = OpDot %float %284 %284 + %286 = OpExtInst %float %1 Sqrt %285 + %287 = OpDPdy %v3float %149 + %288 = OpDot %float %287 %287 + %289 = OpExtInst %float %1 Sqrt %288 + %290 = OpFDiv %float %286 %289 + %291 = OpExtInst %float %1 FMax %282 %290 + %292 = OpCompositeExtract %v4float %113 0 + %293 = OpVectorShuffle %v3float %292 %292 0 1 2 + %294 = OpCompositeExtract %v4float %113 1 + %295 = OpVectorShuffle %v3float %294 %294 0 1 2 + %296 = OpCompositeExtract %v4float %113 2 + %297 = OpVectorShuffle %v3float %296 %296 0 1 2 + %298 = OpCompositeConstruct %mat3v3float %293 %295 %297 + %299 = OpMatrixTimesVector %v3float %298 %70 + %300 = OpDot %float %299 %151 + %301 = OpExtInst %float %1 FAbs %300 + %302 = OpFDiv %float %291 %301 + %303 = OpFDiv %float %274 %302 + %304 = OpAccessChain %_ptr_Uniform_float %PrimitiveDither %int_0 + %305 = OpLoad %float %304 + %306 = OpFOrdNotEqual %bool %305 %float_0 + OpSelectionMerge %307 None + OpBranchConditional %306 %308 %307 + %308 = OpLabel + %309 = OpExtInst %float %1 FAbs %305 + %310 = OpFOrdGreaterThan %bool %309 %float_0_00100000005 + OpSelectionMerge %311 None + OpBranchConditional %310 %312 %311 + %312 = OpLabel + %313 = OpExtInst %v2float %1 Floor %133 + %314 = OpDot %float %313 %75 + %315 = OpExtInst %float %1 Cos %314 + %316 = OpFMul %float %315 %float_1000 + %317 = OpExtInst %float %1 Fract %316 + %318 = OpFOrdLessThan %bool %305 %float_0 + %319 = OpFAdd %float %305 %float_1 + %320 = OpFOrdGreaterThan %bool %319 %317 + %321 = OpFOrdLessThan %bool %305 %317 + %322 = OpSelect %bool %318 %320 %321 + %323 = OpSelect %float %322 %float_1 %float_0 + %324 = OpFSub %float %323 %float_0_00100000005 + %325 = OpFOrdLessThan %bool %324 %float_0 + OpSelectionMerge %326 None + OpBranchConditional %325 %327 %326 + %327 = OpLabel + OpKill + %326 = OpLabel + OpBranch %311 + %311 = OpLabel + OpBranch %307 + %307 = OpLabel + %328 = OpFSub %float %267 %float_0_333299994 + %329 = OpFOrdLessThan %bool %328 %float_0 + OpSelectionMerge %330 None + OpBranchConditional %329 %331 %330 + %331 = OpLabel + OpKill + %330 = OpLabel + %332 = OpCompositeExtract %float %140 2 + %333 = OpCompositeExtract %float %140 3 + %334 = OpFAdd %float %333 %303 + %335 = OpFDiv %float %332 %334 + %336 = OpExtInst %float %1 FMin %335 %136 + %337 = OpVectorShuffle %v2float %107 %107 0 1 + %338 = OpCompositeExtract %float %107 3 + %339 = OpCompositeConstruct %v2float %338 %338 + %340 = OpFDiv %v2float %337 %339 + %341 = OpVectorShuffle %v2float %119 %119 0 1 + %342 = OpFSub %v2float %340 %341 + %343 = OpVectorShuffle %v2float %108 %108 0 1 + %344 = OpCompositeExtract %float %108 3 + %345 = OpCompositeConstruct %v2float %344 %344 + %346 = OpFDiv %v2float %343 %345 + %347 = OpVectorShuffle %v2float %119 %119 2 3 + %348 = OpFSub %v2float %346 %347 + %349 = OpFSub %v2float %342 %348 + %350 = OpFMul %v2float %349 %54 + %351 = OpFAdd %v2float %350 %56 + %352 = OpVectorShuffle %v4float %351 %49 0 1 2 3 + OpStore %gl_FragDepth %336 + OpStore %out_var_SV_Target0 %352 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag b/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag new file mode 100644 index 00000000000..eba220ba4e7 --- /dev/null +++ b/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag @@ -0,0 +1,878 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 353 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %MainPixelShader "main" %gl_FragCoord %in_var_TEXCOORD6 %in_var_TEXCOORD7 %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_TEXCOORD0 %in_var_PRIMITIVE_ID %gl_FrontFacing %gl_FragDepth %out_var_SV_Target0 + OpExecutionMode %MainPixelShader OriginUpperLeft + OpExecutionMode %MainPixelShader DepthReplacing + OpExecutionMode %MainPixelShader DepthLess + OpSource HLSL 600 + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_ClipToWorld" + OpMemberName %type_View 3 "View_TranslatedWorldToView" + OpMemberName %type_View 4 "View_ViewToTranslatedWorld" + OpMemberName %type_View 5 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 7 "View_ViewToClip" + OpMemberName %type_View 8 "View_ViewToClipNoAA" + OpMemberName %type_View 9 "View_ClipToView" + OpMemberName %type_View 10 "View_ClipToTranslatedWorld" + OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 12 "View_ScreenToWorld" + OpMemberName %type_View 13 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 14 "View_ViewForward" + OpMemberName %type_View 15 "PrePadding_View_908" + OpMemberName %type_View 16 "View_ViewUp" + OpMemberName %type_View 17 "PrePadding_View_924" + OpMemberName %type_View 18 "View_ViewRight" + OpMemberName %type_View 19 "PrePadding_View_940" + OpMemberName %type_View 20 "View_HMDViewNoRollUp" + OpMemberName %type_View 21 "PrePadding_View_956" + OpMemberName %type_View 22 "View_HMDViewNoRollRight" + OpMemberName %type_View 23 "PrePadding_View_972" + OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 25 "View_ScreenPositionScaleBias" + OpMemberName %type_View 26 "View_WorldCameraOrigin" + OpMemberName %type_View 27 "PrePadding_View_1020" + OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 29 "PrePadding_View_1036" + OpMemberName %type_View 30 "View_WorldViewOrigin" + OpMemberName %type_View 31 "PrePadding_View_1052" + OpMemberName %type_View 32 "View_PreViewTranslation" + OpMemberName %type_View 33 "PrePadding_View_1068" + OpMemberName %type_View 34 "View_PrevProjection" + OpMemberName %type_View 35 "View_PrevViewProj" + OpMemberName %type_View 36 "View_PrevViewRotationProj" + OpMemberName %type_View 37 "View_PrevViewToClip" + OpMemberName %type_View 38 "View_PrevClipToView" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 40 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 44 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 45 "PrePadding_View_1724" + OpMemberName %type_View 46 "View_PrevWorldViewOrigin" + OpMemberName %type_View 47 "PrePadding_View_1740" + OpMemberName %type_View 48 "View_PrevPreViewTranslation" + OpMemberName %type_View 49 "PrePadding_View_1756" + OpMemberName %type_View 50 "View_PrevInvViewProj" + OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 52 "View_ClipToPrevClip" + OpMemberName %type_View 53 "View_TemporalAAJitter" + OpMemberName %type_View 54 "View_GlobalClippingPlane" + OpMemberName %type_View 55 "View_FieldOfViewWideAngles" + OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 57 "View_ViewRectMin" + OpMemberName %type_View 58 "View_ViewSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferSizeAndInvSize" + OpMemberName %type_View 60 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 61 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 62 "View_PreExposure" + OpMemberName %type_View 63 "View_OneOverPreExposure" + OpMemberName %type_View 64 "PrePadding_View_2076" + OpMemberName %type_View 65 "View_DiffuseOverrideParameter" + OpMemberName %type_View 66 "View_SpecularOverrideParameter" + OpMemberName %type_View 67 "View_NormalOverrideParameter" + OpMemberName %type_View 68 "View_RoughnessOverrideParameter" + OpMemberName %type_View 69 "View_PrevFrameGameTime" + OpMemberName %type_View 70 "View_PrevFrameRealTime" + OpMemberName %type_View 71 "View_OutOfBoundsMask" + OpMemberName %type_View 72 "PrePadding_View_2148" + OpMemberName %type_View 73 "PrePadding_View_2152" + OpMemberName %type_View 74 "PrePadding_View_2156" + OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 76 "View_CullingSign" + OpMemberName %type_View 77 "View_NearPlane" + OpMemberName %type_View 78 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 79 "View_GameTime" + OpMemberName %type_View 80 "View_RealTime" + OpMemberName %type_View 81 "View_DeltaTime" + OpMemberName %type_View 82 "View_MaterialTextureMipBias" + OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 84 "View_Random" + OpMemberName %type_View 85 "View_FrameNumber" + OpMemberName %type_View 86 "View_StateFrameIndexMod8" + OpMemberName %type_View 87 "View_StateFrameIndex" + OpMemberName %type_View 88 "View_CameraCut" + OpMemberName %type_View 89 "View_UnlitViewmodeMask" + OpMemberName %type_View 90 "PrePadding_View_2228" + OpMemberName %type_View 91 "PrePadding_View_2232" + OpMemberName %type_View 92 "PrePadding_View_2236" + OpMemberName %type_View 93 "View_DirectionalLightColor" + OpMemberName %type_View 94 "View_DirectionalLightDirection" + OpMemberName %type_View 95 "PrePadding_View_2268" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 98 "View_TemporalAAParams" + OpMemberName %type_View 99 "View_CircleDOFParams" + OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 102 "View_DepthOfFieldScale" + OpMemberName %type_View 103 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 109 "View_GeneralPurposeTweak" + OpMemberName %type_View 110 "View_DemosaicVposOffset" + OpMemberName %type_View 111 "PrePadding_View_2412" + OpMemberName %type_View 112 "View_IndirectLightingColorScale" + OpMemberName %type_View 113 "View_HDR32bppEncodingMode" + OpMemberName %type_View 114 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 115 "View_AtmosphericFogSunPower" + OpMemberName %type_View 116 "View_AtmosphericFogPower" + OpMemberName %type_View 117 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 123 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian" + OpMemberName %type_View 127 "PrePadding_View_2492" + OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance" + OpMemberName %type_View 129 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 131 "PrePadding_View_2520" + OpMemberName %type_View 132 "PrePadding_View_2524" + OpMemberName %type_View 133 "View_AtmosphericFogSunColor" + OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 136 "View_AmbientCubemapTint" + OpMemberName %type_View 137 "View_AmbientCubemapIntensity" + OpMemberName %type_View 138 "View_SkyLightParameters" + OpMemberName %type_View 139 "PrePadding_View_2584" + OpMemberName %type_View 140 "PrePadding_View_2588" + OpMemberName %type_View 141 "View_SkyLightColor" + OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 143 "View_MobilePreviewMode" + OpMemberName %type_View 144 "View_HMDEyePaddingOffset" + OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 146 "View_ShowDecalsMask" + OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 149 "PrePadding_View_2744" + OpMemberName %type_View 150 "PrePadding_View_2748" + OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 152 "View_StereoPassIndex" + OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 155 "View_GlobalVolumeDimension" + OpMemberName %type_View 156 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 157 "View_MaxGlobalDistance" + OpMemberName %type_View 158 "PrePadding_View_2908" + OpMemberName %type_View 159 "View_CursorPosition" + OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 161 "PrePadding_View_2924" + OpMemberName %type_View 162 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 163 "PrePadding_View_2940" + OpMemberName %type_View 164 "View_VolumetricFogGridZParams" + OpMemberName %type_View 165 "PrePadding_View_2956" + OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 167 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 168 "PrePadding_View_2972" + OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 170 "PrePadding_View_2988" + OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 172 "PrePadding_View_3004" + OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 176 "View_StereoIPD" + OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_PrimitiveDither "type.PrimitiveDither" + OpMemberName %type_PrimitiveDither 0 "PrimitiveDither_LODFactor" + OpName %PrimitiveDither "PrimitiveDither" + OpName %type_PrimitiveFade "type.PrimitiveFade" + OpMemberName %type_PrimitiveFade 0 "PrimitiveFade_FadeTimeScaleBias" + OpName %PrimitiveFade "PrimitiveFade" + OpName %type_Material "type.Material" + OpMemberName %type_Material 0 "Material_VectorExpressions" + OpMemberName %type_Material 1 "Material_ScalarExpressions" + OpName %Material "Material" + OpName %type_2d_image "type.2d.image" + OpName %Material_Texture2D_0 "Material_Texture2D_0" + OpName %type_sampler "type.sampler" + OpName %Material_Texture2D_0Sampler "Material_Texture2D_0Sampler" + OpName %Material_Texture2D_3 "Material_Texture2D_3" + OpName %Material_Texture2D_3Sampler "Material_Texture2D_3Sampler" + OpName %in_var_TEXCOORD6 "in.var.TEXCOORD6" + OpName %in_var_TEXCOORD7 "in.var.TEXCOORD7" + OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid" + OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %in_var_PRIMITIVE_ID "in.var.PRIMITIVE_ID" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %MainPixelShader "MainPixelShader" + OpName %type_sampled_image "type.sampled.image" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_Position" + OpDecorateString %in_var_TEXCOORD6 UserSemantic "TEXCOORD6" + OpDecorateString %in_var_TEXCOORD7 UserSemantic "TEXCOORD7" + OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %in_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID" + OpDecorate %in_var_PRIMITIVE_ID Flat + OpDecorate %gl_FrontFacing BuiltIn FrontFacing + OpDecorateString %gl_FrontFacing UserSemantic "SV_IsFrontFace" + OpDecorate %gl_FrontFacing Flat + OpDecorate %gl_FragDepth BuiltIn FragDepth + OpDecorateString %gl_FragDepth UserSemantic "SV_DepthLessEqual" + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %in_var_TEXCOORD6 Location 0 + OpDecorate %in_var_TEXCOORD7 Location 1 + OpDecorate %in_var_TEXCOORD10_centroid Location 2 + OpDecorate %in_var_TEXCOORD11_centroid Location 3 + OpDecorate %in_var_TEXCOORD0 Location 4 + OpDecorate %in_var_PRIMITIVE_ID Location 5 + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %PrimitiveDither DescriptorSet 0 + OpDecorate %PrimitiveDither Binding 1 + OpDecorate %PrimitiveFade DescriptorSet 0 + OpDecorate %PrimitiveFade Binding 2 + OpDecorate %Material DescriptorSet 0 + OpDecorate %Material Binding 3 + OpDecorate %Material_Texture2D_0 DescriptorSet 0 + OpDecorate %Material_Texture2D_0 Binding 0 + OpDecorate %Material_Texture2D_0Sampler DescriptorSet 0 + OpDecorate %Material_Texture2D_0Sampler Binding 0 + OpDecorate %Material_Texture2D_3 DescriptorSet 0 + OpDecorate %Material_Texture2D_3 Binding 1 + OpDecorate %Material_Texture2D_3Sampler DescriptorSet 0 + OpDecorate %Material_Texture2D_3Sampler Binding 1 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 13 MatrixStride 16 + OpMemberDecorate %type_View 13 ColMajor + OpMemberDecorate %type_View 14 Offset 896 + OpMemberDecorate %type_View 15 Offset 908 + OpMemberDecorate %type_View 16 Offset 912 + OpMemberDecorate %type_View 17 Offset 924 + OpMemberDecorate %type_View 18 Offset 928 + OpMemberDecorate %type_View 19 Offset 940 + OpMemberDecorate %type_View 20 Offset 944 + OpMemberDecorate %type_View 21 Offset 956 + OpMemberDecorate %type_View 22 Offset 960 + OpMemberDecorate %type_View 23 Offset 972 + OpMemberDecorate %type_View 24 Offset 976 + OpMemberDecorate %type_View 25 Offset 992 + OpMemberDecorate %type_View 26 Offset 1008 + OpMemberDecorate %type_View 27 Offset 1020 + OpMemberDecorate %type_View 28 Offset 1024 + OpMemberDecorate %type_View 29 Offset 1036 + OpMemberDecorate %type_View 30 Offset 1040 + OpMemberDecorate %type_View 31 Offset 1052 + OpMemberDecorate %type_View 32 Offset 1056 + OpMemberDecorate %type_View 33 Offset 1068 + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 43 MatrixStride 16 + OpMemberDecorate %type_View 43 ColMajor + OpMemberDecorate %type_View 44 Offset 1712 + OpMemberDecorate %type_View 45 Offset 1724 + OpMemberDecorate %type_View 46 Offset 1728 + OpMemberDecorate %type_View 47 Offset 1740 + OpMemberDecorate %type_View 48 Offset 1744 + OpMemberDecorate %type_View 49 Offset 1756 + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 52 MatrixStride 16 + OpMemberDecorate %type_View 52 ColMajor + OpMemberDecorate %type_View 53 Offset 1952 + OpMemberDecorate %type_View 54 Offset 1968 + OpMemberDecorate %type_View 55 Offset 1984 + OpMemberDecorate %type_View 56 Offset 1992 + OpMemberDecorate %type_View 57 Offset 2000 + OpMemberDecorate %type_View 58 Offset 2016 + OpMemberDecorate %type_View 59 Offset 2032 + OpMemberDecorate %type_View 60 Offset 2048 + OpMemberDecorate %type_View 61 Offset 2064 + OpMemberDecorate %type_View 62 Offset 2068 + OpMemberDecorate %type_View 63 Offset 2072 + OpMemberDecorate %type_View 64 Offset 2076 + OpMemberDecorate %type_View 65 Offset 2080 + OpMemberDecorate %type_View 66 Offset 2096 + OpMemberDecorate %type_View 67 Offset 2112 + OpMemberDecorate %type_View 68 Offset 2128 + OpMemberDecorate %type_View 69 Offset 2136 + OpMemberDecorate %type_View 70 Offset 2140 + OpMemberDecorate %type_View 71 Offset 2144 + OpMemberDecorate %type_View 72 Offset 2148 + OpMemberDecorate %type_View 73 Offset 2152 + OpMemberDecorate %type_View 74 Offset 2156 + OpMemberDecorate %type_View 75 Offset 2160 + OpMemberDecorate %type_View 76 Offset 2172 + OpMemberDecorate %type_View 77 Offset 2176 + OpMemberDecorate %type_View 78 Offset 2180 + OpMemberDecorate %type_View 79 Offset 2184 + OpMemberDecorate %type_View 80 Offset 2188 + OpMemberDecorate %type_View 81 Offset 2192 + OpMemberDecorate %type_View 82 Offset 2196 + OpMemberDecorate %type_View 83 Offset 2200 + OpMemberDecorate %type_View 84 Offset 2204 + OpMemberDecorate %type_View 85 Offset 2208 + OpMemberDecorate %type_View 86 Offset 2212 + OpMemberDecorate %type_View 87 Offset 2216 + OpMemberDecorate %type_View 88 Offset 2220 + OpMemberDecorate %type_View 89 Offset 2224 + OpMemberDecorate %type_View 90 Offset 2228 + OpMemberDecorate %type_View 91 Offset 2232 + OpMemberDecorate %type_View 92 Offset 2236 + OpMemberDecorate %type_View 93 Offset 2240 + OpMemberDecorate %type_View 94 Offset 2256 + OpMemberDecorate %type_View 95 Offset 2268 + OpMemberDecorate %type_View 96 Offset 2272 + OpMemberDecorate %type_View 97 Offset 2304 + OpMemberDecorate %type_View 98 Offset 2336 + OpMemberDecorate %type_View 99 Offset 2352 + OpMemberDecorate %type_View 100 Offset 2368 + OpMemberDecorate %type_View 101 Offset 2372 + OpMemberDecorate %type_View 102 Offset 2376 + OpMemberDecorate %type_View 103 Offset 2380 + OpMemberDecorate %type_View 104 Offset 2384 + OpMemberDecorate %type_View 105 Offset 2388 + OpMemberDecorate %type_View 106 Offset 2392 + OpMemberDecorate %type_View 107 Offset 2396 + OpMemberDecorate %type_View 108 Offset 2400 + OpMemberDecorate %type_View 109 Offset 2404 + OpMemberDecorate %type_View 110 Offset 2408 + OpMemberDecorate %type_View 111 Offset 2412 + OpMemberDecorate %type_View 112 Offset 2416 + OpMemberDecorate %type_View 113 Offset 2428 + OpMemberDecorate %type_View 114 Offset 2432 + OpMemberDecorate %type_View 115 Offset 2444 + OpMemberDecorate %type_View 116 Offset 2448 + OpMemberDecorate %type_View 117 Offset 2452 + OpMemberDecorate %type_View 118 Offset 2456 + OpMemberDecorate %type_View 119 Offset 2460 + OpMemberDecorate %type_View 120 Offset 2464 + OpMemberDecorate %type_View 121 Offset 2468 + OpMemberDecorate %type_View 122 Offset 2472 + OpMemberDecorate %type_View 123 Offset 2476 + OpMemberDecorate %type_View 124 Offset 2480 + OpMemberDecorate %type_View 125 Offset 2484 + OpMemberDecorate %type_View 126 Offset 2488 + OpMemberDecorate %type_View 127 Offset 2492 + OpMemberDecorate %type_View 128 Offset 2496 + OpMemberDecorate %type_View 129 Offset 2512 + OpMemberDecorate %type_View 130 Offset 2516 + OpMemberDecorate %type_View 131 Offset 2520 + OpMemberDecorate %type_View 132 Offset 2524 + OpMemberDecorate %type_View 133 Offset 2528 + OpMemberDecorate %type_View 134 Offset 2544 + OpMemberDecorate %type_View 135 Offset 2556 + OpMemberDecorate %type_View 136 Offset 2560 + OpMemberDecorate %type_View 137 Offset 2576 + OpMemberDecorate %type_View 138 Offset 2580 + OpMemberDecorate %type_View 139 Offset 2584 + OpMemberDecorate %type_View 140 Offset 2588 + OpMemberDecorate %type_View 141 Offset 2592 + OpMemberDecorate %type_View 142 Offset 2608 + OpMemberDecorate %type_View 143 Offset 2720 + OpMemberDecorate %type_View 144 Offset 2724 + OpMemberDecorate %type_View 145 Offset 2728 + OpMemberDecorate %type_View 146 Offset 2732 + OpMemberDecorate %type_View 147 Offset 2736 + OpMemberDecorate %type_View 148 Offset 2740 + OpMemberDecorate %type_View 149 Offset 2744 + OpMemberDecorate %type_View 150 Offset 2748 + OpMemberDecorate %type_View 151 Offset 2752 + OpMemberDecorate %type_View 152 Offset 2764 + OpMemberDecorate %type_View 153 Offset 2768 + OpMemberDecorate %type_View 154 Offset 2832 + OpMemberDecorate %type_View 155 Offset 2896 + OpMemberDecorate %type_View 156 Offset 2900 + OpMemberDecorate %type_View 157 Offset 2904 + OpMemberDecorate %type_View 158 Offset 2908 + OpMemberDecorate %type_View 159 Offset 2912 + OpMemberDecorate %type_View 160 Offset 2920 + OpMemberDecorate %type_View 161 Offset 2924 + OpMemberDecorate %type_View 162 Offset 2928 + OpMemberDecorate %type_View 163 Offset 2940 + OpMemberDecorate %type_View 164 Offset 2944 + OpMemberDecorate %type_View 165 Offset 2956 + OpMemberDecorate %type_View 166 Offset 2960 + OpMemberDecorate %type_View 167 Offset 2968 + OpMemberDecorate %type_View 168 Offset 2972 + OpMemberDecorate %type_View 169 Offset 2976 + OpMemberDecorate %type_View 170 Offset 2988 + OpMemberDecorate %type_View 171 Offset 2992 + OpMemberDecorate %type_View 172 Offset 3004 + OpMemberDecorate %type_View 173 Offset 3008 + OpMemberDecorate %type_View 174 Offset 3020 + OpMemberDecorate %type_View 175 Offset 3024 + OpMemberDecorate %type_View 176 Offset 3036 + OpMemberDecorate %type_View 177 Offset 3040 + OpMemberDecorate %type_View 178 Offset 3044 + OpDecorate %type_View Block + OpMemberDecorate %type_PrimitiveDither 0 Offset 0 + OpDecorate %type_PrimitiveDither Block + OpMemberDecorate %type_PrimitiveFade 0 Offset 0 + OpDecorate %type_PrimitiveFade Block + OpDecorate %_arr_v4float_uint_9 ArrayStride 16 + OpDecorate %_arr_v4float_uint_3 ArrayStride 16 + OpMemberDecorate %type_Material 0 Offset 0 + OpMemberDecorate %type_Material 1 Offset 144 + OpDecorate %type_Material Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %v2int = OpTypeVector %int 2 +%float_0_00100000005 = OpConstant %float 0.00100000005 + %int_2 = OpConstant %int 2 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %float_0 = OpConstant %float 0 + %49 = OpConstantComposite %v2float %float_0 %float_0 + %float_1 = OpConstant %float 1 + %int_4 = OpConstant %int 4 + %int_11 = OpConstant %int 11 +%float_0_249500006 = OpConstant %float 0.249500006 + %54 = OpConstantComposite %v2float %float_0_249500006 %float_0_249500006 +%float_0_499992371 = OpConstant %float 0.499992371 + %56 = OpConstantComposite %v2float %float_0_499992371 %float_0_499992371 + %int_32 = OpConstant %int 32 + %int_53 = OpConstant %int 53 + %int_57 = OpConstant %int 57 + %int_80 = OpConstant %int 80 + %int_82 = OpConstant %int 82 + %int_98 = OpConstant %int 98 + %uint_1 = OpConstant %uint 1 +%mat3v3float = OpTypeMatrix %v3float 3 + %float_2 = OpConstant %float 2 + %float_n1 = OpConstant %float -1 + %67 = OpConstantComposite %v2float %float_n1 %float_n1 + %bool = OpTypeBool + %float_n0_5 = OpConstant %float -0.5 + %70 = OpConstantComposite %v3float %float_0 %float_0 %float_1 +%float_0_333299994 = OpConstant %float 0.333299994 + %uint_5 = OpConstant %uint 5 +%float_347_834503 = OpConstant %float 347.834503 +%float_3343_28369 = OpConstant %float 3343.28369 + %75 = OpConstantComposite %v2float %float_347_834503 %float_3343_28369 + %float_1000 = OpConstant %float 1000 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%type_PrimitiveDither = OpTypeStruct %float +%_ptr_Uniform_type_PrimitiveDither = OpTypePointer Uniform %type_PrimitiveDither +%type_PrimitiveFade = OpTypeStruct %v2float +%_ptr_Uniform_type_PrimitiveFade = OpTypePointer Uniform %type_PrimitiveFade + %uint_9 = OpConstant %uint 9 +%_arr_v4float_uint_9 = OpTypeArray %v4float %uint_9 + %uint_3 = OpConstant %uint 3 +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%type_Material = OpTypeStruct %_arr_v4float_uint_9 %_arr_v4float_uint_3 +%_ptr_Uniform_type_Material = OpTypePointer Uniform %type_Material +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1 +%_ptr_Input__arr_v4float_uint_1 = OpTypePointer Input %_arr_v4float_uint_1 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Input_bool = OpTypePointer Input %bool +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %93 = OpTypeFunction %void +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float +%type_sampled_image = OpTypeSampledImage %type_2d_image + %View = OpVariable %_ptr_Uniform_type_View Uniform +%PrimitiveDither = OpVariable %_ptr_Uniform_type_PrimitiveDither Uniform +%PrimitiveFade = OpVariable %_ptr_Uniform_type_PrimitiveFade Uniform + %Material = OpVariable %_ptr_Uniform_type_Material Uniform +%Material_Texture2D_0 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%Material_Texture2D_0Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%Material_Texture2D_3 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%Material_Texture2D_3Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD6 = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD7 = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr_v4float_uint_1 Input +%in_var_PRIMITIVE_ID = OpVariable %_ptr_Input_uint Input +%gl_FrontFacing = OpVariable %_ptr_Input_bool Input +%gl_FragDepth = OpVariable %_ptr_Output_float Output +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output + %98 = OpUndef %float + %99 = OpConstantNull %v2float +%float_0_015625 = OpConstant %float 0.015625 + %101 = OpConstantComposite %v2float %float_0_015625 %float_0_015625 +%float_0_166666672 = OpConstant %float 0.166666672 + %103 = OpUndef %float + %104 = OpConstantNull %v3float +%MainPixelShader = OpFunction %void None %93 + %105 = OpLabel + %106 = OpLoad %v4float %gl_FragCoord + %107 = OpLoad %v4float %in_var_TEXCOORD6 + %108 = OpLoad %v4float %in_var_TEXCOORD7 + %109 = OpLoad %v4float %in_var_TEXCOORD10_centroid + %110 = OpLoad %v4float %in_var_TEXCOORD11_centroid + %111 = OpLoad %_arr_v4float_uint_1 %in_var_TEXCOORD0 + %112 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_4 + %113 = OpLoad %mat4v4float %112 + %114 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_11 + %115 = OpLoad %mat4v4float %114 + %116 = OpAccessChain %_ptr_Uniform_v3float %View %int_32 + %117 = OpLoad %v3float %116 + %118 = OpAccessChain %_ptr_Uniform_v4float %View %int_53 + %119 = OpLoad %v4float %118 + %120 = OpAccessChain %_ptr_Uniform_v4float %View %int_57 + %121 = OpLoad %v4float %120 + %122 = OpAccessChain %_ptr_Uniform_float %View %int_80 + %123 = OpLoad %float %122 + %124 = OpCompositeExtract %v4float %111 0 + %125 = OpVectorShuffle %v2float %99 %124 2 3 + %126 = OpVectorShuffle %v3float %109 %109 0 1 2 + %127 = OpVectorShuffle %v3float %110 %110 0 1 2 + %128 = OpExtInst %v3float %1 Cross %127 %126 + %129 = OpCompositeExtract %float %110 3 + %130 = OpCompositeConstruct %v3float %129 %129 %129 + %131 = OpFMul %v3float %128 %130 + %132 = OpCompositeConstruct %mat3v3float %126 %131 %127 + %133 = OpVectorShuffle %v2float %106 %106 0 1 + %134 = OpVectorShuffle %v2float %121 %121 0 1 + %135 = OpFSub %v2float %133 %134 + %136 = OpCompositeExtract %float %106 2 + %137 = OpCompositeConstruct %v4float %103 %103 %136 %float_1 + %138 = OpCompositeExtract %float %106 3 + %139 = OpCompositeConstruct %v4float %138 %138 %138 %138 + %140 = OpFMul %v4float %137 %139 + %141 = OpCompositeExtract %float %106 0 + %142 = OpCompositeExtract %float %106 1 + %143 = OpCompositeConstruct %v4float %141 %142 %136 %float_1 + %144 = OpMatrixTimesVector %v4float %115 %143 + %145 = OpVectorShuffle %v3float %144 %144 0 1 2 + %146 = OpCompositeExtract %float %144 3 + %147 = OpCompositeConstruct %v3float %146 %146 %146 + %148 = OpFDiv %v3float %145 %147 + %149 = OpFSub %v3float %148 %117 + %150 = OpFNegate %v3float %148 + %151 = OpExtInst %v3float %1 Normalize %150 + %152 = OpVectorTimesMatrix %v3float %151 %132 + %153 = OpVectorShuffle %v2float %152 %152 0 1 + %154 = OpFMul %v2float %153 %67 + %155 = OpCompositeExtract %float %152 2 + %156 = OpCompositeConstruct %v2float %155 %155 + %157 = OpFDiv %v2float %154 %156 + %158 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_0 + %159 = OpLoad %float %158 + %160 = OpCompositeConstruct %v2float %159 %159 + %161 = OpFMul %v2float %160 %157 + %162 = OpDot %float %151 %127 + %163 = OpExtInst %float %1 FAbs %162 + %164 = OpExtInst %float %1 FMax %163 %float_0 + %165 = OpExtInst %float %1 FMin %164 %float_1 + %166 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_1 + %167 = OpLoad %float %166 + %168 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_2 + %169 = OpLoad %float %168 + %170 = OpExtInst %float %1 FMix %167 %169 %165 + %171 = OpExtInst %float %1 Floor %170 + %172 = OpFDiv %float %float_1 %170 + %173 = OpCompositeConstruct %v2float %172 %172 + %174 = OpFMul %v2float %161 %173 + %175 = OpDPdx %v2float %125 + %176 = OpDPdy %v2float %125 + %177 = OpLoad %type_2d_image %Material_Texture2D_0 + %178 = OpLoad %type_sampler %Material_Texture2D_0Sampler + OpBranch %179 + %179 = OpLabel + %180 = OpPhi %float %float_1 %105 %181 %182 + %183 = OpPhi %v2float %49 %105 %184 %182 + %185 = OpPhi %int %int_0 %105 %186 %182 + %187 = OpPhi %float %float_1 %105 %188 %182 + %189 = OpPhi %float %float_1 %105 %180 %182 + %190 = OpConvertSToF %float %185 + %191 = OpFAdd %float %171 %float_2 + %192 = OpFOrdLessThan %bool %190 %191 + OpLoopMerge %193 %182 None + OpBranchConditional %192 %194 %193 + %194 = OpLabel + %195 = OpFAdd %v2float %125 %183 + %196 = OpSampledImage %type_sampled_image %177 %178 + %197 = OpImageSampleExplicitLod %v4float %196 %195 Grad %175 %176 + %188 = OpCompositeExtract %float %197 1 + %198 = OpFOrdLessThan %bool %180 %188 + OpSelectionMerge %182 None + OpBranchConditional %198 %199 %182 + %199 = OpLabel + %200 = OpFSub %float %189 %187 + %201 = OpFSub %float %188 %180 + %202 = OpFAdd %float %200 %201 + %203 = OpFDiv %float %201 %202 + %204 = OpFMul %float %189 %203 + %205 = OpFSub %float %float_1 %203 + %206 = OpFMul %float %180 %205 + %207 = OpFAdd %float %204 %206 + %208 = OpCompositeConstruct %v2float %203 %203 + %209 = OpFMul %v2float %208 %174 + %210 = OpFSub %v2float %183 %209 + OpBranch %193 + %182 = OpLabel + %181 = OpFSub %float %180 %172 + %184 = OpFAdd %v2float %183 %174 + %186 = OpIAdd %int %185 %int_1 + OpBranch %179 + %193 = OpLabel + %211 = OpPhi %float %98 %179 %207 %199 + %212 = OpPhi %v2float %183 %179 %210 %199 + %213 = OpVectorShuffle %v2float %212 %104 0 1 + %214 = OpFAdd %v2float %125 %213 + %215 = OpAccessChain %_ptr_Uniform_float %View %int_82 + %216 = OpLoad %float %215 + %217 = OpSampledImage %type_sampled_image %177 %178 + %218 = OpImageSampleImplicitLod %v4float %217 %214 Bias %216 + %219 = OpCompositeExtract %float %218 0 + %220 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_2 %int_1 + %221 = OpLoad %float %220 + %222 = OpFMul %float %219 %221 + %223 = OpFSub %float %float_1 %222 + %224 = OpExtInst %float %1 FMax %223 %float_0 + %225 = OpExtInst %float %1 FMin %224 %float_1 + %226 = OpAccessChain %_ptr_Uniform_float %View %int_98 %int_0 + %227 = OpLoad %float %226 + %228 = OpCompositeConstruct %v2float %227 %227 + %229 = OpFAdd %v2float %135 %228 + %230 = OpCompositeExtract %float %229 0 + %231 = OpConvertFToU %uint %230 + %232 = OpCompositeExtract %float %229 1 + %233 = OpConvertFToU %uint %232 + %234 = OpIMul %uint %uint_2 %233 + %235 = OpIAdd %uint %231 %234 + %236 = OpUMod %uint %235 %uint_5 + %237 = OpConvertUToF %float %236 + %238 = OpFMul %v2float %135 %101 + %239 = OpLoad %type_2d_image %Material_Texture2D_3 + %240 = OpLoad %type_sampler %Material_Texture2D_3Sampler + %241 = OpSampledImage %type_sampled_image %239 %240 + %242 = OpImageSampleImplicitLod %v4float %241 %238 Bias %216 + %243 = OpCompositeExtract %float %242 0 + %244 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_2 %int_2 + %245 = OpLoad %float %244 + %246 = OpFMul %float %243 %245 + %247 = OpFAdd %float %237 %246 + %248 = OpFMul %float %247 %float_0_166666672 + %249 = OpFAdd %float %225 %248 + %250 = OpFAdd %float %249 %float_n0_5 + %251 = OpCompositeExtract %float %218 2 + %252 = OpFAdd %float %251 %250 + %253 = OpSampledImage %type_sampled_image %239 %240 + %254 = OpImageSampleImplicitLod %v4float %253 %238 Bias %216 + %255 = OpCompositeExtract %float %254 0 + %256 = OpFAdd %float %237 %255 + %257 = OpFMul %float %256 %float_0_166666672 + %258 = OpAccessChain %_ptr_Uniform_float %PrimitiveFade %int_0 %int_0 + %259 = OpLoad %float %258 + %260 = OpFMul %float %123 %259 + %261 = OpAccessChain %_ptr_Uniform_float %PrimitiveFade %int_0 %int_1 + %262 = OpLoad %float %261 + %263 = OpFAdd %float %260 %262 + %264 = OpExtInst %float %1 FClamp %263 %float_0 %float_1 + %265 = OpFAdd %float %264 %257 + %266 = OpFAdd %float %265 %float_n0_5 + %267 = OpFMul %float %252 %266 + %268 = OpFSub %float %float_1 %211 + %269 = OpFMul %float %268 %159 + %270 = OpCompositeExtract %float %212 0 + %271 = OpCompositeExtract %float %212 1 + %272 = OpCompositeConstruct %v3float %270 %271 %269 + %273 = OpDot %float %272 %272 + %274 = OpExtInst %float %1 Sqrt %273 + %275 = OpDPdx %v2float %125 + %276 = OpExtInst %v2float %1 FAbs %275 + %277 = OpDot %float %276 %276 + %278 = OpExtInst %float %1 Sqrt %277 + %279 = OpDPdx %v3float %149 + %280 = OpDot %float %279 %279 + %281 = OpExtInst %float %1 Sqrt %280 + %282 = OpFDiv %float %278 %281 + %283 = OpDPdy %v2float %125 + %284 = OpExtInst %v2float %1 FAbs %283 + %285 = OpDot %float %284 %284 + %286 = OpExtInst %float %1 Sqrt %285 + %287 = OpDPdy %v3float %149 + %288 = OpDot %float %287 %287 + %289 = OpExtInst %float %1 Sqrt %288 + %290 = OpFDiv %float %286 %289 + %291 = OpExtInst %float %1 FMax %282 %290 + %292 = OpCompositeExtract %v4float %113 0 + %293 = OpVectorShuffle %v3float %292 %292 0 1 2 + %294 = OpCompositeExtract %v4float %113 1 + %295 = OpVectorShuffle %v3float %294 %294 0 1 2 + %296 = OpCompositeExtract %v4float %113 2 + %297 = OpVectorShuffle %v3float %296 %296 0 1 2 + %298 = OpCompositeConstruct %mat3v3float %293 %295 %297 + %299 = OpMatrixTimesVector %v3float %298 %70 + %300 = OpDot %float %299 %151 + %301 = OpExtInst %float %1 FAbs %300 + %302 = OpFDiv %float %291 %301 + %303 = OpFDiv %float %274 %302 + %304 = OpAccessChain %_ptr_Uniform_float %PrimitiveDither %int_0 + %305 = OpLoad %float %304 + %306 = OpFOrdNotEqual %bool %305 %float_0 + OpSelectionMerge %307 None + OpBranchConditional %306 %308 %307 + %308 = OpLabel + %309 = OpExtInst %float %1 FAbs %305 + %310 = OpFOrdGreaterThan %bool %309 %float_0_00100000005 + OpSelectionMerge %311 None + OpBranchConditional %310 %312 %311 + %312 = OpLabel + %313 = OpExtInst %v2float %1 Floor %133 + %314 = OpDot %float %313 %75 + %315 = OpExtInst %float %1 Cos %314 + %316 = OpFMul %float %315 %float_1000 + %317 = OpExtInst %float %1 Fract %316 + %318 = OpFOrdLessThan %bool %305 %float_0 + %319 = OpFAdd %float %305 %float_1 + %320 = OpFOrdGreaterThan %bool %319 %317 + %321 = OpFOrdLessThan %bool %305 %317 + %322 = OpSelect %bool %318 %320 %321 + %323 = OpSelect %float %322 %float_1 %float_0 + %324 = OpFSub %float %323 %float_0_00100000005 + %325 = OpFOrdLessThan %bool %324 %float_0 + OpSelectionMerge %326 None + OpBranchConditional %325 %327 %326 + %327 = OpLabel + OpKill + %326 = OpLabel + OpBranch %311 + %311 = OpLabel + OpBranch %307 + %307 = OpLabel + %328 = OpFSub %float %267 %float_0_333299994 + %329 = OpFOrdLessThan %bool %328 %float_0 + OpSelectionMerge %330 None + OpBranchConditional %329 %331 %330 + %331 = OpLabel + OpKill + %330 = OpLabel + %332 = OpCompositeExtract %float %140 2 + %333 = OpCompositeExtract %float %140 3 + %334 = OpFAdd %float %333 %303 + %335 = OpFDiv %float %332 %334 + %336 = OpExtInst %float %1 FMin %335 %136 + %337 = OpVectorShuffle %v2float %107 %107 0 1 + %338 = OpCompositeExtract %float %107 3 + %339 = OpCompositeConstruct %v2float %338 %338 + %340 = OpFDiv %v2float %337 %339 + %341 = OpVectorShuffle %v2float %119 %119 0 1 + %342 = OpFSub %v2float %340 %341 + %343 = OpVectorShuffle %v2float %108 %108 0 1 + %344 = OpCompositeExtract %float %108 3 + %345 = OpCompositeConstruct %v2float %344 %344 + %346 = OpFDiv %v2float %343 %345 + %347 = OpVectorShuffle %v2float %119 %119 2 3 + %348 = OpFSub %v2float %346 %347 + %349 = OpFSub %v2float %342 %348 + %350 = OpFMul %v2float %349 %54 + %351 = OpFAdd %v2float %350 %56 + %352 = OpVectorShuffle %v4float %351 %49 0 1 2 3 + OpStore %gl_FragDepth %336 + OpStore %out_var_SV_Target0 %352 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese b/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese new file mode 100644 index 00000000000..778e93d39a3 --- /dev/null +++ b/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese @@ -0,0 +1,715 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 183 +; Schema: 0 + OpCapability Tessellation + OpCapability SampledBuffer + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationEvaluation %MainDomain "main" %gl_TessLevelOuter %gl_TessLevelInner %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_TEXCOORD0 %in_var_COLOR1 %in_var_COLOR2 %in_var_VS_To_DS_Position %in_var_TEXCOORD7 %in_var_Flat_DisplacementScales %in_var_Flat_TessellationMultiplier %in_var_Flat_WorldDisplacementMultiplier %gl_TessCoord %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_TEXCOORD0 %out_var_COLOR1 %out_var_COLOR2 %out_var_TEXCOORD6 %out_var_TEXCOORD7 %gl_Position + OpExecutionMode %MainDomain Triangles + OpExecutionMode %MainDomain SpacingFractionalOdd + OpExecutionMode %MainDomain VertexOrderCw + OpSource HLSL 600 + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_ClipToWorld" + OpMemberName %type_View 3 "View_TranslatedWorldToView" + OpMemberName %type_View 4 "View_ViewToTranslatedWorld" + OpMemberName %type_View 5 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 7 "View_ViewToClip" + OpMemberName %type_View 8 "View_ViewToClipNoAA" + OpMemberName %type_View 9 "View_ClipToView" + OpMemberName %type_View 10 "View_ClipToTranslatedWorld" + OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 12 "View_ScreenToWorld" + OpMemberName %type_View 13 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 14 "View_ViewForward" + OpMemberName %type_View 15 "PrePadding_View_908" + OpMemberName %type_View 16 "View_ViewUp" + OpMemberName %type_View 17 "PrePadding_View_924" + OpMemberName %type_View 18 "View_ViewRight" + OpMemberName %type_View 19 "PrePadding_View_940" + OpMemberName %type_View 20 "View_HMDViewNoRollUp" + OpMemberName %type_View 21 "PrePadding_View_956" + OpMemberName %type_View 22 "View_HMDViewNoRollRight" + OpMemberName %type_View 23 "PrePadding_View_972" + OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 25 "View_ScreenPositionScaleBias" + OpMemberName %type_View 26 "View_WorldCameraOrigin" + OpMemberName %type_View 27 "PrePadding_View_1020" + OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 29 "PrePadding_View_1036" + OpMemberName %type_View 30 "View_WorldViewOrigin" + OpMemberName %type_View 31 "PrePadding_View_1052" + OpMemberName %type_View 32 "View_PreViewTranslation" + OpMemberName %type_View 33 "PrePadding_View_1068" + OpMemberName %type_View 34 "View_PrevProjection" + OpMemberName %type_View 35 "View_PrevViewProj" + OpMemberName %type_View 36 "View_PrevViewRotationProj" + OpMemberName %type_View 37 "View_PrevViewToClip" + OpMemberName %type_View 38 "View_PrevClipToView" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 40 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 44 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 45 "PrePadding_View_1724" + OpMemberName %type_View 46 "View_PrevWorldViewOrigin" + OpMemberName %type_View 47 "PrePadding_View_1740" + OpMemberName %type_View 48 "View_PrevPreViewTranslation" + OpMemberName %type_View 49 "PrePadding_View_1756" + OpMemberName %type_View 50 "View_PrevInvViewProj" + OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 52 "View_ClipToPrevClip" + OpMemberName %type_View 53 "View_TemporalAAJitter" + OpMemberName %type_View 54 "View_GlobalClippingPlane" + OpMemberName %type_View 55 "View_FieldOfViewWideAngles" + OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 57 "View_ViewRectMin" + OpMemberName %type_View 58 "View_ViewSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferSizeAndInvSize" + OpMemberName %type_View 60 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 61 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 62 "View_PreExposure" + OpMemberName %type_View 63 "View_OneOverPreExposure" + OpMemberName %type_View 64 "PrePadding_View_2076" + OpMemberName %type_View 65 "View_DiffuseOverrideParameter" + OpMemberName %type_View 66 "View_SpecularOverrideParameter" + OpMemberName %type_View 67 "View_NormalOverrideParameter" + OpMemberName %type_View 68 "View_RoughnessOverrideParameter" + OpMemberName %type_View 69 "View_PrevFrameGameTime" + OpMemberName %type_View 70 "View_PrevFrameRealTime" + OpMemberName %type_View 71 "View_OutOfBoundsMask" + OpMemberName %type_View 72 "PrePadding_View_2148" + OpMemberName %type_View 73 "PrePadding_View_2152" + OpMemberName %type_View 74 "PrePadding_View_2156" + OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 76 "View_CullingSign" + OpMemberName %type_View 77 "View_NearPlane" + OpMemberName %type_View 78 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 79 "View_GameTime" + OpMemberName %type_View 80 "View_RealTime" + OpMemberName %type_View 81 "View_DeltaTime" + OpMemberName %type_View 82 "View_MaterialTextureMipBias" + OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 84 "View_Random" + OpMemberName %type_View 85 "View_FrameNumber" + OpMemberName %type_View 86 "View_StateFrameIndexMod8" + OpMemberName %type_View 87 "View_StateFrameIndex" + OpMemberName %type_View 88 "View_CameraCut" + OpMemberName %type_View 89 "View_UnlitViewmodeMask" + OpMemberName %type_View 90 "PrePadding_View_2228" + OpMemberName %type_View 91 "PrePadding_View_2232" + OpMemberName %type_View 92 "PrePadding_View_2236" + OpMemberName %type_View 93 "View_DirectionalLightColor" + OpMemberName %type_View 94 "View_DirectionalLightDirection" + OpMemberName %type_View 95 "PrePadding_View_2268" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 98 "View_TemporalAAParams" + OpMemberName %type_View 99 "View_CircleDOFParams" + OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 102 "View_DepthOfFieldScale" + OpMemberName %type_View 103 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 109 "View_GeneralPurposeTweak" + OpMemberName %type_View 110 "View_DemosaicVposOffset" + OpMemberName %type_View 111 "PrePadding_View_2412" + OpMemberName %type_View 112 "View_IndirectLightingColorScale" + OpMemberName %type_View 113 "View_HDR32bppEncodingMode" + OpMemberName %type_View 114 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 115 "View_AtmosphericFogSunPower" + OpMemberName %type_View 116 "View_AtmosphericFogPower" + OpMemberName %type_View 117 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 123 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian" + OpMemberName %type_View 127 "PrePadding_View_2492" + OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance" + OpMemberName %type_View 129 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 131 "PrePadding_View_2520" + OpMemberName %type_View 132 "PrePadding_View_2524" + OpMemberName %type_View 133 "View_AtmosphericFogSunColor" + OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 136 "View_AmbientCubemapTint" + OpMemberName %type_View 137 "View_AmbientCubemapIntensity" + OpMemberName %type_View 138 "View_SkyLightParameters" + OpMemberName %type_View 139 "PrePadding_View_2584" + OpMemberName %type_View 140 "PrePadding_View_2588" + OpMemberName %type_View 141 "View_SkyLightColor" + OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 143 "View_MobilePreviewMode" + OpMemberName %type_View 144 "View_HMDEyePaddingOffset" + OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 146 "View_ShowDecalsMask" + OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 149 "PrePadding_View_2744" + OpMemberName %type_View 150 "PrePadding_View_2748" + OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 152 "View_StereoPassIndex" + OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 155 "View_GlobalVolumeDimension" + OpMemberName %type_View 156 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 157 "View_MaxGlobalDistance" + OpMemberName %type_View 158 "PrePadding_View_2908" + OpMemberName %type_View 159 "View_CursorPosition" + OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 161 "PrePadding_View_2924" + OpMemberName %type_View 162 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 163 "PrePadding_View_2940" + OpMemberName %type_View 164 "View_VolumetricFogGridZParams" + OpMemberName %type_View 165 "PrePadding_View_2956" + OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 167 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 168 "PrePadding_View_2972" + OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 170 "PrePadding_View_2988" + OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 172 "PrePadding_View_3004" + OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 176 "View_StereoIPD" + OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle" + OpMemberName %type_View 179 "PrePadding_View_3048" + OpMemberName %type_View 180 "PrePadding_View_3052" + OpMemberName %type_View 181 "View_WorldToVirtualTexture" + OpMemberName %type_View 182 "View_VirtualTextureParams" + OpMemberName %type_View 183 "View_XRPassthroughCameraUVs" + OpName %View "View" + OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid" + OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %in_var_COLOR1 "in.var.COLOR1" + OpName %in_var_COLOR2 "in.var.COLOR2" + OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position" + OpName %in_var_TEXCOORD7 "in.var.TEXCOORD7" + OpName %in_var_Flat_DisplacementScales "in.var.Flat_DisplacementScales" + OpName %in_var_Flat_TessellationMultiplier "in.var.Flat_TessellationMultiplier" + OpName %in_var_Flat_WorldDisplacementMultiplier "in.var.Flat_WorldDisplacementMultiplier" + OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid" + OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid" + OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0" + OpName %out_var_COLOR1 "out.var.COLOR1" + OpName %out_var_COLOR2 "out.var.COLOR2" + OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6" + OpName %out_var_TEXCOORD7 "out.var.TEXCOORD7" + OpName %MainDomain "MainDomain" + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor" + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor" + OpDecorate %gl_TessLevelInner Patch + OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %in_var_COLOR1 UserSemantic "COLOR1" + OpDecorateString %in_var_COLOR2 UserSemantic "COLOR2" + OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position" + OpDecorateString %in_var_TEXCOORD7 UserSemantic "TEXCOORD7" + OpDecorateString %in_var_Flat_DisplacementScales UserSemantic "Flat_DisplacementScales" + OpDecorateString %in_var_Flat_TessellationMultiplier UserSemantic "Flat_TessellationMultiplier" + OpDecorateString %in_var_Flat_WorldDisplacementMultiplier UserSemantic "Flat_WorldDisplacementMultiplier" + OpDecorate %gl_TessCoord BuiltIn TessCoord + OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation" + OpDecorate %gl_TessCoord Patch + OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %out_var_COLOR1 UserSemantic "COLOR1" + OpDecorateString %out_var_COLOR2 UserSemantic "COLOR2" + OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6" + OpDecorateString %out_var_TEXCOORD7 UserSemantic "TEXCOORD7" + OpDecorate %gl_Position BuiltIn Position + OpDecorateString %gl_Position UserSemantic "SV_POSITION" + OpDecorate %in_var_COLOR1 Location 0 + OpDecorate %in_var_COLOR2 Location 1 + OpDecorate %in_var_Flat_DisplacementScales Location 2 + OpDecorate %in_var_Flat_TessellationMultiplier Location 3 + OpDecorate %in_var_Flat_WorldDisplacementMultiplier Location 4 + OpDecorate %in_var_TEXCOORD0 Location 5 + OpDecorate %in_var_TEXCOORD10_centroid Location 6 + OpDecorate %in_var_TEXCOORD11_centroid Location 7 + OpDecorate %in_var_TEXCOORD7 Location 8 + OpDecorate %in_var_VS_To_DS_Position Location 9 + OpDecorate %out_var_TEXCOORD10_centroid Location 0 + OpDecorate %out_var_TEXCOORD11_centroid Location 1 + OpDecorate %out_var_TEXCOORD0 Location 2 + OpDecorate %out_var_COLOR1 Location 3 + OpDecorate %out_var_COLOR2 Location 4 + OpDecorate %out_var_TEXCOORD6 Location 5 + OpDecorate %out_var_TEXCOORD7 Location 6 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 13 MatrixStride 16 + OpMemberDecorate %type_View 13 ColMajor + OpMemberDecorate %type_View 14 Offset 896 + OpMemberDecorate %type_View 15 Offset 908 + OpMemberDecorate %type_View 16 Offset 912 + OpMemberDecorate %type_View 17 Offset 924 + OpMemberDecorate %type_View 18 Offset 928 + OpMemberDecorate %type_View 19 Offset 940 + OpMemberDecorate %type_View 20 Offset 944 + OpMemberDecorate %type_View 21 Offset 956 + OpMemberDecorate %type_View 22 Offset 960 + OpMemberDecorate %type_View 23 Offset 972 + OpMemberDecorate %type_View 24 Offset 976 + OpMemberDecorate %type_View 25 Offset 992 + OpMemberDecorate %type_View 26 Offset 1008 + OpMemberDecorate %type_View 27 Offset 1020 + OpMemberDecorate %type_View 28 Offset 1024 + OpMemberDecorate %type_View 29 Offset 1036 + OpMemberDecorate %type_View 30 Offset 1040 + OpMemberDecorate %type_View 31 Offset 1052 + OpMemberDecorate %type_View 32 Offset 1056 + OpMemberDecorate %type_View 33 Offset 1068 + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 43 MatrixStride 16 + OpMemberDecorate %type_View 43 ColMajor + OpMemberDecorate %type_View 44 Offset 1712 + OpMemberDecorate %type_View 45 Offset 1724 + OpMemberDecorate %type_View 46 Offset 1728 + OpMemberDecorate %type_View 47 Offset 1740 + OpMemberDecorate %type_View 48 Offset 1744 + OpMemberDecorate %type_View 49 Offset 1756 + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 52 MatrixStride 16 + OpMemberDecorate %type_View 52 ColMajor + OpMemberDecorate %type_View 53 Offset 1952 + OpMemberDecorate %type_View 54 Offset 1968 + OpMemberDecorate %type_View 55 Offset 1984 + OpMemberDecorate %type_View 56 Offset 1992 + OpMemberDecorate %type_View 57 Offset 2000 + OpMemberDecorate %type_View 58 Offset 2016 + OpMemberDecorate %type_View 59 Offset 2032 + OpMemberDecorate %type_View 60 Offset 2048 + OpMemberDecorate %type_View 61 Offset 2064 + OpMemberDecorate %type_View 62 Offset 2068 + OpMemberDecorate %type_View 63 Offset 2072 + OpMemberDecorate %type_View 64 Offset 2076 + OpMemberDecorate %type_View 65 Offset 2080 + OpMemberDecorate %type_View 66 Offset 2096 + OpMemberDecorate %type_View 67 Offset 2112 + OpMemberDecorate %type_View 68 Offset 2128 + OpMemberDecorate %type_View 69 Offset 2136 + OpMemberDecorate %type_View 70 Offset 2140 + OpMemberDecorate %type_View 71 Offset 2144 + OpMemberDecorate %type_View 72 Offset 2148 + OpMemberDecorate %type_View 73 Offset 2152 + OpMemberDecorate %type_View 74 Offset 2156 + OpMemberDecorate %type_View 75 Offset 2160 + OpMemberDecorate %type_View 76 Offset 2172 + OpMemberDecorate %type_View 77 Offset 2176 + OpMemberDecorate %type_View 78 Offset 2180 + OpMemberDecorate %type_View 79 Offset 2184 + OpMemberDecorate %type_View 80 Offset 2188 + OpMemberDecorate %type_View 81 Offset 2192 + OpMemberDecorate %type_View 82 Offset 2196 + OpMemberDecorate %type_View 83 Offset 2200 + OpMemberDecorate %type_View 84 Offset 2204 + OpMemberDecorate %type_View 85 Offset 2208 + OpMemberDecorate %type_View 86 Offset 2212 + OpMemberDecorate %type_View 87 Offset 2216 + OpMemberDecorate %type_View 88 Offset 2220 + OpMemberDecorate %type_View 89 Offset 2224 + OpMemberDecorate %type_View 90 Offset 2228 + OpMemberDecorate %type_View 91 Offset 2232 + OpMemberDecorate %type_View 92 Offset 2236 + OpMemberDecorate %type_View 93 Offset 2240 + OpMemberDecorate %type_View 94 Offset 2256 + OpMemberDecorate %type_View 95 Offset 2268 + OpMemberDecorate %type_View 96 Offset 2272 + OpMemberDecorate %type_View 97 Offset 2304 + OpMemberDecorate %type_View 98 Offset 2336 + OpMemberDecorate %type_View 99 Offset 2352 + OpMemberDecorate %type_View 100 Offset 2368 + OpMemberDecorate %type_View 101 Offset 2372 + OpMemberDecorate %type_View 102 Offset 2376 + OpMemberDecorate %type_View 103 Offset 2380 + OpMemberDecorate %type_View 104 Offset 2384 + OpMemberDecorate %type_View 105 Offset 2388 + OpMemberDecorate %type_View 106 Offset 2392 + OpMemberDecorate %type_View 107 Offset 2396 + OpMemberDecorate %type_View 108 Offset 2400 + OpMemberDecorate %type_View 109 Offset 2404 + OpMemberDecorate %type_View 110 Offset 2408 + OpMemberDecorate %type_View 111 Offset 2412 + OpMemberDecorate %type_View 112 Offset 2416 + OpMemberDecorate %type_View 113 Offset 2428 + OpMemberDecorate %type_View 114 Offset 2432 + OpMemberDecorate %type_View 115 Offset 2444 + OpMemberDecorate %type_View 116 Offset 2448 + OpMemberDecorate %type_View 117 Offset 2452 + OpMemberDecorate %type_View 118 Offset 2456 + OpMemberDecorate %type_View 119 Offset 2460 + OpMemberDecorate %type_View 120 Offset 2464 + OpMemberDecorate %type_View 121 Offset 2468 + OpMemberDecorate %type_View 122 Offset 2472 + OpMemberDecorate %type_View 123 Offset 2476 + OpMemberDecorate %type_View 124 Offset 2480 + OpMemberDecorate %type_View 125 Offset 2484 + OpMemberDecorate %type_View 126 Offset 2488 + OpMemberDecorate %type_View 127 Offset 2492 + OpMemberDecorate %type_View 128 Offset 2496 + OpMemberDecorate %type_View 129 Offset 2512 + OpMemberDecorate %type_View 130 Offset 2516 + OpMemberDecorate %type_View 131 Offset 2520 + OpMemberDecorate %type_View 132 Offset 2524 + OpMemberDecorate %type_View 133 Offset 2528 + OpMemberDecorate %type_View 134 Offset 2544 + OpMemberDecorate %type_View 135 Offset 2556 + OpMemberDecorate %type_View 136 Offset 2560 + OpMemberDecorate %type_View 137 Offset 2576 + OpMemberDecorate %type_View 138 Offset 2580 + OpMemberDecorate %type_View 139 Offset 2584 + OpMemberDecorate %type_View 140 Offset 2588 + OpMemberDecorate %type_View 141 Offset 2592 + OpMemberDecorate %type_View 142 Offset 2608 + OpMemberDecorate %type_View 143 Offset 2720 + OpMemberDecorate %type_View 144 Offset 2724 + OpMemberDecorate %type_View 145 Offset 2728 + OpMemberDecorate %type_View 146 Offset 2732 + OpMemberDecorate %type_View 147 Offset 2736 + OpMemberDecorate %type_View 148 Offset 2740 + OpMemberDecorate %type_View 149 Offset 2744 + OpMemberDecorate %type_View 150 Offset 2748 + OpMemberDecorate %type_View 151 Offset 2752 + OpMemberDecorate %type_View 152 Offset 2764 + OpMemberDecorate %type_View 153 Offset 2768 + OpMemberDecorate %type_View 154 Offset 2832 + OpMemberDecorate %type_View 155 Offset 2896 + OpMemberDecorate %type_View 156 Offset 2900 + OpMemberDecorate %type_View 157 Offset 2904 + OpMemberDecorate %type_View 158 Offset 2908 + OpMemberDecorate %type_View 159 Offset 2912 + OpMemberDecorate %type_View 160 Offset 2920 + OpMemberDecorate %type_View 161 Offset 2924 + OpMemberDecorate %type_View 162 Offset 2928 + OpMemberDecorate %type_View 163 Offset 2940 + OpMemberDecorate %type_View 164 Offset 2944 + OpMemberDecorate %type_View 165 Offset 2956 + OpMemberDecorate %type_View 166 Offset 2960 + OpMemberDecorate %type_View 167 Offset 2968 + OpMemberDecorate %type_View 168 Offset 2972 + OpMemberDecorate %type_View 169 Offset 2976 + OpMemberDecorate %type_View 170 Offset 2988 + OpMemberDecorate %type_View 171 Offset 2992 + OpMemberDecorate %type_View 172 Offset 3004 + OpMemberDecorate %type_View 173 Offset 3008 + OpMemberDecorate %type_View 174 Offset 3020 + OpMemberDecorate %type_View 175 Offset 3024 + OpMemberDecorate %type_View 176 Offset 3036 + OpMemberDecorate %type_View 177 Offset 3040 + OpMemberDecorate %type_View 178 Offset 3044 + OpMemberDecorate %type_View 179 Offset 3048 + OpMemberDecorate %type_View 180 Offset 3052 + OpMemberDecorate %type_View 181 Offset 3056 + OpMemberDecorate %type_View 181 MatrixStride 16 + OpMemberDecorate %type_View 181 ColMajor + OpMemberDecorate %type_View 182 Offset 3120 + OpMemberDecorate %type_View 183 Offset 3136 + OpDecorate %type_View Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %v2int = OpTypeVector %int 2 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %uint_1 = OpConstant %uint 1 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float %float %float %mat4v4float %v4float %_arr_v4float_uint_2 +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2 + %uint_3 = OpConstant %uint 3 +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3 +%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1 +%_arr__arr_v4float_uint_1_uint_3 = OpTypeArray %_arr_v4float_uint_1 %uint_3 +%_ptr_Input__arr__arr_v4float_uint_1_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_1_uint_3 +%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3 +%_ptr_Input__arr_v3float_uint_3 = OpTypePointer Input %_arr_v3float_uint_3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 +%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3 +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_ptr_Output__arr_v4float_uint_1 = OpTypePointer Output %_arr_v4float_uint_1 +%_ptr_Output_v3float = OpTypePointer Output %v3float + %void = OpTypeVoid + %63 = OpTypeFunction %void +%_ptr_Function_v4float = OpTypePointer Function %v4float + %bool = OpTypeBool +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float + %View = OpVariable %_ptr_Uniform_type_View Uniform +%gl_TessLevelOuter = OpVariable %_ptr_Input__arr_float_uint_4 Input +%gl_TessLevelInner = OpVariable %_ptr_Input__arr_float_uint_2 Input +%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr__arr_v4float_uint_1_uint_3 Input +%in_var_COLOR1 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_COLOR2 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_TEXCOORD7 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input +%in_var_Flat_DisplacementScales = OpVariable %_ptr_Input__arr_v3float_uint_3 Input +%in_var_Flat_TessellationMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input +%in_var_Flat_WorldDisplacementMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input +%gl_TessCoord = OpVariable %_ptr_Input_v3float Input +%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD0 = OpVariable %_ptr_Output__arr_v4float_uint_1 Output +%out_var_COLOR1 = OpVariable %_ptr_Output_v4float Output +%out_var_COLOR2 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD6 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD7 = OpVariable %_ptr_Output_v3float Output +%gl_Position = OpVariable %_ptr_Output_v4float Output +%_ptr_Function__arr_v4float_uint_1 = OpTypePointer Function %_arr_v4float_uint_1 + %68 = OpUndef %v4float + %69 = OpConstantNull %v4float + %MainDomain = OpFunction %void None %63 + %70 = OpLabel + %71 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function + %72 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function + %73 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function + %74 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function + %75 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function + %76 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function + %77 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD10_centroid + %78 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD11_centroid + %79 = OpLoad %_arr__arr_v4float_uint_1_uint_3 %in_var_TEXCOORD0 + %80 = OpLoad %_arr_v4float_uint_3 %in_var_COLOR1 + %81 = OpLoad %_arr_v4float_uint_3 %in_var_COLOR2 + %82 = OpCompositeExtract %v4float %77 0 + %83 = OpCompositeExtract %v4float %78 0 + %84 = OpCompositeExtract %_arr_v4float_uint_1 %79 0 + %85 = OpCompositeExtract %v4float %80 0 + %86 = OpCompositeExtract %v4float %81 0 + %87 = OpCompositeExtract %v4float %77 1 + %88 = OpCompositeExtract %v4float %78 1 + %89 = OpCompositeExtract %_arr_v4float_uint_1 %79 1 + %90 = OpCompositeExtract %v4float %80 1 + %91 = OpCompositeExtract %v4float %81 1 + %92 = OpCompositeExtract %v4float %77 2 + %93 = OpCompositeExtract %v4float %78 2 + %94 = OpCompositeExtract %_arr_v4float_uint_1 %79 2 + %95 = OpCompositeExtract %v4float %80 2 + %96 = OpCompositeExtract %v4float %81 2 + %97 = OpLoad %_arr_v4float_uint_3 %in_var_VS_To_DS_Position + %98 = OpLoad %_arr_v3float_uint_3 %in_var_TEXCOORD7 + %99 = OpCompositeExtract %v4float %97 0 + %100 = OpCompositeExtract %v3float %98 0 + %101 = OpCompositeExtract %v4float %97 1 + %102 = OpCompositeExtract %v3float %98 1 + %103 = OpCompositeExtract %v4float %97 2 + %104 = OpCompositeExtract %v3float %98 2 + %105 = OpLoad %v3float %gl_TessCoord + %106 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0 + %107 = OpLoad %mat4v4float %106 + %108 = OpCompositeExtract %float %105 0 + %109 = OpCompositeExtract %float %105 1 + %110 = OpCompositeExtract %float %105 2 + %111 = OpCompositeConstruct %v4float %108 %108 %108 %108 + %112 = OpFMul %v4float %99 %111 + %113 = OpCompositeConstruct %v4float %109 %109 %109 %109 + %114 = OpFMul %v4float %101 %113 + %115 = OpFAdd %v4float %112 %114 + %116 = OpCompositeConstruct %v4float %110 %110 %110 %110 + %117 = OpFMul %v4float %103 %116 + %118 = OpFAdd %v4float %115 %117 + OpStore %72 %84 + OpStore %71 %89 + %119 = OpVectorShuffle %v3float %82 %82 0 1 2 + %120 = OpCompositeConstruct %v3float %108 %108 %108 + %121 = OpFMul %v3float %119 %120 + %122 = OpVectorShuffle %v3float %87 %87 0 1 2 + %123 = OpCompositeConstruct %v3float %109 %109 %109 + %124 = OpFMul %v3float %122 %123 + %125 = OpFAdd %v3float %121 %124 + %126 = OpFMul %v4float %83 %111 + %127 = OpFMul %v4float %88 %113 + %128 = OpFAdd %v4float %126 %127 + %129 = OpFMul %v4float %85 %111 + %130 = OpFMul %v4float %90 %113 + %131 = OpFAdd %v4float %129 %130 + OpBranch %132 + %132 = OpLabel + %133 = OpPhi %int %int_0 %70 %134 %135 + %136 = OpSLessThan %bool %133 %int_1 + OpLoopMerge %137 %135 None + OpBranchConditional %136 %135 %137 + %135 = OpLabel + %138 = OpAccessChain %_ptr_Function_v4float %72 %133 + %139 = OpLoad %v4float %138 + %140 = OpFMul %v4float %139 %111 + %141 = OpAccessChain %_ptr_Function_v4float %71 %133 + %142 = OpLoad %v4float %141 + %143 = OpFMul %v4float %142 %113 + %144 = OpFAdd %v4float %140 %143 + %145 = OpAccessChain %_ptr_Function_v4float %73 %133 + OpStore %145 %144 + %134 = OpIAdd %int %133 %int_1 + OpBranch %132 + %137 = OpLabel + %146 = OpFMul %v4float %86 %111 + %147 = OpFMul %v4float %91 %113 + %148 = OpFAdd %v4float %146 %147 + %149 = OpLoad %_arr_v4float_uint_1 %73 + %150 = OpFMul %v3float %100 %120 + %151 = OpFMul %v3float %102 %123 + %152 = OpFAdd %v3float %150 %151 + OpStore %75 %149 + OpStore %74 %94 + %153 = OpVectorShuffle %v3float %125 %69 0 1 2 + %154 = OpVectorShuffle %v3float %92 %92 0 1 2 + %155 = OpCompositeConstruct %v3float %110 %110 %110 + %156 = OpFMul %v3float %154 %155 + %157 = OpFAdd %v3float %153 %156 + %158 = OpVectorShuffle %v4float %68 %157 4 5 6 3 + %159 = OpFMul %v4float %93 %116 + %160 = OpFAdd %v4float %128 %159 + %161 = OpFMul %v4float %95 %116 + %162 = OpFAdd %v4float %131 %161 + OpBranch %163 + %163 = OpLabel + %164 = OpPhi %int %int_0 %137 %165 %166 + %167 = OpSLessThan %bool %164 %int_1 + OpLoopMerge %168 %166 None + OpBranchConditional %167 %166 %168 + %166 = OpLabel + %169 = OpAccessChain %_ptr_Function_v4float %75 %164 + %170 = OpLoad %v4float %169 + %171 = OpAccessChain %_ptr_Function_v4float %74 %164 + %172 = OpLoad %v4float %171 + %173 = OpFMul %v4float %172 %116 + %174 = OpFAdd %v4float %170 %173 + %175 = OpAccessChain %_ptr_Function_v4float %76 %164 + OpStore %175 %174 + %165 = OpIAdd %int %164 %int_1 + OpBranch %163 + %168 = OpLabel + %176 = OpFMul %v4float %96 %116 + %177 = OpFAdd %v4float %148 %176 + %178 = OpLoad %_arr_v4float_uint_1 %76 + %179 = OpFMul %v3float %104 %155 + %180 = OpFAdd %v3float %152 %179 + %181 = OpVectorShuffle %v4float %118 %118 4 5 6 3 + %182 = OpMatrixTimesVector %v4float %107 %181 + OpStore %out_var_TEXCOORD10_centroid %158 + OpStore %out_var_TEXCOORD11_centroid %160 + OpStore %out_var_TEXCOORD0 %178 + OpStore %out_var_COLOR1 %162 + OpStore %out_var_COLOR2 %177 + OpStore %out_var_TEXCOORD6 %181 + OpStore %out_var_TEXCOORD7 %180 + OpStore %gl_Position %182 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert b/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert new file mode 100644 index 00000000000..693f16c0099 --- /dev/null +++ b/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert @@ -0,0 +1,259 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 181 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %ScatterMainVS "main" %gl_VertexIndex %gl_InstanceIndex %out_var_TEXCOORD0 %out_var_TEXCOORD1 %out_var_TEXCOORD2 %out_var_TEXCOORD3 %out_var_TEXCOORD4 %out_var_TEXCOORD5 %out_var_TEXCOORD6 %gl_Position + OpSource HLSL 600 + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "ViewportSize" + OpMemberName %type__Globals 1 "ScatteringScaling" + OpMemberName %type__Globals 2 "CocRadiusToCircumscribedRadius" + OpName %_Globals "$Globals" + OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float" + OpName %ScatterDrawList "ScatterDrawList" + OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0" + OpName %out_var_TEXCOORD1 "out.var.TEXCOORD1" + OpName %out_var_TEXCOORD2 "out.var.TEXCOORD2" + OpName %out_var_TEXCOORD3 "out.var.TEXCOORD3" + OpName %out_var_TEXCOORD4 "out.var.TEXCOORD4" + OpName %out_var_TEXCOORD5 "out.var.TEXCOORD5" + OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6" + OpName %ScatterMainVS "ScatterMainVS" + OpDecorate %gl_VertexIndex BuiltIn VertexIndex + OpDecorateString %gl_VertexIndex UserSemantic "SV_VertexID" + OpDecorate %gl_InstanceIndex BuiltIn InstanceIndex + OpDecorateString %gl_InstanceIndex UserSemantic "SV_InstanceID" + OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %out_var_TEXCOORD1 UserSemantic "TEXCOORD1" + OpDecorateString %out_var_TEXCOORD2 UserSemantic "TEXCOORD2" + OpDecorateString %out_var_TEXCOORD3 UserSemantic "TEXCOORD3" + OpDecorateString %out_var_TEXCOORD4 UserSemantic "TEXCOORD4" + OpDecorateString %out_var_TEXCOORD5 UserSemantic "TEXCOORD5" + OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6" + OpDecorate %gl_Position BuiltIn Position + OpDecorateString %gl_Position UserSemantic "SV_POSITION" + OpDecorate %out_var_TEXCOORD0 Location 0 + OpDecorate %out_var_TEXCOORD1 Location 1 + OpDecorate %out_var_TEXCOORD2 Location 2 + OpDecorate %out_var_TEXCOORD3 Location 3 + OpDecorate %out_var_TEXCOORD4 Location 4 + OpDecorate %out_var_TEXCOORD5 Location 5 + OpDecorate %out_var_TEXCOORD6 Location 6 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 1 + OpDecorate %ScatterDrawList DescriptorSet 0 + OpDecorate %ScatterDrawList Binding 0 + OpMemberDecorate %type__Globals 0 Offset 0 + OpMemberDecorate %type__Globals 1 Offset 16 + OpMemberDecorate %type__Globals 2 Offset 20 + OpDecorate %type__Globals Block + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0 + OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable + OpDecorate %type_StructuredBuffer_v4float BufferBlock + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %float_0_5 = OpConstant %float 0.5 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %float_1 = OpConstant %float 1 + %uint_16 = OpConstant %uint 16 + %float_0 = OpConstant %float 0 + %uint_0 = OpConstant %uint 0 + %uint_5 = OpConstant %uint 5 + %uint_1 = OpConstant %uint 1 + %int_3 = OpConstant %int 3 + %float_n0_5 = OpConstant %float -0.5 + %int_2 = OpConstant %int 2 + %float_2 = OpConstant %float 2 + %39 = OpConstantComposite %v2float %float_2 %float_2 + %40 = OpConstantComposite %v2float %float_1 %float_1 + %41 = OpConstantComposite %v2float %float_0_5 %float_0_5 +%type__Globals = OpTypeStruct %v4float %float %float +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%_runtimearr_v4float = OpTypeRuntimeArray %v4float +%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Output_v2float = OpTypePointer Output %v2float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %48 = OpTypeFunction %void +%_ptr_Function_v2float = OpTypePointer Function %v2float +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Function__arr_v4float_uint_4 = OpTypePointer Function %_arr_v4float_uint_4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Function__arr_float_uint_4 = OpTypePointer Function %_arr_float_uint_4 +%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4 +%_ptr_Function__arr_v2float_uint_4 = OpTypePointer Function %_arr_v2float_uint_4 +%_ptr_Function_float = OpTypePointer Function %float + %bool = OpTypeBool +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%ScatterDrawList = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform +%gl_VertexIndex = OpVariable %_ptr_Input_uint Input +%gl_InstanceIndex = OpVariable %_ptr_Input_uint Input +%out_var_TEXCOORD0 = OpVariable %_ptr_Output_v2float Output +%out_var_TEXCOORD1 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD2 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD3 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD4 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD5 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD6 = OpVariable %_ptr_Output_v4float Output +%gl_Position = OpVariable %_ptr_Output_v4float Output +%ScatterMainVS = OpFunction %void None %48 + %60 = OpLabel + %61 = OpVariable %_ptr_Function__arr_v4float_uint_4 Function + %62 = OpVariable %_ptr_Function__arr_float_uint_4 Function + %63 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function + %64 = OpLoad %uint %gl_VertexIndex + %65 = OpLoad %uint %gl_InstanceIndex + %66 = OpUDiv %uint %64 %uint_4 + %67 = OpIMul %uint %66 %uint_4 + %68 = OpISub %uint %64 %67 + %69 = OpIMul %uint %uint_16 %65 + %70 = OpIAdd %uint %69 %66 + OpBranch %71 + %71 = OpLabel + %72 = OpPhi %float %float_0 %60 %73 %74 + %75 = OpPhi %uint %uint_0 %60 %76 %74 + %77 = OpULessThan %bool %75 %uint_4 + OpLoopMerge %78 %74 Unroll + OpBranchConditional %77 %79 %78 + %79 = OpLabel + %80 = OpIMul %uint %uint_5 %70 + %81 = OpIAdd %uint %80 %75 + %82 = OpIAdd %uint %81 %uint_1 + %83 = OpAccessChain %_ptr_Uniform_v4float %ScatterDrawList %int_0 %82 + %84 = OpLoad %v4float %83 + %85 = OpCompositeExtract %float %84 0 + %86 = OpCompositeExtract %float %84 1 + %87 = OpCompositeExtract %float %84 2 + %88 = OpCompositeConstruct %v4float %85 %86 %87 %float_0 + %89 = OpAccessChain %_ptr_Function_v4float %61 %75 + OpStore %89 %88 + %90 = OpCompositeExtract %float %84 3 + %91 = OpAccessChain %_ptr_Function_float %62 %75 + OpStore %91 %90 + %92 = OpIEqual %bool %75 %uint_0 + OpSelectionMerge %74 None + OpBranchConditional %92 %93 %94 + %93 = OpLabel + %95 = OpLoad %float %91 + OpBranch %74 + %94 = OpLabel + %96 = OpLoad %float %91 + %97 = OpExtInst %float %1 FMax %72 %96 + OpBranch %74 + %74 = OpLabel + %73 = OpPhi %float %95 %93 %97 %94 + %98 = OpLoad %float %91 + %99 = OpFDiv %float %float_n0_5 %98 + %100 = OpAccessChain %_ptr_Function_float %63 %75 %int_0 + OpStore %100 %99 + %101 = OpLoad %float %91 + %102 = OpFMul %float %float_0_5 %101 + %103 = OpFAdd %float %102 %float_0_5 + %104 = OpAccessChain %_ptr_Function_float %63 %75 %int_1 + OpStore %104 %103 + %76 = OpIAdd %uint %75 %uint_1 + OpBranch %71 + %78 = OpLabel + %105 = OpAccessChain %_ptr_Function_v4float %61 %int_0 + %106 = OpLoad %v4float %105 + %107 = OpCompositeExtract %float %106 0 + %108 = OpCompositeExtract %float %106 1 + %109 = OpCompositeExtract %float %106 2 + %110 = OpAccessChain %_ptr_Function_float %62 %int_0 + %111 = OpLoad %float %110 + %112 = OpCompositeConstruct %v4float %107 %108 %109 %111 + %113 = OpAccessChain %_ptr_Function_v4float %61 %int_1 + %114 = OpLoad %v4float %113 + %115 = OpCompositeExtract %float %114 0 + %116 = OpCompositeExtract %float %114 1 + %117 = OpCompositeExtract %float %114 2 + %118 = OpAccessChain %_ptr_Function_float %62 %int_1 + %119 = OpLoad %float %118 + %120 = OpCompositeConstruct %v4float %115 %116 %117 %119 + %121 = OpAccessChain %_ptr_Function_v4float %61 %int_2 + %122 = OpLoad %v4float %121 + %123 = OpCompositeExtract %float %122 0 + %124 = OpCompositeExtract %float %122 1 + %125 = OpCompositeExtract %float %122 2 + %126 = OpAccessChain %_ptr_Function_float %62 %int_2 + %127 = OpLoad %float %126 + %128 = OpCompositeConstruct %v4float %123 %124 %125 %127 + %129 = OpAccessChain %_ptr_Function_v4float %61 %int_3 + %130 = OpLoad %v4float %129 + %131 = OpCompositeExtract %float %130 0 + %132 = OpCompositeExtract %float %130 1 + %133 = OpCompositeExtract %float %130 2 + %134 = OpAccessChain %_ptr_Function_float %62 %int_3 + %135 = OpLoad %float %134 + %136 = OpCompositeConstruct %v4float %131 %132 %133 %135 + %137 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 + %138 = OpLoad %float %137 + %139 = OpCompositeConstruct %v2float %138 %138 + %140 = OpIMul %uint %uint_5 %70 + %141 = OpAccessChain %_ptr_Uniform_v4float %ScatterDrawList %int_0 %140 + %142 = OpLoad %v4float %141 + %143 = OpVectorShuffle %v2float %142 %142 0 1 + %144 = OpFMul %v2float %139 %143 + %145 = OpAccessChain %_ptr_Function_v2float %63 %int_0 + %146 = OpLoad %v2float %145 + %147 = OpAccessChain %_ptr_Function_v2float %63 %int_1 + %148 = OpLoad %v2float %147 + %149 = OpVectorShuffle %v4float %146 %148 0 1 2 3 + %150 = OpAccessChain %_ptr_Function_v2float %63 %int_2 + %151 = OpLoad %v2float %150 + %152 = OpAccessChain %_ptr_Function_v2float %63 %int_3 + %153 = OpLoad %v2float %152 + %154 = OpVectorShuffle %v4float %151 %153 0 1 2 3 + %155 = OpUMod %uint %68 %uint_2 + %156 = OpConvertUToF %float %155 + %157 = OpUDiv %uint %68 %uint_2 + %158 = OpConvertUToF %float %157 + %159 = OpCompositeConstruct %v2float %156 %158 + %160 = OpFMul %v2float %159 %39 + %161 = OpFSub %v2float %160 %40 + %162 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 + %163 = OpLoad %float %162 + %164 = OpFMul %float %72 %163 + %165 = OpFAdd %float %164 %float_1 + %166 = OpCompositeConstruct %v2float %165 %165 + %167 = OpFMul %v2float %166 %161 + %168 = OpFAdd %v2float %167 %144 + %169 = OpFAdd %v2float %168 %41 + %170 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_0 + %171 = OpLoad %v4float %170 + %172 = OpVectorShuffle %v2float %171 %171 2 3 + %173 = OpFMul %v2float %169 %172 + %174 = OpCompositeExtract %float %173 0 + %175 = OpFMul %float %174 %float_2 + %176 = OpFSub %float %175 %float_1 + %177 = OpCompositeExtract %float %173 1 + %178 = OpFMul %float %177 %float_2 + %179 = OpFSub %float %float_1 %178 + %180 = OpCompositeConstruct %v4float %176 %179 %float_0 %float_1 + OpStore %out_var_TEXCOORD0 %144 + OpStore %out_var_TEXCOORD1 %112 + OpStore %out_var_TEXCOORD2 %120 + OpStore %out_var_TEXCOORD3 %128 + OpStore %out_var_TEXCOORD4 %136 + OpStore %out_var_TEXCOORD5 %149 + OpStore %out_var_TEXCOORD6 %154 + OpStore %gl_Position %180 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/frag/depth-compare.asm.frag b/shaders-ue4/asm/frag/depth-compare.asm.frag new file mode 100644 index 00000000000..603d4f28c46 --- /dev/null +++ b/shaders-ue4/asm/frag/depth-compare.asm.frag @@ -0,0 +1,961 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 452 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %MainOnePassPointLightPS "main" %gl_FragCoord %out_var_SV_Target0 + OpExecutionMode %MainOnePassPointLightPS OriginUpperLeft + OpSource HLSL 600 + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_TranslatedWorldToView" + OpMemberName %type_View 3 "View_ViewToTranslatedWorld" + OpMemberName %type_View 4 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 6 "View_ViewToClip" + OpMemberName %type_View 7 "View_ViewToClipNoAA" + OpMemberName %type_View 8 "View_ClipToView" + OpMemberName %type_View 9 "View_ClipToTranslatedWorld" + OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 11 "View_ScreenToWorld" + OpMemberName %type_View 12 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 13 "View_ViewForward" + OpMemberName %type_View 14 "PrePadding_View_844" + OpMemberName %type_View 15 "View_ViewUp" + OpMemberName %type_View 16 "PrePadding_View_860" + OpMemberName %type_View 17 "View_ViewRight" + OpMemberName %type_View 18 "PrePadding_View_876" + OpMemberName %type_View 19 "View_HMDViewNoRollUp" + OpMemberName %type_View 20 "PrePadding_View_892" + OpMemberName %type_View 21 "View_HMDViewNoRollRight" + OpMemberName %type_View 22 "PrePadding_View_908" + OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 24 "View_ScreenPositionScaleBias" + OpMemberName %type_View 25 "View_WorldCameraOrigin" + OpMemberName %type_View 26 "PrePadding_View_956" + OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 28 "PrePadding_View_972" + OpMemberName %type_View 29 "View_WorldViewOrigin" + OpMemberName %type_View 30 "PrePadding_View_988" + OpMemberName %type_View 31 "View_PreViewTranslation" + OpMemberName %type_View 32 "PrePadding_View_1004" + OpMemberName %type_View 33 "View_PrevProjection" + OpMemberName %type_View 34 "View_PrevViewProj" + OpMemberName %type_View 35 "View_PrevViewRotationProj" + OpMemberName %type_View 36 "View_PrevViewToClip" + OpMemberName %type_View 37 "View_PrevClipToView" + OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 43 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 44 "PrePadding_View_1660" + OpMemberName %type_View 45 "View_PrevWorldViewOrigin" + OpMemberName %type_View 46 "PrePadding_View_1676" + OpMemberName %type_View 47 "View_PrevPreViewTranslation" + OpMemberName %type_View 48 "PrePadding_View_1692" + OpMemberName %type_View 49 "View_PrevInvViewProj" + OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 51 "View_ClipToPrevClip" + OpMemberName %type_View 52 "View_TemporalAAJitter" + OpMemberName %type_View 53 "View_GlobalClippingPlane" + OpMemberName %type_View 54 "View_FieldOfViewWideAngles" + OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 56 "View_ViewRectMin" + OpMemberName %type_View 57 "View_ViewSizeAndInvSize" + OpMemberName %type_View 58 "View_BufferSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 60 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 61 "View_PreExposure" + OpMemberName %type_View 62 "View_OneOverPreExposure" + OpMemberName %type_View 63 "PrePadding_View_2012" + OpMemberName %type_View 64 "View_DiffuseOverrideParameter" + OpMemberName %type_View 65 "View_SpecularOverrideParameter" + OpMemberName %type_View 66 "View_NormalOverrideParameter" + OpMemberName %type_View 67 "View_RoughnessOverrideParameter" + OpMemberName %type_View 68 "View_PrevFrameGameTime" + OpMemberName %type_View 69 "View_PrevFrameRealTime" + OpMemberName %type_View 70 "View_OutOfBoundsMask" + OpMemberName %type_View 71 "PrePadding_View_2084" + OpMemberName %type_View 72 "PrePadding_View_2088" + OpMemberName %type_View 73 "PrePadding_View_2092" + OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 75 "View_CullingSign" + OpMemberName %type_View 76 "View_NearPlane" + OpMemberName %type_View 77 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 78 "View_GameTime" + OpMemberName %type_View 79 "View_RealTime" + OpMemberName %type_View 80 "View_DeltaTime" + OpMemberName %type_View 81 "View_MaterialTextureMipBias" + OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 83 "View_Random" + OpMemberName %type_View 84 "View_FrameNumber" + OpMemberName %type_View 85 "View_StateFrameIndexMod8" + OpMemberName %type_View 86 "View_StateFrameIndex" + OpMemberName %type_View 87 "View_CameraCut" + OpMemberName %type_View 88 "View_UnlitViewmodeMask" + OpMemberName %type_View 89 "PrePadding_View_2164" + OpMemberName %type_View 90 "PrePadding_View_2168" + OpMemberName %type_View 91 "PrePadding_View_2172" + OpMemberName %type_View 92 "View_DirectionalLightColor" + OpMemberName %type_View 93 "View_DirectionalLightDirection" + OpMemberName %type_View 94 "PrePadding_View_2204" + OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 97 "View_TemporalAAParams" + OpMemberName %type_View 98 "View_CircleDOFParams" + OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 101 "View_DepthOfFieldScale" + OpMemberName %type_View 102 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 108 "View_GeneralPurposeTweak" + OpMemberName %type_View 109 "View_DemosaicVposOffset" + OpMemberName %type_View 110 "PrePadding_View_2348" + OpMemberName %type_View 111 "View_IndirectLightingColorScale" + OpMemberName %type_View 112 "View_HDR32bppEncodingMode" + OpMemberName %type_View 113 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 114 "View_AtmosphericFogSunPower" + OpMemberName %type_View 115 "View_AtmosphericFogPower" + OpMemberName %type_View 116 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 122 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 125 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 127 "View_AtmosphericFogSunColor" + OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 130 "View_AmbientCubemapTint" + OpMemberName %type_View 131 "View_AmbientCubemapIntensity" + OpMemberName %type_View 132 "View_SkyLightParameters" + OpMemberName %type_View 133 "PrePadding_View_2488" + OpMemberName %type_View 134 "PrePadding_View_2492" + OpMemberName %type_View 135 "View_SkyLightColor" + OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 137 "View_MobilePreviewMode" + OpMemberName %type_View 138 "View_HMDEyePaddingOffset" + OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 140 "View_ShowDecalsMask" + OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 143 "PrePadding_View_2648" + OpMemberName %type_View 144 "PrePadding_View_2652" + OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 146 "View_StereoPassIndex" + OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 149 "View_GlobalVolumeDimension" + OpMemberName %type_View 150 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 151 "View_MaxGlobalDistance" + OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 153 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 154 "PrePadding_View_2828" + OpMemberName %type_View 155 "View_VolumetricFogGridZParams" + OpMemberName %type_View 156 "PrePadding_View_2844" + OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 158 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 159 "PrePadding_View_2860" + OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 161 "PrePadding_View_2876" + OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 163 "PrePadding_View_2892" + OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 167 "View_StereoIPD" + OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_2d_image "type.2d.image" + OpName %SceneTexturesStruct_SceneDepthTexture "SceneTexturesStruct_SceneDepthTexture" + OpName %type_sampler "type.sampler" + OpName %SceneTexturesStruct_SceneDepthTextureSampler "SceneTexturesStruct_SceneDepthTextureSampler" + OpName %SceneTexturesStruct_GBufferATexture "SceneTexturesStruct_GBufferATexture" + OpName %SceneTexturesStruct_GBufferBTexture "SceneTexturesStruct_GBufferBTexture" + OpName %SceneTexturesStruct_GBufferDTexture "SceneTexturesStruct_GBufferDTexture" + OpName %SceneTexturesStruct_GBufferATextureSampler "SceneTexturesStruct_GBufferATextureSampler" + OpName %SceneTexturesStruct_GBufferBTextureSampler "SceneTexturesStruct_GBufferBTextureSampler" + OpName %SceneTexturesStruct_GBufferDTextureSampler "SceneTexturesStruct_GBufferDTextureSampler" + OpName %ShadowDepthTextureSampler "ShadowDepthTextureSampler" + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "SoftTransitionScale" + OpMemberName %type__Globals 1 "ShadowViewProjectionMatrices" + OpMemberName %type__Globals 2 "InvShadowmapResolution" + OpMemberName %type__Globals 3 "ShadowFadeFraction" + OpMemberName %type__Globals 4 "ShadowSharpen" + OpMemberName %type__Globals 5 "LightPositionAndInvRadius" + OpMemberName %type__Globals 6 "ProjectionDepthBiasParameters" + OpMemberName %type__Globals 7 "PointLightDepthBiasAndProjParameters" + OpName %_Globals "$Globals" + OpName %type_cube_image "type.cube.image" + OpName %ShadowDepthCubeTexture "ShadowDepthCubeTexture" + OpName %ShadowDepthCubeTextureSampler "ShadowDepthCubeTextureSampler" + OpName %SSProfilesTexture "SSProfilesTexture" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %MainOnePassPointLightPS "MainOnePassPointLightPS" + OpName %type_sampled_image "type.sampled.image" + OpName %type_sampled_image_0 "type.sampled.image" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION" + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %SceneTexturesStruct_SceneDepthTexture DescriptorSet 0 + OpDecorate %SceneTexturesStruct_SceneDepthTexture Binding 0 + OpDecorate %SceneTexturesStruct_SceneDepthTextureSampler DescriptorSet 0 + OpDecorate %SceneTexturesStruct_SceneDepthTextureSampler Binding 0 + OpDecorate %SceneTexturesStruct_GBufferATexture DescriptorSet 0 + OpDecorate %SceneTexturesStruct_GBufferATexture Binding 1 + OpDecorate %SceneTexturesStruct_GBufferBTexture DescriptorSet 0 + OpDecorate %SceneTexturesStruct_GBufferBTexture Binding 2 + OpDecorate %SceneTexturesStruct_GBufferDTexture DescriptorSet 0 + OpDecorate %SceneTexturesStruct_GBufferDTexture Binding 3 + OpDecorate %SceneTexturesStruct_GBufferATextureSampler DescriptorSet 0 + OpDecorate %SceneTexturesStruct_GBufferATextureSampler Binding 1 + OpDecorate %SceneTexturesStruct_GBufferBTextureSampler DescriptorSet 0 + OpDecorate %SceneTexturesStruct_GBufferBTextureSampler Binding 2 + OpDecorate %SceneTexturesStruct_GBufferDTextureSampler DescriptorSet 0 + OpDecorate %SceneTexturesStruct_GBufferDTextureSampler Binding 3 + OpDecorate %ShadowDepthTextureSampler DescriptorSet 0 + OpDecorate %ShadowDepthTextureSampler Binding 4 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 1 + OpDecorate %ShadowDepthCubeTexture DescriptorSet 0 + OpDecorate %ShadowDepthCubeTexture Binding 4 + OpDecorate %ShadowDepthCubeTextureSampler DescriptorSet 0 + OpDecorate %ShadowDepthCubeTextureSampler Binding 5 + OpDecorate %SSProfilesTexture DescriptorSet 0 + OpDecorate %SSProfilesTexture Binding 5 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 14 Offset 844 + OpMemberDecorate %type_View 15 Offset 848 + OpMemberDecorate %type_View 16 Offset 860 + OpMemberDecorate %type_View 17 Offset 864 + OpMemberDecorate %type_View 18 Offset 876 + OpMemberDecorate %type_View 19 Offset 880 + OpMemberDecorate %type_View 20 Offset 892 + OpMemberDecorate %type_View 21 Offset 896 + OpMemberDecorate %type_View 22 Offset 908 + OpMemberDecorate %type_View 23 Offset 912 + OpMemberDecorate %type_View 24 Offset 928 + OpMemberDecorate %type_View 25 Offset 944 + OpMemberDecorate %type_View 26 Offset 956 + OpMemberDecorate %type_View 27 Offset 960 + OpMemberDecorate %type_View 28 Offset 972 + OpMemberDecorate %type_View 29 Offset 976 + OpMemberDecorate %type_View 30 Offset 988 + OpMemberDecorate %type_View 31 Offset 992 + OpMemberDecorate %type_View 32 Offset 1004 + OpMemberDecorate %type_View 33 Offset 1008 + OpMemberDecorate %type_View 33 MatrixStride 16 + OpMemberDecorate %type_View 33 ColMajor + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 44 Offset 1660 + OpMemberDecorate %type_View 45 Offset 1664 + OpMemberDecorate %type_View 46 Offset 1676 + OpMemberDecorate %type_View 47 Offset 1680 + OpMemberDecorate %type_View 48 Offset 1692 + OpMemberDecorate %type_View 49 Offset 1696 + OpMemberDecorate %type_View 49 MatrixStride 16 + OpMemberDecorate %type_View 49 ColMajor + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 53 Offset 1904 + OpMemberDecorate %type_View 54 Offset 1920 + OpMemberDecorate %type_View 55 Offset 1928 + OpMemberDecorate %type_View 56 Offset 1936 + OpMemberDecorate %type_View 57 Offset 1952 + OpMemberDecorate %type_View 58 Offset 1968 + OpMemberDecorate %type_View 59 Offset 1984 + OpMemberDecorate %type_View 60 Offset 2000 + OpMemberDecorate %type_View 61 Offset 2004 + OpMemberDecorate %type_View 62 Offset 2008 + OpMemberDecorate %type_View 63 Offset 2012 + OpMemberDecorate %type_View 64 Offset 2016 + OpMemberDecorate %type_View 65 Offset 2032 + OpMemberDecorate %type_View 66 Offset 2048 + OpMemberDecorate %type_View 67 Offset 2064 + OpMemberDecorate %type_View 68 Offset 2072 + OpMemberDecorate %type_View 69 Offset 2076 + OpMemberDecorate %type_View 70 Offset 2080 + OpMemberDecorate %type_View 71 Offset 2084 + OpMemberDecorate %type_View 72 Offset 2088 + OpMemberDecorate %type_View 73 Offset 2092 + OpMemberDecorate %type_View 74 Offset 2096 + OpMemberDecorate %type_View 75 Offset 2108 + OpMemberDecorate %type_View 76 Offset 2112 + OpMemberDecorate %type_View 77 Offset 2116 + OpMemberDecorate %type_View 78 Offset 2120 + OpMemberDecorate %type_View 79 Offset 2124 + OpMemberDecorate %type_View 80 Offset 2128 + OpMemberDecorate %type_View 81 Offset 2132 + OpMemberDecorate %type_View 82 Offset 2136 + OpMemberDecorate %type_View 83 Offset 2140 + OpMemberDecorate %type_View 84 Offset 2144 + OpMemberDecorate %type_View 85 Offset 2148 + OpMemberDecorate %type_View 86 Offset 2152 + OpMemberDecorate %type_View 87 Offset 2156 + OpMemberDecorate %type_View 88 Offset 2160 + OpMemberDecorate %type_View 89 Offset 2164 + OpMemberDecorate %type_View 90 Offset 2168 + OpMemberDecorate %type_View 91 Offset 2172 + OpMemberDecorate %type_View 92 Offset 2176 + OpMemberDecorate %type_View 93 Offset 2192 + OpMemberDecorate %type_View 94 Offset 2204 + OpMemberDecorate %type_View 95 Offset 2208 + OpMemberDecorate %type_View 96 Offset 2240 + OpMemberDecorate %type_View 97 Offset 2272 + OpMemberDecorate %type_View 98 Offset 2288 + OpMemberDecorate %type_View 99 Offset 2304 + OpMemberDecorate %type_View 100 Offset 2308 + OpMemberDecorate %type_View 101 Offset 2312 + OpMemberDecorate %type_View 102 Offset 2316 + OpMemberDecorate %type_View 103 Offset 2320 + OpMemberDecorate %type_View 104 Offset 2324 + OpMemberDecorate %type_View 105 Offset 2328 + OpMemberDecorate %type_View 106 Offset 2332 + OpMemberDecorate %type_View 107 Offset 2336 + OpMemberDecorate %type_View 108 Offset 2340 + OpMemberDecorate %type_View 109 Offset 2344 + OpMemberDecorate %type_View 110 Offset 2348 + OpMemberDecorate %type_View 111 Offset 2352 + OpMemberDecorate %type_View 112 Offset 2364 + OpMemberDecorate %type_View 113 Offset 2368 + OpMemberDecorate %type_View 114 Offset 2380 + OpMemberDecorate %type_View 115 Offset 2384 + OpMemberDecorate %type_View 116 Offset 2388 + OpMemberDecorate %type_View 117 Offset 2392 + OpMemberDecorate %type_View 118 Offset 2396 + OpMemberDecorate %type_View 119 Offset 2400 + OpMemberDecorate %type_View 120 Offset 2404 + OpMemberDecorate %type_View 121 Offset 2408 + OpMemberDecorate %type_View 122 Offset 2412 + OpMemberDecorate %type_View 123 Offset 2416 + OpMemberDecorate %type_View 124 Offset 2420 + OpMemberDecorate %type_View 125 Offset 2424 + OpMemberDecorate %type_View 126 Offset 2428 + OpMemberDecorate %type_View 127 Offset 2432 + OpMemberDecorate %type_View 128 Offset 2448 + OpMemberDecorate %type_View 129 Offset 2460 + OpMemberDecorate %type_View 130 Offset 2464 + OpMemberDecorate %type_View 131 Offset 2480 + OpMemberDecorate %type_View 132 Offset 2484 + OpMemberDecorate %type_View 133 Offset 2488 + OpMemberDecorate %type_View 134 Offset 2492 + OpMemberDecorate %type_View 135 Offset 2496 + OpMemberDecorate %type_View 136 Offset 2512 + OpMemberDecorate %type_View 137 Offset 2624 + OpMemberDecorate %type_View 138 Offset 2628 + OpMemberDecorate %type_View 139 Offset 2632 + OpMemberDecorate %type_View 140 Offset 2636 + OpMemberDecorate %type_View 141 Offset 2640 + OpMemberDecorate %type_View 142 Offset 2644 + OpMemberDecorate %type_View 143 Offset 2648 + OpMemberDecorate %type_View 144 Offset 2652 + OpMemberDecorate %type_View 145 Offset 2656 + OpMemberDecorate %type_View 146 Offset 2668 + OpMemberDecorate %type_View 147 Offset 2672 + OpMemberDecorate %type_View 148 Offset 2736 + OpMemberDecorate %type_View 149 Offset 2800 + OpMemberDecorate %type_View 150 Offset 2804 + OpMemberDecorate %type_View 151 Offset 2808 + OpMemberDecorate %type_View 152 Offset 2812 + OpMemberDecorate %type_View 153 Offset 2816 + OpMemberDecorate %type_View 154 Offset 2828 + OpMemberDecorate %type_View 155 Offset 2832 + OpMemberDecorate %type_View 156 Offset 2844 + OpMemberDecorate %type_View 157 Offset 2848 + OpMemberDecorate %type_View 158 Offset 2856 + OpMemberDecorate %type_View 159 Offset 2860 + OpMemberDecorate %type_View 160 Offset 2864 + OpMemberDecorate %type_View 161 Offset 2876 + OpMemberDecorate %type_View 162 Offset 2880 + OpMemberDecorate %type_View 163 Offset 2892 + OpMemberDecorate %type_View 164 Offset 2896 + OpMemberDecorate %type_View 165 Offset 2908 + OpMemberDecorate %type_View 166 Offset 2912 + OpMemberDecorate %type_View 167 Offset 2924 + OpMemberDecorate %type_View 168 Offset 2928 + OpMemberDecorate %type_View 169 Offset 2932 + OpDecorate %type_View Block + OpDecorate %_arr_mat4v4float_uint_6 ArrayStride 64 + OpMemberDecorate %type__Globals 0 Offset 0 + OpMemberDecorate %type__Globals 1 Offset 16 + OpMemberDecorate %type__Globals 1 MatrixStride 16 + OpMemberDecorate %type__Globals 1 ColMajor + OpMemberDecorate %type__Globals 2 Offset 400 + OpMemberDecorate %type__Globals 3 Offset 404 + OpMemberDecorate %type__Globals 4 Offset 408 + OpMemberDecorate %type__Globals 5 Offset 416 + OpMemberDecorate %type__Globals 6 Offset 432 + OpMemberDecorate %type__Globals 7 Offset 448 + OpDecorate %type__Globals Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %float_0 = OpConstant %float 0 + %float_2_5 = OpConstant %float 2.5 +%float_2_37764096 = OpConstant %float 2.37764096 +%float_0_772542 = OpConstant %float 0.772542 +%float_1_46946299 = OpConstant %float 1.46946299 +%float_n2_02254295 = OpConstant %float -2.02254295 +%float_n1_46946299 = OpConstant %float -1.46946299 +%float_n2_022542 = OpConstant %float -2.022542 +%float_n2_37764096 = OpConstant %float -2.37764096 +%float_0_772543013 = OpConstant %float 0.772543013 + %float_1 = OpConstant %float 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_3 = OpConstant %int 3 + %int_7 = OpConstant %int 7 + %int_58 = OpConstant %int 58 + %int_24 = OpConstant %int 24 + %int_11 = OpConstant %int 11 + %int_5 = OpConstant %int 5 + %float_0_5 = OpConstant %float 0.5 + %int_4 = OpConstant %int 4 + %int_2 = OpConstant %int 2 + %62 = OpConstantComposite %v3float %float_1 %float_1 %float_1 + %bool = OpTypeBool + %uint_5 = OpConstant %uint 5 + %65 = OpConstantComposite %v3float %float_0 %float_0 %float_1 + %66 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %float_10 = OpConstant %float 10 + %float_5 = OpConstant %float 5 + %uint_0 = OpConstant %uint 0 + %int_23 = OpConstant %int 23 + %uint_1 = OpConstant %uint 1 + %uint_3 = OpConstant %uint 3 + %uint_16 = OpConstant %uint 16 +%float_0_150000006 = OpConstant %float 0.150000006 + %float_0_25 = OpConstant %float 0.25 + %float_2 = OpConstant %float 2 + %77 = OpConstantComposite %v3float %float_2 %float_2 %float_2 + %float_255 = OpConstant %float 255 + %uint_15 = OpConstant %uint 15 +%uint_4294967280 = OpConstant %uint 4294967280 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler + %uint_6 = OpConstant %uint 6 +%_arr_mat4v4float_uint_6 = OpTypeArray %mat4v4float %uint_6 +%type__Globals = OpTypeStruct %v3float %_arr_mat4v4float_uint_6 %float %float %float %v4float %v2float %v4float +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%type_cube_image = OpTypeImage %float Cube 2 0 0 1 Unknown +%_ptr_UniformConstant_type_cube_image = OpTypePointer UniformConstant %type_cube_image + %v2int = OpTypeVector %int 2 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %91 = OpTypeFunction %void +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float +%type_sampled_image = OpTypeSampledImage %type_cube_image + %v3int = OpTypeVector %int 3 +%type_sampled_image_0 = OpTypeSampledImage %type_2d_image + %v4bool = OpTypeVector %bool 4 + %View = OpVariable %_ptr_Uniform_type_View Uniform +%SceneTexturesStruct_SceneDepthTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%SceneTexturesStruct_SceneDepthTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%SceneTexturesStruct_GBufferATexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%SceneTexturesStruct_GBufferBTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%SceneTexturesStruct_GBufferDTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%SceneTexturesStruct_GBufferATextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%SceneTexturesStruct_GBufferBTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%SceneTexturesStruct_GBufferDTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%ShadowDepthTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%ShadowDepthCubeTexture = OpVariable %_ptr_UniformConstant_type_cube_image UniformConstant +%ShadowDepthCubeTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%SSProfilesTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output +%float_0_200000003 = OpConstant %float 0.200000003 + %98 = OpConstantComposite %v3float %float_2_5 %float_2_5 %float_2_5 + %99 = OpConstantComposite %v3float %float_2_37764096 %float_2_37764096 %float_2_37764096 + %100 = OpConstantComposite %v3float %float_0_772542 %float_0_772542 %float_0_772542 + %101 = OpConstantComposite %v3float %float_1_46946299 %float_1_46946299 %float_1_46946299 + %102 = OpConstantComposite %v3float %float_n2_02254295 %float_n2_02254295 %float_n2_02254295 + %103 = OpConstantComposite %v3float %float_n1_46946299 %float_n1_46946299 %float_n1_46946299 + %104 = OpConstantComposite %v3float %float_n2_022542 %float_n2_022542 %float_n2_022542 + %105 = OpConstantComposite %v3float %float_n2_37764096 %float_n2_37764096 %float_n2_37764096 + %106 = OpConstantComposite %v3float %float_0_772543013 %float_0_772543013 %float_0_772543013 + %107 = OpUndef %v4float +%MainOnePassPointLightPS = OpFunction %void None %91 + %108 = OpLabel + %109 = OpLoad %v4float %gl_FragCoord + %110 = OpVectorShuffle %v2float %109 %109 0 1 + %111 = OpAccessChain %_ptr_Uniform_v4float %View %int_58 + %112 = OpLoad %v4float %111 + %113 = OpVectorShuffle %v2float %112 %112 2 3 + %114 = OpFMul %v2float %110 %113 + %115 = OpLoad %type_2d_image %SceneTexturesStruct_SceneDepthTexture + %116 = OpLoad %type_sampler %SceneTexturesStruct_SceneDepthTextureSampler + %117 = OpSampledImage %type_sampled_image_0 %115 %116 + %118 = OpImageSampleExplicitLod %v4float %117 %114 Lod %float_0 + %119 = OpCompositeExtract %float %118 0 + %120 = OpAccessChain %_ptr_Uniform_float %View %int_23 %uint_0 + %121 = OpLoad %float %120 + %122 = OpFMul %float %119 %121 + %123 = OpAccessChain %_ptr_Uniform_float %View %int_23 %uint_1 + %124 = OpLoad %float %123 + %125 = OpFAdd %float %122 %124 + %126 = OpAccessChain %_ptr_Uniform_float %View %int_23 %uint_2 + %127 = OpLoad %float %126 + %128 = OpFMul %float %119 %127 + %129 = OpAccessChain %_ptr_Uniform_float %View %int_23 %uint_3 + %130 = OpLoad %float %129 + %131 = OpFSub %float %128 %130 + %132 = OpFDiv %float %float_1 %131 + %133 = OpFAdd %float %125 %132 + %134 = OpAccessChain %_ptr_Uniform_v4float %View %int_24 + %135 = OpLoad %v4float %134 + %136 = OpVectorShuffle %v2float %135 %135 3 2 + %137 = OpFSub %v2float %114 %136 + %138 = OpVectorShuffle %v2float %135 %135 0 1 + %139 = OpFDiv %v2float %137 %138 + %140 = OpCompositeConstruct %v2float %133 %133 + %141 = OpFMul %v2float %139 %140 + %142 = OpCompositeExtract %float %141 0 + %143 = OpCompositeExtract %float %141 1 + %144 = OpCompositeConstruct %v4float %142 %143 %133 %float_1 + %145 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_11 + %146 = OpLoad %mat4v4float %145 + %147 = OpMatrixTimesVector %v4float %146 %144 + %148 = OpVectorShuffle %v3float %147 %147 0 1 2 + %149 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_5 + %150 = OpLoad %v4float %149 + %151 = OpVectorShuffle %v3float %150 %150 0 1 2 + %152 = OpFSub %v3float %151 %148 + %153 = OpAccessChain %_ptr_Uniform_float %_Globals %int_5 %int_3 + %154 = OpLoad %float %153 + %155 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_7 + %156 = OpAccessChain %_ptr_Uniform_float %_Globals %int_7 %int_0 + %157 = OpLoad %float %156 + %158 = OpExtInst %float %1 Length %152 + %159 = OpFMul %float %158 %154 + %160 = OpFOrdLessThan %bool %159 %float_1 + OpSelectionMerge %161 DontFlatten + OpBranchConditional %160 %162 %161 + %162 = OpLabel + %163 = OpCompositeConstruct %v3float %158 %158 %158 + %164 = OpFDiv %v3float %152 %163 + %165 = OpExtInst %v3float %1 FAbs %152 + %166 = OpCompositeExtract %float %165 0 + %167 = OpCompositeExtract %float %165 1 + %168 = OpCompositeExtract %float %165 2 + %169 = OpExtInst %float %1 FMax %167 %168 + %170 = OpExtInst %float %1 FMax %166 %169 + %171 = OpFOrdEqual %bool %170 %166 + OpSelectionMerge %172 None + OpBranchConditional %171 %173 %174 + %174 = OpLabel + %175 = OpFOrdEqual %bool %170 %167 + OpSelectionMerge %176 None + OpBranchConditional %175 %177 %178 + %178 = OpLabel + %179 = OpCompositeExtract %float %152 2 + %180 = OpFOrdEqual %bool %168 %179 + %181 = OpSelect %int %180 %int_4 %int_5 + OpBranch %176 + %177 = OpLabel + %182 = OpCompositeExtract %float %152 1 + %183 = OpFOrdEqual %bool %167 %182 + %184 = OpSelect %int %183 %int_2 %int_3 + OpBranch %176 + %176 = OpLabel + %185 = OpPhi %int %184 %177 %181 %178 + OpBranch %172 + %173 = OpLabel + %186 = OpCompositeExtract %float %152 0 + %187 = OpFOrdEqual %bool %166 %186 + %188 = OpSelect %int %187 %int_0 %int_1 + OpBranch %172 + %172 = OpLabel + %189 = OpPhi %int %188 %173 %185 %176 + %190 = OpCompositeExtract %float %147 0 + %191 = OpCompositeExtract %float %147 1 + %192 = OpCompositeExtract %float %147 2 + %193 = OpCompositeConstruct %v4float %190 %191 %192 %float_1 + %194 = OpAccessChain %_ptr_Uniform_mat4v4float %_Globals %int_1 %189 + %195 = OpLoad %mat4v4float %194 + %196 = OpMatrixTimesVector %v4float %195 %193 + %197 = OpCompositeExtract %float %196 2 + %198 = OpCompositeExtract %float %196 3 + %199 = OpFDiv %float %197 %198 + %200 = OpFNegate %float %157 + %201 = OpFDiv %float %200 %198 + %202 = OpLoad %type_cube_image %ShadowDepthCubeTexture + %203 = OpLoad %type_sampler %ShadowDepthCubeTextureSampler + %204 = OpFAdd %float %199 %201 + %205 = OpSampledImage %type_sampled_image %202 %203 + %206 = OpImageSampleDrefExplicitLod %float %205 %164 %204 Lod %float_0 + OpBranch %161 + %161 = OpLabel + %207 = OpPhi %float %float_1 %108 %206 %172 + %208 = OpFSub %float %207 %float_0_5 + %209 = OpAccessChain %_ptr_Uniform_float %_Globals %int_4 + %210 = OpLoad %float %209 + %211 = OpFMul %float %208 %210 + %212 = OpFAdd %float %211 %float_0_5 + %213 = OpExtInst %float %1 FClamp %212 %float_0 %float_1 + %214 = OpFMul %float %213 %213 + %215 = OpAccessChain %_ptr_Uniform_float %_Globals %int_3 + %216 = OpLoad %float %215 + %217 = OpExtInst %float %1 FMix %float_1 %214 %216 + %218 = OpExtInst %float %1 Sqrt %217 + %219 = OpCompositeInsert %v4float %218 %107 2 + %220 = OpVectorShuffle %v4float %219 %62 4 5 2 6 + %221 = OpLoad %type_2d_image %SceneTexturesStruct_GBufferATexture + %222 = OpLoad %type_sampler %SceneTexturesStruct_GBufferATextureSampler + %223 = OpSampledImage %type_sampled_image_0 %221 %222 + %224 = OpImageSampleExplicitLod %v4float %223 %114 Lod %float_0 + %225 = OpLoad %type_2d_image %SceneTexturesStruct_GBufferBTexture + %226 = OpLoad %type_sampler %SceneTexturesStruct_GBufferBTextureSampler + %227 = OpSampledImage %type_sampled_image_0 %225 %226 + %228 = OpImageSampleExplicitLod %v4float %227 %114 Lod %float_0 + %229 = OpLoad %type_2d_image %SceneTexturesStruct_GBufferDTexture + %230 = OpLoad %type_sampler %SceneTexturesStruct_GBufferDTextureSampler + %231 = OpSampledImage %type_sampled_image_0 %229 %230 + %232 = OpImageSampleExplicitLod %v4float %231 %114 Lod %float_0 + %233 = OpVectorShuffle %v3float %224 %224 0 1 2 + %234 = OpFMul %v3float %233 %77 + %235 = OpFSub %v3float %234 %62 + %236 = OpExtInst %v3float %1 Normalize %235 + %237 = OpCompositeExtract %float %228 3 + %238 = OpFMul %float %237 %float_255 + %239 = OpExtInst %float %1 Round %238 + %240 = OpConvertFToU %uint %239 + %241 = OpBitwiseAnd %uint %240 %uint_15 + %242 = OpBitwiseAnd %uint %240 %uint_4294967280 + %243 = OpBitwiseAnd %uint %242 %uint_16 + %244 = OpINotEqual %bool %243 %uint_0 + %245 = OpLogicalNot %bool %244 + %246 = OpCompositeConstruct %v4bool %245 %245 %245 %245 + %247 = OpSelect %v4float %246 %232 %66 + %248 = OpIEqual %bool %241 %uint_5 + OpSelectionMerge %249 None + OpBranchConditional %248 %250 %249 + %250 = OpLabel + %251 = OpLoad %v4float %155 + %252 = OpCompositeExtract %float %247 0 + %253 = OpFMul %float %252 %float_255 + %254 = OpFAdd %float %253 %float_0_5 + %255 = OpConvertFToU %uint %254 + %256 = OpBitcast %int %255 + %257 = OpCompositeConstruct %v3int %int_1 %256 %int_0 + %258 = OpVectorShuffle %v2int %257 %257 0 1 + %259 = OpLoad %type_2d_image %SSProfilesTexture + %260 = OpImageFetch %v4float %259 %258 Lod %int_0 + %261 = OpCompositeExtract %float %260 0 + %262 = OpCompositeExtract %float %260 1 + %263 = OpFMul %float %262 %float_0_5 + %264 = OpCompositeConstruct %v3float %263 %263 %263 + %265 = OpFMul %v3float %236 %264 + %266 = OpFSub %v3float %148 %265 + %267 = OpDot %float %152 %152 + %268 = OpExtInst %float %1 InverseSqrt %267 + %269 = OpCompositeConstruct %v3float %268 %268 %268 + %270 = OpFMul %v3float %152 %269 + %271 = OpFNegate %v3float %270 + %272 = OpDot %float %271 %236 + %273 = OpExtInst %float %1 FClamp %272 %float_0 %float_1 + %274 = OpExtInst %float %1 Pow %273 %float_1 + OpSelectionMerge %275 DontFlatten + OpBranchConditional %160 %276 %275 + %276 = OpLabel + %277 = OpCompositeConstruct %v3float %158 %158 %158 + %278 = OpFDiv %v3float %152 %277 + %279 = OpExtInst %v3float %1 Cross %278 %65 + %280 = OpExtInst %v3float %1 Normalize %279 + %281 = OpExtInst %v3float %1 Cross %280 %278 + %282 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 + %283 = OpLoad %float %282 + %284 = OpCompositeConstruct %v3float %283 %283 %283 + %285 = OpFMul %v3float %280 %284 + %286 = OpFMul %v3float %281 %284 + %287 = OpExtInst %v3float %1 FAbs %278 + %288 = OpCompositeExtract %float %287 0 + %289 = OpCompositeExtract %float %287 1 + %290 = OpCompositeExtract %float %287 2 + %291 = OpExtInst %float %1 FMax %289 %290 + %292 = OpExtInst %float %1 FMax %288 %291 + %293 = OpFOrdEqual %bool %292 %288 + OpSelectionMerge %294 None + OpBranchConditional %293 %295 %296 + %296 = OpLabel + %297 = OpFOrdEqual %bool %292 %289 + OpSelectionMerge %298 None + OpBranchConditional %297 %299 %300 + %300 = OpLabel + %301 = OpCompositeExtract %float %278 2 + %302 = OpFOrdEqual %bool %290 %301 + %303 = OpSelect %int %302 %int_4 %int_5 + OpBranch %298 + %299 = OpLabel + %304 = OpCompositeExtract %float %278 1 + %305 = OpFOrdEqual %bool %289 %304 + %306 = OpSelect %int %305 %int_2 %int_3 + OpBranch %298 + %298 = OpLabel + %307 = OpPhi %int %306 %299 %303 %300 + OpBranch %294 + %295 = OpLabel + %308 = OpCompositeExtract %float %278 0 + %309 = OpFOrdEqual %bool %288 %308 + %310 = OpSelect %int %309 %int_0 %int_1 + OpBranch %294 + %294 = OpLabel + %311 = OpPhi %int %310 %295 %307 %298 + %312 = OpCompositeExtract %float %266 0 + %313 = OpCompositeExtract %float %266 1 + %314 = OpCompositeExtract %float %266 2 + %315 = OpCompositeConstruct %v4float %312 %313 %314 %float_1 + %316 = OpAccessChain %_ptr_Uniform_mat4v4float %_Globals %int_1 %311 + %317 = OpLoad %mat4v4float %316 + %318 = OpMatrixTimesVector %v4float %317 %315 + %319 = OpCompositeExtract %float %318 2 + %320 = OpCompositeExtract %float %318 3 + %321 = OpFDiv %float %319 %320 + %322 = OpFDiv %float %float_10 %154 + %323 = OpFMul %float %261 %322 + %324 = OpCompositeExtract %float %251 2 + %325 = OpFMul %float %321 %324 + %326 = OpCompositeExtract %float %251 3 + %327 = OpFSub %float %325 %326 + %328 = OpFDiv %float %float_1 %327 + %329 = OpFMul %float %328 %154 + %330 = OpFMul %v3float %286 %98 + %331 = OpFAdd %v3float %278 %330 + %332 = OpLoad %type_cube_image %ShadowDepthCubeTexture + %333 = OpLoad %type_sampler %ShadowDepthTextureSampler + %334 = OpSampledImage %type_sampled_image %332 %333 + %335 = OpImageSampleExplicitLod %v4float %334 %331 Lod %float_0 + %336 = OpCompositeExtract %float %335 0 + %337 = OpFMul %float %336 %324 + %338 = OpFSub %float %337 %326 + %339 = OpFDiv %float %float_1 %338 + %340 = OpFMul %float %339 %154 + %341 = OpFSub %float %329 %340 + %342 = OpFMul %float %341 %323 + %343 = OpFOrdGreaterThan %bool %342 %float_0 + %344 = OpFAdd %float %342 %263 + %345 = OpFMul %float %342 %274 + %346 = OpFAdd %float %345 %263 + %347 = OpExtInst %float %1 FMax %float_0 %346 + %348 = OpSelect %float %343 %344 %347 + %349 = OpExtInst %float %1 FAbs %348 + %350 = OpExtInst %float %1 FClamp %349 %float_0_150000006 %float_5 + %351 = OpFAdd %float %350 %float_0_25 + %352 = OpFMul %v3float %285 %99 + %353 = OpFAdd %v3float %278 %352 + %354 = OpFMul %v3float %286 %100 + %355 = OpFAdd %v3float %353 %354 + %356 = OpSampledImage %type_sampled_image %332 %333 + %357 = OpImageSampleExplicitLod %v4float %356 %355 Lod %float_0 + %358 = OpCompositeExtract %float %357 0 + %359 = OpFMul %float %358 %324 + %360 = OpFSub %float %359 %326 + %361 = OpFDiv %float %float_1 %360 + %362 = OpFMul %float %361 %154 + %363 = OpFSub %float %329 %362 + %364 = OpFMul %float %363 %323 + %365 = OpFOrdGreaterThan %bool %364 %float_0 + %366 = OpFAdd %float %364 %263 + %367 = OpFMul %float %364 %274 + %368 = OpFAdd %float %367 %263 + %369 = OpExtInst %float %1 FMax %float_0 %368 + %370 = OpSelect %float %365 %366 %369 + %371 = OpExtInst %float %1 FAbs %370 + %372 = OpExtInst %float %1 FClamp %371 %float_0_150000006 %float_5 + %373 = OpFAdd %float %372 %float_0_25 + %374 = OpFAdd %float %351 %373 + %375 = OpFMul %v3float %285 %101 + %376 = OpFAdd %v3float %278 %375 + %377 = OpFMul %v3float %286 %102 + %378 = OpFAdd %v3float %376 %377 + %379 = OpSampledImage %type_sampled_image %332 %333 + %380 = OpImageSampleExplicitLod %v4float %379 %378 Lod %float_0 + %381 = OpCompositeExtract %float %380 0 + %382 = OpFMul %float %381 %324 + %383 = OpFSub %float %382 %326 + %384 = OpFDiv %float %float_1 %383 + %385 = OpFMul %float %384 %154 + %386 = OpFSub %float %329 %385 + %387 = OpFMul %float %386 %323 + %388 = OpFOrdGreaterThan %bool %387 %float_0 + %389 = OpFAdd %float %387 %263 + %390 = OpFMul %float %387 %274 + %391 = OpFAdd %float %390 %263 + %392 = OpExtInst %float %1 FMax %float_0 %391 + %393 = OpSelect %float %388 %389 %392 + %394 = OpExtInst %float %1 FAbs %393 + %395 = OpExtInst %float %1 FClamp %394 %float_0_150000006 %float_5 + %396 = OpFAdd %float %395 %float_0_25 + %397 = OpFAdd %float %374 %396 + %398 = OpFMul %v3float %285 %103 + %399 = OpFAdd %v3float %278 %398 + %400 = OpFMul %v3float %286 %104 + %401 = OpFAdd %v3float %399 %400 + %402 = OpSampledImage %type_sampled_image %332 %333 + %403 = OpImageSampleExplicitLod %v4float %402 %401 Lod %float_0 + %404 = OpCompositeExtract %float %403 0 + %405 = OpFMul %float %404 %324 + %406 = OpFSub %float %405 %326 + %407 = OpFDiv %float %float_1 %406 + %408 = OpFMul %float %407 %154 + %409 = OpFSub %float %329 %408 + %410 = OpFMul %float %409 %323 + %411 = OpFOrdGreaterThan %bool %410 %float_0 + %412 = OpFAdd %float %410 %263 + %413 = OpFMul %float %410 %274 + %414 = OpFAdd %float %413 %263 + %415 = OpExtInst %float %1 FMax %float_0 %414 + %416 = OpSelect %float %411 %412 %415 + %417 = OpExtInst %float %1 FAbs %416 + %418 = OpExtInst %float %1 FClamp %417 %float_0_150000006 %float_5 + %419 = OpFAdd %float %418 %float_0_25 + %420 = OpFAdd %float %397 %419 + %421 = OpFMul %v3float %285 %105 + %422 = OpFAdd %v3float %278 %421 + %423 = OpFMul %v3float %286 %106 + %424 = OpFAdd %v3float %422 %423 + %425 = OpSampledImage %type_sampled_image %332 %333 + %426 = OpImageSampleExplicitLod %v4float %425 %424 Lod %float_0 + %427 = OpCompositeExtract %float %426 0 + %428 = OpFMul %float %427 %324 + %429 = OpFSub %float %428 %326 + %430 = OpFDiv %float %float_1 %429 + %431 = OpFMul %float %430 %154 + %432 = OpFSub %float %329 %431 + %433 = OpFMul %float %432 %323 + %434 = OpFOrdGreaterThan %bool %433 %float_0 + %435 = OpFAdd %float %433 %263 + %436 = OpFMul %float %433 %274 + %437 = OpFAdd %float %436 %263 + %438 = OpExtInst %float %1 FMax %float_0 %437 + %439 = OpSelect %float %434 %435 %438 + %440 = OpExtInst %float %1 FAbs %439 + %441 = OpExtInst %float %1 FClamp %440 %float_0_150000006 %float_5 + %442 = OpFAdd %float %441 %float_0_25 + %443 = OpFAdd %float %420 %442 + %444 = OpFMul %float %443 %float_0_200000003 + OpBranch %275 + %275 = OpLabel + %445 = OpPhi %float %float_1 %250 %444 %294 + %446 = OpFMul %float %445 %float_0_200000003 + %447 = OpFSub %float %float_1 %446 + OpBranch %249 + %249 = OpLabel + %448 = OpPhi %float %float_1 %161 %447 %275 + %449 = OpExtInst %float %1 Sqrt %448 + %450 = OpSelect %float %248 %449 %218 + %451 = OpCompositeInsert %v4float %450 %220 3 + OpStore %out_var_SV_Target0 %451 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/frag/global-constant-arrays.asm.frag b/shaders-ue4/asm/frag/global-constant-arrays.asm.frag new file mode 100644 index 00000000000..47db9ebc512 --- /dev/null +++ b/shaders-ue4/asm/frag/global-constant-arrays.asm.frag @@ -0,0 +1,3556 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 3005 +; Schema: 0 + OpCapability Shader + OpCapability Geometry + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %MainPS "main" %in_var_TEXCOORD0 %gl_FragCoord %gl_Layer %out_var_SV_Target0 + OpExecutionMode %MainPS OriginUpperLeft + OpSource HLSL 600 + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "MappingPolynomial" + OpMemberName %type__Globals 1 "InverseGamma" + OpMemberName %type__Globals 2 "ColorMatrixR_ColorCurveCd1" + OpMemberName %type__Globals 3 "ColorMatrixG_ColorCurveCd3Cm3" + OpMemberName %type__Globals 4 "ColorMatrixB_ColorCurveCm2" + OpMemberName %type__Globals 5 "ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3" + OpMemberName %type__Globals 6 "ColorCurve_Ch1_Ch2" + OpMemberName %type__Globals 7 "ColorShadow_Luma" + OpMemberName %type__Globals 8 "ColorShadow_Tint1" + OpMemberName %type__Globals 9 "ColorShadow_Tint2" + OpMemberName %type__Globals 10 "FilmSlope" + OpMemberName %type__Globals 11 "FilmToe" + OpMemberName %type__Globals 12 "FilmShoulder" + OpMemberName %type__Globals 13 "FilmBlackClip" + OpMemberName %type__Globals 14 "FilmWhiteClip" + OpMemberName %type__Globals 15 "ColorScale" + OpMemberName %type__Globals 16 "OverlayColor" + OpMemberName %type__Globals 17 "WhiteTemp" + OpMemberName %type__Globals 18 "WhiteTint" + OpMemberName %type__Globals 19 "ColorSaturation" + OpMemberName %type__Globals 20 "ColorContrast" + OpMemberName %type__Globals 21 "ColorGamma" + OpMemberName %type__Globals 22 "ColorGain" + OpMemberName %type__Globals 23 "ColorOffset" + OpMemberName %type__Globals 24 "ColorSaturationShadows" + OpMemberName %type__Globals 25 "ColorContrastShadows" + OpMemberName %type__Globals 26 "ColorGammaShadows" + OpMemberName %type__Globals 27 "ColorGainShadows" + OpMemberName %type__Globals 28 "ColorOffsetShadows" + OpMemberName %type__Globals 29 "ColorSaturationMidtones" + OpMemberName %type__Globals 30 "ColorContrastMidtones" + OpMemberName %type__Globals 31 "ColorGammaMidtones" + OpMemberName %type__Globals 32 "ColorGainMidtones" + OpMemberName %type__Globals 33 "ColorOffsetMidtones" + OpMemberName %type__Globals 34 "ColorSaturationHighlights" + OpMemberName %type__Globals 35 "ColorContrastHighlights" + OpMemberName %type__Globals 36 "ColorGammaHighlights" + OpMemberName %type__Globals 37 "ColorGainHighlights" + OpMemberName %type__Globals 38 "ColorOffsetHighlights" + OpMemberName %type__Globals 39 "ColorCorrectionShadowsMax" + OpMemberName %type__Globals 40 "ColorCorrectionHighlightsMin" + OpMemberName %type__Globals 41 "OutputDevice" + OpMemberName %type__Globals 42 "OutputGamut" + OpMemberName %type__Globals 43 "BlueCorrection" + OpMemberName %type__Globals 44 "ExpandGamut" + OpName %_Globals "$Globals" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %MainPS "MainPS" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorate %in_var_TEXCOORD0 NoPerspective + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION" + OpDecorate %gl_Layer BuiltIn Layer + OpDecorateString %gl_Layer UserSemantic "SV_RenderTargetArrayIndex" + OpDecorate %gl_Layer Flat + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %in_var_TEXCOORD0 Location 0 + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 0 + OpMemberDecorate %type__Globals 0 Offset 0 + OpMemberDecorate %type__Globals 1 Offset 16 + OpMemberDecorate %type__Globals 2 Offset 32 + OpMemberDecorate %type__Globals 3 Offset 48 + OpMemberDecorate %type__Globals 4 Offset 64 + OpMemberDecorate %type__Globals 5 Offset 80 + OpMemberDecorate %type__Globals 6 Offset 96 + OpMemberDecorate %type__Globals 7 Offset 112 + OpMemberDecorate %type__Globals 8 Offset 128 + OpMemberDecorate %type__Globals 9 Offset 144 + OpMemberDecorate %type__Globals 10 Offset 160 + OpMemberDecorate %type__Globals 11 Offset 164 + OpMemberDecorate %type__Globals 12 Offset 168 + OpMemberDecorate %type__Globals 13 Offset 172 + OpMemberDecorate %type__Globals 14 Offset 176 + OpMemberDecorate %type__Globals 15 Offset 180 + OpMemberDecorate %type__Globals 16 Offset 192 + OpMemberDecorate %type__Globals 17 Offset 208 + OpMemberDecorate %type__Globals 18 Offset 212 + OpMemberDecorate %type__Globals 19 Offset 224 + OpMemberDecorate %type__Globals 20 Offset 240 + OpMemberDecorate %type__Globals 21 Offset 256 + OpMemberDecorate %type__Globals 22 Offset 272 + OpMemberDecorate %type__Globals 23 Offset 288 + OpMemberDecorate %type__Globals 24 Offset 304 + OpMemberDecorate %type__Globals 25 Offset 320 + OpMemberDecorate %type__Globals 26 Offset 336 + OpMemberDecorate %type__Globals 27 Offset 352 + OpMemberDecorate %type__Globals 28 Offset 368 + OpMemberDecorate %type__Globals 29 Offset 384 + OpMemberDecorate %type__Globals 30 Offset 400 + OpMemberDecorate %type__Globals 31 Offset 416 + OpMemberDecorate %type__Globals 32 Offset 432 + OpMemberDecorate %type__Globals 33 Offset 448 + OpMemberDecorate %type__Globals 34 Offset 464 + OpMemberDecorate %type__Globals 35 Offset 480 + OpMemberDecorate %type__Globals 36 Offset 496 + OpMemberDecorate %type__Globals 37 Offset 512 + OpMemberDecorate %type__Globals 38 Offset 528 + OpMemberDecorate %type__Globals 39 Offset 544 + OpMemberDecorate %type__Globals 40 Offset 548 + OpMemberDecorate %type__Globals 41 Offset 552 + OpMemberDecorate %type__Globals 42 Offset 556 + OpMemberDecorate %type__Globals 43 Offset 560 + OpMemberDecorate %type__Globals 44 Offset 564 + OpDecorate %type__Globals Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 +%float_0_952552378 = OpConstant %float 0.952552378 + %float_0 = OpConstant %float 0 + +; HACK: Needed to hack this constant since MSVC and GNU libc are off by 1 ULP when converting to string (it probably still works fine though in a roundtrip ...) +%float_9_36786018en05 = OpConstant %float 9.25 + +%float_0_343966454 = OpConstant %float 0.343966454 +%float_0_728166103 = OpConstant %float 0.728166103 +%float_n0_0721325427 = OpConstant %float -0.0721325427 +%float_1_00882518 = OpConstant %float 1.00882518 +%float_1_04981101 = OpConstant %float 1.04981101 +%float_n9_74845025en05 = OpConstant %float -9.74845025e-05 +%float_n0_495903015 = OpConstant %float -0.495903015 +%float_1_37331307 = OpConstant %float 1.37331307 +%float_0_0982400328 = OpConstant %float 0.0982400328 +%float_0_991252005 = OpConstant %float 0.991252005 +%float_0_662454188 = OpConstant %float 0.662454188 +%float_0_134004205 = OpConstant %float 0.134004205 +%float_0_156187683 = OpConstant %float 0.156187683 +%float_0_272228718 = OpConstant %float 0.272228718 +%float_0_674081743 = OpConstant %float 0.674081743 +%float_0_0536895171 = OpConstant %float 0.0536895171 +%float_n0_00557464967 = OpConstant %float -0.00557464967 +%float_0_0040607336 = OpConstant %float 0.0040607336 +%float_1_01033914 = OpConstant %float 1.01033914 +%float_1_6410234 = OpConstant %float 1.6410234 +%float_n0_324803293 = OpConstant %float -0.324803293 +%float_n0_236424699 = OpConstant %float -0.236424699 +%float_n0_663662851 = OpConstant %float -0.663662851 +%float_1_61533165 = OpConstant %float 1.61533165 +%float_0_0167563483 = OpConstant %float 0.0167563483 +%float_0_0117218941 = OpConstant %float 0.0117218941 +%float_n0_00828444213 = OpConstant %float -0.00828444213 +%float_0_988394856 = OpConstant %float 0.988394856 +%float_1_45143926 = OpConstant %float 1.45143926 +%float_n0_236510754 = OpConstant %float -0.236510754 +%float_n0_214928567 = OpConstant %float -0.214928567 +%float_n0_0765537769 = OpConstant %float -0.0765537769 +%float_1_17622972 = OpConstant %float 1.17622972 +%float_n0_0996759236 = OpConstant %float -0.0996759236 +%float_0_00831614807 = OpConstant %float 0.00831614807 +%float_n0_00603244966 = OpConstant %float -0.00603244966 +%float_0_997716308 = OpConstant %float 0.997716308 +%float_0_695452213 = OpConstant %float 0.695452213 +%float_0_140678704 = OpConstant %float 0.140678704 +%float_0_163869068 = OpConstant %float 0.163869068 +%float_0_0447945632 = OpConstant %float 0.0447945632 +%float_0_859671116 = OpConstant %float 0.859671116 +%float_0_0955343172 = OpConstant %float 0.0955343172 +%float_n0_00552588282 = OpConstant %float -0.00552588282 +%float_0_00402521016 = OpConstant %float 0.00402521016 +%float_1_00150073 = OpConstant %float 1.00150073 + %67 = OpConstantComposite %v3float %float_0_272228718 %float_0_674081743 %float_0_0536895171 +%float_3_2409699 = OpConstant %float 3.2409699 +%float_n1_5373832 = OpConstant %float -1.5373832 +%float_n0_498610765 = OpConstant %float -0.498610765 +%float_n0_969243646 = OpConstant %float -0.969243646 +%float_1_8759675 = OpConstant %float 1.8759675 +%float_0_0415550582 = OpConstant %float 0.0415550582 +%float_0_0556300804 = OpConstant %float 0.0556300804 +%float_n0_203976959 = OpConstant %float -0.203976959 +%float_1_05697155 = OpConstant %float 1.05697155 +%float_0_412456393 = OpConstant %float 0.412456393 +%float_0_357576102 = OpConstant %float 0.357576102 +%float_0_180437505 = OpConstant %float 0.180437505 +%float_0_212672904 = OpConstant %float 0.212672904 +%float_0_715152204 = OpConstant %float 0.715152204 +%float_0_0721750036 = OpConstant %float 0.0721750036 +%float_0_0193339009 = OpConstant %float 0.0193339009 +%float_0_119191997 = OpConstant %float 0.119191997 +%float_0_950304091 = OpConstant %float 0.950304091 +%float_1_71660841 = OpConstant %float 1.71660841 +%float_n0_355662107 = OpConstant %float -0.355662107 +%float_n0_253360093 = OpConstant %float -0.253360093 +%float_n0_666682899 = OpConstant %float -0.666682899 +%float_1_61647761 = OpConstant %float 1.61647761 +%float_0_0157685 = OpConstant %float 0.0157685 +%float_0_0176422 = OpConstant %float 0.0176422 +%float_n0_0427763015 = OpConstant %float -0.0427763015 +%float_0_942228675 = OpConstant %float 0.942228675 +%float_2_49339628 = OpConstant %float 2.49339628 +%float_n0_93134588 = OpConstant %float -0.93134588 +%float_n0_402694494 = OpConstant %float -0.402694494 +%float_n0_829486787 = OpConstant %float -0.829486787 +%float_1_76265967 = OpConstant %float 1.76265967 +%float_0_0236246008 = OpConstant %float 0.0236246008 +%float_0_0358507 = OpConstant %float 0.0358507 +%float_n0_0761827007 = OpConstant %float -0.0761827007 +%float_0_957014024 = OpConstant %float 0.957014024 +%float_1_01303005 = OpConstant %float 1.01303005 +%float_0_00610530982 = OpConstant %float 0.00610530982 +%float_n0_0149710001 = OpConstant %float -0.0149710001 +%float_0_00769822998 = OpConstant %float 0.00769822998 +%float_0_998165011 = OpConstant %float 0.998165011 +%float_n0_00503202993 = OpConstant %float -0.00503202993 +%float_n0_00284131011 = OpConstant %float -0.00284131011 +%float_0_00468515977 = OpConstant %float 0.00468515977 +%float_0_924507022 = OpConstant %float 0.924507022 +%float_0_987223983 = OpConstant %float 0.987223983 +%float_n0_00611326983 = OpConstant %float -0.00611326983 +%float_0_0159533005 = OpConstant %float 0.0159533005 +%float_n0_00759836007 = OpConstant %float -0.00759836007 +%float_1_00186002 = OpConstant %float 1.00186002 +%float_0_0053300201 = OpConstant %float 0.0053300201 +%float_0_00307257008 = OpConstant %float 0.00307257008 +%float_n0_00509594986 = OpConstant %float -0.00509594986 +%float_1_08168006 = OpConstant %float 1.08168006 + %float_0_5 = OpConstant %float 0.5 + %float_n1 = OpConstant %float -1 + %float_1 = OpConstant %float 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 +%float_0_015625 = OpConstant %float 0.015625 + %128 = OpConstantComposite %v2float %float_0_015625 %float_0_015625 + %129 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %int_42 = OpConstant %int 42 + %uint_3 = OpConstant %uint 3 + %132 = OpConstantComposite %v3float %float_0 %float_0 %float_0 + %int_9 = OpConstant %int 9 + %int_3 = OpConstant %int 3 + %135 = OpConstantComposite %v3float %float_1 %float_1 %float_1 + %float_n4 = OpConstant %float -4 + %int_44 = OpConstant %int 44 +%float_0_544169128 = OpConstant %float 0.544169128 +%float_0_239592597 = OpConstant %float 0.239592597 +%float_0_166694298 = OpConstant %float 0.166694298 +%float_0_239465594 = OpConstant %float 0.239465594 +%float_0_702153027 = OpConstant %float 0.702153027 +%float_0_058381401 = OpConstant %float 0.058381401 +%float_n0_00234390004 = OpConstant %float -0.00234390004 +%float_0_0361833982 = OpConstant %float 0.0361833982 +%float_1_05521834 = OpConstant %float 1.05521834 +%float_0_940437257 = OpConstant %float 0.940437257 +%float_n0_0183068793 = OpConstant %float -0.0183068793 +%float_0_077869609 = OpConstant %float 0.077869609 +%float_0_00837869663 = OpConstant %float 0.00837869663 +%float_0_828660011 = OpConstant %float 0.828660011 +%float_0_162961304 = OpConstant %float 0.162961304 +%float_0_00054712611 = OpConstant %float 0.00054712611 +%float_n0_000883374596 = OpConstant %float -0.000883374596 +%float_1_00033629 = OpConstant %float 1.00033629 +%float_1_06317997 = OpConstant %float 1.06317997 +%float_0_0233955998 = OpConstant %float 0.0233955998 +%float_n0_0865726024 = OpConstant %float -0.0865726024 +%float_n0_0106336996 = OpConstant %float -0.0106336996 +%float_1_20632005 = OpConstant %float 1.20632005 +%float_n0_195690006 = OpConstant %float -0.195690006 +%float_n0_000590886979 = OpConstant %float -0.000590886979 +%float_0_00105247996 = OpConstant %float 0.00105247996 +%float_0_999538004 = OpConstant %float 0.999538004 + %int_43 = OpConstant %int 43 + %int_15 = OpConstant %int 15 + %int_16 = OpConstant %int 16 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_5 = OpConstant %uint 5 + %uint_6 = OpConstant %uint 6 + %int_2 = OpConstant %int 2 +%mat3v3float = OpTypeMatrix %v3float 3 + %int_41 = OpConstant %int 41 +%float_0_159301758 = OpConstant %float 0.159301758 +%float_78_84375 = OpConstant %float 78.84375 +%float_0_8359375 = OpConstant %float 0.8359375 +%float_18_8515625 = OpConstant %float 18.8515625 +%float_18_6875 = OpConstant %float 18.6875 +%float_10000 = OpConstant %float 10000 +%float_0_0126833133 = OpConstant %float 0.0126833133 + %182 = OpConstantComposite %v3float %float_0_0126833133 %float_0_0126833133 %float_0_0126833133 + %183 = OpConstantComposite %v3float %float_0_8359375 %float_0_8359375 %float_0_8359375 + %184 = OpConstantComposite %v3float %float_18_8515625 %float_18_8515625 %float_18_8515625 + %185 = OpConstantComposite %v3float %float_18_6875 %float_18_6875 %float_18_6875 +%float_6_27739477 = OpConstant %float 6.27739477 + %187 = OpConstantComposite %v3float %float_6_27739477 %float_6_27739477 %float_6_27739477 + %188 = OpConstantComposite %v3float %float_10000 %float_10000 %float_10000 + %float_14 = OpConstant %float 14 +%float_0_180000007 = OpConstant %float 0.180000007 +%float_0_434017599 = OpConstant %float 0.434017599 + %192 = OpConstantComposite %v3float %float_0_434017599 %float_0_434017599 %float_0_434017599 + %193 = OpConstantComposite %v3float %float_14 %float_14 %float_14 + %194 = OpConstantComposite %v3float %float_0_180000007 %float_0_180000007 %float_0_180000007 + %int_17 = OpConstant %int 17 + %float_4000 = OpConstant %float 4000 +%float_0_312700003 = OpConstant %float 0.312700003 +%float_0_328999996 = OpConstant %float 0.328999996 + %int_18 = OpConstant %int 18 + %int_24 = OpConstant %int 24 + %int_19 = OpConstant %int 19 + %int_25 = OpConstant %int 25 + %int_20 = OpConstant %int 20 + %int_26 = OpConstant %int 26 + %int_21 = OpConstant %int 21 + %int_27 = OpConstant %int 27 + %int_22 = OpConstant %int 22 + %int_28 = OpConstant %int 28 + %int_23 = OpConstant %int 23 + %int_39 = OpConstant %int 39 + %int_34 = OpConstant %int 34 + %int_35 = OpConstant %int 35 + %int_36 = OpConstant %int 36 + %int_37 = OpConstant %int 37 + %int_38 = OpConstant %int 38 + %int_40 = OpConstant %int 40 + %int_29 = OpConstant %int 29 + %int_30 = OpConstant %int 30 + %int_31 = OpConstant %int 31 + %int_32 = OpConstant %int 32 + %int_33 = OpConstant %int 33 +%float_0_0500000007 = OpConstant %float 0.0500000007 + %float_1_75 = OpConstant %float 1.75 +%float_0_400000006 = OpConstant %float 0.400000006 +%float_0_0299999993 = OpConstant %float 0.0299999993 + %float_2 = OpConstant %float 2 +%float_0_959999979 = OpConstant %float 0.959999979 + %228 = OpConstantComposite %v3float %float_0_959999979 %float_0_959999979 %float_0_959999979 + %int_13 = OpConstant %int 13 + %int_11 = OpConstant %int 11 + %int_14 = OpConstant %int 14 + %int_12 = OpConstant %int 12 +%float_0_800000012 = OpConstant %float 0.800000012 + %int_10 = OpConstant %int 10 + %float_10 = OpConstant %float 10 + %float_n2 = OpConstant %float -2 + %float_3 = OpConstant %float 3 + %238 = OpConstantComposite %v3float %float_3 %float_3 %float_3 + %239 = OpConstantComposite %v3float %float_2 %float_2 %float_2 +%float_0_930000007 = OpConstant %float 0.930000007 + %241 = OpConstantComposite %v3float %float_0_930000007 %float_0_930000007 %float_0_930000007 + %int_4 = OpConstant %int 4 + %int_8 = OpConstant %int 8 + %int_7 = OpConstant %int 7 + %int_5 = OpConstant %int 5 + %int_6 = OpConstant %int 6 +%float_0_00200000009 = OpConstant %float 0.00200000009 + %248 = OpConstantComposite %v3float %float_0_00200000009 %float_0_00200000009 %float_0_00200000009 +%float_6_10351999en05 = OpConstant %float 6.10351999e-05 + %250 = OpConstantComposite %v3float %float_6_10351999en05 %float_6_10351999en05 %float_6_10351999en05 + %float_4_5 = OpConstant %float 4.5 + %252 = OpConstantComposite %v3float %float_4_5 %float_4_5 %float_4_5 +%float_0_0179999992 = OpConstant %float 0.0179999992 + %254 = OpConstantComposite %v3float %float_0_0179999992 %float_0_0179999992 %float_0_0179999992 +%float_0_449999988 = OpConstant %float 0.449999988 + %256 = OpConstantComposite %v3float %float_0_449999988 %float_0_449999988 %float_0_449999988 +%float_1_09899998 = OpConstant %float 1.09899998 + %258 = OpConstantComposite %v3float %float_1_09899998 %float_1_09899998 %float_1_09899998 +%float_0_0989999995 = OpConstant %float 0.0989999995 + %260 = OpConstantComposite %v3float %float_0_0989999995 %float_0_0989999995 %float_0_0989999995 + %float_1_5 = OpConstant %float 1.5 + %262 = OpConstantComposite %v3float %float_1_5 %float_1_5 %float_1_5 + %263 = OpConstantComposite %v3float %float_0_159301758 %float_0_159301758 %float_0_159301758 + %264 = OpConstantComposite %v3float %float_78_84375 %float_78_84375 %float_78_84375 +%float_1_00055635 = OpConstant %float 1.00055635 + %float_7000 = OpConstant %float 7000 +%float_0_244063005 = OpConstant %float 0.244063005 +%float_99_1100006 = OpConstant %float 99.1100006 +%float_2967800 = OpConstant %float 2967800 +%float_0_237039998 = OpConstant %float 0.237039998 +%float_247_479996 = OpConstant %float 247.479996 +%float_1901800 = OpConstant %float 1901800 + %float_n3 = OpConstant %float -3 +%float_2_86999989 = OpConstant %float 2.86999989 +%float_0_275000006 = OpConstant %float 0.275000006 +%float_0_860117733 = OpConstant %float 0.860117733 +%float_0_000154118257 = OpConstant %float 0.000154118257 +%float_1_28641219en07 = OpConstant %float 1.28641219e-07 +%float_0_00084242021 = OpConstant %float 0.00084242021 +%float_7_08145137en07 = OpConstant %float 7.08145137e-07 +%float_0_317398727 = OpConstant %float 0.317398727 + +; HACK: Needed to hack this constant since MSVC and GNU libc are off by 1 ULP when converting to string (it probably still works fine though in a roundtrip ...) +%float_4_22806261en05 = OpConstant %float 4.25 + +%float_4_20481676en08 = OpConstant %float 4.20481676e-08 +%float_2_8974182en05 = OpConstant %float 2.8974182e-05 +%float_1_61456057en07 = OpConstant %float 1.61456057e-07 + %float_8 = OpConstant %float 8 + %float_4 = OpConstant %float 4 +%float_0_895099998 = OpConstant %float 0.895099998 +%float_0_266400009 = OpConstant %float 0.266400009 +%float_n0_161400005 = OpConstant %float -0.161400005 +%float_n0_750199974 = OpConstant %float -0.750199974 +%float_1_71350002 = OpConstant %float 1.71350002 +%float_0_0366999991 = OpConstant %float 0.0366999991 +%float_0_0388999991 = OpConstant %float 0.0388999991 +%float_n0_0684999973 = OpConstant %float -0.0684999973 +%float_1_02960002 = OpConstant %float 1.02960002 +%float_0_986992896 = OpConstant %float 0.986992896 +%float_n0_1470543 = OpConstant %float -0.1470543 +%float_0_159962699 = OpConstant %float 0.159962699 +%float_0_432305306 = OpConstant %float 0.432305306 +%float_0_518360317 = OpConstant %float 0.518360317 +%float_0_0492912009 = OpConstant %float 0.0492912009 +%float_n0_0085287001 = OpConstant %float -0.0085287001 +%float_0_040042799 = OpConstant %float 0.040042799 +%float_0_968486726 = OpConstant %float 0.968486726 +%float_5_55555534 = OpConstant %float 5.55555534 + %307 = OpConstantComposite %v3float %float_5_55555534 %float_5_55555534 %float_5_55555534 +%float_1_00000001en10 = OpConstant %float 1.00000001e-10 +%float_0_00999999978 = OpConstant %float 0.00999999978 +%float_0_666666687 = OpConstant %float 0.666666687 + %float_180 = OpConstant %float 180 + %float_360 = OpConstant %float 360 +%float_65535 = OpConstant %float 65535 + %314 = OpConstantComposite %v3float %float_65535 %float_65535 %float_65535 +%float_n4_97062206 = OpConstant %float -4.97062206 +%float_n3_02937818 = OpConstant %float -3.02937818 +%float_n2_12619996 = OpConstant %float -2.12619996 +%float_n1_51049995 = OpConstant %float -1.51049995 +%float_n1_05780005 = OpConstant %float -1.05780005 +%float_n0_466800004 = OpConstant %float -0.466800004 +%float_0_119379997 = OpConstant %float 0.119379997 +%float_0_708813429 = OpConstant %float 0.708813429 +%float_1_29118657 = OpConstant %float 1.29118657 +%float_0_808913231 = OpConstant %float 0.808913231 +%float_1_19108677 = OpConstant %float 1.19108677 +%float_1_56830001 = OpConstant %float 1.56830001 +%float_1_9483 = OpConstant %float 1.9483 +%float_2_30830002 = OpConstant %float 2.30830002 +%float_2_63840008 = OpConstant %float 2.63840008 +%float_2_85949993 = OpConstant %float 2.85949993 +%float_2_98726082 = OpConstant %float 2.98726082 +%float_3_01273918 = OpConstant %float 3.01273918 +%float_0_179999992 = OpConstant %float 0.179999992 +%float_9_99999975en05 = OpConstant %float 9.99999975e-05 + %float_1000 = OpConstant %float 1000 +%float_0_0599999987 = OpConstant %float 0.0599999987 +%float_3_50738446en05 = OpConstant %float 3.50738446e-05 + %338 = OpConstantComposite %v3float %float_3_50738446en05 %float_3_50738446en05 %float_3_50738446en05 +%float_n2_30102992 = OpConstant %float -2.30102992 +%float_n1_93120003 = OpConstant %float -1.93120003 +%float_n1_52049994 = OpConstant %float -1.52049994 +%float_0_801995218 = OpConstant %float 0.801995218 +%float_1_19800484 = OpConstant %float 1.19800484 +%float_1_59430003 = OpConstant %float 1.59430003 +%float_1_99730003 = OpConstant %float 1.99730003 +%float_2_37829995 = OpConstant %float 2.37829995 +%float_2_76839995 = OpConstant %float 2.76839995 +%float_3_05150008 = OpConstant %float 3.05150008 +%float_3_27462935 = OpConstant %float 3.27462935 +%float_3_32743073 = OpConstant %float 3.32743073 +%float_0_00499999989 = OpConstant %float 0.00499999989 + %float_11 = OpConstant %float 11 + %float_2000 = OpConstant %float 2000 +%float_0_119999997 = OpConstant %float 0.119999997 +%float_0_00313066994 = OpConstant %float 0.00313066994 +%float_12_9200001 = OpConstant %float 12.9200001 +%float_0_416666657 = OpConstant %float 0.416666657 +%float_1_05499995 = OpConstant %float 1.05499995 +%float_0_0549999997 = OpConstant %float 0.0549999997 +%float_n0_166666672 = OpConstant %float -0.166666672 + %float_n0_5 = OpConstant %float -0.5 +%float_0_166666672 = OpConstant %float 0.166666672 +%float_n3_15737653 = OpConstant %float -3.15737653 +%float_n0_485249996 = OpConstant %float -0.485249996 +%float_1_84773242 = OpConstant %float 1.84773242 +%float_n0_718548238 = OpConstant %float -0.718548238 +%float_2_08103061 = OpConstant %float 2.08103061 +%float_3_6681242 = OpConstant %float 3.6681242 + %float_18 = OpConstant %float 18 + %float_7 = OpConstant %float 7 +%type__Globals = OpTypeStruct %v4float %v3float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %float %float %float %float %float %v3float %v4float %float %float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %float %float %uint %uint %float %float +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %377 = OpTypeFunction %void +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bool = OpTypeBool +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %v2bool = OpTypeVector %bool 2 + %v3bool = OpTypeVector %bool 3 + %uint_10 = OpConstant %uint 10 +%_arr_float_uint_10 = OpTypeArray %float %uint_10 +%_arr_float_uint_6 = OpTypeArray %float %uint_6 + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %gl_Layer = OpVariable %_ptr_Input_uint Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output +%_ptr_Function__arr_float_uint_6 = OpTypePointer Function %_arr_float_uint_6 +%_ptr_Function__arr_float_uint_10 = OpTypePointer Function %_arr_float_uint_10 + %391 = OpUndef %v3float + %392 = OpConstantComposite %v3float %float_0_952552378 %float_0 %float_9_36786018en05 + %393 = OpConstantComposite %v3float %float_0_343966454 %float_0_728166103 %float_n0_0721325427 + %394 = OpConstantComposite %v3float %float_0 %float_0 %float_1_00882518 + %395 = OpConstantComposite %mat3v3float %392 %393 %394 + %396 = OpConstantComposite %v3float %float_1_04981101 %float_0 %float_n9_74845025en05 + %397 = OpConstantComposite %v3float %float_n0_495903015 %float_1_37331307 %float_0_0982400328 + %398 = OpConstantComposite %v3float %float_0 %float_0 %float_0_991252005 + %399 = OpConstantComposite %mat3v3float %396 %397 %398 + %400 = OpConstantComposite %v3float %float_0_662454188 %float_0_134004205 %float_0_156187683 + %401 = OpConstantComposite %v3float %float_n0_00557464967 %float_0_0040607336 %float_1_01033914 + %402 = OpConstantComposite %mat3v3float %400 %67 %401 + %403 = OpConstantComposite %v3float %float_1_6410234 %float_n0_324803293 %float_n0_236424699 + %404 = OpConstantComposite %v3float %float_n0_663662851 %float_1_61533165 %float_0_0167563483 + %405 = OpConstantComposite %v3float %float_0_0117218941 %float_n0_00828444213 %float_0_988394856 + %406 = OpConstantComposite %mat3v3float %403 %404 %405 + %407 = OpConstantComposite %v3float %float_1_45143926 %float_n0_236510754 %float_n0_214928567 + %408 = OpConstantComposite %v3float %float_n0_0765537769 %float_1_17622972 %float_n0_0996759236 + %409 = OpConstantComposite %v3float %float_0_00831614807 %float_n0_00603244966 %float_0_997716308 + %410 = OpConstantComposite %mat3v3float %407 %408 %409 + %411 = OpConstantComposite %v3float %float_0_695452213 %float_0_140678704 %float_0_163869068 + %412 = OpConstantComposite %v3float %float_0_0447945632 %float_0_859671116 %float_0_0955343172 + %413 = OpConstantComposite %v3float %float_n0_00552588282 %float_0_00402521016 %float_1_00150073 + %414 = OpConstantComposite %mat3v3float %411 %412 %413 + %415 = OpConstantComposite %v3float %float_3_2409699 %float_n1_5373832 %float_n0_498610765 + %416 = OpConstantComposite %v3float %float_n0_969243646 %float_1_8759675 %float_0_0415550582 + %417 = OpConstantComposite %v3float %float_0_0556300804 %float_n0_203976959 %float_1_05697155 + %418 = OpConstantComposite %mat3v3float %415 %416 %417 + %419 = OpConstantComposite %v3float %float_0_412456393 %float_0_357576102 %float_0_180437505 + %420 = OpConstantComposite %v3float %float_0_212672904 %float_0_715152204 %float_0_0721750036 + %421 = OpConstantComposite %v3float %float_0_0193339009 %float_0_119191997 %float_0_950304091 + %422 = OpConstantComposite %mat3v3float %419 %420 %421 + %423 = OpConstantComposite %v3float %float_1_71660841 %float_n0_355662107 %float_n0_253360093 + %424 = OpConstantComposite %v3float %float_n0_666682899 %float_1_61647761 %float_0_0157685 + %425 = OpConstantComposite %v3float %float_0_0176422 %float_n0_0427763015 %float_0_942228675 + %426 = OpConstantComposite %mat3v3float %423 %424 %425 + %427 = OpConstantComposite %v3float %float_2_49339628 %float_n0_93134588 %float_n0_402694494 + %428 = OpConstantComposite %v3float %float_n0_829486787 %float_1_76265967 %float_0_0236246008 + %429 = OpConstantComposite %v3float %float_0_0358507 %float_n0_0761827007 %float_0_957014024 + %430 = OpConstantComposite %mat3v3float %427 %428 %429 + %431 = OpConstantComposite %v3float %float_1_01303005 %float_0_00610530982 %float_n0_0149710001 + %432 = OpConstantComposite %v3float %float_0_00769822998 %float_0_998165011 %float_n0_00503202993 + %433 = OpConstantComposite %v3float %float_n0_00284131011 %float_0_00468515977 %float_0_924507022 + %434 = OpConstantComposite %mat3v3float %431 %432 %433 + %435 = OpConstantComposite %v3float %float_0_987223983 %float_n0_00611326983 %float_0_0159533005 + %436 = OpConstantComposite %v3float %float_n0_00759836007 %float_1_00186002 %float_0_0053300201 + %437 = OpConstantComposite %v3float %float_0_00307257008 %float_n0_00509594986 %float_1_08168006 + %438 = OpConstantComposite %mat3v3float %435 %436 %437 + %439 = OpConstantComposite %v3float %float_0_5 %float_n1 %float_0_5 + %440 = OpConstantComposite %v3float %float_n1 %float_1 %float_0_5 + %441 = OpConstantComposite %v3float %float_0_5 %float_0 %float_0 + %442 = OpConstantComposite %mat3v3float %439 %440 %441 + %443 = OpConstantComposite %v3float %float_1 %float_0 %float_0 + %444 = OpConstantComposite %v3float %float_0 %float_1 %float_0 + %445 = OpConstantComposite %v3float %float_0 %float_0 %float_1 + %446 = OpConstantComposite %mat3v3float %443 %444 %445 +%float_n6_07624626 = OpConstant %float -6.07624626 + %448 = OpConstantComposite %v3float %float_n6_07624626 %float_n6_07624626 %float_n6_07624626 + %449 = OpConstantComposite %v3float %float_0_895099998 %float_0_266400009 %float_n0_161400005 + %450 = OpConstantComposite %v3float %float_n0_750199974 %float_1_71350002 %float_0_0366999991 + %451 = OpConstantComposite %v3float %float_0_0388999991 %float_n0_0684999973 %float_1_02960002 + %452 = OpConstantComposite %mat3v3float %449 %450 %451 + %453 = OpConstantComposite %v3float %float_0_986992896 %float_n0_1470543 %float_0_159962699 + %454 = OpConstantComposite %v3float %float_0_432305306 %float_0_518360317 %float_0_0492912009 + %455 = OpConstantComposite %v3float %float_n0_0085287001 %float_0_040042799 %float_0_968486726 + %456 = OpConstantComposite %mat3v3float %453 %454 %455 +%float_0_358299971 = OpConstant %float 0.358299971 + %458 = OpConstantComposite %v3float %float_0_544169128 %float_0_239592597 %float_0_166694298 + %459 = OpConstantComposite %v3float %float_0_239465594 %float_0_702153027 %float_0_058381401 + %460 = OpConstantComposite %v3float %float_n0_00234390004 %float_0_0361833982 %float_1_05521834 + %461 = OpConstantComposite %mat3v3float %458 %459 %460 + %462 = OpConstantComposite %v3float %float_0_940437257 %float_n0_0183068793 %float_0_077869609 + %463 = OpConstantComposite %v3float %float_0_00837869663 %float_0_828660011 %float_0_162961304 + %464 = OpConstantComposite %v3float %float_0_00054712611 %float_n0_000883374596 %float_1_00033629 + %465 = OpConstantComposite %mat3v3float %462 %463 %464 + %466 = OpConstantComposite %v3float %float_1_06317997 %float_0_0233955998 %float_n0_0865726024 + %467 = OpConstantComposite %v3float %float_n0_0106336996 %float_1_20632005 %float_n0_195690006 + %468 = OpConstantComposite %v3float %float_n0_000590886979 %float_0_00105247996 %float_0_999538004 + %469 = OpConstantComposite %mat3v3float %466 %467 %468 +%float_0_0533333346 = OpConstant %float 0.0533333346 +%float_0_159999996 = OpConstant %float 0.159999996 +%float_57_2957764 = OpConstant %float 57.2957764 +%float_n67_5 = OpConstant %float -67.5 + %float_67_5 = OpConstant %float 67.5 + %475 = OpConstantComposite %_arr_float_uint_6 %float_n4 %float_n4 %float_n3_15737653 %float_n0_485249996 %float_1_84773242 %float_1_84773242 + %476 = OpConstantComposite %_arr_float_uint_6 %float_n0_718548238 %float_2_08103061 %float_3_6681242 %float_4 %float_4 %float_4 + %float_n15 = OpConstant %float -15 + %float_n14 = OpConstant %float -14 + %479 = OpConstantComposite %_arr_float_uint_10 %float_n4_97062206 %float_n3_02937818 %float_n2_12619996 %float_n1_51049995 %float_n1_05780005 %float_n0_466800004 %float_0_119379997 %float_0_708813429 %float_1_29118657 %float_1_29118657 + %480 = OpConstantComposite %_arr_float_uint_10 %float_0_808913231 %float_1_19108677 %float_1_56830001 %float_1_9483 %float_2_30830002 %float_2_63840008 %float_2_85949993 %float_2_98726082 %float_3_01273918 %float_3_01273918 + %float_n12 = OpConstant %float -12 + %482 = OpConstantComposite %_arr_float_uint_10 %float_n2_30102992 %float_n2_30102992 %float_n1_93120003 %float_n1_52049994 %float_n1_05780005 %float_n0_466800004 %float_0_119379997 %float_0_708813429 %float_1_29118657 %float_1_29118657 + %483 = OpConstantComposite %_arr_float_uint_10 %float_0_801995218 %float_1_19800484 %float_1_59430003 %float_1_99730003 %float_2_37829995 %float_2_76839995 %float_3_05150008 %float_3_27462935 %float_3_32743073 %float_3_32743073 +%float_0_0322580636 = OpConstant %float 0.0322580636 +%float_1_03225803 = OpConstant %float 1.03225803 + %486 = OpConstantComposite %v2float %float_1_03225803 %float_1_03225803 +%float_4_60443853e_09 = OpConstant %float 4.60443853e+09 +%float_2_00528435e_09 = OpConstant %float 2.00528435e+09 +%float_0_333333343 = OpConstant %float 0.333333343 + %float_5 = OpConstant %float 5 + %float_2_5 = OpConstant %float 2.5 +%float_0_0250000004 = OpConstant %float 0.0250000004 +%float_0_239999995 = OpConstant %float 0.239999995 +%float_0_0148148146 = OpConstant %float 0.0148148146 +%float_0_819999993 = OpConstant %float 0.819999993 + %496 = OpConstantComposite %v3float %float_9_99999975en05 %float_9_99999975en05 %float_9_99999975en05 +%float_0_0296296291 = OpConstant %float 0.0296296291 +%float_0_952381015 = OpConstant %float 0.952381015 + %499 = OpConstantComposite %v3float %float_0_952381015 %float_0_952381015 %float_0_952381015 + %MainPS = OpFunction %void None %377 + %500 = OpLabel + %501 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %502 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %503 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %504 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %505 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %506 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %507 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %508 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %509 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %510 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %511 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %512 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %513 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %514 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %515 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %516 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %517 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %518 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %519 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %520 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %521 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %522 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %523 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %524 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %525 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %526 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %527 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %528 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %529 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %530 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %531 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %532 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %533 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %534 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %535 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %536 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %537 = OpLoad %v2float %in_var_TEXCOORD0 + %538 = OpLoad %uint %gl_Layer + %539 = OpFSub %v2float %537 %128 + %540 = OpFMul %v2float %539 %486 + %541 = OpCompositeExtract %float %540 0 + %542 = OpCompositeExtract %float %540 1 + %543 = OpConvertUToF %float %538 + %544 = OpFMul %float %543 %float_0_0322580636 + %545 = OpCompositeConstruct %v4float %541 %542 %544 %float_0 + %546 = OpMatrixTimesMatrix %mat3v3float %422 %434 + %547 = OpMatrixTimesMatrix %mat3v3float %546 %406 + %548 = OpMatrixTimesMatrix %mat3v3float %402 %438 + %549 = OpMatrixTimesMatrix %mat3v3float %548 %418 + %550 = OpMatrixTimesMatrix %mat3v3float %395 %406 + %551 = OpMatrixTimesMatrix %mat3v3float %402 %399 + %552 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_42 + %553 = OpLoad %uint %552 + OpBranch %554 + %554 = OpLabel + OpLoopMerge %555 %556 None + OpBranch %557 + %557 = OpLabel + %558 = OpMatrixTimesMatrix %mat3v3float %548 %430 + %559 = OpMatrixTimesMatrix %mat3v3float %548 %426 + %560 = OpIEqual %bool %553 %uint_1 + OpSelectionMerge %561 None + OpBranchConditional %560 %562 %563 + %563 = OpLabel + %564 = OpIEqual %bool %553 %uint_2 + OpSelectionMerge %565 None + OpBranchConditional %564 %566 %567 + %567 = OpLabel + %568 = OpIEqual %bool %553 %uint_3 + OpSelectionMerge %569 None + OpBranchConditional %568 %570 %571 + %571 = OpLabel + %572 = OpIEqual %bool %553 %uint_4 + OpSelectionMerge %573 None + OpBranchConditional %572 %574 %575 + %575 = OpLabel + OpBranch %555 + %574 = OpLabel + OpBranch %555 + %573 = OpLabel + OpUnreachable + %570 = OpLabel + OpBranch %555 + %569 = OpLabel + OpUnreachable + %566 = OpLabel + OpBranch %555 + %565 = OpLabel + OpUnreachable + %562 = OpLabel + OpBranch %555 + %561 = OpLabel + OpUnreachable + %556 = OpLabel + OpBranch %554 + %555 = OpLabel + %576 = OpPhi %mat3v3float %549 %575 %446 %574 %414 %570 %559 %566 %558 %562 + %577 = OpVectorShuffle %v3float %545 %545 0 1 2 + %578 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_41 + %579 = OpLoad %uint %578 + %580 = OpUGreaterThanEqual %bool %579 %uint_3 + OpSelectionMerge %581 None + OpBranchConditional %580 %582 %583 + %583 = OpLabel + %584 = OpFSub %v3float %577 %192 + %585 = OpFMul %v3float %584 %193 + %586 = OpExtInst %v3float %1 Exp2 %585 + %587 = OpFMul %v3float %586 %194 + %588 = OpExtInst %v3float %1 Exp2 %448 + %589 = OpFMul %v3float %588 %194 + %590 = OpFSub %v3float %587 %589 + OpBranch %581 + %582 = OpLabel + %591 = OpExtInst %v3float %1 Pow %577 %182 + %592 = OpFSub %v3float %591 %183 + %593 = OpExtInst %v3float %1 FMax %132 %592 + %594 = OpFMul %v3float %185 %591 + %595 = OpFSub %v3float %184 %594 + %596 = OpFDiv %v3float %593 %595 + %597 = OpExtInst %v3float %1 Pow %596 %187 + %598 = OpFMul %v3float %597 %188 + OpBranch %581 + %581 = OpLabel + %599 = OpPhi %v3float %590 %583 %598 %582 + %600 = OpAccessChain %_ptr_Uniform_float %_Globals %int_17 + %601 = OpLoad %float %600 + %602 = OpFMul %float %601 %float_1_00055635 + %603 = OpFOrdLessThanEqual %bool %602 %float_7000 + %604 = OpFDiv %float %float_4_60443853e_09 %601 + %605 = OpFSub %float %float_2967800 %604 + %606 = OpFDiv %float %605 %602 + %607 = OpFAdd %float %float_99_1100006 %606 + %608 = OpFDiv %float %607 %602 + %609 = OpFAdd %float %float_0_244063005 %608 + %610 = OpFDiv %float %float_2_00528435e_09 %601 + %611 = OpFSub %float %float_1901800 %610 + %612 = OpFDiv %float %611 %602 + %613 = OpFAdd %float %float_247_479996 %612 + %614 = OpFDiv %float %613 %602 + %615 = OpFAdd %float %float_0_237039998 %614 + %616 = OpSelect %float %603 %609 %615 + %617 = OpFMul %float %float_n3 %616 + %618 = OpFMul %float %617 %616 + %619 = OpFMul %float %float_2_86999989 %616 + %620 = OpFAdd %float %618 %619 + %621 = OpFSub %float %620 %float_0_275000006 + %622 = OpCompositeConstruct %v2float %616 %621 + %623 = OpFMul %float %float_0_000154118257 %601 + %624 = OpFAdd %float %float_0_860117733 %623 + %625 = OpFMul %float %float_1_28641219en07 %601 + %626 = OpFMul %float %625 %601 + %627 = OpFAdd %float %624 %626 + %628 = OpFMul %float %float_0_00084242021 %601 + %629 = OpFAdd %float %float_1 %628 + %630 = OpFMul %float %float_7_08145137en07 %601 + %631 = OpFMul %float %630 %601 + %632 = OpFAdd %float %629 %631 + %633 = OpFDiv %float %627 %632 + %634 = OpFMul %float %float_4_22806261en05 %601 + %635 = OpFAdd %float %float_0_317398727 %634 + %636 = OpFMul %float %float_4_20481676en08 %601 + %637 = OpFMul %float %636 %601 + %638 = OpFAdd %float %635 %637 + %639 = OpFMul %float %float_2_8974182en05 %601 + %640 = OpFSub %float %float_1 %639 + %641 = OpFMul %float %float_1_61456057en07 %601 + %642 = OpFMul %float %641 %601 + %643 = OpFAdd %float %640 %642 + %644 = OpFDiv %float %638 %643 + %645 = OpFMul %float %float_3 %633 + %646 = OpFMul %float %float_2 %633 + %647 = OpFMul %float %float_8 %644 + %648 = OpFSub %float %646 %647 + %649 = OpFAdd %float %648 %float_4 + %650 = OpFDiv %float %645 %649 + %651 = OpFMul %float %float_2 %644 + %652 = OpFDiv %float %651 %649 + %653 = OpCompositeConstruct %v2float %650 %652 + %654 = OpFOrdLessThan %bool %601 %float_4000 + %655 = OpCompositeConstruct %v2bool %654 %654 + %656 = OpSelect %v2float %655 %653 %622 + %657 = OpAccessChain %_ptr_Uniform_float %_Globals %int_18 + %658 = OpLoad %float %657 + %659 = OpCompositeConstruct %v2float %633 %644 + %660 = OpExtInst %v2float %1 Normalize %659 + %661 = OpCompositeExtract %float %660 1 + %662 = OpFNegate %float %661 + %663 = OpFMul %float %662 %658 + %664 = OpFMul %float %663 %float_0_0500000007 + %665 = OpFAdd %float %633 %664 + %666 = OpCompositeExtract %float %660 0 + %667 = OpFMul %float %666 %658 + %668 = OpFMul %float %667 %float_0_0500000007 + %669 = OpFAdd %float %644 %668 + %670 = OpFMul %float %float_3 %665 + %671 = OpFMul %float %float_2 %665 + %672 = OpFMul %float %float_8 %669 + %673 = OpFSub %float %671 %672 + %674 = OpFAdd %float %673 %float_4 + %675 = OpFDiv %float %670 %674 + %676 = OpFMul %float %float_2 %669 + %677 = OpFDiv %float %676 %674 + %678 = OpCompositeConstruct %v2float %675 %677 + %679 = OpFSub %v2float %678 %653 + %680 = OpFAdd %v2float %656 %679 + %681 = OpCompositeExtract %float %680 0 + %682 = OpCompositeExtract %float %680 1 + %683 = OpExtInst %float %1 FMax %682 %float_1_00000001en10 + %684 = OpFDiv %float %681 %683 + %685 = OpCompositeInsert %v3float %684 %391 0 + %686 = OpCompositeInsert %v3float %float_1 %685 1 + %687 = OpFSub %float %float_1 %681 + %688 = OpFSub %float %687 %682 + %689 = OpFDiv %float %688 %683 + %690 = OpCompositeInsert %v3float %689 %686 2 + %691 = OpExtInst %float %1 FMax %float_0_328999996 %float_1_00000001en10 + %692 = OpFDiv %float %float_0_312700003 %691 + %693 = OpCompositeInsert %v3float %692 %391 0 + %694 = OpCompositeInsert %v3float %float_1 %693 1 + %695 = OpFDiv %float %float_0_358299971 %691 + %696 = OpCompositeInsert %v3float %695 %694 2 + %697 = OpVectorTimesMatrix %v3float %690 %452 + %698 = OpVectorTimesMatrix %v3float %696 %452 + %699 = OpCompositeExtract %float %698 0 + %700 = OpCompositeExtract %float %697 0 + %701 = OpFDiv %float %699 %700 + %702 = OpCompositeConstruct %v3float %701 %float_0 %float_0 + %703 = OpCompositeExtract %float %698 1 + %704 = OpCompositeExtract %float %697 1 + %705 = OpFDiv %float %703 %704 + %706 = OpCompositeConstruct %v3float %float_0 %705 %float_0 + %707 = OpCompositeExtract %float %698 2 + %708 = OpCompositeExtract %float %697 2 + %709 = OpFDiv %float %707 %708 + %710 = OpCompositeConstruct %v3float %float_0 %float_0 %709 + %711 = OpCompositeConstruct %mat3v3float %702 %706 %710 + %712 = OpMatrixTimesMatrix %mat3v3float %452 %711 + %713 = OpMatrixTimesMatrix %mat3v3float %712 %456 + %714 = OpMatrixTimesMatrix %mat3v3float %422 %713 + %715 = OpMatrixTimesMatrix %mat3v3float %714 %418 + %716 = OpVectorTimesMatrix %v3float %599 %715 + %717 = OpVectorTimesMatrix %v3float %716 %547 + %718 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_9 + %719 = OpAccessChain %_ptr_Uniform_float %_Globals %int_9 %int_3 + %720 = OpLoad %float %719 + %721 = OpFOrdNotEqual %bool %720 %float_0 + OpSelectionMerge %722 None + OpBranchConditional %721 %723 %722 + %723 = OpLabel + %724 = OpDot %float %717 %67 + %725 = OpCompositeConstruct %v3float %724 %724 %724 + %726 = OpFDiv %v3float %717 %725 + %727 = OpFSub %v3float %726 %135 + %728 = OpDot %float %727 %727 + %729 = OpFMul %float %float_n4 %728 + %730 = OpExtInst %float %1 Exp2 %729 + %731 = OpFSub %float %float_1 %730 + %732 = OpAccessChain %_ptr_Uniform_float %_Globals %int_44 + %733 = OpLoad %float %732 + %734 = OpFMul %float %float_n4 %733 + %735 = OpFMul %float %734 %724 + %736 = OpFMul %float %735 %724 + %737 = OpExtInst %float %1 Exp2 %736 + %738 = OpFSub %float %float_1 %737 + %739 = OpFMul %float %731 %738 + %740 = OpMatrixTimesMatrix %mat3v3float %461 %406 + %741 = OpMatrixTimesMatrix %mat3v3float %549 %740 + %742 = OpVectorTimesMatrix %v3float %717 %741 + %743 = OpCompositeConstruct %v3float %739 %739 %739 + %744 = OpExtInst %v3float %1 FMix %717 %742 %743 + OpBranch %722 + %722 = OpLabel + %745 = OpPhi %v3float %717 %581 %744 %723 + %746 = OpDot %float %745 %67 + %747 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_24 + %748 = OpLoad %v4float %747 + %749 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_19 + %750 = OpLoad %v4float %749 + %751 = OpFMul %v4float %748 %750 + %752 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_25 + %753 = OpLoad %v4float %752 + %754 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_20 + %755 = OpLoad %v4float %754 + %756 = OpFMul %v4float %753 %755 + %757 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_26 + %758 = OpLoad %v4float %757 + %759 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_21 + %760 = OpLoad %v4float %759 + %761 = OpFMul %v4float %758 %760 + %762 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_27 + %763 = OpLoad %v4float %762 + %764 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_22 + %765 = OpLoad %v4float %764 + %766 = OpFMul %v4float %763 %765 + %767 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_28 + %768 = OpLoad %v4float %767 + %769 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_23 + %770 = OpLoad %v4float %769 + %771 = OpFAdd %v4float %768 %770 + %772 = OpCompositeConstruct %v3float %746 %746 %746 + %773 = OpVectorShuffle %v3float %751 %751 0 1 2 + %774 = OpCompositeExtract %float %751 3 + %775 = OpCompositeConstruct %v3float %774 %774 %774 + %776 = OpFMul %v3float %773 %775 + %777 = OpExtInst %v3float %1 FMix %772 %745 %776 + %778 = OpExtInst %v3float %1 FMax %132 %777 + %779 = OpFMul %v3float %778 %307 + %780 = OpVectorShuffle %v3float %756 %756 0 1 2 + %781 = OpCompositeExtract %float %756 3 + %782 = OpCompositeConstruct %v3float %781 %781 %781 + %783 = OpFMul %v3float %780 %782 + %784 = OpExtInst %v3float %1 Pow %779 %783 + %785 = OpFMul %v3float %784 %194 + %786 = OpVectorShuffle %v3float %761 %761 0 1 2 + %787 = OpCompositeExtract %float %761 3 + %788 = OpCompositeConstruct %v3float %787 %787 %787 + %789 = OpFMul %v3float %786 %788 + %790 = OpFDiv %v3float %135 %789 + %791 = OpExtInst %v3float %1 Pow %785 %790 + %792 = OpVectorShuffle %v3float %766 %766 0 1 2 + %793 = OpCompositeExtract %float %766 3 + %794 = OpCompositeConstruct %v3float %793 %793 %793 + %795 = OpFMul %v3float %792 %794 + %796 = OpFMul %v3float %791 %795 + %797 = OpVectorShuffle %v3float %771 %771 0 1 2 + %798 = OpCompositeExtract %float %771 3 + %799 = OpCompositeConstruct %v3float %798 %798 %798 + %800 = OpFAdd %v3float %797 %799 + %801 = OpFAdd %v3float %796 %800 + %802 = OpAccessChain %_ptr_Uniform_float %_Globals %int_39 + %803 = OpLoad %float %802 + %804 = OpExtInst %float %1 SmoothStep %float_0 %803 %746 + %805 = OpFSub %float %float_1 %804 + %806 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_34 + %807 = OpLoad %v4float %806 + %808 = OpFMul %v4float %807 %750 + %809 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_35 + %810 = OpLoad %v4float %809 + %811 = OpFMul %v4float %810 %755 + %812 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_36 + %813 = OpLoad %v4float %812 + %814 = OpFMul %v4float %813 %760 + %815 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_37 + %816 = OpLoad %v4float %815 + %817 = OpFMul %v4float %816 %765 + %818 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_38 + %819 = OpLoad %v4float %818 + %820 = OpFAdd %v4float %819 %770 + %821 = OpVectorShuffle %v3float %808 %808 0 1 2 + %822 = OpCompositeExtract %float %808 3 + %823 = OpCompositeConstruct %v3float %822 %822 %822 + %824 = OpFMul %v3float %821 %823 + %825 = OpExtInst %v3float %1 FMix %772 %745 %824 + %826 = OpExtInst %v3float %1 FMax %132 %825 + %827 = OpFMul %v3float %826 %307 + %828 = OpVectorShuffle %v3float %811 %811 0 1 2 + %829 = OpCompositeExtract %float %811 3 + %830 = OpCompositeConstruct %v3float %829 %829 %829 + %831 = OpFMul %v3float %828 %830 + %832 = OpExtInst %v3float %1 Pow %827 %831 + %833 = OpFMul %v3float %832 %194 + %834 = OpVectorShuffle %v3float %814 %814 0 1 2 + %835 = OpCompositeExtract %float %814 3 + %836 = OpCompositeConstruct %v3float %835 %835 %835 + %837 = OpFMul %v3float %834 %836 + %838 = OpFDiv %v3float %135 %837 + %839 = OpExtInst %v3float %1 Pow %833 %838 + %840 = OpVectorShuffle %v3float %817 %817 0 1 2 + %841 = OpCompositeExtract %float %817 3 + %842 = OpCompositeConstruct %v3float %841 %841 %841 + %843 = OpFMul %v3float %840 %842 + %844 = OpFMul %v3float %839 %843 + %845 = OpVectorShuffle %v3float %820 %820 0 1 2 + %846 = OpCompositeExtract %float %820 3 + %847 = OpCompositeConstruct %v3float %846 %846 %846 + %848 = OpFAdd %v3float %845 %847 + %849 = OpFAdd %v3float %844 %848 + %850 = OpAccessChain %_ptr_Uniform_float %_Globals %int_40 + %851 = OpLoad %float %850 + %852 = OpExtInst %float %1 SmoothStep %851 %float_1 %746 + %853 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_29 + %854 = OpLoad %v4float %853 + %855 = OpFMul %v4float %854 %750 + %856 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_30 + %857 = OpLoad %v4float %856 + %858 = OpFMul %v4float %857 %755 + %859 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_31 + %860 = OpLoad %v4float %859 + %861 = OpFMul %v4float %860 %760 + %862 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_32 + %863 = OpLoad %v4float %862 + %864 = OpFMul %v4float %863 %765 + %865 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_33 + %866 = OpLoad %v4float %865 + %867 = OpFAdd %v4float %866 %770 + %868 = OpVectorShuffle %v3float %855 %855 0 1 2 + %869 = OpCompositeExtract %float %855 3 + %870 = OpCompositeConstruct %v3float %869 %869 %869 + %871 = OpFMul %v3float %868 %870 + %872 = OpExtInst %v3float %1 FMix %772 %745 %871 + %873 = OpExtInst %v3float %1 FMax %132 %872 + %874 = OpFMul %v3float %873 %307 + %875 = OpVectorShuffle %v3float %858 %858 0 1 2 + %876 = OpCompositeExtract %float %858 3 + %877 = OpCompositeConstruct %v3float %876 %876 %876 + %878 = OpFMul %v3float %875 %877 + %879 = OpExtInst %v3float %1 Pow %874 %878 + %880 = OpFMul %v3float %879 %194 + %881 = OpVectorShuffle %v3float %861 %861 0 1 2 + %882 = OpCompositeExtract %float %861 3 + %883 = OpCompositeConstruct %v3float %882 %882 %882 + %884 = OpFMul %v3float %881 %883 + %885 = OpFDiv %v3float %135 %884 + %886 = OpExtInst %v3float %1 Pow %880 %885 + %887 = OpVectorShuffle %v3float %864 %864 0 1 2 + %888 = OpCompositeExtract %float %864 3 + %889 = OpCompositeConstruct %v3float %888 %888 %888 + %890 = OpFMul %v3float %887 %889 + %891 = OpFMul %v3float %886 %890 + %892 = OpVectorShuffle %v3float %867 %867 0 1 2 + %893 = OpCompositeExtract %float %867 3 + %894 = OpCompositeConstruct %v3float %893 %893 %893 + %895 = OpFAdd %v3float %892 %894 + %896 = OpFAdd %v3float %891 %895 + %897 = OpFSub %float %804 %852 + %898 = OpCompositeConstruct %v3float %805 %805 %805 + %899 = OpFMul %v3float %801 %898 + %900 = OpCompositeConstruct %v3float %897 %897 %897 + %901 = OpFMul %v3float %896 %900 + %902 = OpFAdd %v3float %899 %901 + %903 = OpCompositeConstruct %v3float %852 %852 %852 + %904 = OpFMul %v3float %849 %903 + %905 = OpFAdd %v3float %902 %904 + %906 = OpVectorTimesMatrix %v3float %905 %549 + %907 = OpMatrixTimesMatrix %mat3v3float %551 %465 + %908 = OpMatrixTimesMatrix %mat3v3float %907 %550 + %909 = OpMatrixTimesMatrix %mat3v3float %551 %469 + %910 = OpMatrixTimesMatrix %mat3v3float %909 %550 + %911 = OpVectorTimesMatrix %v3float %905 %908 + %912 = OpAccessChain %_ptr_Uniform_float %_Globals %int_43 + %913 = OpLoad %float %912 + %914 = OpCompositeConstruct %v3float %913 %913 %913 + %915 = OpExtInst %v3float %1 FMix %905 %911 %914 + %916 = OpVectorTimesMatrix %v3float %915 %551 + %917 = OpCompositeExtract %float %916 0 + %918 = OpCompositeExtract %float %916 1 + %919 = OpExtInst %float %1 FMin %917 %918 + %920 = OpCompositeExtract %float %916 2 + %921 = OpExtInst %float %1 FMin %919 %920 + %922 = OpExtInst %float %1 FMax %917 %918 + %923 = OpExtInst %float %1 FMax %922 %920 + %924 = OpExtInst %float %1 FMax %923 %float_1_00000001en10 + %925 = OpExtInst %float %1 FMax %921 %float_1_00000001en10 + %926 = OpFSub %float %924 %925 + %927 = OpExtInst %float %1 FMax %923 %float_0_00999999978 + %928 = OpFDiv %float %926 %927 + %929 = OpFSub %float %920 %918 + %930 = OpFMul %float %920 %929 + %931 = OpFSub %float %918 %917 + %932 = OpFMul %float %918 %931 + %933 = OpFAdd %float %930 %932 + %934 = OpFSub %float %917 %920 + %935 = OpFMul %float %917 %934 + %936 = OpFAdd %float %933 %935 + %937 = OpExtInst %float %1 Sqrt %936 + %938 = OpFAdd %float %920 %918 + %939 = OpFAdd %float %938 %917 + %940 = OpFMul %float %float_1_75 %937 + %941 = OpFAdd %float %939 %940 + %942 = OpFMul %float %941 %float_0_333333343 + %943 = OpFSub %float %928 %float_0_400000006 + %944 = OpFMul %float %943 %float_5 + %945 = OpFMul %float %943 %float_2_5 + %946 = OpExtInst %float %1 FAbs %945 + %947 = OpFSub %float %float_1 %946 + %948 = OpExtInst %float %1 FMax %947 %float_0 + %949 = OpExtInst %float %1 FSign %944 + %950 = OpConvertFToS %int %949 + %951 = OpConvertSToF %float %950 + %952 = OpFMul %float %948 %948 + %953 = OpFSub %float %float_1 %952 + %954 = OpFMul %float %951 %953 + %955 = OpFAdd %float %float_1 %954 + %956 = OpFMul %float %955 %float_0_0250000004 + %957 = OpFOrdLessThanEqual %bool %942 %float_0_0533333346 + OpSelectionMerge %958 None + OpBranchConditional %957 %959 %960 + %960 = OpLabel + %961 = OpFOrdGreaterThanEqual %bool %942 %float_0_159999996 + OpSelectionMerge %962 None + OpBranchConditional %961 %963 %964 + %964 = OpLabel + %965 = OpFDiv %float %float_0_239999995 %941 + %966 = OpFSub %float %965 %float_0_5 + %967 = OpFMul %float %956 %966 + OpBranch %962 + %963 = OpLabel + OpBranch %962 + %962 = OpLabel + %968 = OpPhi %float %967 %964 %float_0 %963 + OpBranch %958 + %959 = OpLabel + OpBranch %958 + %958 = OpLabel + %969 = OpPhi %float %968 %962 %956 %959 + %970 = OpFAdd %float %float_1 %969 + %971 = OpCompositeConstruct %v3float %970 %970 %970 + %972 = OpFMul %v3float %916 %971 + %973 = OpCompositeExtract %float %972 0 + %974 = OpCompositeExtract %float %972 1 + %975 = OpFOrdEqual %bool %973 %974 + %976 = OpCompositeExtract %float %972 2 + %977 = OpFOrdEqual %bool %974 %976 + %978 = OpLogicalAnd %bool %975 %977 + OpSelectionMerge %979 None + OpBranchConditional %978 %980 %981 + %981 = OpLabel + %982 = OpExtInst %float %1 Sqrt %float_3 + %983 = OpFSub %float %974 %976 + %984 = OpFMul %float %982 %983 + %985 = OpFMul %float %float_2 %973 + %986 = OpFSub %float %985 %974 + %987 = OpFSub %float %986 %976 + %988 = OpExtInst %float %1 Atan2 %984 %987 + %989 = OpFMul %float %float_57_2957764 %988 + OpBranch %979 + %980 = OpLabel + OpBranch %979 + %979 = OpLabel + %990 = OpPhi %float %989 %981 %float_0 %980 + %991 = OpFOrdLessThan %bool %990 %float_0 + OpSelectionMerge %992 None + OpBranchConditional %991 %993 %992 + %993 = OpLabel + %994 = OpFAdd %float %990 %float_360 + OpBranch %992 + %992 = OpLabel + %995 = OpPhi %float %990 %979 %994 %993 + %996 = OpExtInst %float %1 FClamp %995 %float_0 %float_360 + %997 = OpFOrdGreaterThan %bool %996 %float_180 + OpSelectionMerge %998 None + OpBranchConditional %997 %999 %998 + %999 = OpLabel + %1000 = OpFSub %float %996 %float_360 + OpBranch %998 + %998 = OpLabel + %1001 = OpPhi %float %996 %992 %1000 %999 + %1002 = OpFMul %float %1001 %float_0_0148148146 + %1003 = OpExtInst %float %1 FAbs %1002 + %1004 = OpFSub %float %float_1 %1003 + %1005 = OpExtInst %float %1 SmoothStep %float_0 %float_1 %1004 + %1006 = OpFMul %float %1005 %1005 + %1007 = OpFMul %float %1006 %928 + %1008 = OpFSub %float %float_0_0299999993 %973 + %1009 = OpFMul %float %1007 %1008 + %1010 = OpFMul %float %1009 %float_0_180000007 + %1011 = OpFAdd %float %973 %1010 + %1012 = OpCompositeInsert %v3float %1011 %972 0 + %1013 = OpVectorTimesMatrix %v3float %1012 %410 + %1014 = OpExtInst %v3float %1 FMax %132 %1013 + %1015 = OpDot %float %1014 %67 + %1016 = OpCompositeConstruct %v3float %1015 %1015 %1015 + %1017 = OpExtInst %v3float %1 FMix %1016 %1014 %228 + %1018 = OpAccessChain %_ptr_Uniform_float %_Globals %int_13 + %1019 = OpLoad %float %1018 + %1020 = OpFAdd %float %float_1 %1019 + %1021 = OpAccessChain %_ptr_Uniform_float %_Globals %int_11 + %1022 = OpLoad %float %1021 + %1023 = OpFSub %float %1020 %1022 + %1024 = OpAccessChain %_ptr_Uniform_float %_Globals %int_14 + %1025 = OpLoad %float %1024 + %1026 = OpFAdd %float %float_1 %1025 + %1027 = OpAccessChain %_ptr_Uniform_float %_Globals %int_12 + %1028 = OpLoad %float %1027 + %1029 = OpFSub %float %1026 %1028 + %1030 = OpFOrdGreaterThan %bool %1022 %float_0_800000012 + OpSelectionMerge %1031 None + OpBranchConditional %1030 %1032 %1033 + %1033 = OpLabel + %1034 = OpFAdd %float %float_0_180000007 %1019 + %1035 = OpFDiv %float %1034 %1023 + %1036 = OpExtInst %float %1 Log %float_0_180000007 + %1037 = OpExtInst %float %1 Log %float_10 + %1038 = OpFDiv %float %1036 %1037 + %1039 = OpFSub %float %float_2 %1035 + %1040 = OpFDiv %float %1035 %1039 + %1041 = OpExtInst %float %1 Log %1040 + %1042 = OpFMul %float %float_0_5 %1041 + %1043 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10 + %1044 = OpLoad %float %1043 + %1045 = OpFDiv %float %1023 %1044 + %1046 = OpFMul %float %1042 %1045 + %1047 = OpFSub %float %1038 %1046 + OpBranch %1031 + %1032 = OpLabel + %1048 = OpFSub %float %float_0_819999993 %1022 + %1049 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10 + %1050 = OpLoad %float %1049 + %1051 = OpFDiv %float %1048 %1050 + %1052 = OpExtInst %float %1 Log %float_0_180000007 + %1053 = OpExtInst %float %1 Log %float_10 + %1054 = OpFDiv %float %1052 %1053 + %1055 = OpFAdd %float %1051 %1054 + OpBranch %1031 + %1031 = OpLabel + %1056 = OpPhi %float %1047 %1033 %1055 %1032 + %1057 = OpFSub %float %float_1 %1022 + %1058 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10 + %1059 = OpLoad %float %1058 + %1060 = OpFDiv %float %1057 %1059 + %1061 = OpFSub %float %1060 %1056 + %1062 = OpFDiv %float %1028 %1059 + %1063 = OpFSub %float %1062 %1061 + %1064 = OpExtInst %v3float %1 Log %1017 + %1065 = OpExtInst %float %1 Log %float_10 + %1066 = OpCompositeConstruct %v3float %1065 %1065 %1065 + %1067 = OpFDiv %v3float %1064 %1066 + %1068 = OpCompositeConstruct %v3float %1059 %1059 %1059 + %1069 = OpCompositeConstruct %v3float %1061 %1061 %1061 + %1070 = OpFAdd %v3float %1067 %1069 + %1071 = OpFMul %v3float %1068 %1070 + %1072 = OpFNegate %float %1019 + %1073 = OpCompositeConstruct %v3float %1072 %1072 %1072 + %1074 = OpFMul %float %float_2 %1023 + %1075 = OpCompositeConstruct %v3float %1074 %1074 %1074 + %1076 = OpFMul %float %float_n2 %1059 + %1077 = OpFDiv %float %1076 %1023 + %1078 = OpCompositeConstruct %v3float %1077 %1077 %1077 + %1079 = OpCompositeConstruct %v3float %1056 %1056 %1056 + %1080 = OpFSub %v3float %1067 %1079 + %1081 = OpFMul %v3float %1078 %1080 + %1082 = OpExtInst %v3float %1 Exp %1081 + %1083 = OpFAdd %v3float %135 %1082 + %1084 = OpFDiv %v3float %1075 %1083 + %1085 = OpFAdd %v3float %1073 %1084 + %1086 = OpCompositeConstruct %v3float %1026 %1026 %1026 + %1087 = OpFMul %float %float_2 %1029 + %1088 = OpCompositeConstruct %v3float %1087 %1087 %1087 + %1089 = OpFMul %float %float_2 %1059 + %1090 = OpFDiv %float %1089 %1029 + %1091 = OpCompositeConstruct %v3float %1090 %1090 %1090 + %1092 = OpCompositeConstruct %v3float %1063 %1063 %1063 + %1093 = OpFSub %v3float %1067 %1092 + %1094 = OpFMul %v3float %1091 %1093 + %1095 = OpExtInst %v3float %1 Exp %1094 + %1096 = OpFAdd %v3float %135 %1095 + %1097 = OpFDiv %v3float %1088 %1096 + %1098 = OpFSub %v3float %1086 %1097 + %1099 = OpFOrdLessThan %v3bool %1067 %1079 + %1100 = OpSelect %v3float %1099 %1085 %1071 + %1101 = OpFOrdGreaterThan %v3bool %1067 %1092 + %1102 = OpSelect %v3float %1101 %1098 %1071 + %1103 = OpFSub %float %1063 %1056 + %1104 = OpCompositeConstruct %v3float %1103 %1103 %1103 + %1105 = OpFDiv %v3float %1080 %1104 + %1106 = OpExtInst %v3float %1 FClamp %1105 %132 %135 + %1107 = OpFOrdLessThan %bool %1063 %1056 + %1108 = OpFSub %v3float %135 %1106 + %1109 = OpCompositeConstruct %v3bool %1107 %1107 %1107 + %1110 = OpSelect %v3float %1109 %1108 %1106 + %1111 = OpFMul %v3float %239 %1110 + %1112 = OpFSub %v3float %238 %1111 + %1113 = OpFMul %v3float %1112 %1110 + %1114 = OpFMul %v3float %1113 %1110 + %1115 = OpExtInst %v3float %1 FMix %1100 %1102 %1114 + %1116 = OpDot %float %1115 %67 + %1117 = OpCompositeConstruct %v3float %1116 %1116 %1116 + %1118 = OpExtInst %v3float %1 FMix %1117 %1115 %241 + %1119 = OpExtInst %v3float %1 FMax %132 %1118 + %1120 = OpVectorTimesMatrix %v3float %1119 %910 + %1121 = OpExtInst %v3float %1 FMix %1119 %1120 %914 + %1122 = OpVectorTimesMatrix %v3float %1121 %549 + %1123 = OpExtInst %v3float %1 FMax %132 %1122 + %1124 = OpFOrdEqual %bool %720 %float_0 + OpSelectionMerge %1125 DontFlatten + OpBranchConditional %1124 %1126 %1125 + %1126 = OpLabel + %1127 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_2 + %1128 = OpLoad %v4float %1127 + %1129 = OpVectorShuffle %v3float %1128 %1128 0 1 2 + %1130 = OpDot %float %906 %1129 + %1131 = OpCompositeInsert %v3float %1130 %391 0 + %1132 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3 + %1133 = OpLoad %v4float %1132 + %1134 = OpVectorShuffle %v3float %1133 %1133 0 1 2 + %1135 = OpDot %float %906 %1134 + %1136 = OpCompositeInsert %v3float %1135 %1131 1 + %1137 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_4 + %1138 = OpLoad %v4float %1137 + %1139 = OpVectorShuffle %v3float %1138 %1138 0 1 2 + %1140 = OpDot %float %906 %1139 + %1141 = OpCompositeInsert %v3float %1140 %1136 2 + %1142 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_8 + %1143 = OpLoad %v4float %1142 + %1144 = OpVectorShuffle %v3float %1143 %1143 0 1 2 + %1145 = OpLoad %v4float %718 + %1146 = OpVectorShuffle %v3float %1145 %1145 0 1 2 + %1147 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_7 + %1148 = OpLoad %v4float %1147 + %1149 = OpVectorShuffle %v3float %1148 %1148 0 1 2 + %1150 = OpDot %float %906 %1149 + %1151 = OpFAdd %float %1150 %float_1 + %1152 = OpFDiv %float %float_1 %1151 + %1153 = OpCompositeConstruct %v3float %1152 %1152 %1152 + %1154 = OpFMul %v3float %1146 %1153 + %1155 = OpFAdd %v3float %1144 %1154 + %1156 = OpFMul %v3float %1141 %1155 + %1157 = OpExtInst %v3float %1 FMax %132 %1156 + %1158 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_5 + %1159 = OpLoad %v4float %1158 + %1160 = OpVectorShuffle %v3float %1159 %1159 0 0 0 + %1161 = OpFSub %v3float %1160 %1157 + %1162 = OpExtInst %v3float %1 FMax %132 %1161 + %1163 = OpVectorShuffle %v3float %1159 %1159 2 2 2 + %1164 = OpExtInst %v3float %1 FMax %1157 %1163 + %1165 = OpExtInst %v3float %1 FClamp %1157 %1160 %1163 + %1166 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_6 + %1167 = OpLoad %v4float %1166 + %1168 = OpVectorShuffle %v3float %1167 %1167 0 0 0 + %1169 = OpFMul %v3float %1164 %1168 + %1170 = OpVectorShuffle %v3float %1167 %1167 1 1 1 + %1171 = OpFAdd %v3float %1169 %1170 + %1172 = OpVectorShuffle %v3float %1159 %1159 3 3 3 + %1173 = OpFAdd %v3float %1164 %1172 + %1174 = OpFDiv %v3float %135 %1173 + %1175 = OpFMul %v3float %1171 %1174 + %1176 = OpVectorShuffle %v3float %1138 %1138 3 3 3 + %1177 = OpFMul %v3float %1165 %1176 + %1178 = OpVectorShuffle %v3float %1128 %1128 3 3 3 + %1179 = OpFMul %v3float %1162 %1178 + %1180 = OpVectorShuffle %v3float %1159 %1159 1 1 1 + %1181 = OpFAdd %v3float %1162 %1180 + %1182 = OpFDiv %v3float %135 %1181 + %1183 = OpFMul %v3float %1179 %1182 + %1184 = OpVectorShuffle %v3float %1133 %1133 3 3 3 + %1185 = OpFAdd %v3float %1183 %1184 + %1186 = OpFAdd %v3float %1177 %1185 + %1187 = OpFAdd %v3float %1175 %1186 + %1188 = OpFSub %v3float %1187 %248 + OpBranch %1125 + %1125 = OpLabel + %1189 = OpPhi %v3float %1123 %1031 %1188 %1126 + %1190 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_0 + %1191 = OpLoad %float %1190 + %1192 = OpCompositeConstruct %v3float %1191 %1191 %1191 + %1193 = OpFMul %v3float %1189 %1189 + %1194 = OpFMul %v3float %1192 %1193 + %1195 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_1 + %1196 = OpLoad %float %1195 + %1197 = OpCompositeConstruct %v3float %1196 %1196 %1196 + %1198 = OpFMul %v3float %1197 %1189 + %1199 = OpFAdd %v3float %1194 %1198 + %1200 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_2 + %1201 = OpLoad %float %1200 + %1202 = OpCompositeConstruct %v3float %1201 %1201 %1201 + %1203 = OpFAdd %v3float %1199 %1202 + %1204 = OpAccessChain %_ptr_Uniform_v3float %_Globals %int_15 + %1205 = OpLoad %v3float %1204 + %1206 = OpFMul %v3float %1203 %1205 + %1207 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_16 + %1208 = OpLoad %v4float %1207 + %1209 = OpVectorShuffle %v3float %1208 %1208 0 1 2 + %1210 = OpAccessChain %_ptr_Uniform_float %_Globals %int_16 %int_3 + %1211 = OpLoad %float %1210 + %1212 = OpCompositeConstruct %v3float %1211 %1211 %1211 + %1213 = OpExtInst %v3float %1 FMix %1206 %1209 %1212 + %1214 = OpExtInst %v3float %1 FMax %132 %1213 + %1215 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %int_1 + %1216 = OpLoad %float %1215 + %1217 = OpCompositeConstruct %v3float %1216 %1216 %1216 + %1218 = OpExtInst %v3float %1 Pow %1214 %1217 + %1219 = OpIEqual %bool %579 %uint_0 + OpSelectionMerge %1220 DontFlatten + OpBranchConditional %1219 %1221 %1222 + %1222 = OpLabel + %1223 = OpIEqual %bool %579 %uint_1 + OpSelectionMerge %1224 None + OpBranchConditional %1223 %1225 %1226 + %1226 = OpLabel + %1227 = OpIEqual %bool %579 %uint_3 + %1228 = OpIEqual %bool %579 %uint_5 + %1229 = OpLogicalOr %bool %1227 %1228 + OpSelectionMerge %1230 None + OpBranchConditional %1229 %1231 %1232 + %1232 = OpLabel + %1233 = OpIEqual %bool %579 %uint_4 + %1234 = OpIEqual %bool %579 %uint_6 + %1235 = OpLogicalOr %bool %1233 %1234 + OpSelectionMerge %1236 None + OpBranchConditional %1235 %1237 %1238 + %1238 = OpLabel + %1239 = OpIEqual %bool %579 %uint_7 + OpSelectionMerge %1240 None + OpBranchConditional %1239 %1241 %1242 + %1242 = OpLabel + %1243 = OpVectorTimesMatrix %v3float %1218 %547 + %1244 = OpVectorTimesMatrix %v3float %1243 %576 + %1245 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %int_2 + %1246 = OpLoad %float %1245 + %1247 = OpCompositeConstruct %v3float %1246 %1246 %1246 + %1248 = OpExtInst %v3float %1 Pow %1244 %1247 + OpBranch %1240 + %1241 = OpLabel + %1249 = OpVectorTimesMatrix %v3float %906 %547 + %1250 = OpVectorTimesMatrix %v3float %1249 %576 + %1251 = OpFMul %v3float %1250 %496 + %1252 = OpExtInst %v3float %1 Pow %1251 %263 + %1253 = OpFMul %v3float %184 %1252 + %1254 = OpFAdd %v3float %183 %1253 + %1255 = OpFMul %v3float %185 %1252 + %1256 = OpFAdd %v3float %135 %1255 + %1257 = OpFDiv %v3float %135 %1256 + %1258 = OpFMul %v3float %1254 %1257 + %1259 = OpExtInst %v3float %1 Pow %1258 %264 + OpBranch %1240 + %1240 = OpLabel + %1260 = OpPhi %v3float %1248 %1242 %1259 %1241 + OpBranch %1236 + %1237 = OpLabel + %1261 = OpMatrixTimesMatrix %mat3v3float %546 %399 + %1262 = OpFMul %v3float %906 %262 + %1263 = OpVectorTimesMatrix %v3float %1262 %1261 + %1264 = OpCompositeExtract %float %1263 0 + %1265 = OpCompositeExtract %float %1263 1 + %1266 = OpExtInst %float %1 FMin %1264 %1265 + %1267 = OpCompositeExtract %float %1263 2 + %1268 = OpExtInst %float %1 FMin %1266 %1267 + %1269 = OpExtInst %float %1 FMax %1264 %1265 + %1270 = OpExtInst %float %1 FMax %1269 %1267 + %1271 = OpExtInst %float %1 FMax %1270 %float_1_00000001en10 + %1272 = OpExtInst %float %1 FMax %1268 %float_1_00000001en10 + %1273 = OpFSub %float %1271 %1272 + %1274 = OpExtInst %float %1 FMax %1270 %float_0_00999999978 + %1275 = OpFDiv %float %1273 %1274 + %1276 = OpFSub %float %1267 %1265 + %1277 = OpFMul %float %1267 %1276 + %1278 = OpFSub %float %1265 %1264 + %1279 = OpFMul %float %1265 %1278 + %1280 = OpFAdd %float %1277 %1279 + %1281 = OpFSub %float %1264 %1267 + %1282 = OpFMul %float %1264 %1281 + %1283 = OpFAdd %float %1280 %1282 + %1284 = OpExtInst %float %1 Sqrt %1283 + %1285 = OpFAdd %float %1267 %1265 + %1286 = OpFAdd %float %1285 %1264 + %1287 = OpFMul %float %float_1_75 %1284 + %1288 = OpFAdd %float %1286 %1287 + %1289 = OpFMul %float %1288 %float_0_333333343 + %1290 = OpFSub %float %1275 %float_0_400000006 + %1291 = OpFMul %float %1290 %float_5 + %1292 = OpFMul %float %1290 %float_2_5 + %1293 = OpExtInst %float %1 FAbs %1292 + %1294 = OpFSub %float %float_1 %1293 + %1295 = OpExtInst %float %1 FMax %1294 %float_0 + %1296 = OpExtInst %float %1 FSign %1291 + %1297 = OpConvertFToS %int %1296 + %1298 = OpConvertSToF %float %1297 + %1299 = OpFMul %float %1295 %1295 + %1300 = OpFSub %float %float_1 %1299 + %1301 = OpFMul %float %1298 %1300 + %1302 = OpFAdd %float %float_1 %1301 + %1303 = OpFMul %float %1302 %float_0_0250000004 + %1304 = OpFOrdLessThanEqual %bool %1289 %float_0_0533333346 + OpSelectionMerge %1305 None + OpBranchConditional %1304 %1306 %1307 + %1307 = OpLabel + %1308 = OpFOrdGreaterThanEqual %bool %1289 %float_0_159999996 + OpSelectionMerge %1309 None + OpBranchConditional %1308 %1310 %1311 + %1311 = OpLabel + %1312 = OpFDiv %float %float_0_239999995 %1288 + %1313 = OpFSub %float %1312 %float_0_5 + %1314 = OpFMul %float %1303 %1313 + OpBranch %1309 + %1310 = OpLabel + OpBranch %1309 + %1309 = OpLabel + %1315 = OpPhi %float %1314 %1311 %float_0 %1310 + OpBranch %1305 + %1306 = OpLabel + OpBranch %1305 + %1305 = OpLabel + %1316 = OpPhi %float %1315 %1309 %1303 %1306 + %1317 = OpFAdd %float %float_1 %1316 + %1318 = OpCompositeConstruct %v3float %1317 %1317 %1317 + %1319 = OpFMul %v3float %1263 %1318 + %1320 = OpCompositeExtract %float %1319 0 + %1321 = OpCompositeExtract %float %1319 1 + %1322 = OpFOrdEqual %bool %1320 %1321 + %1323 = OpCompositeExtract %float %1319 2 + %1324 = OpFOrdEqual %bool %1321 %1323 + %1325 = OpLogicalAnd %bool %1322 %1324 + OpSelectionMerge %1326 None + OpBranchConditional %1325 %1327 %1328 + %1328 = OpLabel + %1329 = OpExtInst %float %1 Sqrt %float_3 + %1330 = OpFSub %float %1321 %1323 + %1331 = OpFMul %float %1329 %1330 + %1332 = OpFMul %float %float_2 %1320 + %1333 = OpFSub %float %1332 %1321 + %1334 = OpFSub %float %1333 %1323 + %1335 = OpExtInst %float %1 Atan2 %1331 %1334 + %1336 = OpFMul %float %float_57_2957764 %1335 + OpBranch %1326 + %1327 = OpLabel + OpBranch %1326 + %1326 = OpLabel + %1337 = OpPhi %float %1336 %1328 %float_0 %1327 + %1338 = OpFOrdLessThan %bool %1337 %float_0 + OpSelectionMerge %1339 None + OpBranchConditional %1338 %1340 %1339 + %1340 = OpLabel + %1341 = OpFAdd %float %1337 %float_360 + OpBranch %1339 + %1339 = OpLabel + %1342 = OpPhi %float %1337 %1326 %1341 %1340 + %1343 = OpExtInst %float %1 FClamp %1342 %float_0 %float_360 + %1344 = OpFOrdGreaterThan %bool %1343 %float_180 + OpSelectionMerge %1345 None + OpBranchConditional %1344 %1346 %1345 + %1346 = OpLabel + %1347 = OpFSub %float %1343 %float_360 + OpBranch %1345 + %1345 = OpLabel + %1348 = OpPhi %float %1343 %1339 %1347 %1346 + %1349 = OpFOrdGreaterThan %bool %1348 %float_n67_5 + %1350 = OpFOrdLessThan %bool %1348 %float_67_5 + %1351 = OpLogicalAnd %bool %1349 %1350 + OpSelectionMerge %1352 None + OpBranchConditional %1351 %1353 %1352 + %1353 = OpLabel + %1354 = OpFSub %float %1348 %float_n67_5 + %1355 = OpFMul %float %1354 %float_0_0296296291 + %1356 = OpConvertFToS %int %1355 + %1357 = OpConvertSToF %float %1356 + %1358 = OpFSub %float %1355 %1357 + %1359 = OpFMul %float %1358 %1358 + %1360 = OpFMul %float %1359 %1358 + %1361 = OpIEqual %bool %1356 %int_3 + OpSelectionMerge %1362 None + OpBranchConditional %1361 %1363 %1364 + %1364 = OpLabel + %1365 = OpIEqual %bool %1356 %int_2 + OpSelectionMerge %1366 None + OpBranchConditional %1365 %1367 %1368 + %1368 = OpLabel + %1369 = OpIEqual %bool %1356 %int_1 + OpSelectionMerge %1370 None + OpBranchConditional %1369 %1371 %1372 + %1372 = OpLabel + %1373 = OpIEqual %bool %1356 %int_0 + OpSelectionMerge %1374 None + OpBranchConditional %1373 %1375 %1376 + %1376 = OpLabel + OpBranch %1374 + %1375 = OpLabel + %1377 = OpFMul %float %1360 %float_0_166666672 + OpBranch %1374 + %1374 = OpLabel + %1378 = OpPhi %float %float_0 %1376 %1377 %1375 + OpBranch %1370 + %1371 = OpLabel + %1379 = OpFMul %float %1360 %float_n0_5 + %1380 = OpFMul %float %1359 %float_0_5 + %1381 = OpFAdd %float %1379 %1380 + %1382 = OpFMul %float %1358 %float_0_5 + %1383 = OpFAdd %float %1381 %1382 + %1384 = OpFAdd %float %1383 %float_0_166666672 + OpBranch %1370 + %1370 = OpLabel + %1385 = OpPhi %float %1378 %1374 %1384 %1371 + OpBranch %1366 + %1367 = OpLabel + %1386 = OpFMul %float %1360 %float_0_5 + %1387 = OpFMul %float %1359 %float_n1 + %1388 = OpFAdd %float %1386 %1387 + %1389 = OpFAdd %float %1388 %float_0_666666687 + OpBranch %1366 + %1366 = OpLabel + %1390 = OpPhi %float %1385 %1370 %1389 %1367 + OpBranch %1362 + %1363 = OpLabel + %1391 = OpFMul %float %1360 %float_n0_166666672 + %1392 = OpFMul %float %1359 %float_0_5 + %1393 = OpFAdd %float %1391 %1392 + %1394 = OpFMul %float %1358 %float_n0_5 + %1395 = OpFAdd %float %1393 %1394 + %1396 = OpFAdd %float %1395 %float_0_166666672 + OpBranch %1362 + %1362 = OpLabel + %1397 = OpPhi %float %1390 %1366 %1396 %1363 + OpBranch %1352 + %1352 = OpLabel + %1398 = OpPhi %float %float_0 %1345 %1397 %1362 + %1399 = OpFMul %float %1398 %float_1_5 + %1400 = OpFMul %float %1399 %1275 + %1401 = OpFSub %float %float_0_0299999993 %1320 + %1402 = OpFMul %float %1400 %1401 + %1403 = OpFMul %float %1402 %float_0_180000007 + %1404 = OpFAdd %float %1320 %1403 + %1405 = OpCompositeInsert %v3float %1404 %1319 0 + %1406 = OpExtInst %v3float %1 FClamp %1405 %132 %314 + %1407 = OpVectorTimesMatrix %v3float %1406 %410 + %1408 = OpExtInst %v3float %1 FClamp %1407 %132 %314 + %1409 = OpDot %float %1408 %67 + %1410 = OpCompositeConstruct %v3float %1409 %1409 %1409 + %1411 = OpExtInst %v3float %1 FMix %1410 %1408 %228 + %1412 = OpCompositeExtract %float %1411 0 + %1413 = OpExtInst %float %1 Exp2 %float_n15 + %1414 = OpFMul %float %float_0_179999992 %1413 + %1415 = OpExtInst %float %1 Exp2 %float_18 + %1416 = OpFMul %float %float_0_179999992 %1415 + OpStore %502 %475 + OpStore %501 %476 + %1417 = OpFOrdLessThanEqual %bool %1412 %float_0 + %1418 = OpExtInst %float %1 Exp2 %float_n14 + %1419 = OpSelect %float %1417 %1418 %1412 + %1420 = OpExtInst %float %1 Log %1419 + %1421 = OpFDiv %float %1420 %1065 + %1422 = OpExtInst %float %1 Log %1414 + %1423 = OpFDiv %float %1422 %1065 + %1424 = OpFOrdLessThanEqual %bool %1421 %1423 + OpSelectionMerge %1425 None + OpBranchConditional %1424 %1426 %1427 + %1427 = OpLabel + %1428 = OpFOrdGreaterThan %bool %1421 %1423 + %1429 = OpExtInst %float %1 Log %float_0_180000007 + %1430 = OpFDiv %float %1429 %1065 + %1431 = OpFOrdLessThan %bool %1421 %1430 + %1432 = OpLogicalAnd %bool %1428 %1431 + OpSelectionMerge %1433 None + OpBranchConditional %1432 %1434 %1435 + %1435 = OpLabel + %1436 = OpFOrdGreaterThanEqual %bool %1421 %1430 + %1437 = OpExtInst %float %1 Log %1416 + %1438 = OpFDiv %float %1437 %1065 + %1439 = OpFOrdLessThan %bool %1421 %1438 + %1440 = OpLogicalAnd %bool %1436 %1439 + OpSelectionMerge %1441 None + OpBranchConditional %1440 %1442 %1443 + %1443 = OpLabel + %1444 = OpExtInst %float %1 Log %float_10000 + %1445 = OpFDiv %float %1444 %1065 + OpBranch %1441 + %1442 = OpLabel + %1446 = OpFSub %float %1421 %1430 + %1447 = OpFMul %float %float_3 %1446 + %1448 = OpFSub %float %1438 %1430 + %1449 = OpFDiv %float %1447 %1448 + %1450 = OpConvertFToS %int %1449 + %1451 = OpConvertSToF %float %1450 + %1452 = OpFSub %float %1449 %1451 + %1453 = OpAccessChain %_ptr_Function_float %501 %1450 + %1454 = OpLoad %float %1453 + %1455 = OpIAdd %int %1450 %int_1 + %1456 = OpAccessChain %_ptr_Function_float %501 %1455 + %1457 = OpLoad %float %1456 + %1458 = OpIAdd %int %1450 %int_2 + %1459 = OpAccessChain %_ptr_Function_float %501 %1458 + %1460 = OpLoad %float %1459 + %1461 = OpCompositeConstruct %v3float %1454 %1457 %1460 + %1462 = OpFMul %float %1452 %1452 + %1463 = OpCompositeConstruct %v3float %1462 %1452 %float_1 + %1464 = OpMatrixTimesVector %v3float %442 %1461 + %1465 = OpDot %float %1463 %1464 + OpBranch %1441 + %1441 = OpLabel + %1466 = OpPhi %float %1445 %1443 %1465 %1442 + OpBranch %1433 + %1434 = OpLabel + %1467 = OpFSub %float %1421 %1423 + %1468 = OpFMul %float %float_3 %1467 + %1469 = OpFSub %float %1430 %1423 + %1470 = OpFDiv %float %1468 %1469 + %1471 = OpConvertFToS %int %1470 + %1472 = OpConvertSToF %float %1471 + %1473 = OpFSub %float %1470 %1472 + %1474 = OpAccessChain %_ptr_Function_float %502 %1471 + %1475 = OpLoad %float %1474 + %1476 = OpIAdd %int %1471 %int_1 + %1477 = OpAccessChain %_ptr_Function_float %502 %1476 + %1478 = OpLoad %float %1477 + %1479 = OpIAdd %int %1471 %int_2 + %1480 = OpAccessChain %_ptr_Function_float %502 %1479 + %1481 = OpLoad %float %1480 + %1482 = OpCompositeConstruct %v3float %1475 %1478 %1481 + %1483 = OpFMul %float %1473 %1473 + %1484 = OpCompositeConstruct %v3float %1483 %1473 %float_1 + %1485 = OpMatrixTimesVector %v3float %442 %1482 + %1486 = OpDot %float %1484 %1485 + OpBranch %1433 + %1433 = OpLabel + %1487 = OpPhi %float %1466 %1441 %1486 %1434 + OpBranch %1425 + %1426 = OpLabel + %1488 = OpExtInst %float %1 Log %float_9_99999975en05 + %1489 = OpFDiv %float %1488 %1065 + OpBranch %1425 + %1425 = OpLabel + %1490 = OpPhi %float %1487 %1433 %1489 %1426 + %1491 = OpExtInst %float %1 Pow %float_10 %1490 + %1492 = OpCompositeInsert %v3float %1491 %391 0 + %1493 = OpCompositeExtract %float %1411 1 + OpStore %504 %475 + OpStore %503 %476 + %1494 = OpFOrdLessThanEqual %bool %1493 %float_0 + %1495 = OpSelect %float %1494 %1418 %1493 + %1496 = OpExtInst %float %1 Log %1495 + %1497 = OpFDiv %float %1496 %1065 + %1498 = OpFOrdLessThanEqual %bool %1497 %1423 + OpSelectionMerge %1499 None + OpBranchConditional %1498 %1500 %1501 + %1501 = OpLabel + %1502 = OpFOrdGreaterThan %bool %1497 %1423 + %1503 = OpExtInst %float %1 Log %float_0_180000007 + %1504 = OpFDiv %float %1503 %1065 + %1505 = OpFOrdLessThan %bool %1497 %1504 + %1506 = OpLogicalAnd %bool %1502 %1505 + OpSelectionMerge %1507 None + OpBranchConditional %1506 %1508 %1509 + %1509 = OpLabel + %1510 = OpFOrdGreaterThanEqual %bool %1497 %1504 + %1511 = OpExtInst %float %1 Log %1416 + %1512 = OpFDiv %float %1511 %1065 + %1513 = OpFOrdLessThan %bool %1497 %1512 + %1514 = OpLogicalAnd %bool %1510 %1513 + OpSelectionMerge %1515 None + OpBranchConditional %1514 %1516 %1517 + %1517 = OpLabel + %1518 = OpExtInst %float %1 Log %float_10000 + %1519 = OpFDiv %float %1518 %1065 + OpBranch %1515 + %1516 = OpLabel + %1520 = OpFSub %float %1497 %1504 + %1521 = OpFMul %float %float_3 %1520 + %1522 = OpFSub %float %1512 %1504 + %1523 = OpFDiv %float %1521 %1522 + %1524 = OpConvertFToS %int %1523 + %1525 = OpConvertSToF %float %1524 + %1526 = OpFSub %float %1523 %1525 + %1527 = OpAccessChain %_ptr_Function_float %503 %1524 + %1528 = OpLoad %float %1527 + %1529 = OpIAdd %int %1524 %int_1 + %1530 = OpAccessChain %_ptr_Function_float %503 %1529 + %1531 = OpLoad %float %1530 + %1532 = OpIAdd %int %1524 %int_2 + %1533 = OpAccessChain %_ptr_Function_float %503 %1532 + %1534 = OpLoad %float %1533 + %1535 = OpCompositeConstruct %v3float %1528 %1531 %1534 + %1536 = OpFMul %float %1526 %1526 + %1537 = OpCompositeConstruct %v3float %1536 %1526 %float_1 + %1538 = OpMatrixTimesVector %v3float %442 %1535 + %1539 = OpDot %float %1537 %1538 + OpBranch %1515 + %1515 = OpLabel + %1540 = OpPhi %float %1519 %1517 %1539 %1516 + OpBranch %1507 + %1508 = OpLabel + %1541 = OpFSub %float %1497 %1423 + %1542 = OpFMul %float %float_3 %1541 + %1543 = OpFSub %float %1504 %1423 + %1544 = OpFDiv %float %1542 %1543 + %1545 = OpConvertFToS %int %1544 + %1546 = OpConvertSToF %float %1545 + %1547 = OpFSub %float %1544 %1546 + %1548 = OpAccessChain %_ptr_Function_float %504 %1545 + %1549 = OpLoad %float %1548 + %1550 = OpIAdd %int %1545 %int_1 + %1551 = OpAccessChain %_ptr_Function_float %504 %1550 + %1552 = OpLoad %float %1551 + %1553 = OpIAdd %int %1545 %int_2 + %1554 = OpAccessChain %_ptr_Function_float %504 %1553 + %1555 = OpLoad %float %1554 + %1556 = OpCompositeConstruct %v3float %1549 %1552 %1555 + %1557 = OpFMul %float %1547 %1547 + %1558 = OpCompositeConstruct %v3float %1557 %1547 %float_1 + %1559 = OpMatrixTimesVector %v3float %442 %1556 + %1560 = OpDot %float %1558 %1559 + OpBranch %1507 + %1507 = OpLabel + %1561 = OpPhi %float %1540 %1515 %1560 %1508 + OpBranch %1499 + %1500 = OpLabel + %1562 = OpExtInst %float %1 Log %float_9_99999975en05 + %1563 = OpFDiv %float %1562 %1065 + OpBranch %1499 + %1499 = OpLabel + %1564 = OpPhi %float %1561 %1507 %1563 %1500 + %1565 = OpExtInst %float %1 Pow %float_10 %1564 + %1566 = OpCompositeInsert %v3float %1565 %1492 1 + %1567 = OpCompositeExtract %float %1411 2 + OpStore %506 %475 + OpStore %505 %476 + %1568 = OpFOrdLessThanEqual %bool %1567 %float_0 + %1569 = OpSelect %float %1568 %1418 %1567 + %1570 = OpExtInst %float %1 Log %1569 + %1571 = OpFDiv %float %1570 %1065 + %1572 = OpFOrdLessThanEqual %bool %1571 %1423 + OpSelectionMerge %1573 None + OpBranchConditional %1572 %1574 %1575 + %1575 = OpLabel + %1576 = OpFOrdGreaterThan %bool %1571 %1423 + %1577 = OpExtInst %float %1 Log %float_0_180000007 + %1578 = OpFDiv %float %1577 %1065 + %1579 = OpFOrdLessThan %bool %1571 %1578 + %1580 = OpLogicalAnd %bool %1576 %1579 + OpSelectionMerge %1581 None + OpBranchConditional %1580 %1582 %1583 + %1583 = OpLabel + %1584 = OpFOrdGreaterThanEqual %bool %1571 %1578 + %1585 = OpExtInst %float %1 Log %1416 + %1586 = OpFDiv %float %1585 %1065 + %1587 = OpFOrdLessThan %bool %1571 %1586 + %1588 = OpLogicalAnd %bool %1584 %1587 + OpSelectionMerge %1589 None + OpBranchConditional %1588 %1590 %1591 + %1591 = OpLabel + %1592 = OpExtInst %float %1 Log %float_10000 + %1593 = OpFDiv %float %1592 %1065 + OpBranch %1589 + %1590 = OpLabel + %1594 = OpFSub %float %1571 %1578 + %1595 = OpFMul %float %float_3 %1594 + %1596 = OpFSub %float %1586 %1578 + %1597 = OpFDiv %float %1595 %1596 + %1598 = OpConvertFToS %int %1597 + %1599 = OpConvertSToF %float %1598 + %1600 = OpFSub %float %1597 %1599 + %1601 = OpAccessChain %_ptr_Function_float %505 %1598 + %1602 = OpLoad %float %1601 + %1603 = OpIAdd %int %1598 %int_1 + %1604 = OpAccessChain %_ptr_Function_float %505 %1603 + %1605 = OpLoad %float %1604 + %1606 = OpIAdd %int %1598 %int_2 + %1607 = OpAccessChain %_ptr_Function_float %505 %1606 + %1608 = OpLoad %float %1607 + %1609 = OpCompositeConstruct %v3float %1602 %1605 %1608 + %1610 = OpFMul %float %1600 %1600 + %1611 = OpCompositeConstruct %v3float %1610 %1600 %float_1 + %1612 = OpMatrixTimesVector %v3float %442 %1609 + %1613 = OpDot %float %1611 %1612 + OpBranch %1589 + %1589 = OpLabel + %1614 = OpPhi %float %1593 %1591 %1613 %1590 + OpBranch %1581 + %1582 = OpLabel + %1615 = OpFSub %float %1571 %1423 + %1616 = OpFMul %float %float_3 %1615 + %1617 = OpFSub %float %1578 %1423 + %1618 = OpFDiv %float %1616 %1617 + %1619 = OpConvertFToS %int %1618 + %1620 = OpConvertSToF %float %1619 + %1621 = OpFSub %float %1618 %1620 + %1622 = OpAccessChain %_ptr_Function_float %506 %1619 + %1623 = OpLoad %float %1622 + %1624 = OpIAdd %int %1619 %int_1 + %1625 = OpAccessChain %_ptr_Function_float %506 %1624 + %1626 = OpLoad %float %1625 + %1627 = OpIAdd %int %1619 %int_2 + %1628 = OpAccessChain %_ptr_Function_float %506 %1627 + %1629 = OpLoad %float %1628 + %1630 = OpCompositeConstruct %v3float %1623 %1626 %1629 + %1631 = OpFMul %float %1621 %1621 + %1632 = OpCompositeConstruct %v3float %1631 %1621 %float_1 + %1633 = OpMatrixTimesVector %v3float %442 %1630 + %1634 = OpDot %float %1632 %1633 + OpBranch %1581 + %1581 = OpLabel + %1635 = OpPhi %float %1614 %1589 %1634 %1582 + OpBranch %1573 + %1574 = OpLabel + %1636 = OpExtInst %float %1 Log %float_9_99999975en05 + %1637 = OpFDiv %float %1636 %1065 + OpBranch %1573 + %1573 = OpLabel + %1638 = OpPhi %float %1635 %1581 %1637 %1574 + %1639 = OpExtInst %float %1 Pow %float_10 %1638 + %1640 = OpCompositeInsert %v3float %1639 %1566 2 + %1641 = OpVectorTimesMatrix %v3float %1640 %414 + %1642 = OpVectorTimesMatrix %v3float %1641 %410 + %1643 = OpExtInst %float %1 Pow %float_2 %float_n12 + %1644 = OpFMul %float %float_0_179999992 %1643 + OpStore %514 %475 + OpStore %513 %476 + %1645 = OpFOrdLessThanEqual %bool %1644 %float_0 + %1646 = OpSelect %float %1645 %1418 %1644 + %1647 = OpExtInst %float %1 Log %1646 + %1648 = OpFDiv %float %1647 %1065 + %1649 = OpFOrdLessThanEqual %bool %1648 %1423 + OpSelectionMerge %1650 None + OpBranchConditional %1649 %1651 %1652 + %1652 = OpLabel + %1653 = OpFOrdGreaterThan %bool %1648 %1423 + %1654 = OpExtInst %float %1 Log %float_0_180000007 + %1655 = OpFDiv %float %1654 %1065 + %1656 = OpFOrdLessThan %bool %1648 %1655 + %1657 = OpLogicalAnd %bool %1653 %1656 + OpSelectionMerge %1658 None + OpBranchConditional %1657 %1659 %1660 + %1660 = OpLabel + %1661 = OpFOrdGreaterThanEqual %bool %1648 %1655 + %1662 = OpExtInst %float %1 Log %1416 + %1663 = OpFDiv %float %1662 %1065 + %1664 = OpFOrdLessThan %bool %1648 %1663 + %1665 = OpLogicalAnd %bool %1661 %1664 + OpSelectionMerge %1666 None + OpBranchConditional %1665 %1667 %1668 + %1668 = OpLabel + %1669 = OpExtInst %float %1 Log %float_10000 + %1670 = OpFDiv %float %1669 %1065 + OpBranch %1666 + %1667 = OpLabel + %1671 = OpFSub %float %1648 %1655 + %1672 = OpFMul %float %float_3 %1671 + %1673 = OpFSub %float %1663 %1655 + %1674 = OpFDiv %float %1672 %1673 + %1675 = OpConvertFToS %int %1674 + %1676 = OpConvertSToF %float %1675 + %1677 = OpFSub %float %1674 %1676 + %1678 = OpAccessChain %_ptr_Function_float %513 %1675 + %1679 = OpLoad %float %1678 + %1680 = OpIAdd %int %1675 %int_1 + %1681 = OpAccessChain %_ptr_Function_float %513 %1680 + %1682 = OpLoad %float %1681 + %1683 = OpIAdd %int %1675 %int_2 + %1684 = OpAccessChain %_ptr_Function_float %513 %1683 + %1685 = OpLoad %float %1684 + %1686 = OpCompositeConstruct %v3float %1679 %1682 %1685 + %1687 = OpFMul %float %1677 %1677 + %1688 = OpCompositeConstruct %v3float %1687 %1677 %float_1 + %1689 = OpMatrixTimesVector %v3float %442 %1686 + %1690 = OpDot %float %1688 %1689 + OpBranch %1666 + %1666 = OpLabel + %1691 = OpPhi %float %1670 %1668 %1690 %1667 + OpBranch %1658 + %1659 = OpLabel + %1692 = OpFSub %float %1648 %1423 + %1693 = OpFMul %float %float_3 %1692 + %1694 = OpFSub %float %1655 %1423 + %1695 = OpFDiv %float %1693 %1694 + %1696 = OpConvertFToS %int %1695 + %1697 = OpConvertSToF %float %1696 + %1698 = OpFSub %float %1695 %1697 + %1699 = OpAccessChain %_ptr_Function_float %514 %1696 + %1700 = OpLoad %float %1699 + %1701 = OpIAdd %int %1696 %int_1 + %1702 = OpAccessChain %_ptr_Function_float %514 %1701 + %1703 = OpLoad %float %1702 + %1704 = OpIAdd %int %1696 %int_2 + %1705 = OpAccessChain %_ptr_Function_float %514 %1704 + %1706 = OpLoad %float %1705 + %1707 = OpCompositeConstruct %v3float %1700 %1703 %1706 + %1708 = OpFMul %float %1698 %1698 + %1709 = OpCompositeConstruct %v3float %1708 %1698 %float_1 + %1710 = OpMatrixTimesVector %v3float %442 %1707 + %1711 = OpDot %float %1709 %1710 + OpBranch %1658 + %1658 = OpLabel + %1712 = OpPhi %float %1691 %1666 %1711 %1659 + OpBranch %1650 + %1651 = OpLabel + %1713 = OpExtInst %float %1 Log %float_9_99999975en05 + %1714 = OpFDiv %float %1713 %1065 + OpBranch %1650 + %1650 = OpLabel + %1715 = OpPhi %float %1712 %1658 %1714 %1651 + %1716 = OpExtInst %float %1 Pow %float_10 %1715 + OpStore %516 %475 + OpStore %515 %476 + %1717 = OpExtInst %float %1 Log %float_0_180000007 + %1718 = OpFDiv %float %1717 %1065 + %1719 = OpFOrdLessThanEqual %bool %1718 %1423 + OpSelectionMerge %1720 None + OpBranchConditional %1719 %1721 %1722 + %1722 = OpLabel + %1723 = OpFOrdGreaterThan %bool %1718 %1423 + %1724 = OpFOrdLessThan %bool %1718 %1718 + %1725 = OpLogicalAnd %bool %1723 %1724 + OpSelectionMerge %1726 None + OpBranchConditional %1725 %1727 %1728 + %1728 = OpLabel + %1729 = OpFOrdGreaterThanEqual %bool %1718 %1718 + %1730 = OpExtInst %float %1 Log %1416 + %1731 = OpFDiv %float %1730 %1065 + %1732 = OpFOrdLessThan %bool %1718 %1731 + %1733 = OpLogicalAnd %bool %1729 %1732 + OpSelectionMerge %1734 None + OpBranchConditional %1733 %1735 %1736 + %1736 = OpLabel + %1737 = OpExtInst %float %1 Log %float_10000 + %1738 = OpFDiv %float %1737 %1065 + OpBranch %1734 + %1735 = OpLabel + %1739 = OpFSub %float %1718 %1718 + %1740 = OpFMul %float %float_3 %1739 + %1741 = OpFSub %float %1731 %1718 + %1742 = OpFDiv %float %1740 %1741 + %1743 = OpConvertFToS %int %1742 + %1744 = OpConvertSToF %float %1743 + %1745 = OpFSub %float %1742 %1744 + %1746 = OpAccessChain %_ptr_Function_float %515 %1743 + %1747 = OpLoad %float %1746 + %1748 = OpIAdd %int %1743 %int_1 + %1749 = OpAccessChain %_ptr_Function_float %515 %1748 + %1750 = OpLoad %float %1749 + %1751 = OpIAdd %int %1743 %int_2 + %1752 = OpAccessChain %_ptr_Function_float %515 %1751 + %1753 = OpLoad %float %1752 + %1754 = OpCompositeConstruct %v3float %1747 %1750 %1753 + %1755 = OpFMul %float %1745 %1745 + %1756 = OpCompositeConstruct %v3float %1755 %1745 %float_1 + %1757 = OpMatrixTimesVector %v3float %442 %1754 + %1758 = OpDot %float %1756 %1757 + OpBranch %1734 + %1734 = OpLabel + %1759 = OpPhi %float %1738 %1736 %1758 %1735 + OpBranch %1726 + %1727 = OpLabel + %1760 = OpFSub %float %1718 %1423 + %1761 = OpFMul %float %float_3 %1760 + %1762 = OpAccessChain %_ptr_Function_float %516 %int_3 + %1763 = OpLoad %float %1762 + %1764 = OpAccessChain %_ptr_Function_float %516 %int_4 + %1765 = OpLoad %float %1764 + %1766 = OpAccessChain %_ptr_Function_float %516 %int_5 + %1767 = OpLoad %float %1766 + %1768 = OpCompositeConstruct %v3float %1763 %1765 %1767 + %1769 = OpMatrixTimesVector %v3float %442 %1768 + %1770 = OpCompositeExtract %float %1769 2 + OpBranch %1726 + %1726 = OpLabel + %1771 = OpPhi %float %1759 %1734 %1770 %1727 + OpBranch %1720 + %1721 = OpLabel + %1772 = OpExtInst %float %1 Log %float_9_99999975en05 + %1773 = OpFDiv %float %1772 %1065 + OpBranch %1720 + %1720 = OpLabel + %1774 = OpPhi %float %1771 %1726 %1773 %1721 + %1775 = OpExtInst %float %1 Pow %float_10 %1774 + %1776 = OpExtInst %float %1 Pow %float_2 %float_11 + %1777 = OpFMul %float %float_0_179999992 %1776 + OpStore %518 %475 + OpStore %517 %476 + %1778 = OpFOrdLessThanEqual %bool %1777 %float_0 + %1779 = OpSelect %float %1778 %1418 %1777 + %1780 = OpExtInst %float %1 Log %1779 + %1781 = OpFDiv %float %1780 %1065 + %1782 = OpFOrdLessThanEqual %bool %1781 %1423 + OpSelectionMerge %1783 None + OpBranchConditional %1782 %1784 %1785 + %1785 = OpLabel + %1786 = OpFOrdGreaterThan %bool %1781 %1423 + %1787 = OpFOrdLessThan %bool %1781 %1718 + %1788 = OpLogicalAnd %bool %1786 %1787 + OpSelectionMerge %1789 None + OpBranchConditional %1788 %1790 %1791 + %1791 = OpLabel + %1792 = OpFOrdGreaterThanEqual %bool %1781 %1718 + %1793 = OpExtInst %float %1 Log %1416 + %1794 = OpFDiv %float %1793 %1065 + %1795 = OpFOrdLessThan %bool %1781 %1794 + %1796 = OpLogicalAnd %bool %1792 %1795 + OpSelectionMerge %1797 None + OpBranchConditional %1796 %1798 %1799 + %1799 = OpLabel + %1800 = OpExtInst %float %1 Log %float_10000 + %1801 = OpFDiv %float %1800 %1065 + OpBranch %1797 + %1798 = OpLabel + %1802 = OpFSub %float %1781 %1718 + %1803 = OpFMul %float %float_3 %1802 + %1804 = OpFSub %float %1794 %1718 + %1805 = OpFDiv %float %1803 %1804 + %1806 = OpConvertFToS %int %1805 + %1807 = OpConvertSToF %float %1806 + %1808 = OpFSub %float %1805 %1807 + %1809 = OpAccessChain %_ptr_Function_float %517 %1806 + %1810 = OpLoad %float %1809 + %1811 = OpIAdd %int %1806 %int_1 + %1812 = OpAccessChain %_ptr_Function_float %517 %1811 + %1813 = OpLoad %float %1812 + %1814 = OpIAdd %int %1806 %int_2 + %1815 = OpAccessChain %_ptr_Function_float %517 %1814 + %1816 = OpLoad %float %1815 + %1817 = OpCompositeConstruct %v3float %1810 %1813 %1816 + %1818 = OpFMul %float %1808 %1808 + %1819 = OpCompositeConstruct %v3float %1818 %1808 %float_1 + %1820 = OpMatrixTimesVector %v3float %442 %1817 + %1821 = OpDot %float %1819 %1820 + OpBranch %1797 + %1797 = OpLabel + %1822 = OpPhi %float %1801 %1799 %1821 %1798 + OpBranch %1789 + %1790 = OpLabel + %1823 = OpFSub %float %1781 %1423 + %1824 = OpFMul %float %float_3 %1823 + %1825 = OpFSub %float %1718 %1423 + %1826 = OpFDiv %float %1824 %1825 + %1827 = OpConvertFToS %int %1826 + %1828 = OpConvertSToF %float %1827 + %1829 = OpFSub %float %1826 %1828 + %1830 = OpAccessChain %_ptr_Function_float %518 %1827 + %1831 = OpLoad %float %1830 + %1832 = OpIAdd %int %1827 %int_1 + %1833 = OpAccessChain %_ptr_Function_float %518 %1832 + %1834 = OpLoad %float %1833 + %1835 = OpIAdd %int %1827 %int_2 + %1836 = OpAccessChain %_ptr_Function_float %518 %1835 + %1837 = OpLoad %float %1836 + %1838 = OpCompositeConstruct %v3float %1831 %1834 %1837 + %1839 = OpFMul %float %1829 %1829 + %1840 = OpCompositeConstruct %v3float %1839 %1829 %float_1 + %1841 = OpMatrixTimesVector %v3float %442 %1838 + %1842 = OpDot %float %1840 %1841 + OpBranch %1789 + %1789 = OpLabel + %1843 = OpPhi %float %1822 %1797 %1842 %1790 + OpBranch %1783 + %1784 = OpLabel + %1844 = OpExtInst %float %1 Log %float_9_99999975en05 + %1845 = OpFDiv %float %1844 %1065 + OpBranch %1783 + %1783 = OpLabel + %1846 = OpPhi %float %1843 %1789 %1845 %1784 + %1847 = OpExtInst %float %1 Pow %float_10 %1846 + %1848 = OpCompositeExtract %float %1642 0 + OpStore %512 %482 + OpStore %511 %483 + %1849 = OpFOrdLessThanEqual %bool %1848 %float_0 + %1850 = OpSelect %float %1849 %float_9_99999975en05 %1848 + %1851 = OpExtInst %float %1 Log %1850 + %1852 = OpFDiv %float %1851 %1065 + %1853 = OpExtInst %float %1 Log %1716 + %1854 = OpFDiv %float %1853 %1065 + %1855 = OpFOrdLessThanEqual %bool %1852 %1854 + OpSelectionMerge %1856 None + OpBranchConditional %1855 %1857 %1858 + %1858 = OpLabel + %1859 = OpFOrdGreaterThan %bool %1852 %1854 + %1860 = OpExtInst %float %1 Log %1775 + %1861 = OpFDiv %float %1860 %1065 + %1862 = OpFOrdLessThan %bool %1852 %1861 + %1863 = OpLogicalAnd %bool %1859 %1862 + OpSelectionMerge %1864 None + OpBranchConditional %1863 %1865 %1866 + %1866 = OpLabel + %1867 = OpFOrdGreaterThanEqual %bool %1852 %1861 + %1868 = OpExtInst %float %1 Log %1847 + %1869 = OpFDiv %float %1868 %1065 + %1870 = OpFOrdLessThan %bool %1852 %1869 + %1871 = OpLogicalAnd %bool %1867 %1870 + OpSelectionMerge %1872 None + OpBranchConditional %1871 %1873 %1874 + %1874 = OpLabel + %1875 = OpFMul %float %1852 %float_0_119999997 + %1876 = OpExtInst %float %1 Log %float_2000 + %1877 = OpFDiv %float %1876 %1065 + %1878 = OpFMul %float %float_0_119999997 %1868 + %1879 = OpFDiv %float %1878 %1065 + %1880 = OpFSub %float %1877 %1879 + %1881 = OpFAdd %float %1875 %1880 + OpBranch %1872 + %1873 = OpLabel + %1882 = OpFSub %float %1852 %1861 + %1883 = OpFMul %float %float_7 %1882 + %1884 = OpFSub %float %1869 %1861 + %1885 = OpFDiv %float %1883 %1884 + %1886 = OpConvertFToS %int %1885 + %1887 = OpConvertSToF %float %1886 + %1888 = OpFSub %float %1885 %1887 + %1889 = OpAccessChain %_ptr_Function_float %511 %1886 + %1890 = OpLoad %float %1889 + %1891 = OpIAdd %int %1886 %int_1 + %1892 = OpAccessChain %_ptr_Function_float %511 %1891 + %1893 = OpLoad %float %1892 + %1894 = OpIAdd %int %1886 %int_2 + %1895 = OpAccessChain %_ptr_Function_float %511 %1894 + %1896 = OpLoad %float %1895 + %1897 = OpCompositeConstruct %v3float %1890 %1893 %1896 + %1898 = OpFMul %float %1888 %1888 + %1899 = OpCompositeConstruct %v3float %1898 %1888 %float_1 + %1900 = OpMatrixTimesVector %v3float %442 %1897 + %1901 = OpDot %float %1899 %1900 + OpBranch %1872 + %1872 = OpLabel + %1902 = OpPhi %float %1881 %1874 %1901 %1873 + OpBranch %1864 + %1865 = OpLabel + %1903 = OpFSub %float %1852 %1854 + %1904 = OpFMul %float %float_7 %1903 + %1905 = OpFSub %float %1861 %1854 + %1906 = OpFDiv %float %1904 %1905 + %1907 = OpConvertFToS %int %1906 + %1908 = OpConvertSToF %float %1907 + %1909 = OpFSub %float %1906 %1908 + %1910 = OpAccessChain %_ptr_Function_float %512 %1907 + %1911 = OpLoad %float %1910 + %1912 = OpIAdd %int %1907 %int_1 + %1913 = OpAccessChain %_ptr_Function_float %512 %1912 + %1914 = OpLoad %float %1913 + %1915 = OpIAdd %int %1907 %int_2 + %1916 = OpAccessChain %_ptr_Function_float %512 %1915 + %1917 = OpLoad %float %1916 + %1918 = OpCompositeConstruct %v3float %1911 %1914 %1917 + %1919 = OpFMul %float %1909 %1909 + %1920 = OpCompositeConstruct %v3float %1919 %1909 %float_1 + %1921 = OpMatrixTimesVector %v3float %442 %1918 + %1922 = OpDot %float %1920 %1921 + OpBranch %1864 + %1864 = OpLabel + %1923 = OpPhi %float %1902 %1872 %1922 %1865 + OpBranch %1856 + %1857 = OpLabel + %1924 = OpExtInst %float %1 Log %float_0_00499999989 + %1925 = OpFDiv %float %1924 %1065 + OpBranch %1856 + %1856 = OpLabel + %1926 = OpPhi %float %1923 %1864 %1925 %1857 + %1927 = OpExtInst %float %1 Pow %float_10 %1926 + %1928 = OpCompositeInsert %v3float %1927 %391 0 + %1929 = OpCompositeExtract %float %1642 1 + OpStore %510 %482 + OpStore %509 %483 + %1930 = OpFOrdLessThanEqual %bool %1929 %float_0 + %1931 = OpSelect %float %1930 %float_9_99999975en05 %1929 + %1932 = OpExtInst %float %1 Log %1931 + %1933 = OpFDiv %float %1932 %1065 + %1934 = OpFOrdLessThanEqual %bool %1933 %1854 + OpSelectionMerge %1935 None + OpBranchConditional %1934 %1936 %1937 + %1937 = OpLabel + %1938 = OpFOrdGreaterThan %bool %1933 %1854 + %1939 = OpExtInst %float %1 Log %1775 + %1940 = OpFDiv %float %1939 %1065 + %1941 = OpFOrdLessThan %bool %1933 %1940 + %1942 = OpLogicalAnd %bool %1938 %1941 + OpSelectionMerge %1943 None + OpBranchConditional %1942 %1944 %1945 + %1945 = OpLabel + %1946 = OpFOrdGreaterThanEqual %bool %1933 %1940 + %1947 = OpExtInst %float %1 Log %1847 + %1948 = OpFDiv %float %1947 %1065 + %1949 = OpFOrdLessThan %bool %1933 %1948 + %1950 = OpLogicalAnd %bool %1946 %1949 + OpSelectionMerge %1951 None + OpBranchConditional %1950 %1952 %1953 + %1953 = OpLabel + %1954 = OpFMul %float %1933 %float_0_119999997 + %1955 = OpExtInst %float %1 Log %float_2000 + %1956 = OpFDiv %float %1955 %1065 + %1957 = OpFMul %float %float_0_119999997 %1947 + %1958 = OpFDiv %float %1957 %1065 + %1959 = OpFSub %float %1956 %1958 + %1960 = OpFAdd %float %1954 %1959 + OpBranch %1951 + %1952 = OpLabel + %1961 = OpFSub %float %1933 %1940 + %1962 = OpFMul %float %float_7 %1961 + %1963 = OpFSub %float %1948 %1940 + %1964 = OpFDiv %float %1962 %1963 + %1965 = OpConvertFToS %int %1964 + %1966 = OpConvertSToF %float %1965 + %1967 = OpFSub %float %1964 %1966 + %1968 = OpAccessChain %_ptr_Function_float %509 %1965 + %1969 = OpLoad %float %1968 + %1970 = OpIAdd %int %1965 %int_1 + %1971 = OpAccessChain %_ptr_Function_float %509 %1970 + %1972 = OpLoad %float %1971 + %1973 = OpIAdd %int %1965 %int_2 + %1974 = OpAccessChain %_ptr_Function_float %509 %1973 + %1975 = OpLoad %float %1974 + %1976 = OpCompositeConstruct %v3float %1969 %1972 %1975 + %1977 = OpFMul %float %1967 %1967 + %1978 = OpCompositeConstruct %v3float %1977 %1967 %float_1 + %1979 = OpMatrixTimesVector %v3float %442 %1976 + %1980 = OpDot %float %1978 %1979 + OpBranch %1951 + %1951 = OpLabel + %1981 = OpPhi %float %1960 %1953 %1980 %1952 + OpBranch %1943 + %1944 = OpLabel + %1982 = OpFSub %float %1933 %1854 + %1983 = OpFMul %float %float_7 %1982 + %1984 = OpFSub %float %1940 %1854 + %1985 = OpFDiv %float %1983 %1984 + %1986 = OpConvertFToS %int %1985 + %1987 = OpConvertSToF %float %1986 + %1988 = OpFSub %float %1985 %1987 + %1989 = OpAccessChain %_ptr_Function_float %510 %1986 + %1990 = OpLoad %float %1989 + %1991 = OpIAdd %int %1986 %int_1 + %1992 = OpAccessChain %_ptr_Function_float %510 %1991 + %1993 = OpLoad %float %1992 + %1994 = OpIAdd %int %1986 %int_2 + %1995 = OpAccessChain %_ptr_Function_float %510 %1994 + %1996 = OpLoad %float %1995 + %1997 = OpCompositeConstruct %v3float %1990 %1993 %1996 + %1998 = OpFMul %float %1988 %1988 + %1999 = OpCompositeConstruct %v3float %1998 %1988 %float_1 + %2000 = OpMatrixTimesVector %v3float %442 %1997 + %2001 = OpDot %float %1999 %2000 + OpBranch %1943 + %1943 = OpLabel + %2002 = OpPhi %float %1981 %1951 %2001 %1944 + OpBranch %1935 + %1936 = OpLabel + %2003 = OpExtInst %float %1 Log %float_0_00499999989 + %2004 = OpFDiv %float %2003 %1065 + OpBranch %1935 + %1935 = OpLabel + %2005 = OpPhi %float %2002 %1943 %2004 %1936 + %2006 = OpExtInst %float %1 Pow %float_10 %2005 + %2007 = OpCompositeInsert %v3float %2006 %1928 1 + %2008 = OpCompositeExtract %float %1642 2 + OpStore %508 %482 + OpStore %507 %483 + %2009 = OpFOrdLessThanEqual %bool %2008 %float_0 + %2010 = OpSelect %float %2009 %float_9_99999975en05 %2008 + %2011 = OpExtInst %float %1 Log %2010 + %2012 = OpFDiv %float %2011 %1065 + %2013 = OpFOrdLessThanEqual %bool %2012 %1854 + OpSelectionMerge %2014 None + OpBranchConditional %2013 %2015 %2016 + %2016 = OpLabel + %2017 = OpFOrdGreaterThan %bool %2012 %1854 + %2018 = OpExtInst %float %1 Log %1775 + %2019 = OpFDiv %float %2018 %1065 + %2020 = OpFOrdLessThan %bool %2012 %2019 + %2021 = OpLogicalAnd %bool %2017 %2020 + OpSelectionMerge %2022 None + OpBranchConditional %2021 %2023 %2024 + %2024 = OpLabel + %2025 = OpFOrdGreaterThanEqual %bool %2012 %2019 + %2026 = OpExtInst %float %1 Log %1847 + %2027 = OpFDiv %float %2026 %1065 + %2028 = OpFOrdLessThan %bool %2012 %2027 + %2029 = OpLogicalAnd %bool %2025 %2028 + OpSelectionMerge %2030 None + OpBranchConditional %2029 %2031 %2032 + %2032 = OpLabel + %2033 = OpFMul %float %2012 %float_0_119999997 + %2034 = OpExtInst %float %1 Log %float_2000 + %2035 = OpFDiv %float %2034 %1065 + %2036 = OpFMul %float %float_0_119999997 %2026 + %2037 = OpFDiv %float %2036 %1065 + %2038 = OpFSub %float %2035 %2037 + %2039 = OpFAdd %float %2033 %2038 + OpBranch %2030 + %2031 = OpLabel + %2040 = OpFSub %float %2012 %2019 + %2041 = OpFMul %float %float_7 %2040 + %2042 = OpFSub %float %2027 %2019 + %2043 = OpFDiv %float %2041 %2042 + %2044 = OpConvertFToS %int %2043 + %2045 = OpConvertSToF %float %2044 + %2046 = OpFSub %float %2043 %2045 + %2047 = OpAccessChain %_ptr_Function_float %507 %2044 + %2048 = OpLoad %float %2047 + %2049 = OpIAdd %int %2044 %int_1 + %2050 = OpAccessChain %_ptr_Function_float %507 %2049 + %2051 = OpLoad %float %2050 + %2052 = OpIAdd %int %2044 %int_2 + %2053 = OpAccessChain %_ptr_Function_float %507 %2052 + %2054 = OpLoad %float %2053 + %2055 = OpCompositeConstruct %v3float %2048 %2051 %2054 + %2056 = OpFMul %float %2046 %2046 + %2057 = OpCompositeConstruct %v3float %2056 %2046 %float_1 + %2058 = OpMatrixTimesVector %v3float %442 %2055 + %2059 = OpDot %float %2057 %2058 + OpBranch %2030 + %2030 = OpLabel + %2060 = OpPhi %float %2039 %2032 %2059 %2031 + OpBranch %2022 + %2023 = OpLabel + %2061 = OpFSub %float %2012 %1854 + %2062 = OpFMul %float %float_7 %2061 + %2063 = OpFSub %float %2019 %1854 + %2064 = OpFDiv %float %2062 %2063 + %2065 = OpConvertFToS %int %2064 + %2066 = OpConvertSToF %float %2065 + %2067 = OpFSub %float %2064 %2066 + %2068 = OpAccessChain %_ptr_Function_float %508 %2065 + %2069 = OpLoad %float %2068 + %2070 = OpIAdd %int %2065 %int_1 + %2071 = OpAccessChain %_ptr_Function_float %508 %2070 + %2072 = OpLoad %float %2071 + %2073 = OpIAdd %int %2065 %int_2 + %2074 = OpAccessChain %_ptr_Function_float %508 %2073 + %2075 = OpLoad %float %2074 + %2076 = OpCompositeConstruct %v3float %2069 %2072 %2075 + %2077 = OpFMul %float %2067 %2067 + %2078 = OpCompositeConstruct %v3float %2077 %2067 %float_1 + %2079 = OpMatrixTimesVector %v3float %442 %2076 + %2080 = OpDot %float %2078 %2079 + OpBranch %2022 + %2022 = OpLabel + %2081 = OpPhi %float %2060 %2030 %2080 %2023 + OpBranch %2014 + %2015 = OpLabel + %2082 = OpExtInst %float %1 Log %float_0_00499999989 + %2083 = OpFDiv %float %2082 %1065 + OpBranch %2014 + %2014 = OpLabel + %2084 = OpPhi %float %2081 %2022 %2083 %2015 + %2085 = OpExtInst %float %1 Pow %float_10 %2084 + %2086 = OpCompositeInsert %v3float %2085 %2007 2 + %2087 = OpVectorTimesMatrix %v3float %2086 %576 + %2088 = OpFMul %v3float %2087 %496 + %2089 = OpExtInst %v3float %1 Pow %2088 %263 + %2090 = OpFMul %v3float %184 %2089 + %2091 = OpFAdd %v3float %183 %2090 + %2092 = OpFMul %v3float %185 %2089 + %2093 = OpFAdd %v3float %135 %2092 + %2094 = OpFDiv %v3float %135 %2093 + %2095 = OpFMul %v3float %2091 %2094 + %2096 = OpExtInst %v3float %1 Pow %2095 %264 + OpBranch %1236 + %1236 = OpLabel + %2097 = OpPhi %v3float %1260 %1240 %2096 %2014 + OpBranch %1230 + %1231 = OpLabel + %2098 = OpMatrixTimesMatrix %mat3v3float %546 %399 + %2099 = OpFMul %v3float %906 %262 + %2100 = OpVectorTimesMatrix %v3float %2099 %2098 + %2101 = OpCompositeExtract %float %2100 0 + %2102 = OpCompositeExtract %float %2100 1 + %2103 = OpExtInst %float %1 FMin %2101 %2102 + %2104 = OpCompositeExtract %float %2100 2 + %2105 = OpExtInst %float %1 FMin %2103 %2104 + %2106 = OpExtInst %float %1 FMax %2101 %2102 + %2107 = OpExtInst %float %1 FMax %2106 %2104 + %2108 = OpExtInst %float %1 FMax %2107 %float_1_00000001en10 + %2109 = OpExtInst %float %1 FMax %2105 %float_1_00000001en10 + %2110 = OpFSub %float %2108 %2109 + %2111 = OpExtInst %float %1 FMax %2107 %float_0_00999999978 + %2112 = OpFDiv %float %2110 %2111 + %2113 = OpFSub %float %2104 %2102 + %2114 = OpFMul %float %2104 %2113 + %2115 = OpFSub %float %2102 %2101 + %2116 = OpFMul %float %2102 %2115 + %2117 = OpFAdd %float %2114 %2116 + %2118 = OpFSub %float %2101 %2104 + %2119 = OpFMul %float %2101 %2118 + %2120 = OpFAdd %float %2117 %2119 + %2121 = OpExtInst %float %1 Sqrt %2120 + %2122 = OpFAdd %float %2104 %2102 + %2123 = OpFAdd %float %2122 %2101 + %2124 = OpFMul %float %float_1_75 %2121 + %2125 = OpFAdd %float %2123 %2124 + %2126 = OpFMul %float %2125 %float_0_333333343 + %2127 = OpFSub %float %2112 %float_0_400000006 + %2128 = OpFMul %float %2127 %float_5 + %2129 = OpFMul %float %2127 %float_2_5 + %2130 = OpExtInst %float %1 FAbs %2129 + %2131 = OpFSub %float %float_1 %2130 + %2132 = OpExtInst %float %1 FMax %2131 %float_0 + %2133 = OpExtInst %float %1 FSign %2128 + %2134 = OpConvertFToS %int %2133 + %2135 = OpConvertSToF %float %2134 + %2136 = OpFMul %float %2132 %2132 + %2137 = OpFSub %float %float_1 %2136 + %2138 = OpFMul %float %2135 %2137 + %2139 = OpFAdd %float %float_1 %2138 + %2140 = OpFMul %float %2139 %float_0_0250000004 + %2141 = OpFOrdLessThanEqual %bool %2126 %float_0_0533333346 + OpSelectionMerge %2142 None + OpBranchConditional %2141 %2143 %2144 + %2144 = OpLabel + %2145 = OpFOrdGreaterThanEqual %bool %2126 %float_0_159999996 + OpSelectionMerge %2146 None + OpBranchConditional %2145 %2147 %2148 + %2148 = OpLabel + %2149 = OpFDiv %float %float_0_239999995 %2125 + %2150 = OpFSub %float %2149 %float_0_5 + %2151 = OpFMul %float %2140 %2150 + OpBranch %2146 + %2147 = OpLabel + OpBranch %2146 + %2146 = OpLabel + %2152 = OpPhi %float %2151 %2148 %float_0 %2147 + OpBranch %2142 + %2143 = OpLabel + OpBranch %2142 + %2142 = OpLabel + %2153 = OpPhi %float %2152 %2146 %2140 %2143 + %2154 = OpFAdd %float %float_1 %2153 + %2155 = OpCompositeConstruct %v3float %2154 %2154 %2154 + %2156 = OpFMul %v3float %2100 %2155 + %2157 = OpCompositeExtract %float %2156 0 + %2158 = OpCompositeExtract %float %2156 1 + %2159 = OpFOrdEqual %bool %2157 %2158 + %2160 = OpCompositeExtract %float %2156 2 + %2161 = OpFOrdEqual %bool %2158 %2160 + %2162 = OpLogicalAnd %bool %2159 %2161 + OpSelectionMerge %2163 None + OpBranchConditional %2162 %2164 %2165 + %2165 = OpLabel + %2166 = OpExtInst %float %1 Sqrt %float_3 + %2167 = OpFSub %float %2158 %2160 + %2168 = OpFMul %float %2166 %2167 + %2169 = OpFMul %float %float_2 %2157 + %2170 = OpFSub %float %2169 %2158 + %2171 = OpFSub %float %2170 %2160 + %2172 = OpExtInst %float %1 Atan2 %2168 %2171 + %2173 = OpFMul %float %float_57_2957764 %2172 + OpBranch %2163 + %2164 = OpLabel + OpBranch %2163 + %2163 = OpLabel + %2174 = OpPhi %float %2173 %2165 %float_0 %2164 + %2175 = OpFOrdLessThan %bool %2174 %float_0 + OpSelectionMerge %2176 None + OpBranchConditional %2175 %2177 %2176 + %2177 = OpLabel + %2178 = OpFAdd %float %2174 %float_360 + OpBranch %2176 + %2176 = OpLabel + %2179 = OpPhi %float %2174 %2163 %2178 %2177 + %2180 = OpExtInst %float %1 FClamp %2179 %float_0 %float_360 + %2181 = OpFOrdGreaterThan %bool %2180 %float_180 + OpSelectionMerge %2182 None + OpBranchConditional %2181 %2183 %2182 + %2183 = OpLabel + %2184 = OpFSub %float %2180 %float_360 + OpBranch %2182 + %2182 = OpLabel + %2185 = OpPhi %float %2180 %2176 %2184 %2183 + %2186 = OpFOrdGreaterThan %bool %2185 %float_n67_5 + %2187 = OpFOrdLessThan %bool %2185 %float_67_5 + %2188 = OpLogicalAnd %bool %2186 %2187 + OpSelectionMerge %2189 None + OpBranchConditional %2188 %2190 %2189 + %2190 = OpLabel + %2191 = OpFSub %float %2185 %float_n67_5 + %2192 = OpFMul %float %2191 %float_0_0296296291 + %2193 = OpConvertFToS %int %2192 + %2194 = OpConvertSToF %float %2193 + %2195 = OpFSub %float %2192 %2194 + %2196 = OpFMul %float %2195 %2195 + %2197 = OpFMul %float %2196 %2195 + %2198 = OpIEqual %bool %2193 %int_3 + OpSelectionMerge %2199 None + OpBranchConditional %2198 %2200 %2201 + %2201 = OpLabel + %2202 = OpIEqual %bool %2193 %int_2 + OpSelectionMerge %2203 None + OpBranchConditional %2202 %2204 %2205 + %2205 = OpLabel + %2206 = OpIEqual %bool %2193 %int_1 + OpSelectionMerge %2207 None + OpBranchConditional %2206 %2208 %2209 + %2209 = OpLabel + %2210 = OpIEqual %bool %2193 %int_0 + OpSelectionMerge %2211 None + OpBranchConditional %2210 %2212 %2213 + %2213 = OpLabel + OpBranch %2211 + %2212 = OpLabel + %2214 = OpFMul %float %2197 %float_0_166666672 + OpBranch %2211 + %2211 = OpLabel + %2215 = OpPhi %float %float_0 %2213 %2214 %2212 + OpBranch %2207 + %2208 = OpLabel + %2216 = OpFMul %float %2197 %float_n0_5 + %2217 = OpFMul %float %2196 %float_0_5 + %2218 = OpFAdd %float %2216 %2217 + %2219 = OpFMul %float %2195 %float_0_5 + %2220 = OpFAdd %float %2218 %2219 + %2221 = OpFAdd %float %2220 %float_0_166666672 + OpBranch %2207 + %2207 = OpLabel + %2222 = OpPhi %float %2215 %2211 %2221 %2208 + OpBranch %2203 + %2204 = OpLabel + %2223 = OpFMul %float %2197 %float_0_5 + %2224 = OpFMul %float %2196 %float_n1 + %2225 = OpFAdd %float %2223 %2224 + %2226 = OpFAdd %float %2225 %float_0_666666687 + OpBranch %2203 + %2203 = OpLabel + %2227 = OpPhi %float %2222 %2207 %2226 %2204 + OpBranch %2199 + %2200 = OpLabel + %2228 = OpFMul %float %2197 %float_n0_166666672 + %2229 = OpFMul %float %2196 %float_0_5 + %2230 = OpFAdd %float %2228 %2229 + %2231 = OpFMul %float %2195 %float_n0_5 + %2232 = OpFAdd %float %2230 %2231 + %2233 = OpFAdd %float %2232 %float_0_166666672 + OpBranch %2199 + %2199 = OpLabel + %2234 = OpPhi %float %2227 %2203 %2233 %2200 + OpBranch %2189 + %2189 = OpLabel + %2235 = OpPhi %float %float_0 %2182 %2234 %2199 + %2236 = OpFMul %float %2235 %float_1_5 + %2237 = OpFMul %float %2236 %2112 + %2238 = OpFSub %float %float_0_0299999993 %2157 + %2239 = OpFMul %float %2237 %2238 + %2240 = OpFMul %float %2239 %float_0_180000007 + %2241 = OpFAdd %float %2157 %2240 + %2242 = OpCompositeInsert %v3float %2241 %2156 0 + %2243 = OpExtInst %v3float %1 FClamp %2242 %132 %314 + %2244 = OpVectorTimesMatrix %v3float %2243 %410 + %2245 = OpExtInst %v3float %1 FClamp %2244 %132 %314 + %2246 = OpDot %float %2245 %67 + %2247 = OpCompositeConstruct %v3float %2246 %2246 %2246 + %2248 = OpExtInst %v3float %1 FMix %2247 %2245 %228 + %2249 = OpCompositeExtract %float %2248 0 + %2250 = OpExtInst %float %1 Exp2 %float_n15 + %2251 = OpFMul %float %float_0_179999992 %2250 + %2252 = OpExtInst %float %1 Exp2 %float_18 + %2253 = OpFMul %float %float_0_179999992 %2252 + OpStore %520 %475 + OpStore %519 %476 + %2254 = OpFOrdLessThanEqual %bool %2249 %float_0 + %2255 = OpExtInst %float %1 Exp2 %float_n14 + %2256 = OpSelect %float %2254 %2255 %2249 + %2257 = OpExtInst %float %1 Log %2256 + %2258 = OpFDiv %float %2257 %1065 + %2259 = OpExtInst %float %1 Log %2251 + %2260 = OpFDiv %float %2259 %1065 + %2261 = OpFOrdLessThanEqual %bool %2258 %2260 + OpSelectionMerge %2262 None + OpBranchConditional %2261 %2263 %2264 + %2264 = OpLabel + %2265 = OpFOrdGreaterThan %bool %2258 %2260 + %2266 = OpExtInst %float %1 Log %float_0_180000007 + %2267 = OpFDiv %float %2266 %1065 + %2268 = OpFOrdLessThan %bool %2258 %2267 + %2269 = OpLogicalAnd %bool %2265 %2268 + OpSelectionMerge %2270 None + OpBranchConditional %2269 %2271 %2272 + %2272 = OpLabel + %2273 = OpFOrdGreaterThanEqual %bool %2258 %2267 + %2274 = OpExtInst %float %1 Log %2253 + %2275 = OpFDiv %float %2274 %1065 + %2276 = OpFOrdLessThan %bool %2258 %2275 + %2277 = OpLogicalAnd %bool %2273 %2276 + OpSelectionMerge %2278 None + OpBranchConditional %2277 %2279 %2280 + %2280 = OpLabel + %2281 = OpExtInst %float %1 Log %float_10000 + %2282 = OpFDiv %float %2281 %1065 + OpBranch %2278 + %2279 = OpLabel + %2283 = OpFSub %float %2258 %2267 + %2284 = OpFMul %float %float_3 %2283 + %2285 = OpFSub %float %2275 %2267 + %2286 = OpFDiv %float %2284 %2285 + %2287 = OpConvertFToS %int %2286 + %2288 = OpConvertSToF %float %2287 + %2289 = OpFSub %float %2286 %2288 + %2290 = OpAccessChain %_ptr_Function_float %519 %2287 + %2291 = OpLoad %float %2290 + %2292 = OpIAdd %int %2287 %int_1 + %2293 = OpAccessChain %_ptr_Function_float %519 %2292 + %2294 = OpLoad %float %2293 + %2295 = OpIAdd %int %2287 %int_2 + %2296 = OpAccessChain %_ptr_Function_float %519 %2295 + %2297 = OpLoad %float %2296 + %2298 = OpCompositeConstruct %v3float %2291 %2294 %2297 + %2299 = OpFMul %float %2289 %2289 + %2300 = OpCompositeConstruct %v3float %2299 %2289 %float_1 + %2301 = OpMatrixTimesVector %v3float %442 %2298 + %2302 = OpDot %float %2300 %2301 + OpBranch %2278 + %2278 = OpLabel + %2303 = OpPhi %float %2282 %2280 %2302 %2279 + OpBranch %2270 + %2271 = OpLabel + %2304 = OpFSub %float %2258 %2260 + %2305 = OpFMul %float %float_3 %2304 + %2306 = OpFSub %float %2267 %2260 + %2307 = OpFDiv %float %2305 %2306 + %2308 = OpConvertFToS %int %2307 + %2309 = OpConvertSToF %float %2308 + %2310 = OpFSub %float %2307 %2309 + %2311 = OpAccessChain %_ptr_Function_float %520 %2308 + %2312 = OpLoad %float %2311 + %2313 = OpIAdd %int %2308 %int_1 + %2314 = OpAccessChain %_ptr_Function_float %520 %2313 + %2315 = OpLoad %float %2314 + %2316 = OpIAdd %int %2308 %int_2 + %2317 = OpAccessChain %_ptr_Function_float %520 %2316 + %2318 = OpLoad %float %2317 + %2319 = OpCompositeConstruct %v3float %2312 %2315 %2318 + %2320 = OpFMul %float %2310 %2310 + %2321 = OpCompositeConstruct %v3float %2320 %2310 %float_1 + %2322 = OpMatrixTimesVector %v3float %442 %2319 + %2323 = OpDot %float %2321 %2322 + OpBranch %2270 + %2270 = OpLabel + %2324 = OpPhi %float %2303 %2278 %2323 %2271 + OpBranch %2262 + %2263 = OpLabel + %2325 = OpExtInst %float %1 Log %float_9_99999975en05 + %2326 = OpFDiv %float %2325 %1065 + OpBranch %2262 + %2262 = OpLabel + %2327 = OpPhi %float %2324 %2270 %2326 %2263 + %2328 = OpExtInst %float %1 Pow %float_10 %2327 + %2329 = OpCompositeInsert %v3float %2328 %391 0 + %2330 = OpCompositeExtract %float %2248 1 + OpStore %522 %475 + OpStore %521 %476 + %2331 = OpFOrdLessThanEqual %bool %2330 %float_0 + %2332 = OpSelect %float %2331 %2255 %2330 + %2333 = OpExtInst %float %1 Log %2332 + %2334 = OpFDiv %float %2333 %1065 + %2335 = OpFOrdLessThanEqual %bool %2334 %2260 + OpSelectionMerge %2336 None + OpBranchConditional %2335 %2337 %2338 + %2338 = OpLabel + %2339 = OpFOrdGreaterThan %bool %2334 %2260 + %2340 = OpExtInst %float %1 Log %float_0_180000007 + %2341 = OpFDiv %float %2340 %1065 + %2342 = OpFOrdLessThan %bool %2334 %2341 + %2343 = OpLogicalAnd %bool %2339 %2342 + OpSelectionMerge %2344 None + OpBranchConditional %2343 %2345 %2346 + %2346 = OpLabel + %2347 = OpFOrdGreaterThanEqual %bool %2334 %2341 + %2348 = OpExtInst %float %1 Log %2253 + %2349 = OpFDiv %float %2348 %1065 + %2350 = OpFOrdLessThan %bool %2334 %2349 + %2351 = OpLogicalAnd %bool %2347 %2350 + OpSelectionMerge %2352 None + OpBranchConditional %2351 %2353 %2354 + %2354 = OpLabel + %2355 = OpExtInst %float %1 Log %float_10000 + %2356 = OpFDiv %float %2355 %1065 + OpBranch %2352 + %2353 = OpLabel + %2357 = OpFSub %float %2334 %2341 + %2358 = OpFMul %float %float_3 %2357 + %2359 = OpFSub %float %2349 %2341 + %2360 = OpFDiv %float %2358 %2359 + %2361 = OpConvertFToS %int %2360 + %2362 = OpConvertSToF %float %2361 + %2363 = OpFSub %float %2360 %2362 + %2364 = OpAccessChain %_ptr_Function_float %521 %2361 + %2365 = OpLoad %float %2364 + %2366 = OpIAdd %int %2361 %int_1 + %2367 = OpAccessChain %_ptr_Function_float %521 %2366 + %2368 = OpLoad %float %2367 + %2369 = OpIAdd %int %2361 %int_2 + %2370 = OpAccessChain %_ptr_Function_float %521 %2369 + %2371 = OpLoad %float %2370 + %2372 = OpCompositeConstruct %v3float %2365 %2368 %2371 + %2373 = OpFMul %float %2363 %2363 + %2374 = OpCompositeConstruct %v3float %2373 %2363 %float_1 + %2375 = OpMatrixTimesVector %v3float %442 %2372 + %2376 = OpDot %float %2374 %2375 + OpBranch %2352 + %2352 = OpLabel + %2377 = OpPhi %float %2356 %2354 %2376 %2353 + OpBranch %2344 + %2345 = OpLabel + %2378 = OpFSub %float %2334 %2260 + %2379 = OpFMul %float %float_3 %2378 + %2380 = OpFSub %float %2341 %2260 + %2381 = OpFDiv %float %2379 %2380 + %2382 = OpConvertFToS %int %2381 + %2383 = OpConvertSToF %float %2382 + %2384 = OpFSub %float %2381 %2383 + %2385 = OpAccessChain %_ptr_Function_float %522 %2382 + %2386 = OpLoad %float %2385 + %2387 = OpIAdd %int %2382 %int_1 + %2388 = OpAccessChain %_ptr_Function_float %522 %2387 + %2389 = OpLoad %float %2388 + %2390 = OpIAdd %int %2382 %int_2 + %2391 = OpAccessChain %_ptr_Function_float %522 %2390 + %2392 = OpLoad %float %2391 + %2393 = OpCompositeConstruct %v3float %2386 %2389 %2392 + %2394 = OpFMul %float %2384 %2384 + %2395 = OpCompositeConstruct %v3float %2394 %2384 %float_1 + %2396 = OpMatrixTimesVector %v3float %442 %2393 + %2397 = OpDot %float %2395 %2396 + OpBranch %2344 + %2344 = OpLabel + %2398 = OpPhi %float %2377 %2352 %2397 %2345 + OpBranch %2336 + %2337 = OpLabel + %2399 = OpExtInst %float %1 Log %float_9_99999975en05 + %2400 = OpFDiv %float %2399 %1065 + OpBranch %2336 + %2336 = OpLabel + %2401 = OpPhi %float %2398 %2344 %2400 %2337 + %2402 = OpExtInst %float %1 Pow %float_10 %2401 + %2403 = OpCompositeInsert %v3float %2402 %2329 1 + %2404 = OpCompositeExtract %float %2248 2 + OpStore %524 %475 + OpStore %523 %476 + %2405 = OpFOrdLessThanEqual %bool %2404 %float_0 + %2406 = OpSelect %float %2405 %2255 %2404 + %2407 = OpExtInst %float %1 Log %2406 + %2408 = OpFDiv %float %2407 %1065 + %2409 = OpFOrdLessThanEqual %bool %2408 %2260 + OpSelectionMerge %2410 None + OpBranchConditional %2409 %2411 %2412 + %2412 = OpLabel + %2413 = OpFOrdGreaterThan %bool %2408 %2260 + %2414 = OpExtInst %float %1 Log %float_0_180000007 + %2415 = OpFDiv %float %2414 %1065 + %2416 = OpFOrdLessThan %bool %2408 %2415 + %2417 = OpLogicalAnd %bool %2413 %2416 + OpSelectionMerge %2418 None + OpBranchConditional %2417 %2419 %2420 + %2420 = OpLabel + %2421 = OpFOrdGreaterThanEqual %bool %2408 %2415 + %2422 = OpExtInst %float %1 Log %2253 + %2423 = OpFDiv %float %2422 %1065 + %2424 = OpFOrdLessThan %bool %2408 %2423 + %2425 = OpLogicalAnd %bool %2421 %2424 + OpSelectionMerge %2426 None + OpBranchConditional %2425 %2427 %2428 + %2428 = OpLabel + %2429 = OpExtInst %float %1 Log %float_10000 + %2430 = OpFDiv %float %2429 %1065 + OpBranch %2426 + %2427 = OpLabel + %2431 = OpFSub %float %2408 %2415 + %2432 = OpFMul %float %float_3 %2431 + %2433 = OpFSub %float %2423 %2415 + %2434 = OpFDiv %float %2432 %2433 + %2435 = OpConvertFToS %int %2434 + %2436 = OpConvertSToF %float %2435 + %2437 = OpFSub %float %2434 %2436 + %2438 = OpAccessChain %_ptr_Function_float %523 %2435 + %2439 = OpLoad %float %2438 + %2440 = OpIAdd %int %2435 %int_1 + %2441 = OpAccessChain %_ptr_Function_float %523 %2440 + %2442 = OpLoad %float %2441 + %2443 = OpIAdd %int %2435 %int_2 + %2444 = OpAccessChain %_ptr_Function_float %523 %2443 + %2445 = OpLoad %float %2444 + %2446 = OpCompositeConstruct %v3float %2439 %2442 %2445 + %2447 = OpFMul %float %2437 %2437 + %2448 = OpCompositeConstruct %v3float %2447 %2437 %float_1 + %2449 = OpMatrixTimesVector %v3float %442 %2446 + %2450 = OpDot %float %2448 %2449 + OpBranch %2426 + %2426 = OpLabel + %2451 = OpPhi %float %2430 %2428 %2450 %2427 + OpBranch %2418 + %2419 = OpLabel + %2452 = OpFSub %float %2408 %2260 + %2453 = OpFMul %float %float_3 %2452 + %2454 = OpFSub %float %2415 %2260 + %2455 = OpFDiv %float %2453 %2454 + %2456 = OpConvertFToS %int %2455 + %2457 = OpConvertSToF %float %2456 + %2458 = OpFSub %float %2455 %2457 + %2459 = OpAccessChain %_ptr_Function_float %524 %2456 + %2460 = OpLoad %float %2459 + %2461 = OpIAdd %int %2456 %int_1 + %2462 = OpAccessChain %_ptr_Function_float %524 %2461 + %2463 = OpLoad %float %2462 + %2464 = OpIAdd %int %2456 %int_2 + %2465 = OpAccessChain %_ptr_Function_float %524 %2464 + %2466 = OpLoad %float %2465 + %2467 = OpCompositeConstruct %v3float %2460 %2463 %2466 + %2468 = OpFMul %float %2458 %2458 + %2469 = OpCompositeConstruct %v3float %2468 %2458 %float_1 + %2470 = OpMatrixTimesVector %v3float %442 %2467 + %2471 = OpDot %float %2469 %2470 + OpBranch %2418 + %2418 = OpLabel + %2472 = OpPhi %float %2451 %2426 %2471 %2419 + OpBranch %2410 + %2411 = OpLabel + %2473 = OpExtInst %float %1 Log %float_9_99999975en05 + %2474 = OpFDiv %float %2473 %1065 + OpBranch %2410 + %2410 = OpLabel + %2475 = OpPhi %float %2472 %2418 %2474 %2411 + %2476 = OpExtInst %float %1 Pow %float_10 %2475 + %2477 = OpCompositeInsert %v3float %2476 %2403 2 + %2478 = OpVectorTimesMatrix %v3float %2477 %414 + %2479 = OpVectorTimesMatrix %v3float %2478 %410 + %2480 = OpExtInst %float %1 Pow %float_2 %float_n12 + %2481 = OpFMul %float %float_0_179999992 %2480 + OpStore %532 %475 + OpStore %531 %476 + %2482 = OpFOrdLessThanEqual %bool %2481 %float_0 + %2483 = OpSelect %float %2482 %2255 %2481 + %2484 = OpExtInst %float %1 Log %2483 + %2485 = OpFDiv %float %2484 %1065 + %2486 = OpFOrdLessThanEqual %bool %2485 %2260 + OpSelectionMerge %2487 None + OpBranchConditional %2486 %2488 %2489 + %2489 = OpLabel + %2490 = OpFOrdGreaterThan %bool %2485 %2260 + %2491 = OpExtInst %float %1 Log %float_0_180000007 + %2492 = OpFDiv %float %2491 %1065 + %2493 = OpFOrdLessThan %bool %2485 %2492 + %2494 = OpLogicalAnd %bool %2490 %2493 + OpSelectionMerge %2495 None + OpBranchConditional %2494 %2496 %2497 + %2497 = OpLabel + %2498 = OpFOrdGreaterThanEqual %bool %2485 %2492 + %2499 = OpExtInst %float %1 Log %2253 + %2500 = OpFDiv %float %2499 %1065 + %2501 = OpFOrdLessThan %bool %2485 %2500 + %2502 = OpLogicalAnd %bool %2498 %2501 + OpSelectionMerge %2503 None + OpBranchConditional %2502 %2504 %2505 + %2505 = OpLabel + %2506 = OpExtInst %float %1 Log %float_10000 + %2507 = OpFDiv %float %2506 %1065 + OpBranch %2503 + %2504 = OpLabel + %2508 = OpFSub %float %2485 %2492 + %2509 = OpFMul %float %float_3 %2508 + %2510 = OpFSub %float %2500 %2492 + %2511 = OpFDiv %float %2509 %2510 + %2512 = OpConvertFToS %int %2511 + %2513 = OpConvertSToF %float %2512 + %2514 = OpFSub %float %2511 %2513 + %2515 = OpAccessChain %_ptr_Function_float %531 %2512 + %2516 = OpLoad %float %2515 + %2517 = OpIAdd %int %2512 %int_1 + %2518 = OpAccessChain %_ptr_Function_float %531 %2517 + %2519 = OpLoad %float %2518 + %2520 = OpIAdd %int %2512 %int_2 + %2521 = OpAccessChain %_ptr_Function_float %531 %2520 + %2522 = OpLoad %float %2521 + %2523 = OpCompositeConstruct %v3float %2516 %2519 %2522 + %2524 = OpFMul %float %2514 %2514 + %2525 = OpCompositeConstruct %v3float %2524 %2514 %float_1 + %2526 = OpMatrixTimesVector %v3float %442 %2523 + %2527 = OpDot %float %2525 %2526 + OpBranch %2503 + %2503 = OpLabel + %2528 = OpPhi %float %2507 %2505 %2527 %2504 + OpBranch %2495 + %2496 = OpLabel + %2529 = OpFSub %float %2485 %2260 + %2530 = OpFMul %float %float_3 %2529 + %2531 = OpFSub %float %2492 %2260 + %2532 = OpFDiv %float %2530 %2531 + %2533 = OpConvertFToS %int %2532 + %2534 = OpConvertSToF %float %2533 + %2535 = OpFSub %float %2532 %2534 + %2536 = OpAccessChain %_ptr_Function_float %532 %2533 + %2537 = OpLoad %float %2536 + %2538 = OpIAdd %int %2533 %int_1 + %2539 = OpAccessChain %_ptr_Function_float %532 %2538 + %2540 = OpLoad %float %2539 + %2541 = OpIAdd %int %2533 %int_2 + %2542 = OpAccessChain %_ptr_Function_float %532 %2541 + %2543 = OpLoad %float %2542 + %2544 = OpCompositeConstruct %v3float %2537 %2540 %2543 + %2545 = OpFMul %float %2535 %2535 + %2546 = OpCompositeConstruct %v3float %2545 %2535 %float_1 + %2547 = OpMatrixTimesVector %v3float %442 %2544 + %2548 = OpDot %float %2546 %2547 + OpBranch %2495 + %2495 = OpLabel + %2549 = OpPhi %float %2528 %2503 %2548 %2496 + OpBranch %2487 + %2488 = OpLabel + %2550 = OpExtInst %float %1 Log %float_9_99999975en05 + %2551 = OpFDiv %float %2550 %1065 + OpBranch %2487 + %2487 = OpLabel + %2552 = OpPhi %float %2549 %2495 %2551 %2488 + %2553 = OpExtInst %float %1 Pow %float_10 %2552 + OpStore %534 %475 + OpStore %533 %476 + %2554 = OpExtInst %float %1 Log %float_0_180000007 + %2555 = OpFDiv %float %2554 %1065 + %2556 = OpFOrdLessThanEqual %bool %2555 %2260 + OpSelectionMerge %2557 None + OpBranchConditional %2556 %2558 %2559 + %2559 = OpLabel + %2560 = OpFOrdGreaterThan %bool %2555 %2260 + %2561 = OpFOrdLessThan %bool %2555 %2555 + %2562 = OpLogicalAnd %bool %2560 %2561 + OpSelectionMerge %2563 None + OpBranchConditional %2562 %2564 %2565 + %2565 = OpLabel + %2566 = OpFOrdGreaterThanEqual %bool %2555 %2555 + %2567 = OpExtInst %float %1 Log %2253 + %2568 = OpFDiv %float %2567 %1065 + %2569 = OpFOrdLessThan %bool %2555 %2568 + %2570 = OpLogicalAnd %bool %2566 %2569 + OpSelectionMerge %2571 None + OpBranchConditional %2570 %2572 %2573 + %2573 = OpLabel + %2574 = OpExtInst %float %1 Log %float_10000 + %2575 = OpFDiv %float %2574 %1065 + OpBranch %2571 + %2572 = OpLabel + %2576 = OpFSub %float %2555 %2555 + %2577 = OpFMul %float %float_3 %2576 + %2578 = OpFSub %float %2568 %2555 + %2579 = OpFDiv %float %2577 %2578 + %2580 = OpConvertFToS %int %2579 + %2581 = OpConvertSToF %float %2580 + %2582 = OpFSub %float %2579 %2581 + %2583 = OpAccessChain %_ptr_Function_float %533 %2580 + %2584 = OpLoad %float %2583 + %2585 = OpIAdd %int %2580 %int_1 + %2586 = OpAccessChain %_ptr_Function_float %533 %2585 + %2587 = OpLoad %float %2586 + %2588 = OpIAdd %int %2580 %int_2 + %2589 = OpAccessChain %_ptr_Function_float %533 %2588 + %2590 = OpLoad %float %2589 + %2591 = OpCompositeConstruct %v3float %2584 %2587 %2590 + %2592 = OpFMul %float %2582 %2582 + %2593 = OpCompositeConstruct %v3float %2592 %2582 %float_1 + %2594 = OpMatrixTimesVector %v3float %442 %2591 + %2595 = OpDot %float %2593 %2594 + OpBranch %2571 + %2571 = OpLabel + %2596 = OpPhi %float %2575 %2573 %2595 %2572 + OpBranch %2563 + %2564 = OpLabel + %2597 = OpFSub %float %2555 %2260 + %2598 = OpFMul %float %float_3 %2597 + %2599 = OpAccessChain %_ptr_Function_float %534 %int_3 + %2600 = OpLoad %float %2599 + %2601 = OpAccessChain %_ptr_Function_float %534 %int_4 + %2602 = OpLoad %float %2601 + %2603 = OpAccessChain %_ptr_Function_float %534 %int_5 + %2604 = OpLoad %float %2603 + %2605 = OpCompositeConstruct %v3float %2600 %2602 %2604 + %2606 = OpMatrixTimesVector %v3float %442 %2605 + %2607 = OpCompositeExtract %float %2606 2 + OpBranch %2563 + %2563 = OpLabel + %2608 = OpPhi %float %2596 %2571 %2607 %2564 + OpBranch %2557 + %2558 = OpLabel + %2609 = OpExtInst %float %1 Log %float_9_99999975en05 + %2610 = OpFDiv %float %2609 %1065 + OpBranch %2557 + %2557 = OpLabel + %2611 = OpPhi %float %2608 %2563 %2610 %2558 + %2612 = OpExtInst %float %1 Pow %float_10 %2611 + %2613 = OpExtInst %float %1 Pow %float_2 %float_10 + %2614 = OpFMul %float %float_0_179999992 %2613 + OpStore %536 %475 + OpStore %535 %476 + %2615 = OpFOrdLessThanEqual %bool %2614 %float_0 + %2616 = OpSelect %float %2615 %2255 %2614 + %2617 = OpExtInst %float %1 Log %2616 + %2618 = OpFDiv %float %2617 %1065 + %2619 = OpFOrdLessThanEqual %bool %2618 %2260 + OpSelectionMerge %2620 None + OpBranchConditional %2619 %2621 %2622 + %2622 = OpLabel + %2623 = OpFOrdGreaterThan %bool %2618 %2260 + %2624 = OpFOrdLessThan %bool %2618 %2555 + %2625 = OpLogicalAnd %bool %2623 %2624 + OpSelectionMerge %2626 None + OpBranchConditional %2625 %2627 %2628 + %2628 = OpLabel + %2629 = OpFOrdGreaterThanEqual %bool %2618 %2555 + %2630 = OpExtInst %float %1 Log %2253 + %2631 = OpFDiv %float %2630 %1065 + %2632 = OpFOrdLessThan %bool %2618 %2631 + %2633 = OpLogicalAnd %bool %2629 %2632 + OpSelectionMerge %2634 None + OpBranchConditional %2633 %2635 %2636 + %2636 = OpLabel + %2637 = OpExtInst %float %1 Log %float_10000 + %2638 = OpFDiv %float %2637 %1065 + OpBranch %2634 + %2635 = OpLabel + %2639 = OpFSub %float %2618 %2555 + %2640 = OpFMul %float %float_3 %2639 + %2641 = OpFSub %float %2631 %2555 + %2642 = OpFDiv %float %2640 %2641 + %2643 = OpConvertFToS %int %2642 + %2644 = OpConvertSToF %float %2643 + %2645 = OpFSub %float %2642 %2644 + %2646 = OpAccessChain %_ptr_Function_float %535 %2643 + %2647 = OpLoad %float %2646 + %2648 = OpIAdd %int %2643 %int_1 + %2649 = OpAccessChain %_ptr_Function_float %535 %2648 + %2650 = OpLoad %float %2649 + %2651 = OpIAdd %int %2643 %int_2 + %2652 = OpAccessChain %_ptr_Function_float %535 %2651 + %2653 = OpLoad %float %2652 + %2654 = OpCompositeConstruct %v3float %2647 %2650 %2653 + %2655 = OpFMul %float %2645 %2645 + %2656 = OpCompositeConstruct %v3float %2655 %2645 %float_1 + %2657 = OpMatrixTimesVector %v3float %442 %2654 + %2658 = OpDot %float %2656 %2657 + OpBranch %2634 + %2634 = OpLabel + %2659 = OpPhi %float %2638 %2636 %2658 %2635 + OpBranch %2626 + %2627 = OpLabel + %2660 = OpFSub %float %2618 %2260 + %2661 = OpFMul %float %float_3 %2660 + %2662 = OpFSub %float %2555 %2260 + %2663 = OpFDiv %float %2661 %2662 + %2664 = OpConvertFToS %int %2663 + %2665 = OpConvertSToF %float %2664 + %2666 = OpFSub %float %2663 %2665 + %2667 = OpAccessChain %_ptr_Function_float %536 %2664 + %2668 = OpLoad %float %2667 + %2669 = OpIAdd %int %2664 %int_1 + %2670 = OpAccessChain %_ptr_Function_float %536 %2669 + %2671 = OpLoad %float %2670 + %2672 = OpIAdd %int %2664 %int_2 + %2673 = OpAccessChain %_ptr_Function_float %536 %2672 + %2674 = OpLoad %float %2673 + %2675 = OpCompositeConstruct %v3float %2668 %2671 %2674 + %2676 = OpFMul %float %2666 %2666 + %2677 = OpCompositeConstruct %v3float %2676 %2666 %float_1 + %2678 = OpMatrixTimesVector %v3float %442 %2675 + %2679 = OpDot %float %2677 %2678 + OpBranch %2626 + %2626 = OpLabel + %2680 = OpPhi %float %2659 %2634 %2679 %2627 + OpBranch %2620 + %2621 = OpLabel + %2681 = OpExtInst %float %1 Log %float_9_99999975en05 + %2682 = OpFDiv %float %2681 %1065 + OpBranch %2620 + %2620 = OpLabel + %2683 = OpPhi %float %2680 %2626 %2682 %2621 + %2684 = OpExtInst %float %1 Pow %float_10 %2683 + %2685 = OpCompositeExtract %float %2479 0 + OpStore %530 %479 + OpStore %529 %480 + %2686 = OpFOrdLessThanEqual %bool %2685 %float_0 + %2687 = OpSelect %float %2686 %float_9_99999975en05 %2685 + %2688 = OpExtInst %float %1 Log %2687 + %2689 = OpFDiv %float %2688 %1065 + %2690 = OpExtInst %float %1 Log %2553 + %2691 = OpFDiv %float %2690 %1065 + %2692 = OpFOrdLessThanEqual %bool %2689 %2691 + OpSelectionMerge %2693 None + OpBranchConditional %2692 %2694 %2695 + %2695 = OpLabel + %2696 = OpFOrdGreaterThan %bool %2689 %2691 + %2697 = OpExtInst %float %1 Log %2612 + %2698 = OpFDiv %float %2697 %1065 + %2699 = OpFOrdLessThan %bool %2689 %2698 + %2700 = OpLogicalAnd %bool %2696 %2699 + OpSelectionMerge %2701 None + OpBranchConditional %2700 %2702 %2703 + %2703 = OpLabel + %2704 = OpFOrdGreaterThanEqual %bool %2689 %2698 + %2705 = OpExtInst %float %1 Log %2684 + %2706 = OpFDiv %float %2705 %1065 + %2707 = OpFOrdLessThan %bool %2689 %2706 + %2708 = OpLogicalAnd %bool %2704 %2707 + OpSelectionMerge %2709 None + OpBranchConditional %2708 %2710 %2711 + %2711 = OpLabel + %2712 = OpFMul %float %2689 %float_0_0599999987 + %2713 = OpExtInst %float %1 Log %float_1000 + %2714 = OpFDiv %float %2713 %1065 + %2715 = OpFMul %float %float_0_0599999987 %2705 + %2716 = OpFDiv %float %2715 %1065 + %2717 = OpFSub %float %2714 %2716 + %2718 = OpFAdd %float %2712 %2717 + OpBranch %2709 + %2710 = OpLabel + %2719 = OpFSub %float %2689 %2698 + %2720 = OpFMul %float %float_7 %2719 + %2721 = OpFSub %float %2706 %2698 + %2722 = OpFDiv %float %2720 %2721 + %2723 = OpConvertFToS %int %2722 + %2724 = OpConvertSToF %float %2723 + %2725 = OpFSub %float %2722 %2724 + %2726 = OpAccessChain %_ptr_Function_float %529 %2723 + %2727 = OpLoad %float %2726 + %2728 = OpIAdd %int %2723 %int_1 + %2729 = OpAccessChain %_ptr_Function_float %529 %2728 + %2730 = OpLoad %float %2729 + %2731 = OpIAdd %int %2723 %int_2 + %2732 = OpAccessChain %_ptr_Function_float %529 %2731 + %2733 = OpLoad %float %2732 + %2734 = OpCompositeConstruct %v3float %2727 %2730 %2733 + %2735 = OpFMul %float %2725 %2725 + %2736 = OpCompositeConstruct %v3float %2735 %2725 %float_1 + %2737 = OpMatrixTimesVector %v3float %442 %2734 + %2738 = OpDot %float %2736 %2737 + OpBranch %2709 + %2709 = OpLabel + %2739 = OpPhi %float %2718 %2711 %2738 %2710 + OpBranch %2701 + %2702 = OpLabel + %2740 = OpFSub %float %2689 %2691 + %2741 = OpFMul %float %float_7 %2740 + %2742 = OpFSub %float %2698 %2691 + %2743 = OpFDiv %float %2741 %2742 + %2744 = OpConvertFToS %int %2743 + %2745 = OpConvertSToF %float %2744 + %2746 = OpFSub %float %2743 %2745 + %2747 = OpAccessChain %_ptr_Function_float %530 %2744 + %2748 = OpLoad %float %2747 + %2749 = OpIAdd %int %2744 %int_1 + %2750 = OpAccessChain %_ptr_Function_float %530 %2749 + %2751 = OpLoad %float %2750 + %2752 = OpIAdd %int %2744 %int_2 + %2753 = OpAccessChain %_ptr_Function_float %530 %2752 + %2754 = OpLoad %float %2753 + %2755 = OpCompositeConstruct %v3float %2748 %2751 %2754 + %2756 = OpFMul %float %2746 %2746 + %2757 = OpCompositeConstruct %v3float %2756 %2746 %float_1 + %2758 = OpMatrixTimesVector %v3float %442 %2755 + %2759 = OpDot %float %2757 %2758 + OpBranch %2701 + %2701 = OpLabel + %2760 = OpPhi %float %2739 %2709 %2759 %2702 + OpBranch %2693 + %2694 = OpLabel + %2761 = OpFMul %float %2689 %float_3 + %2762 = OpExtInst %float %1 Log %float_9_99999975en05 + %2763 = OpFDiv %float %2762 %1065 + %2764 = OpFMul %float %float_3 %2690 + %2765 = OpFDiv %float %2764 %1065 + %2766 = OpFSub %float %2763 %2765 + %2767 = OpFAdd %float %2761 %2766 + OpBranch %2693 + %2693 = OpLabel + %2768 = OpPhi %float %2760 %2701 %2767 %2694 + %2769 = OpExtInst %float %1 Pow %float_10 %2768 + %2770 = OpCompositeInsert %v3float %2769 %391 0 + %2771 = OpCompositeExtract %float %2479 1 + OpStore %528 %479 + OpStore %527 %480 + %2772 = OpFOrdLessThanEqual %bool %2771 %float_0 + %2773 = OpSelect %float %2772 %float_9_99999975en05 %2771 + %2774 = OpExtInst %float %1 Log %2773 + %2775 = OpFDiv %float %2774 %1065 + %2776 = OpFOrdLessThanEqual %bool %2775 %2691 + OpSelectionMerge %2777 None + OpBranchConditional %2776 %2778 %2779 + %2779 = OpLabel + %2780 = OpFOrdGreaterThan %bool %2775 %2691 + %2781 = OpExtInst %float %1 Log %2612 + %2782 = OpFDiv %float %2781 %1065 + %2783 = OpFOrdLessThan %bool %2775 %2782 + %2784 = OpLogicalAnd %bool %2780 %2783 + OpSelectionMerge %2785 None + OpBranchConditional %2784 %2786 %2787 + %2787 = OpLabel + %2788 = OpFOrdGreaterThanEqual %bool %2775 %2782 + %2789 = OpExtInst %float %1 Log %2684 + %2790 = OpFDiv %float %2789 %1065 + %2791 = OpFOrdLessThan %bool %2775 %2790 + %2792 = OpLogicalAnd %bool %2788 %2791 + OpSelectionMerge %2793 None + OpBranchConditional %2792 %2794 %2795 + %2795 = OpLabel + %2796 = OpFMul %float %2775 %float_0_0599999987 + %2797 = OpExtInst %float %1 Log %float_1000 + %2798 = OpFDiv %float %2797 %1065 + %2799 = OpFMul %float %float_0_0599999987 %2789 + %2800 = OpFDiv %float %2799 %1065 + %2801 = OpFSub %float %2798 %2800 + %2802 = OpFAdd %float %2796 %2801 + OpBranch %2793 + %2794 = OpLabel + %2803 = OpFSub %float %2775 %2782 + %2804 = OpFMul %float %float_7 %2803 + %2805 = OpFSub %float %2790 %2782 + %2806 = OpFDiv %float %2804 %2805 + %2807 = OpConvertFToS %int %2806 + %2808 = OpConvertSToF %float %2807 + %2809 = OpFSub %float %2806 %2808 + %2810 = OpAccessChain %_ptr_Function_float %527 %2807 + %2811 = OpLoad %float %2810 + %2812 = OpIAdd %int %2807 %int_1 + %2813 = OpAccessChain %_ptr_Function_float %527 %2812 + %2814 = OpLoad %float %2813 + %2815 = OpIAdd %int %2807 %int_2 + %2816 = OpAccessChain %_ptr_Function_float %527 %2815 + %2817 = OpLoad %float %2816 + %2818 = OpCompositeConstruct %v3float %2811 %2814 %2817 + %2819 = OpFMul %float %2809 %2809 + %2820 = OpCompositeConstruct %v3float %2819 %2809 %float_1 + %2821 = OpMatrixTimesVector %v3float %442 %2818 + %2822 = OpDot %float %2820 %2821 + OpBranch %2793 + %2793 = OpLabel + %2823 = OpPhi %float %2802 %2795 %2822 %2794 + OpBranch %2785 + %2786 = OpLabel + %2824 = OpFSub %float %2775 %2691 + %2825 = OpFMul %float %float_7 %2824 + %2826 = OpFSub %float %2782 %2691 + %2827 = OpFDiv %float %2825 %2826 + %2828 = OpConvertFToS %int %2827 + %2829 = OpConvertSToF %float %2828 + %2830 = OpFSub %float %2827 %2829 + %2831 = OpAccessChain %_ptr_Function_float %528 %2828 + %2832 = OpLoad %float %2831 + %2833 = OpIAdd %int %2828 %int_1 + %2834 = OpAccessChain %_ptr_Function_float %528 %2833 + %2835 = OpLoad %float %2834 + %2836 = OpIAdd %int %2828 %int_2 + %2837 = OpAccessChain %_ptr_Function_float %528 %2836 + %2838 = OpLoad %float %2837 + %2839 = OpCompositeConstruct %v3float %2832 %2835 %2838 + %2840 = OpFMul %float %2830 %2830 + %2841 = OpCompositeConstruct %v3float %2840 %2830 %float_1 + %2842 = OpMatrixTimesVector %v3float %442 %2839 + %2843 = OpDot %float %2841 %2842 + OpBranch %2785 + %2785 = OpLabel + %2844 = OpPhi %float %2823 %2793 %2843 %2786 + OpBranch %2777 + %2778 = OpLabel + %2845 = OpFMul %float %2775 %float_3 + %2846 = OpExtInst %float %1 Log %float_9_99999975en05 + %2847 = OpFDiv %float %2846 %1065 + %2848 = OpFMul %float %float_3 %2690 + %2849 = OpFDiv %float %2848 %1065 + %2850 = OpFSub %float %2847 %2849 + %2851 = OpFAdd %float %2845 %2850 + OpBranch %2777 + %2777 = OpLabel + %2852 = OpPhi %float %2844 %2785 %2851 %2778 + %2853 = OpExtInst %float %1 Pow %float_10 %2852 + %2854 = OpCompositeInsert %v3float %2853 %2770 1 + %2855 = OpCompositeExtract %float %2479 2 + OpStore %526 %479 + OpStore %525 %480 + %2856 = OpFOrdLessThanEqual %bool %2855 %float_0 + %2857 = OpSelect %float %2856 %float_9_99999975en05 %2855 + %2858 = OpExtInst %float %1 Log %2857 + %2859 = OpFDiv %float %2858 %1065 + %2860 = OpFOrdLessThanEqual %bool %2859 %2691 + OpSelectionMerge %2861 None + OpBranchConditional %2860 %2862 %2863 + %2863 = OpLabel + %2864 = OpFOrdGreaterThan %bool %2859 %2691 + %2865 = OpExtInst %float %1 Log %2612 + %2866 = OpFDiv %float %2865 %1065 + %2867 = OpFOrdLessThan %bool %2859 %2866 + %2868 = OpLogicalAnd %bool %2864 %2867 + OpSelectionMerge %2869 None + OpBranchConditional %2868 %2870 %2871 + %2871 = OpLabel + %2872 = OpFOrdGreaterThanEqual %bool %2859 %2866 + %2873 = OpExtInst %float %1 Log %2684 + %2874 = OpFDiv %float %2873 %1065 + %2875 = OpFOrdLessThan %bool %2859 %2874 + %2876 = OpLogicalAnd %bool %2872 %2875 + OpSelectionMerge %2877 None + OpBranchConditional %2876 %2878 %2879 + %2879 = OpLabel + %2880 = OpFMul %float %2859 %float_0_0599999987 + %2881 = OpExtInst %float %1 Log %float_1000 + %2882 = OpFDiv %float %2881 %1065 + %2883 = OpFMul %float %float_0_0599999987 %2873 + %2884 = OpFDiv %float %2883 %1065 + %2885 = OpFSub %float %2882 %2884 + %2886 = OpFAdd %float %2880 %2885 + OpBranch %2877 + %2878 = OpLabel + %2887 = OpFSub %float %2859 %2866 + %2888 = OpFMul %float %float_7 %2887 + %2889 = OpFSub %float %2874 %2866 + %2890 = OpFDiv %float %2888 %2889 + %2891 = OpConvertFToS %int %2890 + %2892 = OpConvertSToF %float %2891 + %2893 = OpFSub %float %2890 %2892 + %2894 = OpAccessChain %_ptr_Function_float %525 %2891 + %2895 = OpLoad %float %2894 + %2896 = OpIAdd %int %2891 %int_1 + %2897 = OpAccessChain %_ptr_Function_float %525 %2896 + %2898 = OpLoad %float %2897 + %2899 = OpIAdd %int %2891 %int_2 + %2900 = OpAccessChain %_ptr_Function_float %525 %2899 + %2901 = OpLoad %float %2900 + %2902 = OpCompositeConstruct %v3float %2895 %2898 %2901 + %2903 = OpFMul %float %2893 %2893 + %2904 = OpCompositeConstruct %v3float %2903 %2893 %float_1 + %2905 = OpMatrixTimesVector %v3float %442 %2902 + %2906 = OpDot %float %2904 %2905 + OpBranch %2877 + %2877 = OpLabel + %2907 = OpPhi %float %2886 %2879 %2906 %2878 + OpBranch %2869 + %2870 = OpLabel + %2908 = OpFSub %float %2859 %2691 + %2909 = OpFMul %float %float_7 %2908 + %2910 = OpFSub %float %2866 %2691 + %2911 = OpFDiv %float %2909 %2910 + %2912 = OpConvertFToS %int %2911 + %2913 = OpConvertSToF %float %2912 + %2914 = OpFSub %float %2911 %2913 + %2915 = OpAccessChain %_ptr_Function_float %526 %2912 + %2916 = OpLoad %float %2915 + %2917 = OpIAdd %int %2912 %int_1 + %2918 = OpAccessChain %_ptr_Function_float %526 %2917 + %2919 = OpLoad %float %2918 + %2920 = OpIAdd %int %2912 %int_2 + %2921 = OpAccessChain %_ptr_Function_float %526 %2920 + %2922 = OpLoad %float %2921 + %2923 = OpCompositeConstruct %v3float %2916 %2919 %2922 + %2924 = OpFMul %float %2914 %2914 + %2925 = OpCompositeConstruct %v3float %2924 %2914 %float_1 + %2926 = OpMatrixTimesVector %v3float %442 %2923 + %2927 = OpDot %float %2925 %2926 + OpBranch %2869 + %2869 = OpLabel + %2928 = OpPhi %float %2907 %2877 %2927 %2870 + OpBranch %2861 + %2862 = OpLabel + %2929 = OpFMul %float %2859 %float_3 + %2930 = OpExtInst %float %1 Log %float_9_99999975en05 + %2931 = OpFDiv %float %2930 %1065 + %2932 = OpFMul %float %float_3 %2690 + %2933 = OpFDiv %float %2932 %1065 + %2934 = OpFSub %float %2931 %2933 + %2935 = OpFAdd %float %2929 %2934 + OpBranch %2861 + %2861 = OpLabel + %2936 = OpPhi %float %2928 %2869 %2935 %2862 + %2937 = OpExtInst %float %1 Pow %float_10 %2936 + %2938 = OpCompositeInsert %v3float %2937 %2854 2 + %2939 = OpFSub %v3float %2938 %338 + %2940 = OpVectorTimesMatrix %v3float %2939 %576 + %2941 = OpFMul %v3float %2940 %496 + %2942 = OpExtInst %v3float %1 Pow %2941 %263 + %2943 = OpFMul %v3float %184 %2942 + %2944 = OpFAdd %v3float %183 %2943 + %2945 = OpFMul %v3float %185 %2942 + %2946 = OpFAdd %v3float %135 %2945 + %2947 = OpFDiv %v3float %135 %2946 + %2948 = OpFMul %v3float %2944 %2947 + %2949 = OpExtInst %v3float %1 Pow %2948 %264 + OpBranch %1230 + %1230 = OpLabel + %2950 = OpPhi %v3float %2097 %1236 %2949 %2861 + OpBranch %1224 + %1225 = OpLabel + %2951 = OpVectorTimesMatrix %v3float %1218 %547 + %2952 = OpVectorTimesMatrix %v3float %2951 %576 + %2953 = OpExtInst %v3float %1 FMax %250 %2952 + %2954 = OpFMul %v3float %2953 %252 + %2955 = OpExtInst %v3float %1 FMax %2953 %254 + %2956 = OpExtInst %v3float %1 Pow %2955 %256 + %2957 = OpFMul %v3float %2956 %258 + %2958 = OpFSub %v3float %2957 %260 + %2959 = OpExtInst %v3float %1 FMin %2954 %2958 + OpBranch %1224 + %1224 = OpLabel + %2960 = OpPhi %v3float %2950 %1230 %2959 %1225 + OpBranch %1220 + %1221 = OpLabel + %2961 = OpCompositeExtract %float %1218 0 + OpBranch %2962 + %2962 = OpLabel + OpLoopMerge %2963 %2964 None + OpBranch %2965 + %2965 = OpLabel + %2966 = OpFOrdLessThan %bool %2961 %float_0_00313066994 + OpSelectionMerge %2967 None + OpBranchConditional %2966 %2968 %2967 + %2968 = OpLabel + %2969 = OpFMul %float %2961 %float_12_9200001 + OpBranch %2963 + %2967 = OpLabel + %2970 = OpExtInst %float %1 Pow %2961 %float_0_416666657 + %2971 = OpFMul %float %2970 %float_1_05499995 + %2972 = OpFSub %float %2971 %float_0_0549999997 + OpBranch %2963 + %2964 = OpLabel + OpBranch %2962 + %2963 = OpLabel + %2973 = OpPhi %float %2969 %2968 %2972 %2967 + %2974 = OpCompositeExtract %float %1218 1 + OpBranch %2975 + %2975 = OpLabel + OpLoopMerge %2976 %2977 None + OpBranch %2978 + %2978 = OpLabel + %2979 = OpFOrdLessThan %bool %2974 %float_0_00313066994 + OpSelectionMerge %2980 None + OpBranchConditional %2979 %2981 %2980 + %2981 = OpLabel + %2982 = OpFMul %float %2974 %float_12_9200001 + OpBranch %2976 + %2980 = OpLabel + %2983 = OpExtInst %float %1 Pow %2974 %float_0_416666657 + %2984 = OpFMul %float %2983 %float_1_05499995 + %2985 = OpFSub %float %2984 %float_0_0549999997 + OpBranch %2976 + %2977 = OpLabel + OpBranch %2975 + %2976 = OpLabel + %2986 = OpPhi %float %2982 %2981 %2985 %2980 + %2987 = OpCompositeExtract %float %1218 2 + OpBranch %2988 + %2988 = OpLabel + OpLoopMerge %2989 %2990 None + OpBranch %2991 + %2991 = OpLabel + %2992 = OpFOrdLessThan %bool %2987 %float_0_00313066994 + OpSelectionMerge %2993 None + OpBranchConditional %2992 %2994 %2993 + %2994 = OpLabel + %2995 = OpFMul %float %2987 %float_12_9200001 + OpBranch %2989 + %2993 = OpLabel + %2996 = OpExtInst %float %1 Pow %2987 %float_0_416666657 + %2997 = OpFMul %float %2996 %float_1_05499995 + %2998 = OpFSub %float %2997 %float_0_0549999997 + OpBranch %2989 + %2990 = OpLabel + OpBranch %2988 + %2989 = OpLabel + %2999 = OpPhi %float %2995 %2994 %2998 %2993 + %3000 = OpCompositeConstruct %v3float %2973 %2986 %2999 + OpBranch %1220 + %1220 = OpLabel + %3001 = OpPhi %v3float %2960 %1224 %3000 %2989 + %3002 = OpFMul %v3float %3001 %499 + %3003 = OpVectorShuffle %v4float %129 %3002 4 5 6 3 + %3004 = OpCompositeInsert %v4float %float_0 %3003 3 + OpStore %out_var_SV_Target0 %3004 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag b/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag new file mode 100644 index 00000000000..e0359bfdd3e --- /dev/null +++ b/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag @@ -0,0 +1,3694 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 3107 +; Schema: 0 + OpCapability Shader + OpCapability Geometry + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %MainPS "main" %in_var_TEXCOORD0 %gl_FragCoord %gl_Layer %out_var_SV_Target0 + OpExecutionMode %MainPS OriginUpperLeft + OpSource HLSL 600 + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "MappingPolynomial" + OpMemberName %type__Globals 1 "InverseGamma" + OpMemberName %type__Globals 2 "ColorMatrixR_ColorCurveCd1" + OpMemberName %type__Globals 3 "ColorMatrixG_ColorCurveCd3Cm3" + OpMemberName %type__Globals 4 "ColorMatrixB_ColorCurveCm2" + OpMemberName %type__Globals 5 "ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3" + OpMemberName %type__Globals 6 "ColorCurve_Ch1_Ch2" + OpMemberName %type__Globals 7 "ColorShadow_Luma" + OpMemberName %type__Globals 8 "ColorShadow_Tint1" + OpMemberName %type__Globals 9 "ColorShadow_Tint2" + OpMemberName %type__Globals 10 "FilmSlope" + OpMemberName %type__Globals 11 "FilmToe" + OpMemberName %type__Globals 12 "FilmShoulder" + OpMemberName %type__Globals 13 "FilmBlackClip" + OpMemberName %type__Globals 14 "FilmWhiteClip" + OpMemberName %type__Globals 15 "LUTWeights" + OpMemberName %type__Globals 16 "ColorScale" + OpMemberName %type__Globals 17 "OverlayColor" + OpMemberName %type__Globals 18 "WhiteTemp" + OpMemberName %type__Globals 19 "WhiteTint" + OpMemberName %type__Globals 20 "ColorSaturation" + OpMemberName %type__Globals 21 "ColorContrast" + OpMemberName %type__Globals 22 "ColorGamma" + OpMemberName %type__Globals 23 "ColorGain" + OpMemberName %type__Globals 24 "ColorOffset" + OpMemberName %type__Globals 25 "ColorSaturationShadows" + OpMemberName %type__Globals 26 "ColorContrastShadows" + OpMemberName %type__Globals 27 "ColorGammaShadows" + OpMemberName %type__Globals 28 "ColorGainShadows" + OpMemberName %type__Globals 29 "ColorOffsetShadows" + OpMemberName %type__Globals 30 "ColorSaturationMidtones" + OpMemberName %type__Globals 31 "ColorContrastMidtones" + OpMemberName %type__Globals 32 "ColorGammaMidtones" + OpMemberName %type__Globals 33 "ColorGainMidtones" + OpMemberName %type__Globals 34 "ColorOffsetMidtones" + OpMemberName %type__Globals 35 "ColorSaturationHighlights" + OpMemberName %type__Globals 36 "ColorContrastHighlights" + OpMemberName %type__Globals 37 "ColorGammaHighlights" + OpMemberName %type__Globals 38 "ColorGainHighlights" + OpMemberName %type__Globals 39 "ColorOffsetHighlights" + OpMemberName %type__Globals 40 "ColorCorrectionShadowsMax" + OpMemberName %type__Globals 41 "ColorCorrectionHighlightsMin" + OpMemberName %type__Globals 42 "OutputDevice" + OpMemberName %type__Globals 43 "OutputGamut" + OpMemberName %type__Globals 44 "BlueCorrection" + OpMemberName %type__Globals 45 "ExpandGamut" + OpName %_Globals "$Globals" + OpName %type_2d_image "type.2d.image" + OpName %Texture1 "Texture1" + OpName %type_sampler "type.sampler" + OpName %Texture1Sampler "Texture1Sampler" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %MainPS "MainPS" + OpName %type_sampled_image "type.sampled.image" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorate %in_var_TEXCOORD0 NoPerspective + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION" + OpDecorate %gl_Layer BuiltIn Layer + OpDecorateString %gl_Layer UserSemantic "SV_RenderTargetArrayIndex" + OpDecorate %gl_Layer Flat + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %in_var_TEXCOORD0 Location 0 + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 0 + OpDecorate %Texture1 DescriptorSet 0 + OpDecorate %Texture1 Binding 0 + OpDecorate %Texture1Sampler DescriptorSet 0 + OpDecorate %Texture1Sampler Binding 0 + OpDecorate %_arr_float_uint_5 ArrayStride 16 + OpMemberDecorate %type__Globals 0 Offset 0 + OpMemberDecorate %type__Globals 1 Offset 16 + OpMemberDecorate %type__Globals 2 Offset 32 + OpMemberDecorate %type__Globals 3 Offset 48 + OpMemberDecorate %type__Globals 4 Offset 64 + OpMemberDecorate %type__Globals 5 Offset 80 + OpMemberDecorate %type__Globals 6 Offset 96 + OpMemberDecorate %type__Globals 7 Offset 112 + OpMemberDecorate %type__Globals 8 Offset 128 + OpMemberDecorate %type__Globals 9 Offset 144 + OpMemberDecorate %type__Globals 10 Offset 160 + OpMemberDecorate %type__Globals 11 Offset 164 + OpMemberDecorate %type__Globals 12 Offset 168 + OpMemberDecorate %type__Globals 13 Offset 172 + OpMemberDecorate %type__Globals 14 Offset 176 + OpMemberDecorate %type__Globals 15 Offset 192 + OpMemberDecorate %type__Globals 16 Offset 272 + OpMemberDecorate %type__Globals 17 Offset 288 + OpMemberDecorate %type__Globals 18 Offset 304 + OpMemberDecorate %type__Globals 19 Offset 308 + OpMemberDecorate %type__Globals 20 Offset 320 + OpMemberDecorate %type__Globals 21 Offset 336 + OpMemberDecorate %type__Globals 22 Offset 352 + OpMemberDecorate %type__Globals 23 Offset 368 + OpMemberDecorate %type__Globals 24 Offset 384 + OpMemberDecorate %type__Globals 25 Offset 400 + OpMemberDecorate %type__Globals 26 Offset 416 + OpMemberDecorate %type__Globals 27 Offset 432 + OpMemberDecorate %type__Globals 28 Offset 448 + OpMemberDecorate %type__Globals 29 Offset 464 + OpMemberDecorate %type__Globals 30 Offset 480 + OpMemberDecorate %type__Globals 31 Offset 496 + OpMemberDecorate %type__Globals 32 Offset 512 + OpMemberDecorate %type__Globals 33 Offset 528 + OpMemberDecorate %type__Globals 34 Offset 544 + OpMemberDecorate %type__Globals 35 Offset 560 + OpMemberDecorate %type__Globals 36 Offset 576 + OpMemberDecorate %type__Globals 37 Offset 592 + OpMemberDecorate %type__Globals 38 Offset 608 + OpMemberDecorate %type__Globals 39 Offset 624 + OpMemberDecorate %type__Globals 40 Offset 640 + OpMemberDecorate %type__Globals 41 Offset 644 + OpMemberDecorate %type__Globals 42 Offset 648 + OpMemberDecorate %type__Globals 43 Offset 652 + OpMemberDecorate %type__Globals 44 Offset 656 + OpMemberDecorate %type__Globals 45 Offset 660 + OpDecorate %type__Globals Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 +%float_0_952552378 = OpConstant %float 0.952552378 + %float_0 = OpConstant %float 0 + +; HACK: Needed to hack this constant since MSVC and GNU libc are off by 1 ULP when converting to string (it probably still works fine though in a roundtrip ...) +%float_9_36786018en05 = OpConstant %float 9.25 + +%float_0_343966454 = OpConstant %float 0.343966454 +%float_0_728166103 = OpConstant %float 0.728166103 +%float_n0_0721325427 = OpConstant %float -0.0721325427 +%float_1_00882518 = OpConstant %float 1.00882518 +%float_1_04981101 = OpConstant %float 1.04981101 +%float_n9_74845025en05 = OpConstant %float -9.74845025e-05 +%float_n0_495903015 = OpConstant %float -0.495903015 +%float_1_37331307 = OpConstant %float 1.37331307 +%float_0_0982400328 = OpConstant %float 0.0982400328 +%float_0_991252005 = OpConstant %float 0.991252005 +%float_0_662454188 = OpConstant %float 0.662454188 +%float_0_134004205 = OpConstant %float 0.134004205 +%float_0_156187683 = OpConstant %float 0.156187683 +%float_0_272228718 = OpConstant %float 0.272228718 +%float_0_674081743 = OpConstant %float 0.674081743 +%float_0_0536895171 = OpConstant %float 0.0536895171 +%float_n0_00557464967 = OpConstant %float -0.00557464967 +%float_0_0040607336 = OpConstant %float 0.0040607336 +%float_1_01033914 = OpConstant %float 1.01033914 +%float_1_6410234 = OpConstant %float 1.6410234 +%float_n0_324803293 = OpConstant %float -0.324803293 +%float_n0_236424699 = OpConstant %float -0.236424699 +%float_n0_663662851 = OpConstant %float -0.663662851 +%float_1_61533165 = OpConstant %float 1.61533165 +%float_0_0167563483 = OpConstant %float 0.0167563483 +%float_0_0117218941 = OpConstant %float 0.0117218941 +%float_n0_00828444213 = OpConstant %float -0.00828444213 +%float_0_988394856 = OpConstant %float 0.988394856 +%float_1_45143926 = OpConstant %float 1.45143926 +%float_n0_236510754 = OpConstant %float -0.236510754 +%float_n0_214928567 = OpConstant %float -0.214928567 +%float_n0_0765537769 = OpConstant %float -0.0765537769 +%float_1_17622972 = OpConstant %float 1.17622972 +%float_n0_0996759236 = OpConstant %float -0.0996759236 +%float_0_00831614807 = OpConstant %float 0.00831614807 +%float_n0_00603244966 = OpConstant %float -0.00603244966 +%float_0_997716308 = OpConstant %float 0.997716308 +%float_0_695452213 = OpConstant %float 0.695452213 +%float_0_140678704 = OpConstant %float 0.140678704 +%float_0_163869068 = OpConstant %float 0.163869068 +%float_0_0447945632 = OpConstant %float 0.0447945632 +%float_0_859671116 = OpConstant %float 0.859671116 +%float_0_0955343172 = OpConstant %float 0.0955343172 +%float_n0_00552588282 = OpConstant %float -0.00552588282 +%float_0_00402521016 = OpConstant %float 0.00402521016 +%float_1_00150073 = OpConstant %float 1.00150073 + %73 = OpConstantComposite %v3float %float_0_272228718 %float_0_674081743 %float_0_0536895171 +%float_3_2409699 = OpConstant %float 3.2409699 +%float_n1_5373832 = OpConstant %float -1.5373832 +%float_n0_498610765 = OpConstant %float -0.498610765 +%float_n0_969243646 = OpConstant %float -0.969243646 +%float_1_8759675 = OpConstant %float 1.8759675 +%float_0_0415550582 = OpConstant %float 0.0415550582 +%float_0_0556300804 = OpConstant %float 0.0556300804 +%float_n0_203976959 = OpConstant %float -0.203976959 +%float_1_05697155 = OpConstant %float 1.05697155 +%float_0_412456393 = OpConstant %float 0.412456393 +%float_0_357576102 = OpConstant %float 0.357576102 +%float_0_180437505 = OpConstant %float 0.180437505 +%float_0_212672904 = OpConstant %float 0.212672904 +%float_0_715152204 = OpConstant %float 0.715152204 +%float_0_0721750036 = OpConstant %float 0.0721750036 +%float_0_0193339009 = OpConstant %float 0.0193339009 +%float_0_119191997 = OpConstant %float 0.119191997 +%float_0_950304091 = OpConstant %float 0.950304091 +%float_1_71660841 = OpConstant %float 1.71660841 +%float_n0_355662107 = OpConstant %float -0.355662107 +%float_n0_253360093 = OpConstant %float -0.253360093 +%float_n0_666682899 = OpConstant %float -0.666682899 +%float_1_61647761 = OpConstant %float 1.61647761 +%float_0_0157685 = OpConstant %float 0.0157685 +%float_0_0176422 = OpConstant %float 0.0176422 +%float_n0_0427763015 = OpConstant %float -0.0427763015 +%float_0_942228675 = OpConstant %float 0.942228675 +%float_2_49339628 = OpConstant %float 2.49339628 +%float_n0_93134588 = OpConstant %float -0.93134588 +%float_n0_402694494 = OpConstant %float -0.402694494 +%float_n0_829486787 = OpConstant %float -0.829486787 +%float_1_76265967 = OpConstant %float 1.76265967 +%float_0_0236246008 = OpConstant %float 0.0236246008 +%float_0_0358507 = OpConstant %float 0.0358507 +%float_n0_0761827007 = OpConstant %float -0.0761827007 +%float_0_957014024 = OpConstant %float 0.957014024 +%float_1_01303005 = OpConstant %float 1.01303005 +%float_0_00610530982 = OpConstant %float 0.00610530982 +%float_n0_0149710001 = OpConstant %float -0.0149710001 +%float_0_00769822998 = OpConstant %float 0.00769822998 +%float_0_998165011 = OpConstant %float 0.998165011 +%float_n0_00503202993 = OpConstant %float -0.00503202993 +%float_n0_00284131011 = OpConstant %float -0.00284131011 +%float_0_00468515977 = OpConstant %float 0.00468515977 +%float_0_924507022 = OpConstant %float 0.924507022 +%float_0_987223983 = OpConstant %float 0.987223983 +%float_n0_00611326983 = OpConstant %float -0.00611326983 +%float_0_0159533005 = OpConstant %float 0.0159533005 +%float_n0_00759836007 = OpConstant %float -0.00759836007 +%float_1_00186002 = OpConstant %float 1.00186002 +%float_0_0053300201 = OpConstant %float 0.0053300201 +%float_0_00307257008 = OpConstant %float 0.00307257008 +%float_n0_00509594986 = OpConstant %float -0.00509594986 +%float_1_08168006 = OpConstant %float 1.08168006 + %float_0_5 = OpConstant %float 0.5 + %float_n1 = OpConstant %float -1 + %float_1 = OpConstant %float 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 +%float_0_015625 = OpConstant %float 0.015625 + %134 = OpConstantComposite %v2float %float_0_015625 %float_0_015625 + %135 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %int_43 = OpConstant %int 43 + %uint_3 = OpConstant %uint 3 + %138 = OpConstantComposite %v3float %float_0 %float_0 %float_0 + %int_9 = OpConstant %int 9 + %int_3 = OpConstant %int 3 + %141 = OpConstantComposite %v3float %float_1 %float_1 %float_1 + %float_n4 = OpConstant %float -4 + %int_45 = OpConstant %int 45 +%float_0_544169128 = OpConstant %float 0.544169128 +%float_0_239592597 = OpConstant %float 0.239592597 +%float_0_166694298 = OpConstant %float 0.166694298 +%float_0_239465594 = OpConstant %float 0.239465594 +%float_0_702153027 = OpConstant %float 0.702153027 +%float_0_058381401 = OpConstant %float 0.058381401 +%float_n0_00234390004 = OpConstant %float -0.00234390004 +%float_0_0361833982 = OpConstant %float 0.0361833982 +%float_1_05521834 = OpConstant %float 1.05521834 +%float_0_940437257 = OpConstant %float 0.940437257 +%float_n0_0183068793 = OpConstant %float -0.0183068793 +%float_0_077869609 = OpConstant %float 0.077869609 +%float_0_00837869663 = OpConstant %float 0.00837869663 +%float_0_828660011 = OpConstant %float 0.828660011 +%float_0_162961304 = OpConstant %float 0.162961304 +%float_0_00054712611 = OpConstant %float 0.00054712611 +%float_n0_000883374596 = OpConstant %float -0.000883374596 +%float_1_00033629 = OpConstant %float 1.00033629 +%float_1_06317997 = OpConstant %float 1.06317997 +%float_0_0233955998 = OpConstant %float 0.0233955998 +%float_n0_0865726024 = OpConstant %float -0.0865726024 +%float_n0_0106336996 = OpConstant %float -0.0106336996 +%float_1_20632005 = OpConstant %float 1.20632005 +%float_n0_195690006 = OpConstant %float -0.195690006 +%float_n0_000590886979 = OpConstant %float -0.000590886979 +%float_0_00105247996 = OpConstant %float 0.00105247996 +%float_0_999538004 = OpConstant %float 0.999538004 + %int_44 = OpConstant %int 44 +%float_0_9375 = OpConstant %float 0.9375 + %173 = OpConstantComposite %v3float %float_0_9375 %float_0_9375 %float_0_9375 +%float_0_03125 = OpConstant %float 0.03125 + %175 = OpConstantComposite %v3float %float_0_03125 %float_0_03125 %float_0_03125 + %int_15 = OpConstant %int 15 + %float_16 = OpConstant %float 16 + %int_16 = OpConstant %int 16 + %int_17 = OpConstant %int 17 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_5 = OpConstant %uint 5 + %uint_6 = OpConstant %uint 6 + %int_2 = OpConstant %int 2 +%mat3v3float = OpTypeMatrix %v3float 3 + %int_42 = OpConstant %int 42 +%float_0_159301758 = OpConstant %float 0.159301758 +%float_78_84375 = OpConstant %float 78.84375 +%float_0_8359375 = OpConstant %float 0.8359375 +%float_18_8515625 = OpConstant %float 18.8515625 +%float_18_6875 = OpConstant %float 18.6875 +%float_10000 = OpConstant %float 10000 +%float_0_0126833133 = OpConstant %float 0.0126833133 + %194 = OpConstantComposite %v3float %float_0_0126833133 %float_0_0126833133 %float_0_0126833133 + %195 = OpConstantComposite %v3float %float_0_8359375 %float_0_8359375 %float_0_8359375 + %196 = OpConstantComposite %v3float %float_18_8515625 %float_18_8515625 %float_18_8515625 + %197 = OpConstantComposite %v3float %float_18_6875 %float_18_6875 %float_18_6875 +%float_6_27739477 = OpConstant %float 6.27739477 + %199 = OpConstantComposite %v3float %float_6_27739477 %float_6_27739477 %float_6_27739477 + %200 = OpConstantComposite %v3float %float_10000 %float_10000 %float_10000 + %float_14 = OpConstant %float 14 +%float_0_180000007 = OpConstant %float 0.180000007 +%float_0_434017599 = OpConstant %float 0.434017599 + %204 = OpConstantComposite %v3float %float_0_434017599 %float_0_434017599 %float_0_434017599 + %205 = OpConstantComposite %v3float %float_14 %float_14 %float_14 + %206 = OpConstantComposite %v3float %float_0_180000007 %float_0_180000007 %float_0_180000007 + %int_18 = OpConstant %int 18 + %float_4000 = OpConstant %float 4000 +%float_0_312700003 = OpConstant %float 0.312700003 +%float_0_328999996 = OpConstant %float 0.328999996 + %int_19 = OpConstant %int 19 + %int_25 = OpConstant %int 25 + %int_20 = OpConstant %int 20 + %int_26 = OpConstant %int 26 + %int_21 = OpConstant %int 21 + %int_27 = OpConstant %int 27 + %int_22 = OpConstant %int 22 + %int_28 = OpConstant %int 28 + %int_23 = OpConstant %int 23 + %int_29 = OpConstant %int 29 + %int_24 = OpConstant %int 24 + %int_40 = OpConstant %int 40 + %int_35 = OpConstant %int 35 + %int_36 = OpConstant %int 36 + %int_37 = OpConstant %int 37 + %int_38 = OpConstant %int 38 + %int_39 = OpConstant %int 39 + %int_41 = OpConstant %int 41 + %int_30 = OpConstant %int 30 + %int_31 = OpConstant %int 31 + %int_32 = OpConstant %int 32 + %int_33 = OpConstant %int 33 + %int_34 = OpConstant %int 34 +%float_0_0500000007 = OpConstant %float 0.0500000007 + %float_1_75 = OpConstant %float 1.75 +%float_0_400000006 = OpConstant %float 0.400000006 +%float_0_819999993 = OpConstant %float 0.819999993 +%float_0_0299999993 = OpConstant %float 0.0299999993 + %float_2 = OpConstant %float 2 +%float_0_959999979 = OpConstant %float 0.959999979 + %241 = OpConstantComposite %v3float %float_0_959999979 %float_0_959999979 %float_0_959999979 + %int_13 = OpConstant %int 13 + %int_11 = OpConstant %int 11 + %int_14 = OpConstant %int 14 + %int_12 = OpConstant %int 12 +%float_0_800000012 = OpConstant %float 0.800000012 + %int_10 = OpConstant %int 10 + %float_10 = OpConstant %float 10 + %float_n2 = OpConstant %float -2 + %float_3 = OpConstant %float 3 + %251 = OpConstantComposite %v3float %float_3 %float_3 %float_3 + %252 = OpConstantComposite %v3float %float_2 %float_2 %float_2 +%float_0_930000007 = OpConstant %float 0.930000007 + %254 = OpConstantComposite %v3float %float_0_930000007 %float_0_930000007 %float_0_930000007 + %int_4 = OpConstant %int 4 + %int_8 = OpConstant %int 8 + %int_7 = OpConstant %int 7 + %int_5 = OpConstant %int 5 + %int_6 = OpConstant %int 6 +%float_0_00200000009 = OpConstant %float 0.00200000009 + %261 = OpConstantComposite %v3float %float_0_00200000009 %float_0_00200000009 %float_0_00200000009 +%float_6_10351999en05 = OpConstant %float 6.10351999e-05 + %263 = OpConstantComposite %v3float %float_6_10351999en05 %float_6_10351999en05 %float_6_10351999en05 +%float_0_0404499993 = OpConstant %float 0.0404499993 + %265 = OpConstantComposite %v3float %float_0_0404499993 %float_0_0404499993 %float_0_0404499993 +%float_0_947867274 = OpConstant %float 0.947867274 + %267 = OpConstantComposite %v3float %float_0_947867274 %float_0_947867274 %float_0_947867274 +%float_0_0521326996 = OpConstant %float 0.0521326996 + %269 = OpConstantComposite %v3float %float_0_0521326996 %float_0_0521326996 %float_0_0521326996 +%float_2_4000001 = OpConstant %float 2.4000001 + %271 = OpConstantComposite %v3float %float_2_4000001 %float_2_4000001 %float_2_4000001 +%float_0_0773993805 = OpConstant %float 0.0773993805 + %273 = OpConstantComposite %v3float %float_0_0773993805 %float_0_0773993805 %float_0_0773993805 + %float_4_5 = OpConstant %float 4.5 + %275 = OpConstantComposite %v3float %float_4_5 %float_4_5 %float_4_5 +%float_0_0179999992 = OpConstant %float 0.0179999992 + %277 = OpConstantComposite %v3float %float_0_0179999992 %float_0_0179999992 %float_0_0179999992 +%float_0_449999988 = OpConstant %float 0.449999988 + %279 = OpConstantComposite %v3float %float_0_449999988 %float_0_449999988 %float_0_449999988 +%float_1_09899998 = OpConstant %float 1.09899998 + %281 = OpConstantComposite %v3float %float_1_09899998 %float_1_09899998 %float_1_09899998 +%float_0_0989999995 = OpConstant %float 0.0989999995 + %283 = OpConstantComposite %v3float %float_0_0989999995 %float_0_0989999995 %float_0_0989999995 + %float_1_5 = OpConstant %float 1.5 + %285 = OpConstantComposite %v3float %float_1_5 %float_1_5 %float_1_5 + %286 = OpConstantComposite %v3float %float_0_159301758 %float_0_159301758 %float_0_159301758 + %287 = OpConstantComposite %v3float %float_78_84375 %float_78_84375 %float_78_84375 +%float_1_00055635 = OpConstant %float 1.00055635 + %float_7000 = OpConstant %float 7000 +%float_0_244063005 = OpConstant %float 0.244063005 +%float_99_1100006 = OpConstant %float 99.1100006 +%float_2967800 = OpConstant %float 2967800 +%float_0_237039998 = OpConstant %float 0.237039998 +%float_247_479996 = OpConstant %float 247.479996 +%float_1901800 = OpConstant %float 1901800 + %float_n3 = OpConstant %float -3 +%float_2_86999989 = OpConstant %float 2.86999989 +%float_0_275000006 = OpConstant %float 0.275000006 +%float_0_860117733 = OpConstant %float 0.860117733 +%float_0_000154118257 = OpConstant %float 0.000154118257 +%float_1_28641219en07 = OpConstant %float 1.28641219e-07 +%float_0_00084242021 = OpConstant %float 0.00084242021 +%float_7_08145137en07 = OpConstant %float 7.08145137e-07 +%float_0_317398727 = OpConstant %float 0.317398727 + +; HACK: Needed to hack this constant since MSVC and GNU libc are off by 1 ULP when converting to string (it probably still works fine though in a roundtrip ...) +%float_4_22806261en05 = OpConstant %float 4.25 + +%float_4_20481676en08 = OpConstant %float 4.20481676e-08 +%float_2_8974182en05 = OpConstant %float 2.8974182e-05 +%float_1_61456057en07 = OpConstant %float 1.61456057e-07 + %float_8 = OpConstant %float 8 + %float_4 = OpConstant %float 4 +%float_0_895099998 = OpConstant %float 0.895099998 +%float_0_266400009 = OpConstant %float 0.266400009 +%float_n0_161400005 = OpConstant %float -0.161400005 +%float_n0_750199974 = OpConstant %float -0.750199974 +%float_1_71350002 = OpConstant %float 1.71350002 +%float_0_0366999991 = OpConstant %float 0.0366999991 +%float_0_0388999991 = OpConstant %float 0.0388999991 +%float_n0_0684999973 = OpConstant %float -0.0684999973 +%float_1_02960002 = OpConstant %float 1.02960002 +%float_0_986992896 = OpConstant %float 0.986992896 +%float_n0_1470543 = OpConstant %float -0.1470543 +%float_0_159962699 = OpConstant %float 0.159962699 +%float_0_432305306 = OpConstant %float 0.432305306 +%float_0_518360317 = OpConstant %float 0.518360317 +%float_0_0492912009 = OpConstant %float 0.0492912009 +%float_n0_0085287001 = OpConstant %float -0.0085287001 +%float_0_040042799 = OpConstant %float 0.040042799 +%float_0_968486726 = OpConstant %float 0.968486726 +%float_5_55555534 = OpConstant %float 5.55555534 + %330 = OpConstantComposite %v3float %float_5_55555534 %float_5_55555534 %float_5_55555534 +%float_1_00000001en10 = OpConstant %float 1.00000001e-10 +%float_0_00999999978 = OpConstant %float 0.00999999978 +%float_0_666666687 = OpConstant %float 0.666666687 + %float_180 = OpConstant %float 180 + %float_360 = OpConstant %float 360 +%float_65535 = OpConstant %float 65535 + %337 = OpConstantComposite %v3float %float_65535 %float_65535 %float_65535 +%float_n4_97062206 = OpConstant %float -4.97062206 +%float_n3_02937818 = OpConstant %float -3.02937818 +%float_n2_12619996 = OpConstant %float -2.12619996 +%float_n1_51049995 = OpConstant %float -1.51049995 +%float_n1_05780005 = OpConstant %float -1.05780005 +%float_n0_466800004 = OpConstant %float -0.466800004 +%float_0_119379997 = OpConstant %float 0.119379997 +%float_0_708813429 = OpConstant %float 0.708813429 +%float_1_29118657 = OpConstant %float 1.29118657 +%float_0_808913231 = OpConstant %float 0.808913231 +%float_1_19108677 = OpConstant %float 1.19108677 +%float_1_56830001 = OpConstant %float 1.56830001 +%float_1_9483 = OpConstant %float 1.9483 +%float_2_30830002 = OpConstant %float 2.30830002 +%float_2_63840008 = OpConstant %float 2.63840008 +%float_2_85949993 = OpConstant %float 2.85949993 +%float_2_98726082 = OpConstant %float 2.98726082 +%float_3_01273918 = OpConstant %float 3.01273918 +%float_0_179999992 = OpConstant %float 0.179999992 +%float_9_99999975en05 = OpConstant %float 9.99999975e-05 + %float_1000 = OpConstant %float 1000 +%float_0_0599999987 = OpConstant %float 0.0599999987 +%float_3_50738446en05 = OpConstant %float 3.50738446e-05 + %361 = OpConstantComposite %v3float %float_3_50738446en05 %float_3_50738446en05 %float_3_50738446en05 +%float_n2_30102992 = OpConstant %float -2.30102992 +%float_n1_93120003 = OpConstant %float -1.93120003 +%float_n1_52049994 = OpConstant %float -1.52049994 +%float_0_801995218 = OpConstant %float 0.801995218 +%float_1_19800484 = OpConstant %float 1.19800484 +%float_1_59430003 = OpConstant %float 1.59430003 +%float_1_99730003 = OpConstant %float 1.99730003 +%float_2_37829995 = OpConstant %float 2.37829995 +%float_2_76839995 = OpConstant %float 2.76839995 +%float_3_05150008 = OpConstant %float 3.05150008 +%float_3_27462935 = OpConstant %float 3.27462935 +%float_3_32743073 = OpConstant %float 3.32743073 +%float_0_00499999989 = OpConstant %float 0.00499999989 + %float_11 = OpConstant %float 11 + %float_2000 = OpConstant %float 2000 +%float_0_119999997 = OpConstant %float 0.119999997 +%float_0_00313066994 = OpConstant %float 0.00313066994 +%float_12_9200001 = OpConstant %float 12.9200001 +%float_0_416666657 = OpConstant %float 0.416666657 +%float_1_05499995 = OpConstant %float 1.05499995 +%float_0_0549999997 = OpConstant %float 0.0549999997 +%float_n0_166666672 = OpConstant %float -0.166666672 + %float_n0_5 = OpConstant %float -0.5 +%float_0_166666672 = OpConstant %float 0.166666672 +%float_n3_15737653 = OpConstant %float -3.15737653 +%float_n0_485249996 = OpConstant %float -0.485249996 +%float_1_84773242 = OpConstant %float 1.84773242 +%float_n0_718548238 = OpConstant %float -0.718548238 +%float_2_08103061 = OpConstant %float 2.08103061 +%float_3_6681242 = OpConstant %float 3.6681242 + %float_18 = OpConstant %float 18 + %float_7 = OpConstant %float 7 +%_arr_float_uint_5 = OpTypeArray %float %uint_5 +%type__Globals = OpTypeStruct %v4float %v3float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %float %float %float %float %float %_arr_float_uint_5 %v3float %v4float %float %float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %float %float %uint %uint %float %float +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %402 = OpTypeFunction %void +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bool = OpTypeBool +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %v2bool = OpTypeVector %bool 2 + %v3bool = OpTypeVector %bool 3 +%type_sampled_image = OpTypeSampledImage %type_2d_image + %uint_10 = OpConstant %uint 10 +%_arr_float_uint_10 = OpTypeArray %float %uint_10 +%_arr_float_uint_6 = OpTypeArray %float %uint_6 + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform + %Texture1 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%Texture1Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %gl_Layer = OpVariable %_ptr_Input_uint Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output +%_ptr_Function__arr_float_uint_6 = OpTypePointer Function %_arr_float_uint_6 +%_ptr_Function__arr_float_uint_10 = OpTypePointer Function %_arr_float_uint_10 + %416 = OpConstantComposite %v3float %float_0_952552378 %float_0 %float_9_36786018en05 + %417 = OpConstantComposite %v3float %float_0_343966454 %float_0_728166103 %float_n0_0721325427 + %418 = OpConstantComposite %v3float %float_0 %float_0 %float_1_00882518 + %419 = OpConstantComposite %mat3v3float %416 %417 %418 + %420 = OpConstantComposite %v3float %float_1_04981101 %float_0 %float_n9_74845025en05 + %421 = OpConstantComposite %v3float %float_n0_495903015 %float_1_37331307 %float_0_0982400328 + %422 = OpConstantComposite %v3float %float_0 %float_0 %float_0_991252005 + %423 = OpConstantComposite %mat3v3float %420 %421 %422 + %424 = OpConstantComposite %v3float %float_0_662454188 %float_0_134004205 %float_0_156187683 + %425 = OpConstantComposite %v3float %float_n0_00557464967 %float_0_0040607336 %float_1_01033914 + %426 = OpConstantComposite %mat3v3float %424 %73 %425 + %427 = OpConstantComposite %v3float %float_1_6410234 %float_n0_324803293 %float_n0_236424699 + %428 = OpConstantComposite %v3float %float_n0_663662851 %float_1_61533165 %float_0_0167563483 + %429 = OpConstantComposite %v3float %float_0_0117218941 %float_n0_00828444213 %float_0_988394856 + %430 = OpConstantComposite %mat3v3float %427 %428 %429 + %431 = OpConstantComposite %v3float %float_1_45143926 %float_n0_236510754 %float_n0_214928567 + %432 = OpConstantComposite %v3float %float_n0_0765537769 %float_1_17622972 %float_n0_0996759236 + %433 = OpConstantComposite %v3float %float_0_00831614807 %float_n0_00603244966 %float_0_997716308 + %434 = OpConstantComposite %mat3v3float %431 %432 %433 + %435 = OpConstantComposite %v3float %float_0_695452213 %float_0_140678704 %float_0_163869068 + %436 = OpConstantComposite %v3float %float_0_0447945632 %float_0_859671116 %float_0_0955343172 + %437 = OpConstantComposite %v3float %float_n0_00552588282 %float_0_00402521016 %float_1_00150073 + %438 = OpConstantComposite %mat3v3float %435 %436 %437 + %439 = OpConstantComposite %v3float %float_3_2409699 %float_n1_5373832 %float_n0_498610765 + %440 = OpConstantComposite %v3float %float_n0_969243646 %float_1_8759675 %float_0_0415550582 + %441 = OpConstantComposite %v3float %float_0_0556300804 %float_n0_203976959 %float_1_05697155 + %442 = OpConstantComposite %mat3v3float %439 %440 %441 + %443 = OpConstantComposite %v3float %float_0_412456393 %float_0_357576102 %float_0_180437505 + %444 = OpConstantComposite %v3float %float_0_212672904 %float_0_715152204 %float_0_0721750036 + %445 = OpConstantComposite %v3float %float_0_0193339009 %float_0_119191997 %float_0_950304091 + %446 = OpConstantComposite %mat3v3float %443 %444 %445 + %447 = OpConstantComposite %v3float %float_1_71660841 %float_n0_355662107 %float_n0_253360093 + %448 = OpConstantComposite %v3float %float_n0_666682899 %float_1_61647761 %float_0_0157685 + %449 = OpConstantComposite %v3float %float_0_0176422 %float_n0_0427763015 %float_0_942228675 + %450 = OpConstantComposite %mat3v3float %447 %448 %449 + %451 = OpConstantComposite %v3float %float_2_49339628 %float_n0_93134588 %float_n0_402694494 + %452 = OpConstantComposite %v3float %float_n0_829486787 %float_1_76265967 %float_0_0236246008 + %453 = OpConstantComposite %v3float %float_0_0358507 %float_n0_0761827007 %float_0_957014024 + %454 = OpConstantComposite %mat3v3float %451 %452 %453 + %455 = OpConstantComposite %v3float %float_1_01303005 %float_0_00610530982 %float_n0_0149710001 + %456 = OpConstantComposite %v3float %float_0_00769822998 %float_0_998165011 %float_n0_00503202993 + %457 = OpConstantComposite %v3float %float_n0_00284131011 %float_0_00468515977 %float_0_924507022 + %458 = OpConstantComposite %mat3v3float %455 %456 %457 + %459 = OpConstantComposite %v3float %float_0_987223983 %float_n0_00611326983 %float_0_0159533005 + %460 = OpConstantComposite %v3float %float_n0_00759836007 %float_1_00186002 %float_0_0053300201 + %461 = OpConstantComposite %v3float %float_0_00307257008 %float_n0_00509594986 %float_1_08168006 + %462 = OpConstantComposite %mat3v3float %459 %460 %461 + %463 = OpConstantComposite %v3float %float_0_5 %float_n1 %float_0_5 + %464 = OpConstantComposite %v3float %float_n1 %float_1 %float_0_5 + %465 = OpConstantComposite %v3float %float_0_5 %float_0 %float_0 + %466 = OpConstantComposite %mat3v3float %463 %464 %465 + %467 = OpConstantComposite %v3float %float_1 %float_0 %float_0 + %468 = OpConstantComposite %v3float %float_0 %float_1 %float_0 + %469 = OpConstantComposite %v3float %float_0 %float_0 %float_1 + %470 = OpConstantComposite %mat3v3float %467 %468 %469 +%float_n6_07624626 = OpConstant %float -6.07624626 + %472 = OpConstantComposite %v3float %float_n6_07624626 %float_n6_07624626 %float_n6_07624626 + %473 = OpConstantComposite %v3float %float_0_895099998 %float_0_266400009 %float_n0_161400005 + %474 = OpConstantComposite %v3float %float_n0_750199974 %float_1_71350002 %float_0_0366999991 + %475 = OpConstantComposite %v3float %float_0_0388999991 %float_n0_0684999973 %float_1_02960002 + %476 = OpConstantComposite %mat3v3float %473 %474 %475 + %477 = OpConstantComposite %v3float %float_0_986992896 %float_n0_1470543 %float_0_159962699 + %478 = OpConstantComposite %v3float %float_0_432305306 %float_0_518360317 %float_0_0492912009 + %479 = OpConstantComposite %v3float %float_n0_0085287001 %float_0_040042799 %float_0_968486726 + %480 = OpConstantComposite %mat3v3float %477 %478 %479 + %481 = OpConstantComposite %v3float %float_0_544169128 %float_0_239592597 %float_0_166694298 + %482 = OpConstantComposite %v3float %float_0_239465594 %float_0_702153027 %float_0_058381401 + %483 = OpConstantComposite %v3float %float_n0_00234390004 %float_0_0361833982 %float_1_05521834 + %484 = OpConstantComposite %mat3v3float %481 %482 %483 + %485 = OpConstantComposite %v3float %float_0_940437257 %float_n0_0183068793 %float_0_077869609 + %486 = OpConstantComposite %v3float %float_0_00837869663 %float_0_828660011 %float_0_162961304 + %487 = OpConstantComposite %v3float %float_0_00054712611 %float_n0_000883374596 %float_1_00033629 + %488 = OpConstantComposite %mat3v3float %485 %486 %487 + %489 = OpConstantComposite %v3float %float_1_06317997 %float_0_0233955998 %float_n0_0865726024 + %490 = OpConstantComposite %v3float %float_n0_0106336996 %float_1_20632005 %float_n0_195690006 + %491 = OpConstantComposite %v3float %float_n0_000590886979 %float_0_00105247996 %float_0_999538004 + %492 = OpConstantComposite %mat3v3float %489 %490 %491 +%float_0_0533333346 = OpConstant %float 0.0533333346 +%float_0_159999996 = OpConstant %float 0.159999996 +%float_57_2957764 = OpConstant %float 57.2957764 +%float_0_0625 = OpConstant %float 0.0625 +%float_n67_5 = OpConstant %float -67.5 + %float_67_5 = OpConstant %float 67.5 + %499 = OpConstantComposite %_arr_float_uint_6 %float_n4 %float_n4 %float_n3_15737653 %float_n0_485249996 %float_1_84773242 %float_1_84773242 + %500 = OpConstantComposite %_arr_float_uint_6 %float_n0_718548238 %float_2_08103061 %float_3_6681242 %float_4 %float_4 %float_4 + %float_n15 = OpConstant %float -15 + %float_n14 = OpConstant %float -14 + %503 = OpConstantComposite %_arr_float_uint_10 %float_n4_97062206 %float_n3_02937818 %float_n2_12619996 %float_n1_51049995 %float_n1_05780005 %float_n0_466800004 %float_0_119379997 %float_0_708813429 %float_1_29118657 %float_1_29118657 + %504 = OpConstantComposite %_arr_float_uint_10 %float_0_808913231 %float_1_19108677 %float_1_56830001 %float_1_9483 %float_2_30830002 %float_2_63840008 %float_2_85949993 %float_2_98726082 %float_3_01273918 %float_3_01273918 + %float_n12 = OpConstant %float -12 + %506 = OpConstantComposite %_arr_float_uint_10 %float_n2_30102992 %float_n2_30102992 %float_n1_93120003 %float_n1_52049994 %float_n1_05780005 %float_n0_466800004 %float_0_119379997 %float_0_708813429 %float_1_29118657 %float_1_29118657 + %507 = OpConstantComposite %_arr_float_uint_10 %float_0_801995218 %float_1_19800484 %float_1_59430003 %float_1_99730003 %float_2_37829995 %float_2_76839995 %float_3_05150008 %float_3_27462935 %float_3_32743073 %float_3_32743073 +%float_0_0322580636 = OpConstant %float 0.0322580636 +%float_1_03225803 = OpConstant %float 1.03225803 + %510 = OpConstantComposite %v2float %float_1_03225803 %float_1_03225803 +%float_4_60443853e_09 = OpConstant %float 4.60443853e+09 +%float_2_00528435e_09 = OpConstant %float 2.00528435e+09 +%float_0_333333343 = OpConstant %float 0.333333343 + %float_5 = OpConstant %float 5 + %float_2_5 = OpConstant %float 2.5 +%float_0_0250000004 = OpConstant %float 0.0250000004 +%float_0_239999995 = OpConstant %float 0.239999995 +%float_0_0148148146 = OpConstant %float 0.0148148146 + %519 = OpConstantComposite %v3float %float_9_99999975en05 %float_9_99999975en05 %float_9_99999975en05 +%float_0_0296296291 = OpConstant %float 0.0296296291 +%float_0_952381015 = OpConstant %float 0.952381015 + %522 = OpConstantComposite %v3float %float_0_952381015 %float_0_952381015 %float_0_952381015 + %523 = OpUndef %v3float +%float_0_358299971 = OpConstant %float 0.358299971 + %525 = OpUndef %v3float + %MainPS = OpFunction %void None %402 + %526 = OpLabel + %527 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %528 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %529 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %530 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %531 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %532 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %533 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %534 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %535 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %536 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %537 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %538 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %539 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %540 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %541 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %542 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %543 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %544 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %545 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %546 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %547 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %548 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %549 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %550 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %551 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %552 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %553 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %554 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %555 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %556 = OpVariable %_ptr_Function__arr_float_uint_10 Function + %557 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %558 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %559 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %560 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %561 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %562 = OpVariable %_ptr_Function__arr_float_uint_6 Function + %563 = OpLoad %v2float %in_var_TEXCOORD0 + %564 = OpLoad %uint %gl_Layer + %565 = OpFSub %v2float %563 %134 + %566 = OpFMul %v2float %565 %510 + %567 = OpCompositeExtract %float %566 0 + %568 = OpCompositeExtract %float %566 1 + %569 = OpConvertUToF %float %564 + %570 = OpFMul %float %569 %float_0_0322580636 + %571 = OpCompositeConstruct %v4float %567 %568 %570 %float_0 + %572 = OpMatrixTimesMatrix %mat3v3float %446 %458 + %573 = OpMatrixTimesMatrix %mat3v3float %572 %430 + %574 = OpMatrixTimesMatrix %mat3v3float %426 %462 + %575 = OpMatrixTimesMatrix %mat3v3float %574 %442 + %576 = OpMatrixTimesMatrix %mat3v3float %419 %430 + %577 = OpMatrixTimesMatrix %mat3v3float %426 %423 + %578 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_43 + %579 = OpLoad %uint %578 + OpBranch %580 + %580 = OpLabel + OpLoopMerge %581 %582 None + OpBranch %583 + %583 = OpLabel + %584 = OpMatrixTimesMatrix %mat3v3float %574 %454 + %585 = OpMatrixTimesMatrix %mat3v3float %574 %450 + %586 = OpIEqual %bool %579 %uint_1 + OpSelectionMerge %587 None + OpBranchConditional %586 %588 %589 + %589 = OpLabel + %590 = OpIEqual %bool %579 %uint_2 + OpSelectionMerge %591 None + OpBranchConditional %590 %592 %593 + %593 = OpLabel + %594 = OpIEqual %bool %579 %uint_3 + OpSelectionMerge %595 None + OpBranchConditional %594 %596 %597 + %597 = OpLabel + %598 = OpIEqual %bool %579 %uint_4 + OpSelectionMerge %599 None + OpBranchConditional %598 %600 %601 + %601 = OpLabel + OpBranch %581 + %600 = OpLabel + OpBranch %581 + %599 = OpLabel + OpUnreachable + %596 = OpLabel + OpBranch %581 + %595 = OpLabel + OpUnreachable + %592 = OpLabel + OpBranch %581 + %591 = OpLabel + OpUnreachable + %588 = OpLabel + OpBranch %581 + %587 = OpLabel + OpUnreachable + %582 = OpLabel + OpBranch %580 + %581 = OpLabel + %602 = OpPhi %mat3v3float %575 %601 %470 %600 %438 %596 %585 %592 %584 %588 + %603 = OpVectorShuffle %v3float %571 %571 0 1 2 + %604 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_42 + %605 = OpLoad %uint %604 + %606 = OpUGreaterThanEqual %bool %605 %uint_3 + OpSelectionMerge %607 None + OpBranchConditional %606 %608 %609 + %609 = OpLabel + %610 = OpFSub %v3float %603 %204 + %611 = OpFMul %v3float %610 %205 + %612 = OpExtInst %v3float %1 Exp2 %611 + %613 = OpFMul %v3float %612 %206 + %614 = OpExtInst %v3float %1 Exp2 %472 + %615 = OpFMul %v3float %614 %206 + %616 = OpFSub %v3float %613 %615 + OpBranch %607 + %608 = OpLabel + %617 = OpExtInst %v3float %1 Pow %603 %194 + %618 = OpFSub %v3float %617 %195 + %619 = OpExtInst %v3float %1 FMax %138 %618 + %620 = OpFMul %v3float %197 %617 + %621 = OpFSub %v3float %196 %620 + %622 = OpFDiv %v3float %619 %621 + %623 = OpExtInst %v3float %1 Pow %622 %199 + %624 = OpFMul %v3float %623 %200 + OpBranch %607 + %607 = OpLabel + %625 = OpPhi %v3float %616 %609 %624 %608 + %626 = OpAccessChain %_ptr_Uniform_float %_Globals %int_18 + %627 = OpLoad %float %626 + %628 = OpFMul %float %627 %float_1_00055635 + %629 = OpFOrdLessThanEqual %bool %628 %float_7000 + %630 = OpFDiv %float %float_4_60443853e_09 %627 + %631 = OpFSub %float %float_2967800 %630 + %632 = OpFDiv %float %631 %628 + %633 = OpFAdd %float %float_99_1100006 %632 + %634 = OpFDiv %float %633 %628 + %635 = OpFAdd %float %float_0_244063005 %634 + %636 = OpFDiv %float %float_2_00528435e_09 %627 + %637 = OpFSub %float %float_1901800 %636 + %638 = OpFDiv %float %637 %628 + %639 = OpFAdd %float %float_247_479996 %638 + %640 = OpFDiv %float %639 %628 + %641 = OpFAdd %float %float_0_237039998 %640 + %642 = OpSelect %float %629 %635 %641 + %643 = OpFMul %float %float_n3 %642 + %644 = OpFMul %float %643 %642 + %645 = OpFMul %float %float_2_86999989 %642 + %646 = OpFAdd %float %644 %645 + %647 = OpFSub %float %646 %float_0_275000006 + %648 = OpCompositeConstruct %v2float %642 %647 + %649 = OpFMul %float %float_0_000154118257 %627 + %650 = OpFAdd %float %float_0_860117733 %649 + %651 = OpFMul %float %float_1_28641219en07 %627 + %652 = OpFMul %float %651 %627 + %653 = OpFAdd %float %650 %652 + %654 = OpFMul %float %float_0_00084242021 %627 + %655 = OpFAdd %float %float_1 %654 + %656 = OpFMul %float %float_7_08145137en07 %627 + %657 = OpFMul %float %656 %627 + %658 = OpFAdd %float %655 %657 + %659 = OpFDiv %float %653 %658 + %660 = OpFMul %float %float_4_22806261en05 %627 + %661 = OpFAdd %float %float_0_317398727 %660 + %662 = OpFMul %float %float_4_20481676en08 %627 + %663 = OpFMul %float %662 %627 + %664 = OpFAdd %float %661 %663 + %665 = OpFMul %float %float_2_8974182en05 %627 + %666 = OpFSub %float %float_1 %665 + %667 = OpFMul %float %float_1_61456057en07 %627 + %668 = OpFMul %float %667 %627 + %669 = OpFAdd %float %666 %668 + %670 = OpFDiv %float %664 %669 + %671 = OpFMul %float %float_3 %659 + %672 = OpFMul %float %float_2 %659 + %673 = OpFMul %float %float_8 %670 + %674 = OpFSub %float %672 %673 + %675 = OpFAdd %float %674 %float_4 + %676 = OpFDiv %float %671 %675 + %677 = OpFMul %float %float_2 %670 + %678 = OpFDiv %float %677 %675 + %679 = OpCompositeConstruct %v2float %676 %678 + %680 = OpFOrdLessThan %bool %627 %float_4000 + %681 = OpCompositeConstruct %v2bool %680 %680 + %682 = OpSelect %v2float %681 %679 %648 + %683 = OpAccessChain %_ptr_Uniform_float %_Globals %int_19 + %684 = OpLoad %float %683 + %685 = OpCompositeConstruct %v2float %659 %670 + %686 = OpExtInst %v2float %1 Normalize %685 + %687 = OpCompositeExtract %float %686 1 + %688 = OpFNegate %float %687 + %689 = OpFMul %float %688 %684 + %690 = OpFMul %float %689 %float_0_0500000007 + %691 = OpFAdd %float %659 %690 + %692 = OpCompositeExtract %float %686 0 + %693 = OpFMul %float %692 %684 + %694 = OpFMul %float %693 %float_0_0500000007 + %695 = OpFAdd %float %670 %694 + %696 = OpFMul %float %float_3 %691 + %697 = OpFMul %float %float_2 %691 + %698 = OpFMul %float %float_8 %695 + %699 = OpFSub %float %697 %698 + %700 = OpFAdd %float %699 %float_4 + %701 = OpFDiv %float %696 %700 + %702 = OpFMul %float %float_2 %695 + %703 = OpFDiv %float %702 %700 + %704 = OpCompositeConstruct %v2float %701 %703 + %705 = OpFSub %v2float %704 %679 + %706 = OpFAdd %v2float %682 %705 + %707 = OpCompositeExtract %float %706 0 + %708 = OpCompositeExtract %float %706 1 + %709 = OpExtInst %float %1 FMax %708 %float_1_00000001en10 + %710 = OpFDiv %float %707 %709 + %711 = OpCompositeInsert %v3float %710 %523 0 + %712 = OpCompositeInsert %v3float %float_1 %711 1 + %713 = OpFSub %float %float_1 %707 + %714 = OpFSub %float %713 %708 + %715 = OpFDiv %float %714 %709 + %716 = OpCompositeInsert %v3float %715 %712 2 + %717 = OpExtInst %float %1 FMax %float_0_328999996 %float_1_00000001en10 + %718 = OpFDiv %float %float_0_312700003 %717 + %719 = OpCompositeInsert %v3float %718 %523 0 + %720 = OpCompositeInsert %v3float %float_1 %719 1 + %721 = OpFDiv %float %float_0_358299971 %717 + %722 = OpCompositeInsert %v3float %721 %720 2 + %723 = OpVectorTimesMatrix %v3float %716 %476 + %724 = OpVectorTimesMatrix %v3float %722 %476 + %725 = OpCompositeExtract %float %724 0 + %726 = OpCompositeExtract %float %723 0 + %727 = OpFDiv %float %725 %726 + %728 = OpCompositeConstruct %v3float %727 %float_0 %float_0 + %729 = OpCompositeExtract %float %724 1 + %730 = OpCompositeExtract %float %723 1 + %731 = OpFDiv %float %729 %730 + %732 = OpCompositeConstruct %v3float %float_0 %731 %float_0 + %733 = OpCompositeExtract %float %724 2 + %734 = OpCompositeExtract %float %723 2 + %735 = OpFDiv %float %733 %734 + %736 = OpCompositeConstruct %v3float %float_0 %float_0 %735 + %737 = OpCompositeConstruct %mat3v3float %728 %732 %736 + %738 = OpMatrixTimesMatrix %mat3v3float %476 %737 + %739 = OpMatrixTimesMatrix %mat3v3float %738 %480 + %740 = OpMatrixTimesMatrix %mat3v3float %446 %739 + %741 = OpMatrixTimesMatrix %mat3v3float %740 %442 + %742 = OpVectorTimesMatrix %v3float %625 %741 + %743 = OpVectorTimesMatrix %v3float %742 %573 + %744 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_9 + %745 = OpAccessChain %_ptr_Uniform_float %_Globals %int_9 %int_3 + %746 = OpLoad %float %745 + %747 = OpFOrdNotEqual %bool %746 %float_0 + OpSelectionMerge %748 None + OpBranchConditional %747 %749 %748 + %749 = OpLabel + %750 = OpDot %float %743 %73 + %751 = OpCompositeConstruct %v3float %750 %750 %750 + %752 = OpFDiv %v3float %743 %751 + %753 = OpFSub %v3float %752 %141 + %754 = OpDot %float %753 %753 + %755 = OpFMul %float %float_n4 %754 + %756 = OpExtInst %float %1 Exp2 %755 + %757 = OpFSub %float %float_1 %756 + %758 = OpAccessChain %_ptr_Uniform_float %_Globals %int_45 + %759 = OpLoad %float %758 + %760 = OpFMul %float %float_n4 %759 + %761 = OpFMul %float %760 %750 + %762 = OpFMul %float %761 %750 + %763 = OpExtInst %float %1 Exp2 %762 + %764 = OpFSub %float %float_1 %763 + %765 = OpFMul %float %757 %764 + %766 = OpMatrixTimesMatrix %mat3v3float %484 %430 + %767 = OpMatrixTimesMatrix %mat3v3float %575 %766 + %768 = OpVectorTimesMatrix %v3float %743 %767 + %769 = OpCompositeConstruct %v3float %765 %765 %765 + %770 = OpExtInst %v3float %1 FMix %743 %768 %769 + OpBranch %748 + %748 = OpLabel + %771 = OpPhi %v3float %743 %607 %770 %749 + %772 = OpDot %float %771 %73 + %773 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_25 + %774 = OpLoad %v4float %773 + %775 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_20 + %776 = OpLoad %v4float %775 + %777 = OpFMul %v4float %774 %776 + %778 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_26 + %779 = OpLoad %v4float %778 + %780 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_21 + %781 = OpLoad %v4float %780 + %782 = OpFMul %v4float %779 %781 + %783 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_27 + %784 = OpLoad %v4float %783 + %785 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_22 + %786 = OpLoad %v4float %785 + %787 = OpFMul %v4float %784 %786 + %788 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_28 + %789 = OpLoad %v4float %788 + %790 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_23 + %791 = OpLoad %v4float %790 + %792 = OpFMul %v4float %789 %791 + %793 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_29 + %794 = OpLoad %v4float %793 + %795 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_24 + %796 = OpLoad %v4float %795 + %797 = OpFAdd %v4float %794 %796 + %798 = OpCompositeConstruct %v3float %772 %772 %772 + %799 = OpVectorShuffle %v3float %777 %777 0 1 2 + %800 = OpCompositeExtract %float %777 3 + %801 = OpCompositeConstruct %v3float %800 %800 %800 + %802 = OpFMul %v3float %799 %801 + %803 = OpExtInst %v3float %1 FMix %798 %771 %802 + %804 = OpExtInst %v3float %1 FMax %138 %803 + %805 = OpFMul %v3float %804 %330 + %806 = OpVectorShuffle %v3float %782 %782 0 1 2 + %807 = OpCompositeExtract %float %782 3 + %808 = OpCompositeConstruct %v3float %807 %807 %807 + %809 = OpFMul %v3float %806 %808 + %810 = OpExtInst %v3float %1 Pow %805 %809 + %811 = OpFMul %v3float %810 %206 + %812 = OpVectorShuffle %v3float %787 %787 0 1 2 + %813 = OpCompositeExtract %float %787 3 + %814 = OpCompositeConstruct %v3float %813 %813 %813 + %815 = OpFMul %v3float %812 %814 + %816 = OpFDiv %v3float %141 %815 + %817 = OpExtInst %v3float %1 Pow %811 %816 + %818 = OpVectorShuffle %v3float %792 %792 0 1 2 + %819 = OpCompositeExtract %float %792 3 + %820 = OpCompositeConstruct %v3float %819 %819 %819 + %821 = OpFMul %v3float %818 %820 + %822 = OpFMul %v3float %817 %821 + %823 = OpVectorShuffle %v3float %797 %797 0 1 2 + %824 = OpCompositeExtract %float %797 3 + %825 = OpCompositeConstruct %v3float %824 %824 %824 + %826 = OpFAdd %v3float %823 %825 + %827 = OpFAdd %v3float %822 %826 + %828 = OpAccessChain %_ptr_Uniform_float %_Globals %int_40 + %829 = OpLoad %float %828 + %830 = OpExtInst %float %1 SmoothStep %float_0 %829 %772 + %831 = OpFSub %float %float_1 %830 + %832 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_35 + %833 = OpLoad %v4float %832 + %834 = OpFMul %v4float %833 %776 + %835 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_36 + %836 = OpLoad %v4float %835 + %837 = OpFMul %v4float %836 %781 + %838 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_37 + %839 = OpLoad %v4float %838 + %840 = OpFMul %v4float %839 %786 + %841 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_38 + %842 = OpLoad %v4float %841 + %843 = OpFMul %v4float %842 %791 + %844 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_39 + %845 = OpLoad %v4float %844 + %846 = OpFAdd %v4float %845 %796 + %847 = OpVectorShuffle %v3float %834 %834 0 1 2 + %848 = OpCompositeExtract %float %834 3 + %849 = OpCompositeConstruct %v3float %848 %848 %848 + %850 = OpFMul %v3float %847 %849 + %851 = OpExtInst %v3float %1 FMix %798 %771 %850 + %852 = OpExtInst %v3float %1 FMax %138 %851 + %853 = OpFMul %v3float %852 %330 + %854 = OpVectorShuffle %v3float %837 %837 0 1 2 + %855 = OpCompositeExtract %float %837 3 + %856 = OpCompositeConstruct %v3float %855 %855 %855 + %857 = OpFMul %v3float %854 %856 + %858 = OpExtInst %v3float %1 Pow %853 %857 + %859 = OpFMul %v3float %858 %206 + %860 = OpVectorShuffle %v3float %840 %840 0 1 2 + %861 = OpCompositeExtract %float %840 3 + %862 = OpCompositeConstruct %v3float %861 %861 %861 + %863 = OpFMul %v3float %860 %862 + %864 = OpFDiv %v3float %141 %863 + %865 = OpExtInst %v3float %1 Pow %859 %864 + %866 = OpVectorShuffle %v3float %843 %843 0 1 2 + %867 = OpCompositeExtract %float %843 3 + %868 = OpCompositeConstruct %v3float %867 %867 %867 + %869 = OpFMul %v3float %866 %868 + %870 = OpFMul %v3float %865 %869 + %871 = OpVectorShuffle %v3float %846 %846 0 1 2 + %872 = OpCompositeExtract %float %846 3 + %873 = OpCompositeConstruct %v3float %872 %872 %872 + %874 = OpFAdd %v3float %871 %873 + %875 = OpFAdd %v3float %870 %874 + %876 = OpAccessChain %_ptr_Uniform_float %_Globals %int_41 + %877 = OpLoad %float %876 + %878 = OpExtInst %float %1 SmoothStep %877 %float_1 %772 + %879 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_30 + %880 = OpLoad %v4float %879 + %881 = OpFMul %v4float %880 %776 + %882 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_31 + %883 = OpLoad %v4float %882 + %884 = OpFMul %v4float %883 %781 + %885 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_32 + %886 = OpLoad %v4float %885 + %887 = OpFMul %v4float %886 %786 + %888 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_33 + %889 = OpLoad %v4float %888 + %890 = OpFMul %v4float %889 %791 + %891 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_34 + %892 = OpLoad %v4float %891 + %893 = OpFAdd %v4float %892 %796 + %894 = OpVectorShuffle %v3float %881 %881 0 1 2 + %895 = OpCompositeExtract %float %881 3 + %896 = OpCompositeConstruct %v3float %895 %895 %895 + %897 = OpFMul %v3float %894 %896 + %898 = OpExtInst %v3float %1 FMix %798 %771 %897 + %899 = OpExtInst %v3float %1 FMax %138 %898 + %900 = OpFMul %v3float %899 %330 + %901 = OpVectorShuffle %v3float %884 %884 0 1 2 + %902 = OpCompositeExtract %float %884 3 + %903 = OpCompositeConstruct %v3float %902 %902 %902 + %904 = OpFMul %v3float %901 %903 + %905 = OpExtInst %v3float %1 Pow %900 %904 + %906 = OpFMul %v3float %905 %206 + %907 = OpVectorShuffle %v3float %887 %887 0 1 2 + %908 = OpCompositeExtract %float %887 3 + %909 = OpCompositeConstruct %v3float %908 %908 %908 + %910 = OpFMul %v3float %907 %909 + %911 = OpFDiv %v3float %141 %910 + %912 = OpExtInst %v3float %1 Pow %906 %911 + %913 = OpVectorShuffle %v3float %890 %890 0 1 2 + %914 = OpCompositeExtract %float %890 3 + %915 = OpCompositeConstruct %v3float %914 %914 %914 + %916 = OpFMul %v3float %913 %915 + %917 = OpFMul %v3float %912 %916 + %918 = OpVectorShuffle %v3float %893 %893 0 1 2 + %919 = OpCompositeExtract %float %893 3 + %920 = OpCompositeConstruct %v3float %919 %919 %919 + %921 = OpFAdd %v3float %918 %920 + %922 = OpFAdd %v3float %917 %921 + %923 = OpFSub %float %830 %878 + %924 = OpCompositeConstruct %v3float %831 %831 %831 + %925 = OpFMul %v3float %827 %924 + %926 = OpCompositeConstruct %v3float %923 %923 %923 + %927 = OpFMul %v3float %922 %926 + %928 = OpFAdd %v3float %925 %927 + %929 = OpCompositeConstruct %v3float %878 %878 %878 + %930 = OpFMul %v3float %875 %929 + %931 = OpFAdd %v3float %928 %930 + %932 = OpVectorTimesMatrix %v3float %931 %575 + %933 = OpMatrixTimesMatrix %mat3v3float %577 %488 + %934 = OpMatrixTimesMatrix %mat3v3float %933 %576 + %935 = OpMatrixTimesMatrix %mat3v3float %577 %492 + %936 = OpMatrixTimesMatrix %mat3v3float %935 %576 + %937 = OpVectorTimesMatrix %v3float %931 %934 + %938 = OpAccessChain %_ptr_Uniform_float %_Globals %int_44 + %939 = OpLoad %float %938 + %940 = OpCompositeConstruct %v3float %939 %939 %939 + %941 = OpExtInst %v3float %1 FMix %931 %937 %940 + %942 = OpVectorTimesMatrix %v3float %941 %577 + %943 = OpCompositeExtract %float %942 0 + %944 = OpCompositeExtract %float %942 1 + %945 = OpExtInst %float %1 FMin %943 %944 + %946 = OpCompositeExtract %float %942 2 + %947 = OpExtInst %float %1 FMin %945 %946 + %948 = OpExtInst %float %1 FMax %943 %944 + %949 = OpExtInst %float %1 FMax %948 %946 + %950 = OpExtInst %float %1 FMax %949 %float_1_00000001en10 + %951 = OpExtInst %float %1 FMax %947 %float_1_00000001en10 + %952 = OpFSub %float %950 %951 + %953 = OpExtInst %float %1 FMax %949 %float_0_00999999978 + %954 = OpFDiv %float %952 %953 + %955 = OpFSub %float %946 %944 + %956 = OpFMul %float %946 %955 + %957 = OpFSub %float %944 %943 + %958 = OpFMul %float %944 %957 + %959 = OpFAdd %float %956 %958 + %960 = OpFSub %float %943 %946 + %961 = OpFMul %float %943 %960 + %962 = OpFAdd %float %959 %961 + %963 = OpExtInst %float %1 Sqrt %962 + %964 = OpFAdd %float %946 %944 + %965 = OpFAdd %float %964 %943 + %966 = OpFMul %float %float_1_75 %963 + %967 = OpFAdd %float %965 %966 + %968 = OpFMul %float %967 %float_0_333333343 + %969 = OpFSub %float %954 %float_0_400000006 + %970 = OpFMul %float %969 %float_5 + %971 = OpFMul %float %969 %float_2_5 + %972 = OpExtInst %float %1 FAbs %971 + %973 = OpFSub %float %float_1 %972 + %974 = OpExtInst %float %1 FMax %973 %float_0 + %975 = OpExtInst %float %1 FSign %970 + %976 = OpConvertFToS %int %975 + %977 = OpConvertSToF %float %976 + %978 = OpFMul %float %974 %974 + %979 = OpFSub %float %float_1 %978 + %980 = OpFMul %float %977 %979 + %981 = OpFAdd %float %float_1 %980 + %982 = OpFMul %float %981 %float_0_0250000004 + %983 = OpFOrdLessThanEqual %bool %968 %float_0_0533333346 + OpSelectionMerge %984 None + OpBranchConditional %983 %985 %986 + %986 = OpLabel + %987 = OpFOrdGreaterThanEqual %bool %968 %float_0_159999996 + OpSelectionMerge %988 None + OpBranchConditional %987 %989 %990 + %990 = OpLabel + %991 = OpFDiv %float %float_0_239999995 %967 + %992 = OpFSub %float %991 %float_0_5 + %993 = OpFMul %float %982 %992 + OpBranch %988 + %989 = OpLabel + OpBranch %988 + %988 = OpLabel + %994 = OpPhi %float %993 %990 %float_0 %989 + OpBranch %984 + %985 = OpLabel + OpBranch %984 + %984 = OpLabel + %995 = OpPhi %float %994 %988 %982 %985 + %996 = OpFAdd %float %float_1 %995 + %997 = OpCompositeConstruct %v3float %996 %996 %996 + %998 = OpFMul %v3float %942 %997 + %999 = OpCompositeExtract %float %998 0 + %1000 = OpCompositeExtract %float %998 1 + %1001 = OpFOrdEqual %bool %999 %1000 + %1002 = OpCompositeExtract %float %998 2 + %1003 = OpFOrdEqual %bool %1000 %1002 + %1004 = OpLogicalAnd %bool %1001 %1003 + OpSelectionMerge %1005 None + OpBranchConditional %1004 %1006 %1007 + %1007 = OpLabel + %1008 = OpExtInst %float %1 Sqrt %float_3 + %1009 = OpFSub %float %1000 %1002 + %1010 = OpFMul %float %1008 %1009 + %1011 = OpFMul %float %float_2 %999 + %1012 = OpFSub %float %1011 %1000 + %1013 = OpFSub %float %1012 %1002 + %1014 = OpExtInst %float %1 Atan2 %1010 %1013 + %1015 = OpFMul %float %float_57_2957764 %1014 + OpBranch %1005 + %1006 = OpLabel + OpBranch %1005 + %1005 = OpLabel + %1016 = OpPhi %float %1015 %1007 %float_0 %1006 + %1017 = OpFOrdLessThan %bool %1016 %float_0 + OpSelectionMerge %1018 None + OpBranchConditional %1017 %1019 %1018 + %1019 = OpLabel + %1020 = OpFAdd %float %1016 %float_360 + OpBranch %1018 + %1018 = OpLabel + %1021 = OpPhi %float %1016 %1005 %1020 %1019 + %1022 = OpExtInst %float %1 FClamp %1021 %float_0 %float_360 + %1023 = OpFOrdGreaterThan %bool %1022 %float_180 + OpSelectionMerge %1024 None + OpBranchConditional %1023 %1025 %1024 + %1025 = OpLabel + %1026 = OpFSub %float %1022 %float_360 + OpBranch %1024 + %1024 = OpLabel + %1027 = OpPhi %float %1022 %1018 %1026 %1025 + %1028 = OpFMul %float %1027 %float_0_0148148146 + %1029 = OpExtInst %float %1 FAbs %1028 + %1030 = OpFSub %float %float_1 %1029 + %1031 = OpExtInst %float %1 SmoothStep %float_0 %float_1 %1030 + %1032 = OpFMul %float %1031 %1031 + %1033 = OpFMul %float %1032 %954 + %1034 = OpFSub %float %float_0_0299999993 %999 + %1035 = OpFMul %float %1033 %1034 + %1036 = OpFMul %float %1035 %float_0_180000007 + %1037 = OpFAdd %float %999 %1036 + %1038 = OpCompositeInsert %v3float %1037 %998 0 + %1039 = OpVectorTimesMatrix %v3float %1038 %434 + %1040 = OpExtInst %v3float %1 FMax %138 %1039 + %1041 = OpDot %float %1040 %73 + %1042 = OpCompositeConstruct %v3float %1041 %1041 %1041 + %1043 = OpExtInst %v3float %1 FMix %1042 %1040 %241 + %1044 = OpAccessChain %_ptr_Uniform_float %_Globals %int_13 + %1045 = OpLoad %float %1044 + %1046 = OpFAdd %float %float_1 %1045 + %1047 = OpAccessChain %_ptr_Uniform_float %_Globals %int_11 + %1048 = OpLoad %float %1047 + %1049 = OpFSub %float %1046 %1048 + %1050 = OpAccessChain %_ptr_Uniform_float %_Globals %int_14 + %1051 = OpLoad %float %1050 + %1052 = OpFAdd %float %float_1 %1051 + %1053 = OpAccessChain %_ptr_Uniform_float %_Globals %int_12 + %1054 = OpLoad %float %1053 + %1055 = OpFSub %float %1052 %1054 + %1056 = OpFOrdGreaterThan %bool %1048 %float_0_800000012 + OpSelectionMerge %1057 None + OpBranchConditional %1056 %1058 %1059 + %1059 = OpLabel + %1060 = OpFAdd %float %float_0_180000007 %1045 + %1061 = OpFDiv %float %1060 %1049 + %1062 = OpExtInst %float %1 Log %float_0_180000007 + %1063 = OpExtInst %float %1 Log %float_10 + %1064 = OpFDiv %float %1062 %1063 + %1065 = OpFSub %float %float_2 %1061 + %1066 = OpFDiv %float %1061 %1065 + %1067 = OpExtInst %float %1 Log %1066 + %1068 = OpFMul %float %float_0_5 %1067 + %1069 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10 + %1070 = OpLoad %float %1069 + %1071 = OpFDiv %float %1049 %1070 + %1072 = OpFMul %float %1068 %1071 + %1073 = OpFSub %float %1064 %1072 + OpBranch %1057 + %1058 = OpLabel + %1074 = OpFSub %float %float_0_819999993 %1048 + %1075 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10 + %1076 = OpLoad %float %1075 + %1077 = OpFDiv %float %1074 %1076 + %1078 = OpExtInst %float %1 Log %float_0_180000007 + %1079 = OpExtInst %float %1 Log %float_10 + %1080 = OpFDiv %float %1078 %1079 + %1081 = OpFAdd %float %1077 %1080 + OpBranch %1057 + %1057 = OpLabel + %1082 = OpPhi %float %1073 %1059 %1081 %1058 + %1083 = OpFSub %float %float_1 %1048 + %1084 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10 + %1085 = OpLoad %float %1084 + %1086 = OpFDiv %float %1083 %1085 + %1087 = OpFSub %float %1086 %1082 + %1088 = OpFDiv %float %1054 %1085 + %1089 = OpFSub %float %1088 %1087 + %1090 = OpExtInst %v3float %1 Log %1043 + %1091 = OpExtInst %float %1 Log %float_10 + %1092 = OpCompositeConstruct %v3float %1091 %1091 %1091 + %1093 = OpFDiv %v3float %1090 %1092 + %1094 = OpCompositeConstruct %v3float %1085 %1085 %1085 + %1095 = OpCompositeConstruct %v3float %1087 %1087 %1087 + %1096 = OpFAdd %v3float %1093 %1095 + %1097 = OpFMul %v3float %1094 %1096 + %1098 = OpFNegate %float %1045 + %1099 = OpCompositeConstruct %v3float %1098 %1098 %1098 + %1100 = OpFMul %float %float_2 %1049 + %1101 = OpCompositeConstruct %v3float %1100 %1100 %1100 + %1102 = OpFMul %float %float_n2 %1085 + %1103 = OpFDiv %float %1102 %1049 + %1104 = OpCompositeConstruct %v3float %1103 %1103 %1103 + %1105 = OpCompositeConstruct %v3float %1082 %1082 %1082 + %1106 = OpFSub %v3float %1093 %1105 + %1107 = OpFMul %v3float %1104 %1106 + %1108 = OpExtInst %v3float %1 Exp %1107 + %1109 = OpFAdd %v3float %141 %1108 + %1110 = OpFDiv %v3float %1101 %1109 + %1111 = OpFAdd %v3float %1099 %1110 + %1112 = OpCompositeConstruct %v3float %1052 %1052 %1052 + %1113 = OpFMul %float %float_2 %1055 + %1114 = OpCompositeConstruct %v3float %1113 %1113 %1113 + %1115 = OpFMul %float %float_2 %1085 + %1116 = OpFDiv %float %1115 %1055 + %1117 = OpCompositeConstruct %v3float %1116 %1116 %1116 + %1118 = OpCompositeConstruct %v3float %1089 %1089 %1089 + %1119 = OpFSub %v3float %1093 %1118 + %1120 = OpFMul %v3float %1117 %1119 + %1121 = OpExtInst %v3float %1 Exp %1120 + %1122 = OpFAdd %v3float %141 %1121 + %1123 = OpFDiv %v3float %1114 %1122 + %1124 = OpFSub %v3float %1112 %1123 + %1125 = OpFOrdLessThan %v3bool %1093 %1105 + %1126 = OpSelect %v3float %1125 %1111 %1097 + %1127 = OpFOrdGreaterThan %v3bool %1093 %1118 + %1128 = OpSelect %v3float %1127 %1124 %1097 + %1129 = OpFSub %float %1089 %1082 + %1130 = OpCompositeConstruct %v3float %1129 %1129 %1129 + %1131 = OpFDiv %v3float %1106 %1130 + %1132 = OpExtInst %v3float %1 FClamp %1131 %138 %141 + %1133 = OpFOrdLessThan %bool %1089 %1082 + %1134 = OpFSub %v3float %141 %1132 + %1135 = OpCompositeConstruct %v3bool %1133 %1133 %1133 + %1136 = OpSelect %v3float %1135 %1134 %1132 + %1137 = OpFMul %v3float %252 %1136 + %1138 = OpFSub %v3float %251 %1137 + %1139 = OpFMul %v3float %1138 %1136 + %1140 = OpFMul %v3float %1139 %1136 + %1141 = OpExtInst %v3float %1 FMix %1126 %1128 %1140 + %1142 = OpDot %float %1141 %73 + %1143 = OpCompositeConstruct %v3float %1142 %1142 %1142 + %1144 = OpExtInst %v3float %1 FMix %1143 %1141 %254 + %1145 = OpExtInst %v3float %1 FMax %138 %1144 + %1146 = OpVectorTimesMatrix %v3float %1145 %936 + %1147 = OpExtInst %v3float %1 FMix %1145 %1146 %940 + %1148 = OpVectorTimesMatrix %v3float %1147 %575 + %1149 = OpExtInst %v3float %1 FMax %138 %1148 + %1150 = OpFOrdEqual %bool %746 %float_0 + OpSelectionMerge %1151 DontFlatten + OpBranchConditional %1150 %1152 %1151 + %1152 = OpLabel + %1153 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_2 + %1154 = OpLoad %v4float %1153 + %1155 = OpVectorShuffle %v3float %1154 %1154 0 1 2 + %1156 = OpDot %float %932 %1155 + %1157 = OpCompositeInsert %v3float %1156 %525 0 + %1158 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3 + %1159 = OpLoad %v4float %1158 + %1160 = OpVectorShuffle %v3float %1159 %1159 0 1 2 + %1161 = OpDot %float %932 %1160 + %1162 = OpCompositeInsert %v3float %1161 %1157 1 + %1163 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_4 + %1164 = OpLoad %v4float %1163 + %1165 = OpVectorShuffle %v3float %1164 %1164 0 1 2 + %1166 = OpDot %float %932 %1165 + %1167 = OpCompositeInsert %v3float %1166 %1162 2 + %1168 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_8 + %1169 = OpLoad %v4float %1168 + %1170 = OpVectorShuffle %v3float %1169 %1169 0 1 2 + %1171 = OpLoad %v4float %744 + %1172 = OpVectorShuffle %v3float %1171 %1171 0 1 2 + %1173 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_7 + %1174 = OpLoad %v4float %1173 + %1175 = OpVectorShuffle %v3float %1174 %1174 0 1 2 + %1176 = OpDot %float %932 %1175 + %1177 = OpFAdd %float %1176 %float_1 + %1178 = OpFDiv %float %float_1 %1177 + %1179 = OpCompositeConstruct %v3float %1178 %1178 %1178 + %1180 = OpFMul %v3float %1172 %1179 + %1181 = OpFAdd %v3float %1170 %1180 + %1182 = OpFMul %v3float %1167 %1181 + %1183 = OpExtInst %v3float %1 FMax %138 %1182 + %1184 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_5 + %1185 = OpLoad %v4float %1184 + %1186 = OpVectorShuffle %v3float %1185 %1185 0 0 0 + %1187 = OpFSub %v3float %1186 %1183 + %1188 = OpExtInst %v3float %1 FMax %138 %1187 + %1189 = OpVectorShuffle %v3float %1185 %1185 2 2 2 + %1190 = OpExtInst %v3float %1 FMax %1183 %1189 + %1191 = OpExtInst %v3float %1 FClamp %1183 %1186 %1189 + %1192 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_6 + %1193 = OpLoad %v4float %1192 + %1194 = OpVectorShuffle %v3float %1193 %1193 0 0 0 + %1195 = OpFMul %v3float %1190 %1194 + %1196 = OpVectorShuffle %v3float %1193 %1193 1 1 1 + %1197 = OpFAdd %v3float %1195 %1196 + %1198 = OpVectorShuffle %v3float %1185 %1185 3 3 3 + %1199 = OpFAdd %v3float %1190 %1198 + %1200 = OpFDiv %v3float %141 %1199 + %1201 = OpFMul %v3float %1197 %1200 + %1202 = OpVectorShuffle %v3float %1164 %1164 3 3 3 + %1203 = OpFMul %v3float %1191 %1202 + %1204 = OpVectorShuffle %v3float %1154 %1154 3 3 3 + %1205 = OpFMul %v3float %1188 %1204 + %1206 = OpVectorShuffle %v3float %1185 %1185 1 1 1 + %1207 = OpFAdd %v3float %1188 %1206 + %1208 = OpFDiv %v3float %141 %1207 + %1209 = OpFMul %v3float %1205 %1208 + %1210 = OpVectorShuffle %v3float %1159 %1159 3 3 3 + %1211 = OpFAdd %v3float %1209 %1210 + %1212 = OpFAdd %v3float %1203 %1211 + %1213 = OpFAdd %v3float %1201 %1212 + %1214 = OpFSub %v3float %1213 %261 + OpBranch %1151 + %1151 = OpLabel + %1215 = OpPhi %v3float %1149 %1057 %1214 %1152 + %1216 = OpExtInst %v3float %1 FClamp %1215 %138 %141 + %1217 = OpCompositeExtract %float %1216 0 + OpBranch %1218 + %1218 = OpLabel + OpLoopMerge %1219 %1220 None + OpBranch %1221 + %1221 = OpLabel + %1222 = OpFOrdLessThan %bool %1217 %float_0_00313066994 + OpSelectionMerge %1223 None + OpBranchConditional %1222 %1224 %1223 + %1224 = OpLabel + %1225 = OpFMul %float %1217 %float_12_9200001 + OpBranch %1219 + %1223 = OpLabel + %1226 = OpExtInst %float %1 Pow %1217 %float_0_416666657 + %1227 = OpFMul %float %1226 %float_1_05499995 + %1228 = OpFSub %float %1227 %float_0_0549999997 + OpBranch %1219 + %1220 = OpLabel + OpBranch %1218 + %1219 = OpLabel + %1229 = OpPhi %float %1225 %1224 %1228 %1223 + %1230 = OpCompositeExtract %float %1216 1 + OpBranch %1231 + %1231 = OpLabel + OpLoopMerge %1232 %1233 None + OpBranch %1234 + %1234 = OpLabel + %1235 = OpFOrdLessThan %bool %1230 %float_0_00313066994 + OpSelectionMerge %1236 None + OpBranchConditional %1235 %1237 %1236 + %1237 = OpLabel + %1238 = OpFMul %float %1230 %float_12_9200001 + OpBranch %1232 + %1236 = OpLabel + %1239 = OpExtInst %float %1 Pow %1230 %float_0_416666657 + %1240 = OpFMul %float %1239 %float_1_05499995 + %1241 = OpFSub %float %1240 %float_0_0549999997 + OpBranch %1232 + %1233 = OpLabel + OpBranch %1231 + %1232 = OpLabel + %1242 = OpPhi %float %1238 %1237 %1241 %1236 + %1243 = OpCompositeExtract %float %1216 2 + OpBranch %1244 + %1244 = OpLabel + OpLoopMerge %1245 %1246 None + OpBranch %1247 + %1247 = OpLabel + %1248 = OpFOrdLessThan %bool %1243 %float_0_00313066994 + OpSelectionMerge %1249 None + OpBranchConditional %1248 %1250 %1249 + %1250 = OpLabel + %1251 = OpFMul %float %1243 %float_12_9200001 + OpBranch %1245 + %1249 = OpLabel + %1252 = OpExtInst %float %1 Pow %1243 %float_0_416666657 + %1253 = OpFMul %float %1252 %float_1_05499995 + %1254 = OpFSub %float %1253 %float_0_0549999997 + OpBranch %1245 + %1246 = OpLabel + OpBranch %1244 + %1245 = OpLabel + %1255 = OpPhi %float %1251 %1250 %1254 %1249 + %1256 = OpCompositeConstruct %v3float %1229 %1242 %1255 + %1257 = OpFMul %v3float %1256 %173 + %1258 = OpFAdd %v3float %1257 %175 + %1259 = OpAccessChain %_ptr_Uniform_float %_Globals %int_15 %int_0 + %1260 = OpLoad %float %1259 + %1261 = OpCompositeConstruct %v3float %1260 %1260 %1260 + %1262 = OpFMul %v3float %1261 %1256 + %1263 = OpAccessChain %_ptr_Uniform_float %_Globals %int_15 %int_1 + %1264 = OpLoad %float %1263 + %1265 = OpCompositeConstruct %v3float %1264 %1264 %1264 + %1266 = OpLoad %type_2d_image %Texture1 + %1267 = OpLoad %type_sampler %Texture1Sampler + %1268 = OpCompositeExtract %float %1258 2 + %1269 = OpFMul %float %1268 %float_16 + %1270 = OpFSub %float %1269 %float_0_5 + %1271 = OpExtInst %float %1 Floor %1270 + %1272 = OpFSub %float %1270 %1271 + %1273 = OpCompositeExtract %float %1258 0 + %1274 = OpFAdd %float %1273 %1271 + %1275 = OpFMul %float %1274 %float_0_0625 + %1276 = OpCompositeExtract %float %1258 1 + %1277 = OpCompositeConstruct %v2float %1275 %1276 + %1278 = OpSampledImage %type_sampled_image %1266 %1267 + %1279 = OpImageSampleImplicitLod %v4float %1278 %1277 None + %1280 = OpFAdd %float %1275 %float_0_0625 + %1281 = OpCompositeConstruct %v2float %1280 %1276 + %1282 = OpSampledImage %type_sampled_image %1266 %1267 + %1283 = OpImageSampleImplicitLod %v4float %1282 %1281 None + %1284 = OpCompositeConstruct %v4float %1272 %1272 %1272 %1272 + %1285 = OpExtInst %v4float %1 FMix %1279 %1283 %1284 + %1286 = OpVectorShuffle %v3float %1285 %1285 0 1 2 + %1287 = OpFMul %v3float %1265 %1286 + %1288 = OpFAdd %v3float %1262 %1287 + %1289 = OpExtInst %v3float %1 FMax %263 %1288 + %1290 = OpFOrdGreaterThan %v3bool %1289 %265 + %1291 = OpFMul %v3float %1289 %267 + %1292 = OpFAdd %v3float %1291 %269 + %1293 = OpExtInst %v3float %1 Pow %1292 %271 + %1294 = OpFMul %v3float %1289 %273 + %1295 = OpSelect %v3float %1290 %1293 %1294 + %1296 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_0 + %1297 = OpLoad %float %1296 + %1298 = OpCompositeConstruct %v3float %1297 %1297 %1297 + %1299 = OpFMul %v3float %1295 %1295 + %1300 = OpFMul %v3float %1298 %1299 + %1301 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_1 + %1302 = OpLoad %float %1301 + %1303 = OpCompositeConstruct %v3float %1302 %1302 %1302 + %1304 = OpFMul %v3float %1303 %1295 + %1305 = OpFAdd %v3float %1300 %1304 + %1306 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_2 + %1307 = OpLoad %float %1306 + %1308 = OpCompositeConstruct %v3float %1307 %1307 %1307 + %1309 = OpFAdd %v3float %1305 %1308 + %1310 = OpAccessChain %_ptr_Uniform_v3float %_Globals %int_16 + %1311 = OpLoad %v3float %1310 + %1312 = OpFMul %v3float %1309 %1311 + %1313 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_17 + %1314 = OpLoad %v4float %1313 + %1315 = OpVectorShuffle %v3float %1314 %1314 0 1 2 + %1316 = OpAccessChain %_ptr_Uniform_float %_Globals %int_17 %int_3 + %1317 = OpLoad %float %1316 + %1318 = OpCompositeConstruct %v3float %1317 %1317 %1317 + %1319 = OpExtInst %v3float %1 FMix %1312 %1315 %1318 + %1320 = OpExtInst %v3float %1 FMax %138 %1319 + %1321 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %int_1 + %1322 = OpLoad %float %1321 + %1323 = OpCompositeConstruct %v3float %1322 %1322 %1322 + %1324 = OpExtInst %v3float %1 Pow %1320 %1323 + %1325 = OpIEqual %bool %605 %uint_0 + OpSelectionMerge %1326 DontFlatten + OpBranchConditional %1325 %1327 %1328 + %1328 = OpLabel + %1329 = OpIEqual %bool %605 %uint_1 + OpSelectionMerge %1330 None + OpBranchConditional %1329 %1331 %1332 + %1332 = OpLabel + %1333 = OpIEqual %bool %605 %uint_3 + %1334 = OpIEqual %bool %605 %uint_5 + %1335 = OpLogicalOr %bool %1333 %1334 + OpSelectionMerge %1336 None + OpBranchConditional %1335 %1337 %1338 + %1338 = OpLabel + %1339 = OpIEqual %bool %605 %uint_4 + %1340 = OpIEqual %bool %605 %uint_6 + %1341 = OpLogicalOr %bool %1339 %1340 + OpSelectionMerge %1342 None + OpBranchConditional %1341 %1343 %1344 + %1344 = OpLabel + %1345 = OpIEqual %bool %605 %uint_7 + OpSelectionMerge %1346 None + OpBranchConditional %1345 %1347 %1348 + %1348 = OpLabel + %1349 = OpVectorTimesMatrix %v3float %1324 %573 + %1350 = OpVectorTimesMatrix %v3float %1349 %602 + %1351 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %int_2 + %1352 = OpLoad %float %1351 + %1353 = OpCompositeConstruct %v3float %1352 %1352 %1352 + %1354 = OpExtInst %v3float %1 Pow %1350 %1353 + OpBranch %1346 + %1347 = OpLabel + %1355 = OpVectorTimesMatrix %v3float %932 %573 + %1356 = OpVectorTimesMatrix %v3float %1355 %602 + %1357 = OpFMul %v3float %1356 %519 + %1358 = OpExtInst %v3float %1 Pow %1357 %286 + %1359 = OpFMul %v3float %196 %1358 + %1360 = OpFAdd %v3float %195 %1359 + %1361 = OpFMul %v3float %197 %1358 + %1362 = OpFAdd %v3float %141 %1361 + %1363 = OpFDiv %v3float %141 %1362 + %1364 = OpFMul %v3float %1360 %1363 + %1365 = OpExtInst %v3float %1 Pow %1364 %287 + OpBranch %1346 + %1346 = OpLabel + %1366 = OpPhi %v3float %1354 %1348 %1365 %1347 + OpBranch %1342 + %1343 = OpLabel + %1367 = OpMatrixTimesMatrix %mat3v3float %572 %423 + %1368 = OpFMul %v3float %932 %285 + %1369 = OpVectorTimesMatrix %v3float %1368 %1367 + %1370 = OpCompositeExtract %float %1369 0 + %1371 = OpCompositeExtract %float %1369 1 + %1372 = OpExtInst %float %1 FMin %1370 %1371 + %1373 = OpCompositeExtract %float %1369 2 + %1374 = OpExtInst %float %1 FMin %1372 %1373 + %1375 = OpExtInst %float %1 FMax %1370 %1371 + %1376 = OpExtInst %float %1 FMax %1375 %1373 + %1377 = OpExtInst %float %1 FMax %1376 %float_1_00000001en10 + %1378 = OpExtInst %float %1 FMax %1374 %float_1_00000001en10 + %1379 = OpFSub %float %1377 %1378 + %1380 = OpExtInst %float %1 FMax %1376 %float_0_00999999978 + %1381 = OpFDiv %float %1379 %1380 + %1382 = OpFSub %float %1373 %1371 + %1383 = OpFMul %float %1373 %1382 + %1384 = OpFSub %float %1371 %1370 + %1385 = OpFMul %float %1371 %1384 + %1386 = OpFAdd %float %1383 %1385 + %1387 = OpFSub %float %1370 %1373 + %1388 = OpFMul %float %1370 %1387 + %1389 = OpFAdd %float %1386 %1388 + %1390 = OpExtInst %float %1 Sqrt %1389 + %1391 = OpFAdd %float %1373 %1371 + %1392 = OpFAdd %float %1391 %1370 + %1393 = OpFMul %float %float_1_75 %1390 + %1394 = OpFAdd %float %1392 %1393 + %1395 = OpFMul %float %1394 %float_0_333333343 + %1396 = OpFSub %float %1381 %float_0_400000006 + %1397 = OpFMul %float %1396 %float_5 + %1398 = OpFMul %float %1396 %float_2_5 + %1399 = OpExtInst %float %1 FAbs %1398 + %1400 = OpFSub %float %float_1 %1399 + %1401 = OpExtInst %float %1 FMax %1400 %float_0 + %1402 = OpExtInst %float %1 FSign %1397 + %1403 = OpConvertFToS %int %1402 + %1404 = OpConvertSToF %float %1403 + %1405 = OpFMul %float %1401 %1401 + %1406 = OpFSub %float %float_1 %1405 + %1407 = OpFMul %float %1404 %1406 + %1408 = OpFAdd %float %float_1 %1407 + %1409 = OpFMul %float %1408 %float_0_0250000004 + %1410 = OpFOrdLessThanEqual %bool %1395 %float_0_0533333346 + OpSelectionMerge %1411 None + OpBranchConditional %1410 %1412 %1413 + %1413 = OpLabel + %1414 = OpFOrdGreaterThanEqual %bool %1395 %float_0_159999996 + OpSelectionMerge %1415 None + OpBranchConditional %1414 %1416 %1417 + %1417 = OpLabel + %1418 = OpFDiv %float %float_0_239999995 %1394 + %1419 = OpFSub %float %1418 %float_0_5 + %1420 = OpFMul %float %1409 %1419 + OpBranch %1415 + %1416 = OpLabel + OpBranch %1415 + %1415 = OpLabel + %1421 = OpPhi %float %1420 %1417 %float_0 %1416 + OpBranch %1411 + %1412 = OpLabel + OpBranch %1411 + %1411 = OpLabel + %1422 = OpPhi %float %1421 %1415 %1409 %1412 + %1423 = OpFAdd %float %float_1 %1422 + %1424 = OpCompositeConstruct %v3float %1423 %1423 %1423 + %1425 = OpFMul %v3float %1369 %1424 + %1426 = OpCompositeExtract %float %1425 0 + %1427 = OpCompositeExtract %float %1425 1 + %1428 = OpFOrdEqual %bool %1426 %1427 + %1429 = OpCompositeExtract %float %1425 2 + %1430 = OpFOrdEqual %bool %1427 %1429 + %1431 = OpLogicalAnd %bool %1428 %1430 + OpSelectionMerge %1432 None + OpBranchConditional %1431 %1433 %1434 + %1434 = OpLabel + %1435 = OpExtInst %float %1 Sqrt %float_3 + %1436 = OpFSub %float %1427 %1429 + %1437 = OpFMul %float %1435 %1436 + %1438 = OpFMul %float %float_2 %1426 + %1439 = OpFSub %float %1438 %1427 + %1440 = OpFSub %float %1439 %1429 + %1441 = OpExtInst %float %1 Atan2 %1437 %1440 + %1442 = OpFMul %float %float_57_2957764 %1441 + OpBranch %1432 + %1433 = OpLabel + OpBranch %1432 + %1432 = OpLabel + %1443 = OpPhi %float %1442 %1434 %float_0 %1433 + %1444 = OpFOrdLessThan %bool %1443 %float_0 + OpSelectionMerge %1445 None + OpBranchConditional %1444 %1446 %1445 + %1446 = OpLabel + %1447 = OpFAdd %float %1443 %float_360 + OpBranch %1445 + %1445 = OpLabel + %1448 = OpPhi %float %1443 %1432 %1447 %1446 + %1449 = OpExtInst %float %1 FClamp %1448 %float_0 %float_360 + %1450 = OpFOrdGreaterThan %bool %1449 %float_180 + OpSelectionMerge %1451 None + OpBranchConditional %1450 %1452 %1451 + %1452 = OpLabel + %1453 = OpFSub %float %1449 %float_360 + OpBranch %1451 + %1451 = OpLabel + %1454 = OpPhi %float %1449 %1445 %1453 %1452 + %1455 = OpFOrdGreaterThan %bool %1454 %float_n67_5 + %1456 = OpFOrdLessThan %bool %1454 %float_67_5 + %1457 = OpLogicalAnd %bool %1455 %1456 + OpSelectionMerge %1458 None + OpBranchConditional %1457 %1459 %1458 + %1459 = OpLabel + %1460 = OpFSub %float %1454 %float_n67_5 + %1461 = OpFMul %float %1460 %float_0_0296296291 + %1462 = OpConvertFToS %int %1461 + %1463 = OpConvertSToF %float %1462 + %1464 = OpFSub %float %1461 %1463 + %1465 = OpFMul %float %1464 %1464 + %1466 = OpFMul %float %1465 %1464 + %1467 = OpIEqual %bool %1462 %int_3 + OpSelectionMerge %1468 None + OpBranchConditional %1467 %1469 %1470 + %1470 = OpLabel + %1471 = OpIEqual %bool %1462 %int_2 + OpSelectionMerge %1472 None + OpBranchConditional %1471 %1473 %1474 + %1474 = OpLabel + %1475 = OpIEqual %bool %1462 %int_1 + OpSelectionMerge %1476 None + OpBranchConditional %1475 %1477 %1478 + %1478 = OpLabel + %1479 = OpIEqual %bool %1462 %int_0 + OpSelectionMerge %1480 None + OpBranchConditional %1479 %1481 %1482 + %1482 = OpLabel + OpBranch %1480 + %1481 = OpLabel + %1483 = OpFMul %float %1466 %float_0_166666672 + OpBranch %1480 + %1480 = OpLabel + %1484 = OpPhi %float %float_0 %1482 %1483 %1481 + OpBranch %1476 + %1477 = OpLabel + %1485 = OpFMul %float %1466 %float_n0_5 + %1486 = OpFMul %float %1465 %float_0_5 + %1487 = OpFAdd %float %1485 %1486 + %1488 = OpFMul %float %1464 %float_0_5 + %1489 = OpFAdd %float %1487 %1488 + %1490 = OpFAdd %float %1489 %float_0_166666672 + OpBranch %1476 + %1476 = OpLabel + %1491 = OpPhi %float %1484 %1480 %1490 %1477 + OpBranch %1472 + %1473 = OpLabel + %1492 = OpFMul %float %1466 %float_0_5 + %1493 = OpFMul %float %1465 %float_n1 + %1494 = OpFAdd %float %1492 %1493 + %1495 = OpFAdd %float %1494 %float_0_666666687 + OpBranch %1472 + %1472 = OpLabel + %1496 = OpPhi %float %1491 %1476 %1495 %1473 + OpBranch %1468 + %1469 = OpLabel + %1497 = OpFMul %float %1466 %float_n0_166666672 + %1498 = OpFMul %float %1465 %float_0_5 + %1499 = OpFAdd %float %1497 %1498 + %1500 = OpFMul %float %1464 %float_n0_5 + %1501 = OpFAdd %float %1499 %1500 + %1502 = OpFAdd %float %1501 %float_0_166666672 + OpBranch %1468 + %1468 = OpLabel + %1503 = OpPhi %float %1496 %1472 %1502 %1469 + OpBranch %1458 + %1458 = OpLabel + %1504 = OpPhi %float %float_0 %1451 %1503 %1468 + %1505 = OpFMul %float %1504 %float_1_5 + %1506 = OpFMul %float %1505 %1381 + %1507 = OpFSub %float %float_0_0299999993 %1426 + %1508 = OpFMul %float %1506 %1507 + %1509 = OpFMul %float %1508 %float_0_180000007 + %1510 = OpFAdd %float %1426 %1509 + %1511 = OpCompositeInsert %v3float %1510 %1425 0 + %1512 = OpExtInst %v3float %1 FClamp %1511 %138 %337 + %1513 = OpVectorTimesMatrix %v3float %1512 %434 + %1514 = OpExtInst %v3float %1 FClamp %1513 %138 %337 + %1515 = OpDot %float %1514 %73 + %1516 = OpCompositeConstruct %v3float %1515 %1515 %1515 + %1517 = OpExtInst %v3float %1 FMix %1516 %1514 %241 + %1518 = OpCompositeExtract %float %1517 0 + %1519 = OpExtInst %float %1 Exp2 %float_n15 + %1520 = OpFMul %float %float_0_179999992 %1519 + %1521 = OpExtInst %float %1 Exp2 %float_18 + %1522 = OpFMul %float %float_0_179999992 %1521 + OpStore %528 %499 + OpStore %527 %500 + %1523 = OpFOrdLessThanEqual %bool %1518 %float_0 + %1524 = OpExtInst %float %1 Exp2 %float_n14 + %1525 = OpSelect %float %1523 %1524 %1518 + %1526 = OpExtInst %float %1 Log %1525 + %1527 = OpFDiv %float %1526 %1091 + %1528 = OpExtInst %float %1 Log %1520 + %1529 = OpFDiv %float %1528 %1091 + %1530 = OpFOrdLessThanEqual %bool %1527 %1529 + OpSelectionMerge %1531 None + OpBranchConditional %1530 %1532 %1533 + %1533 = OpLabel + %1534 = OpFOrdGreaterThan %bool %1527 %1529 + %1535 = OpExtInst %float %1 Log %float_0_180000007 + %1536 = OpFDiv %float %1535 %1091 + %1537 = OpFOrdLessThan %bool %1527 %1536 + %1538 = OpLogicalAnd %bool %1534 %1537 + OpSelectionMerge %1539 None + OpBranchConditional %1538 %1540 %1541 + %1541 = OpLabel + %1542 = OpFOrdGreaterThanEqual %bool %1527 %1536 + %1543 = OpExtInst %float %1 Log %1522 + %1544 = OpFDiv %float %1543 %1091 + %1545 = OpFOrdLessThan %bool %1527 %1544 + %1546 = OpLogicalAnd %bool %1542 %1545 + OpSelectionMerge %1547 None + OpBranchConditional %1546 %1548 %1549 + %1549 = OpLabel + %1550 = OpExtInst %float %1 Log %float_10000 + %1551 = OpFDiv %float %1550 %1091 + OpBranch %1547 + %1548 = OpLabel + %1552 = OpFSub %float %1527 %1536 + %1553 = OpFMul %float %float_3 %1552 + %1554 = OpFSub %float %1544 %1536 + %1555 = OpFDiv %float %1553 %1554 + %1556 = OpConvertFToS %int %1555 + %1557 = OpConvertSToF %float %1556 + %1558 = OpFSub %float %1555 %1557 + %1559 = OpAccessChain %_ptr_Function_float %527 %1556 + %1560 = OpLoad %float %1559 + %1561 = OpIAdd %int %1556 %int_1 + %1562 = OpAccessChain %_ptr_Function_float %527 %1561 + %1563 = OpLoad %float %1562 + %1564 = OpIAdd %int %1556 %int_2 + %1565 = OpAccessChain %_ptr_Function_float %527 %1564 + %1566 = OpLoad %float %1565 + %1567 = OpCompositeConstruct %v3float %1560 %1563 %1566 + %1568 = OpFMul %float %1558 %1558 + %1569 = OpCompositeConstruct %v3float %1568 %1558 %float_1 + %1570 = OpMatrixTimesVector %v3float %466 %1567 + %1571 = OpDot %float %1569 %1570 + OpBranch %1547 + %1547 = OpLabel + %1572 = OpPhi %float %1551 %1549 %1571 %1548 + OpBranch %1539 + %1540 = OpLabel + %1573 = OpFSub %float %1527 %1529 + %1574 = OpFMul %float %float_3 %1573 + %1575 = OpFSub %float %1536 %1529 + %1576 = OpFDiv %float %1574 %1575 + %1577 = OpConvertFToS %int %1576 + %1578 = OpConvertSToF %float %1577 + %1579 = OpFSub %float %1576 %1578 + %1580 = OpAccessChain %_ptr_Function_float %528 %1577 + %1581 = OpLoad %float %1580 + %1582 = OpIAdd %int %1577 %int_1 + %1583 = OpAccessChain %_ptr_Function_float %528 %1582 + %1584 = OpLoad %float %1583 + %1585 = OpIAdd %int %1577 %int_2 + %1586 = OpAccessChain %_ptr_Function_float %528 %1585 + %1587 = OpLoad %float %1586 + %1588 = OpCompositeConstruct %v3float %1581 %1584 %1587 + %1589 = OpFMul %float %1579 %1579 + %1590 = OpCompositeConstruct %v3float %1589 %1579 %float_1 + %1591 = OpMatrixTimesVector %v3float %466 %1588 + %1592 = OpDot %float %1590 %1591 + OpBranch %1539 + %1539 = OpLabel + %1593 = OpPhi %float %1572 %1547 %1592 %1540 + OpBranch %1531 + %1532 = OpLabel + %1594 = OpExtInst %float %1 Log %float_9_99999975en05 + %1595 = OpFDiv %float %1594 %1091 + OpBranch %1531 + %1531 = OpLabel + %1596 = OpPhi %float %1593 %1539 %1595 %1532 + %1597 = OpExtInst %float %1 Pow %float_10 %1596 + %1598 = OpCompositeInsert %v3float %1597 %523 0 + %1599 = OpCompositeExtract %float %1517 1 + OpStore %530 %499 + OpStore %529 %500 + %1600 = OpFOrdLessThanEqual %bool %1599 %float_0 + %1601 = OpSelect %float %1600 %1524 %1599 + %1602 = OpExtInst %float %1 Log %1601 + %1603 = OpFDiv %float %1602 %1091 + %1604 = OpFOrdLessThanEqual %bool %1603 %1529 + OpSelectionMerge %1605 None + OpBranchConditional %1604 %1606 %1607 + %1607 = OpLabel + %1608 = OpFOrdGreaterThan %bool %1603 %1529 + %1609 = OpExtInst %float %1 Log %float_0_180000007 + %1610 = OpFDiv %float %1609 %1091 + %1611 = OpFOrdLessThan %bool %1603 %1610 + %1612 = OpLogicalAnd %bool %1608 %1611 + OpSelectionMerge %1613 None + OpBranchConditional %1612 %1614 %1615 + %1615 = OpLabel + %1616 = OpFOrdGreaterThanEqual %bool %1603 %1610 + %1617 = OpExtInst %float %1 Log %1522 + %1618 = OpFDiv %float %1617 %1091 + %1619 = OpFOrdLessThan %bool %1603 %1618 + %1620 = OpLogicalAnd %bool %1616 %1619 + OpSelectionMerge %1621 None + OpBranchConditional %1620 %1622 %1623 + %1623 = OpLabel + %1624 = OpExtInst %float %1 Log %float_10000 + %1625 = OpFDiv %float %1624 %1091 + OpBranch %1621 + %1622 = OpLabel + %1626 = OpFSub %float %1603 %1610 + %1627 = OpFMul %float %float_3 %1626 + %1628 = OpFSub %float %1618 %1610 + %1629 = OpFDiv %float %1627 %1628 + %1630 = OpConvertFToS %int %1629 + %1631 = OpConvertSToF %float %1630 + %1632 = OpFSub %float %1629 %1631 + %1633 = OpAccessChain %_ptr_Function_float %529 %1630 + %1634 = OpLoad %float %1633 + %1635 = OpIAdd %int %1630 %int_1 + %1636 = OpAccessChain %_ptr_Function_float %529 %1635 + %1637 = OpLoad %float %1636 + %1638 = OpIAdd %int %1630 %int_2 + %1639 = OpAccessChain %_ptr_Function_float %529 %1638 + %1640 = OpLoad %float %1639 + %1641 = OpCompositeConstruct %v3float %1634 %1637 %1640 + %1642 = OpFMul %float %1632 %1632 + %1643 = OpCompositeConstruct %v3float %1642 %1632 %float_1 + %1644 = OpMatrixTimesVector %v3float %466 %1641 + %1645 = OpDot %float %1643 %1644 + OpBranch %1621 + %1621 = OpLabel + %1646 = OpPhi %float %1625 %1623 %1645 %1622 + OpBranch %1613 + %1614 = OpLabel + %1647 = OpFSub %float %1603 %1529 + %1648 = OpFMul %float %float_3 %1647 + %1649 = OpFSub %float %1610 %1529 + %1650 = OpFDiv %float %1648 %1649 + %1651 = OpConvertFToS %int %1650 + %1652 = OpConvertSToF %float %1651 + %1653 = OpFSub %float %1650 %1652 + %1654 = OpAccessChain %_ptr_Function_float %530 %1651 + %1655 = OpLoad %float %1654 + %1656 = OpIAdd %int %1651 %int_1 + %1657 = OpAccessChain %_ptr_Function_float %530 %1656 + %1658 = OpLoad %float %1657 + %1659 = OpIAdd %int %1651 %int_2 + %1660 = OpAccessChain %_ptr_Function_float %530 %1659 + %1661 = OpLoad %float %1660 + %1662 = OpCompositeConstruct %v3float %1655 %1658 %1661 + %1663 = OpFMul %float %1653 %1653 + %1664 = OpCompositeConstruct %v3float %1663 %1653 %float_1 + %1665 = OpMatrixTimesVector %v3float %466 %1662 + %1666 = OpDot %float %1664 %1665 + OpBranch %1613 + %1613 = OpLabel + %1667 = OpPhi %float %1646 %1621 %1666 %1614 + OpBranch %1605 + %1606 = OpLabel + %1668 = OpExtInst %float %1 Log %float_9_99999975en05 + %1669 = OpFDiv %float %1668 %1091 + OpBranch %1605 + %1605 = OpLabel + %1670 = OpPhi %float %1667 %1613 %1669 %1606 + %1671 = OpExtInst %float %1 Pow %float_10 %1670 + %1672 = OpCompositeInsert %v3float %1671 %1598 1 + %1673 = OpCompositeExtract %float %1517 2 + OpStore %532 %499 + OpStore %531 %500 + %1674 = OpFOrdLessThanEqual %bool %1673 %float_0 + %1675 = OpSelect %float %1674 %1524 %1673 + %1676 = OpExtInst %float %1 Log %1675 + %1677 = OpFDiv %float %1676 %1091 + %1678 = OpFOrdLessThanEqual %bool %1677 %1529 + OpSelectionMerge %1679 None + OpBranchConditional %1678 %1680 %1681 + %1681 = OpLabel + %1682 = OpFOrdGreaterThan %bool %1677 %1529 + %1683 = OpExtInst %float %1 Log %float_0_180000007 + %1684 = OpFDiv %float %1683 %1091 + %1685 = OpFOrdLessThan %bool %1677 %1684 + %1686 = OpLogicalAnd %bool %1682 %1685 + OpSelectionMerge %1687 None + OpBranchConditional %1686 %1688 %1689 + %1689 = OpLabel + %1690 = OpFOrdGreaterThanEqual %bool %1677 %1684 + %1691 = OpExtInst %float %1 Log %1522 + %1692 = OpFDiv %float %1691 %1091 + %1693 = OpFOrdLessThan %bool %1677 %1692 + %1694 = OpLogicalAnd %bool %1690 %1693 + OpSelectionMerge %1695 None + OpBranchConditional %1694 %1696 %1697 + %1697 = OpLabel + %1698 = OpExtInst %float %1 Log %float_10000 + %1699 = OpFDiv %float %1698 %1091 + OpBranch %1695 + %1696 = OpLabel + %1700 = OpFSub %float %1677 %1684 + %1701 = OpFMul %float %float_3 %1700 + %1702 = OpFSub %float %1692 %1684 + %1703 = OpFDiv %float %1701 %1702 + %1704 = OpConvertFToS %int %1703 + %1705 = OpConvertSToF %float %1704 + %1706 = OpFSub %float %1703 %1705 + %1707 = OpAccessChain %_ptr_Function_float %531 %1704 + %1708 = OpLoad %float %1707 + %1709 = OpIAdd %int %1704 %int_1 + %1710 = OpAccessChain %_ptr_Function_float %531 %1709 + %1711 = OpLoad %float %1710 + %1712 = OpIAdd %int %1704 %int_2 + %1713 = OpAccessChain %_ptr_Function_float %531 %1712 + %1714 = OpLoad %float %1713 + %1715 = OpCompositeConstruct %v3float %1708 %1711 %1714 + %1716 = OpFMul %float %1706 %1706 + %1717 = OpCompositeConstruct %v3float %1716 %1706 %float_1 + %1718 = OpMatrixTimesVector %v3float %466 %1715 + %1719 = OpDot %float %1717 %1718 + OpBranch %1695 + %1695 = OpLabel + %1720 = OpPhi %float %1699 %1697 %1719 %1696 + OpBranch %1687 + %1688 = OpLabel + %1721 = OpFSub %float %1677 %1529 + %1722 = OpFMul %float %float_3 %1721 + %1723 = OpFSub %float %1684 %1529 + %1724 = OpFDiv %float %1722 %1723 + %1725 = OpConvertFToS %int %1724 + %1726 = OpConvertSToF %float %1725 + %1727 = OpFSub %float %1724 %1726 + %1728 = OpAccessChain %_ptr_Function_float %532 %1725 + %1729 = OpLoad %float %1728 + %1730 = OpIAdd %int %1725 %int_1 + %1731 = OpAccessChain %_ptr_Function_float %532 %1730 + %1732 = OpLoad %float %1731 + %1733 = OpIAdd %int %1725 %int_2 + %1734 = OpAccessChain %_ptr_Function_float %532 %1733 + %1735 = OpLoad %float %1734 + %1736 = OpCompositeConstruct %v3float %1729 %1732 %1735 + %1737 = OpFMul %float %1727 %1727 + %1738 = OpCompositeConstruct %v3float %1737 %1727 %float_1 + %1739 = OpMatrixTimesVector %v3float %466 %1736 + %1740 = OpDot %float %1738 %1739 + OpBranch %1687 + %1687 = OpLabel + %1741 = OpPhi %float %1720 %1695 %1740 %1688 + OpBranch %1679 + %1680 = OpLabel + %1742 = OpExtInst %float %1 Log %float_9_99999975en05 + %1743 = OpFDiv %float %1742 %1091 + OpBranch %1679 + %1679 = OpLabel + %1744 = OpPhi %float %1741 %1687 %1743 %1680 + %1745 = OpExtInst %float %1 Pow %float_10 %1744 + %1746 = OpCompositeInsert %v3float %1745 %1672 2 + %1747 = OpVectorTimesMatrix %v3float %1746 %438 + %1748 = OpVectorTimesMatrix %v3float %1747 %434 + %1749 = OpExtInst %float %1 Pow %float_2 %float_n12 + %1750 = OpFMul %float %float_0_179999992 %1749 + OpStore %540 %499 + OpStore %539 %500 + %1751 = OpFOrdLessThanEqual %bool %1750 %float_0 + %1752 = OpSelect %float %1751 %1524 %1750 + %1753 = OpExtInst %float %1 Log %1752 + %1754 = OpFDiv %float %1753 %1091 + %1755 = OpFOrdLessThanEqual %bool %1754 %1529 + OpSelectionMerge %1756 None + OpBranchConditional %1755 %1757 %1758 + %1758 = OpLabel + %1759 = OpFOrdGreaterThan %bool %1754 %1529 + %1760 = OpExtInst %float %1 Log %float_0_180000007 + %1761 = OpFDiv %float %1760 %1091 + %1762 = OpFOrdLessThan %bool %1754 %1761 + %1763 = OpLogicalAnd %bool %1759 %1762 + OpSelectionMerge %1764 None + OpBranchConditional %1763 %1765 %1766 + %1766 = OpLabel + %1767 = OpFOrdGreaterThanEqual %bool %1754 %1761 + %1768 = OpExtInst %float %1 Log %1522 + %1769 = OpFDiv %float %1768 %1091 + %1770 = OpFOrdLessThan %bool %1754 %1769 + %1771 = OpLogicalAnd %bool %1767 %1770 + OpSelectionMerge %1772 None + OpBranchConditional %1771 %1773 %1774 + %1774 = OpLabel + %1775 = OpExtInst %float %1 Log %float_10000 + %1776 = OpFDiv %float %1775 %1091 + OpBranch %1772 + %1773 = OpLabel + %1777 = OpFSub %float %1754 %1761 + %1778 = OpFMul %float %float_3 %1777 + %1779 = OpFSub %float %1769 %1761 + %1780 = OpFDiv %float %1778 %1779 + %1781 = OpConvertFToS %int %1780 + %1782 = OpConvertSToF %float %1781 + %1783 = OpFSub %float %1780 %1782 + %1784 = OpAccessChain %_ptr_Function_float %539 %1781 + %1785 = OpLoad %float %1784 + %1786 = OpIAdd %int %1781 %int_1 + %1787 = OpAccessChain %_ptr_Function_float %539 %1786 + %1788 = OpLoad %float %1787 + %1789 = OpIAdd %int %1781 %int_2 + %1790 = OpAccessChain %_ptr_Function_float %539 %1789 + %1791 = OpLoad %float %1790 + %1792 = OpCompositeConstruct %v3float %1785 %1788 %1791 + %1793 = OpFMul %float %1783 %1783 + %1794 = OpCompositeConstruct %v3float %1793 %1783 %float_1 + %1795 = OpMatrixTimesVector %v3float %466 %1792 + %1796 = OpDot %float %1794 %1795 + OpBranch %1772 + %1772 = OpLabel + %1797 = OpPhi %float %1776 %1774 %1796 %1773 + OpBranch %1764 + %1765 = OpLabel + %1798 = OpFSub %float %1754 %1529 + %1799 = OpFMul %float %float_3 %1798 + %1800 = OpFSub %float %1761 %1529 + %1801 = OpFDiv %float %1799 %1800 + %1802 = OpConvertFToS %int %1801 + %1803 = OpConvertSToF %float %1802 + %1804 = OpFSub %float %1801 %1803 + %1805 = OpAccessChain %_ptr_Function_float %540 %1802 + %1806 = OpLoad %float %1805 + %1807 = OpIAdd %int %1802 %int_1 + %1808 = OpAccessChain %_ptr_Function_float %540 %1807 + %1809 = OpLoad %float %1808 + %1810 = OpIAdd %int %1802 %int_2 + %1811 = OpAccessChain %_ptr_Function_float %540 %1810 + %1812 = OpLoad %float %1811 + %1813 = OpCompositeConstruct %v3float %1806 %1809 %1812 + %1814 = OpFMul %float %1804 %1804 + %1815 = OpCompositeConstruct %v3float %1814 %1804 %float_1 + %1816 = OpMatrixTimesVector %v3float %466 %1813 + %1817 = OpDot %float %1815 %1816 + OpBranch %1764 + %1764 = OpLabel + %1818 = OpPhi %float %1797 %1772 %1817 %1765 + OpBranch %1756 + %1757 = OpLabel + %1819 = OpExtInst %float %1 Log %float_9_99999975en05 + %1820 = OpFDiv %float %1819 %1091 + OpBranch %1756 + %1756 = OpLabel + %1821 = OpPhi %float %1818 %1764 %1820 %1757 + %1822 = OpExtInst %float %1 Pow %float_10 %1821 + OpStore %542 %499 + OpStore %541 %500 + %1823 = OpExtInst %float %1 Log %float_0_180000007 + %1824 = OpFDiv %float %1823 %1091 + %1825 = OpFOrdLessThanEqual %bool %1824 %1529 + OpSelectionMerge %1826 None + OpBranchConditional %1825 %1827 %1828 + %1828 = OpLabel + %1829 = OpFOrdGreaterThan %bool %1824 %1529 + %1830 = OpFOrdLessThan %bool %1824 %1824 + %1831 = OpLogicalAnd %bool %1829 %1830 + OpSelectionMerge %1832 None + OpBranchConditional %1831 %1833 %1834 + %1834 = OpLabel + %1835 = OpFOrdGreaterThanEqual %bool %1824 %1824 + %1836 = OpExtInst %float %1 Log %1522 + %1837 = OpFDiv %float %1836 %1091 + %1838 = OpFOrdLessThan %bool %1824 %1837 + %1839 = OpLogicalAnd %bool %1835 %1838 + OpSelectionMerge %1840 None + OpBranchConditional %1839 %1841 %1842 + %1842 = OpLabel + %1843 = OpExtInst %float %1 Log %float_10000 + %1844 = OpFDiv %float %1843 %1091 + OpBranch %1840 + %1841 = OpLabel + %1845 = OpFSub %float %1824 %1824 + %1846 = OpFMul %float %float_3 %1845 + %1847 = OpFSub %float %1837 %1824 + %1848 = OpFDiv %float %1846 %1847 + %1849 = OpConvertFToS %int %1848 + %1850 = OpConvertSToF %float %1849 + %1851 = OpFSub %float %1848 %1850 + %1852 = OpAccessChain %_ptr_Function_float %541 %1849 + %1853 = OpLoad %float %1852 + %1854 = OpIAdd %int %1849 %int_1 + %1855 = OpAccessChain %_ptr_Function_float %541 %1854 + %1856 = OpLoad %float %1855 + %1857 = OpIAdd %int %1849 %int_2 + %1858 = OpAccessChain %_ptr_Function_float %541 %1857 + %1859 = OpLoad %float %1858 + %1860 = OpCompositeConstruct %v3float %1853 %1856 %1859 + %1861 = OpFMul %float %1851 %1851 + %1862 = OpCompositeConstruct %v3float %1861 %1851 %float_1 + %1863 = OpMatrixTimesVector %v3float %466 %1860 + %1864 = OpDot %float %1862 %1863 + OpBranch %1840 + %1840 = OpLabel + %1865 = OpPhi %float %1844 %1842 %1864 %1841 + OpBranch %1832 + %1833 = OpLabel + %1866 = OpAccessChain %_ptr_Function_float %542 %int_3 + %1867 = OpLoad %float %1866 + %1868 = OpAccessChain %_ptr_Function_float %542 %int_4 + %1869 = OpLoad %float %1868 + %1870 = OpAccessChain %_ptr_Function_float %542 %int_5 + %1871 = OpLoad %float %1870 + %1872 = OpCompositeConstruct %v3float %1867 %1869 %1871 + %1873 = OpMatrixTimesVector %v3float %466 %1872 + %1874 = OpCompositeExtract %float %1873 2 + OpBranch %1832 + %1832 = OpLabel + %1875 = OpPhi %float %1865 %1840 %1874 %1833 + OpBranch %1826 + %1827 = OpLabel + %1876 = OpExtInst %float %1 Log %float_9_99999975en05 + %1877 = OpFDiv %float %1876 %1091 + OpBranch %1826 + %1826 = OpLabel + %1878 = OpPhi %float %1875 %1832 %1877 %1827 + %1879 = OpExtInst %float %1 Pow %float_10 %1878 + %1880 = OpExtInst %float %1 Pow %float_2 %float_11 + %1881 = OpFMul %float %float_0_179999992 %1880 + OpStore %544 %499 + OpStore %543 %500 + %1882 = OpFOrdLessThanEqual %bool %1881 %float_0 + %1883 = OpSelect %float %1882 %1524 %1881 + %1884 = OpExtInst %float %1 Log %1883 + %1885 = OpFDiv %float %1884 %1091 + %1886 = OpFOrdLessThanEqual %bool %1885 %1529 + OpSelectionMerge %1887 None + OpBranchConditional %1886 %1888 %1889 + %1889 = OpLabel + %1890 = OpFOrdGreaterThan %bool %1885 %1529 + %1891 = OpFOrdLessThan %bool %1885 %1824 + %1892 = OpLogicalAnd %bool %1890 %1891 + OpSelectionMerge %1893 None + OpBranchConditional %1892 %1894 %1895 + %1895 = OpLabel + %1896 = OpFOrdGreaterThanEqual %bool %1885 %1824 + %1897 = OpExtInst %float %1 Log %1522 + %1898 = OpFDiv %float %1897 %1091 + %1899 = OpFOrdLessThan %bool %1885 %1898 + %1900 = OpLogicalAnd %bool %1896 %1899 + OpSelectionMerge %1901 None + OpBranchConditional %1900 %1902 %1903 + %1903 = OpLabel + %1904 = OpExtInst %float %1 Log %float_10000 + %1905 = OpFDiv %float %1904 %1091 + OpBranch %1901 + %1902 = OpLabel + %1906 = OpFSub %float %1885 %1824 + %1907 = OpFMul %float %float_3 %1906 + %1908 = OpFSub %float %1898 %1824 + %1909 = OpFDiv %float %1907 %1908 + %1910 = OpConvertFToS %int %1909 + %1911 = OpConvertSToF %float %1910 + %1912 = OpFSub %float %1909 %1911 + %1913 = OpAccessChain %_ptr_Function_float %543 %1910 + %1914 = OpLoad %float %1913 + %1915 = OpIAdd %int %1910 %int_1 + %1916 = OpAccessChain %_ptr_Function_float %543 %1915 + %1917 = OpLoad %float %1916 + %1918 = OpIAdd %int %1910 %int_2 + %1919 = OpAccessChain %_ptr_Function_float %543 %1918 + %1920 = OpLoad %float %1919 + %1921 = OpCompositeConstruct %v3float %1914 %1917 %1920 + %1922 = OpFMul %float %1912 %1912 + %1923 = OpCompositeConstruct %v3float %1922 %1912 %float_1 + %1924 = OpMatrixTimesVector %v3float %466 %1921 + %1925 = OpDot %float %1923 %1924 + OpBranch %1901 + %1901 = OpLabel + %1926 = OpPhi %float %1905 %1903 %1925 %1902 + OpBranch %1893 + %1894 = OpLabel + %1927 = OpFSub %float %1885 %1529 + %1928 = OpFMul %float %float_3 %1927 + %1929 = OpFSub %float %1824 %1529 + %1930 = OpFDiv %float %1928 %1929 + %1931 = OpConvertFToS %int %1930 + %1932 = OpConvertSToF %float %1931 + %1933 = OpFSub %float %1930 %1932 + %1934 = OpAccessChain %_ptr_Function_float %544 %1931 + %1935 = OpLoad %float %1934 + %1936 = OpIAdd %int %1931 %int_1 + %1937 = OpAccessChain %_ptr_Function_float %544 %1936 + %1938 = OpLoad %float %1937 + %1939 = OpIAdd %int %1931 %int_2 + %1940 = OpAccessChain %_ptr_Function_float %544 %1939 + %1941 = OpLoad %float %1940 + %1942 = OpCompositeConstruct %v3float %1935 %1938 %1941 + %1943 = OpFMul %float %1933 %1933 + %1944 = OpCompositeConstruct %v3float %1943 %1933 %float_1 + %1945 = OpMatrixTimesVector %v3float %466 %1942 + %1946 = OpDot %float %1944 %1945 + OpBranch %1893 + %1893 = OpLabel + %1947 = OpPhi %float %1926 %1901 %1946 %1894 + OpBranch %1887 + %1888 = OpLabel + %1948 = OpExtInst %float %1 Log %float_9_99999975en05 + %1949 = OpFDiv %float %1948 %1091 + OpBranch %1887 + %1887 = OpLabel + %1950 = OpPhi %float %1947 %1893 %1949 %1888 + %1951 = OpExtInst %float %1 Pow %float_10 %1950 + %1952 = OpCompositeExtract %float %1748 0 + OpStore %538 %506 + OpStore %537 %507 + %1953 = OpFOrdLessThanEqual %bool %1952 %float_0 + %1954 = OpSelect %float %1953 %float_9_99999975en05 %1952 + %1955 = OpExtInst %float %1 Log %1954 + %1956 = OpFDiv %float %1955 %1091 + %1957 = OpExtInst %float %1 Log %1822 + %1958 = OpFDiv %float %1957 %1091 + %1959 = OpFOrdLessThanEqual %bool %1956 %1958 + OpSelectionMerge %1960 None + OpBranchConditional %1959 %1961 %1962 + %1962 = OpLabel + %1963 = OpFOrdGreaterThan %bool %1956 %1958 + %1964 = OpExtInst %float %1 Log %1879 + %1965 = OpFDiv %float %1964 %1091 + %1966 = OpFOrdLessThan %bool %1956 %1965 + %1967 = OpLogicalAnd %bool %1963 %1966 + OpSelectionMerge %1968 None + OpBranchConditional %1967 %1969 %1970 + %1970 = OpLabel + %1971 = OpFOrdGreaterThanEqual %bool %1956 %1965 + %1972 = OpExtInst %float %1 Log %1951 + %1973 = OpFDiv %float %1972 %1091 + %1974 = OpFOrdLessThan %bool %1956 %1973 + %1975 = OpLogicalAnd %bool %1971 %1974 + OpSelectionMerge %1976 None + OpBranchConditional %1975 %1977 %1978 + %1978 = OpLabel + %1979 = OpFMul %float %1956 %float_0_119999997 + %1980 = OpExtInst %float %1 Log %float_2000 + %1981 = OpFDiv %float %1980 %1091 + %1982 = OpFMul %float %float_0_119999997 %1972 + %1983 = OpFDiv %float %1982 %1091 + %1984 = OpFSub %float %1981 %1983 + %1985 = OpFAdd %float %1979 %1984 + OpBranch %1976 + %1977 = OpLabel + %1986 = OpFSub %float %1956 %1965 + %1987 = OpFMul %float %float_7 %1986 + %1988 = OpFSub %float %1973 %1965 + %1989 = OpFDiv %float %1987 %1988 + %1990 = OpConvertFToS %int %1989 + %1991 = OpConvertSToF %float %1990 + %1992 = OpFSub %float %1989 %1991 + %1993 = OpAccessChain %_ptr_Function_float %537 %1990 + %1994 = OpLoad %float %1993 + %1995 = OpIAdd %int %1990 %int_1 + %1996 = OpAccessChain %_ptr_Function_float %537 %1995 + %1997 = OpLoad %float %1996 + %1998 = OpIAdd %int %1990 %int_2 + %1999 = OpAccessChain %_ptr_Function_float %537 %1998 + %2000 = OpLoad %float %1999 + %2001 = OpCompositeConstruct %v3float %1994 %1997 %2000 + %2002 = OpFMul %float %1992 %1992 + %2003 = OpCompositeConstruct %v3float %2002 %1992 %float_1 + %2004 = OpMatrixTimesVector %v3float %466 %2001 + %2005 = OpDot %float %2003 %2004 + OpBranch %1976 + %1976 = OpLabel + %2006 = OpPhi %float %1985 %1978 %2005 %1977 + OpBranch %1968 + %1969 = OpLabel + %2007 = OpFSub %float %1956 %1958 + %2008 = OpFMul %float %float_7 %2007 + %2009 = OpFSub %float %1965 %1958 + %2010 = OpFDiv %float %2008 %2009 + %2011 = OpConvertFToS %int %2010 + %2012 = OpConvertSToF %float %2011 + %2013 = OpFSub %float %2010 %2012 + %2014 = OpAccessChain %_ptr_Function_float %538 %2011 + %2015 = OpLoad %float %2014 + %2016 = OpIAdd %int %2011 %int_1 + %2017 = OpAccessChain %_ptr_Function_float %538 %2016 + %2018 = OpLoad %float %2017 + %2019 = OpIAdd %int %2011 %int_2 + %2020 = OpAccessChain %_ptr_Function_float %538 %2019 + %2021 = OpLoad %float %2020 + %2022 = OpCompositeConstruct %v3float %2015 %2018 %2021 + %2023 = OpFMul %float %2013 %2013 + %2024 = OpCompositeConstruct %v3float %2023 %2013 %float_1 + %2025 = OpMatrixTimesVector %v3float %466 %2022 + %2026 = OpDot %float %2024 %2025 + OpBranch %1968 + %1968 = OpLabel + %2027 = OpPhi %float %2006 %1976 %2026 %1969 + OpBranch %1960 + %1961 = OpLabel + %2028 = OpExtInst %float %1 Log %float_0_00499999989 + %2029 = OpFDiv %float %2028 %1091 + OpBranch %1960 + %1960 = OpLabel + %2030 = OpPhi %float %2027 %1968 %2029 %1961 + %2031 = OpExtInst %float %1 Pow %float_10 %2030 + %2032 = OpCompositeInsert %v3float %2031 %523 0 + %2033 = OpCompositeExtract %float %1748 1 + OpStore %536 %506 + OpStore %535 %507 + %2034 = OpFOrdLessThanEqual %bool %2033 %float_0 + %2035 = OpSelect %float %2034 %float_9_99999975en05 %2033 + %2036 = OpExtInst %float %1 Log %2035 + %2037 = OpFDiv %float %2036 %1091 + %2038 = OpFOrdLessThanEqual %bool %2037 %1958 + OpSelectionMerge %2039 None + OpBranchConditional %2038 %2040 %2041 + %2041 = OpLabel + %2042 = OpFOrdGreaterThan %bool %2037 %1958 + %2043 = OpExtInst %float %1 Log %1879 + %2044 = OpFDiv %float %2043 %1091 + %2045 = OpFOrdLessThan %bool %2037 %2044 + %2046 = OpLogicalAnd %bool %2042 %2045 + OpSelectionMerge %2047 None + OpBranchConditional %2046 %2048 %2049 + %2049 = OpLabel + %2050 = OpFOrdGreaterThanEqual %bool %2037 %2044 + %2051 = OpExtInst %float %1 Log %1951 + %2052 = OpFDiv %float %2051 %1091 + %2053 = OpFOrdLessThan %bool %2037 %2052 + %2054 = OpLogicalAnd %bool %2050 %2053 + OpSelectionMerge %2055 None + OpBranchConditional %2054 %2056 %2057 + %2057 = OpLabel + %2058 = OpFMul %float %2037 %float_0_119999997 + %2059 = OpExtInst %float %1 Log %float_2000 + %2060 = OpFDiv %float %2059 %1091 + %2061 = OpFMul %float %float_0_119999997 %2051 + %2062 = OpFDiv %float %2061 %1091 + %2063 = OpFSub %float %2060 %2062 + %2064 = OpFAdd %float %2058 %2063 + OpBranch %2055 + %2056 = OpLabel + %2065 = OpFSub %float %2037 %2044 + %2066 = OpFMul %float %float_7 %2065 + %2067 = OpFSub %float %2052 %2044 + %2068 = OpFDiv %float %2066 %2067 + %2069 = OpConvertFToS %int %2068 + %2070 = OpConvertSToF %float %2069 + %2071 = OpFSub %float %2068 %2070 + %2072 = OpAccessChain %_ptr_Function_float %535 %2069 + %2073 = OpLoad %float %2072 + %2074 = OpIAdd %int %2069 %int_1 + %2075 = OpAccessChain %_ptr_Function_float %535 %2074 + %2076 = OpLoad %float %2075 + %2077 = OpIAdd %int %2069 %int_2 + %2078 = OpAccessChain %_ptr_Function_float %535 %2077 + %2079 = OpLoad %float %2078 + %2080 = OpCompositeConstruct %v3float %2073 %2076 %2079 + %2081 = OpFMul %float %2071 %2071 + %2082 = OpCompositeConstruct %v3float %2081 %2071 %float_1 + %2083 = OpMatrixTimesVector %v3float %466 %2080 + %2084 = OpDot %float %2082 %2083 + OpBranch %2055 + %2055 = OpLabel + %2085 = OpPhi %float %2064 %2057 %2084 %2056 + OpBranch %2047 + %2048 = OpLabel + %2086 = OpFSub %float %2037 %1958 + %2087 = OpFMul %float %float_7 %2086 + %2088 = OpFSub %float %2044 %1958 + %2089 = OpFDiv %float %2087 %2088 + %2090 = OpConvertFToS %int %2089 + %2091 = OpConvertSToF %float %2090 + %2092 = OpFSub %float %2089 %2091 + %2093 = OpAccessChain %_ptr_Function_float %536 %2090 + %2094 = OpLoad %float %2093 + %2095 = OpIAdd %int %2090 %int_1 + %2096 = OpAccessChain %_ptr_Function_float %536 %2095 + %2097 = OpLoad %float %2096 + %2098 = OpIAdd %int %2090 %int_2 + %2099 = OpAccessChain %_ptr_Function_float %536 %2098 + %2100 = OpLoad %float %2099 + %2101 = OpCompositeConstruct %v3float %2094 %2097 %2100 + %2102 = OpFMul %float %2092 %2092 + %2103 = OpCompositeConstruct %v3float %2102 %2092 %float_1 + %2104 = OpMatrixTimesVector %v3float %466 %2101 + %2105 = OpDot %float %2103 %2104 + OpBranch %2047 + %2047 = OpLabel + %2106 = OpPhi %float %2085 %2055 %2105 %2048 + OpBranch %2039 + %2040 = OpLabel + %2107 = OpExtInst %float %1 Log %float_0_00499999989 + %2108 = OpFDiv %float %2107 %1091 + OpBranch %2039 + %2039 = OpLabel + %2109 = OpPhi %float %2106 %2047 %2108 %2040 + %2110 = OpExtInst %float %1 Pow %float_10 %2109 + %2111 = OpCompositeInsert %v3float %2110 %2032 1 + %2112 = OpCompositeExtract %float %1748 2 + OpStore %534 %506 + OpStore %533 %507 + %2113 = OpFOrdLessThanEqual %bool %2112 %float_0 + %2114 = OpSelect %float %2113 %float_9_99999975en05 %2112 + %2115 = OpExtInst %float %1 Log %2114 + %2116 = OpFDiv %float %2115 %1091 + %2117 = OpFOrdLessThanEqual %bool %2116 %1958 + OpSelectionMerge %2118 None + OpBranchConditional %2117 %2119 %2120 + %2120 = OpLabel + %2121 = OpFOrdGreaterThan %bool %2116 %1958 + %2122 = OpExtInst %float %1 Log %1879 + %2123 = OpFDiv %float %2122 %1091 + %2124 = OpFOrdLessThan %bool %2116 %2123 + %2125 = OpLogicalAnd %bool %2121 %2124 + OpSelectionMerge %2126 None + OpBranchConditional %2125 %2127 %2128 + %2128 = OpLabel + %2129 = OpFOrdGreaterThanEqual %bool %2116 %2123 + %2130 = OpExtInst %float %1 Log %1951 + %2131 = OpFDiv %float %2130 %1091 + %2132 = OpFOrdLessThan %bool %2116 %2131 + %2133 = OpLogicalAnd %bool %2129 %2132 + OpSelectionMerge %2134 None + OpBranchConditional %2133 %2135 %2136 + %2136 = OpLabel + %2137 = OpFMul %float %2116 %float_0_119999997 + %2138 = OpExtInst %float %1 Log %float_2000 + %2139 = OpFDiv %float %2138 %1091 + %2140 = OpFMul %float %float_0_119999997 %2130 + %2141 = OpFDiv %float %2140 %1091 + %2142 = OpFSub %float %2139 %2141 + %2143 = OpFAdd %float %2137 %2142 + OpBranch %2134 + %2135 = OpLabel + %2144 = OpFSub %float %2116 %2123 + %2145 = OpFMul %float %float_7 %2144 + %2146 = OpFSub %float %2131 %2123 + %2147 = OpFDiv %float %2145 %2146 + %2148 = OpConvertFToS %int %2147 + %2149 = OpConvertSToF %float %2148 + %2150 = OpFSub %float %2147 %2149 + %2151 = OpAccessChain %_ptr_Function_float %533 %2148 + %2152 = OpLoad %float %2151 + %2153 = OpIAdd %int %2148 %int_1 + %2154 = OpAccessChain %_ptr_Function_float %533 %2153 + %2155 = OpLoad %float %2154 + %2156 = OpIAdd %int %2148 %int_2 + %2157 = OpAccessChain %_ptr_Function_float %533 %2156 + %2158 = OpLoad %float %2157 + %2159 = OpCompositeConstruct %v3float %2152 %2155 %2158 + %2160 = OpFMul %float %2150 %2150 + %2161 = OpCompositeConstruct %v3float %2160 %2150 %float_1 + %2162 = OpMatrixTimesVector %v3float %466 %2159 + %2163 = OpDot %float %2161 %2162 + OpBranch %2134 + %2134 = OpLabel + %2164 = OpPhi %float %2143 %2136 %2163 %2135 + OpBranch %2126 + %2127 = OpLabel + %2165 = OpFSub %float %2116 %1958 + %2166 = OpFMul %float %float_7 %2165 + %2167 = OpFSub %float %2123 %1958 + %2168 = OpFDiv %float %2166 %2167 + %2169 = OpConvertFToS %int %2168 + %2170 = OpConvertSToF %float %2169 + %2171 = OpFSub %float %2168 %2170 + %2172 = OpAccessChain %_ptr_Function_float %534 %2169 + %2173 = OpLoad %float %2172 + %2174 = OpIAdd %int %2169 %int_1 + %2175 = OpAccessChain %_ptr_Function_float %534 %2174 + %2176 = OpLoad %float %2175 + %2177 = OpIAdd %int %2169 %int_2 + %2178 = OpAccessChain %_ptr_Function_float %534 %2177 + %2179 = OpLoad %float %2178 + %2180 = OpCompositeConstruct %v3float %2173 %2176 %2179 + %2181 = OpFMul %float %2171 %2171 + %2182 = OpCompositeConstruct %v3float %2181 %2171 %float_1 + %2183 = OpMatrixTimesVector %v3float %466 %2180 + %2184 = OpDot %float %2182 %2183 + OpBranch %2126 + %2126 = OpLabel + %2185 = OpPhi %float %2164 %2134 %2184 %2127 + OpBranch %2118 + %2119 = OpLabel + %2186 = OpExtInst %float %1 Log %float_0_00499999989 + %2187 = OpFDiv %float %2186 %1091 + OpBranch %2118 + %2118 = OpLabel + %2188 = OpPhi %float %2185 %2126 %2187 %2119 + %2189 = OpExtInst %float %1 Pow %float_10 %2188 + %2190 = OpCompositeInsert %v3float %2189 %2111 2 + %2191 = OpVectorTimesMatrix %v3float %2190 %602 + %2192 = OpFMul %v3float %2191 %519 + %2193 = OpExtInst %v3float %1 Pow %2192 %286 + %2194 = OpFMul %v3float %196 %2193 + %2195 = OpFAdd %v3float %195 %2194 + %2196 = OpFMul %v3float %197 %2193 + %2197 = OpFAdd %v3float %141 %2196 + %2198 = OpFDiv %v3float %141 %2197 + %2199 = OpFMul %v3float %2195 %2198 + %2200 = OpExtInst %v3float %1 Pow %2199 %287 + OpBranch %1342 + %1342 = OpLabel + %2201 = OpPhi %v3float %1366 %1346 %2200 %2118 + OpBranch %1336 + %1337 = OpLabel + %2202 = OpMatrixTimesMatrix %mat3v3float %572 %423 + %2203 = OpFMul %v3float %932 %285 + %2204 = OpVectorTimesMatrix %v3float %2203 %2202 + %2205 = OpCompositeExtract %float %2204 0 + %2206 = OpCompositeExtract %float %2204 1 + %2207 = OpExtInst %float %1 FMin %2205 %2206 + %2208 = OpCompositeExtract %float %2204 2 + %2209 = OpExtInst %float %1 FMin %2207 %2208 + %2210 = OpExtInst %float %1 FMax %2205 %2206 + %2211 = OpExtInst %float %1 FMax %2210 %2208 + %2212 = OpExtInst %float %1 FMax %2211 %float_1_00000001en10 + %2213 = OpExtInst %float %1 FMax %2209 %float_1_00000001en10 + %2214 = OpFSub %float %2212 %2213 + %2215 = OpExtInst %float %1 FMax %2211 %float_0_00999999978 + %2216 = OpFDiv %float %2214 %2215 + %2217 = OpFSub %float %2208 %2206 + %2218 = OpFMul %float %2208 %2217 + %2219 = OpFSub %float %2206 %2205 + %2220 = OpFMul %float %2206 %2219 + %2221 = OpFAdd %float %2218 %2220 + %2222 = OpFSub %float %2205 %2208 + %2223 = OpFMul %float %2205 %2222 + %2224 = OpFAdd %float %2221 %2223 + %2225 = OpExtInst %float %1 Sqrt %2224 + %2226 = OpFAdd %float %2208 %2206 + %2227 = OpFAdd %float %2226 %2205 + %2228 = OpFMul %float %float_1_75 %2225 + %2229 = OpFAdd %float %2227 %2228 + %2230 = OpFMul %float %2229 %float_0_333333343 + %2231 = OpFSub %float %2216 %float_0_400000006 + %2232 = OpFMul %float %2231 %float_5 + %2233 = OpFMul %float %2231 %float_2_5 + %2234 = OpExtInst %float %1 FAbs %2233 + %2235 = OpFSub %float %float_1 %2234 + %2236 = OpExtInst %float %1 FMax %2235 %float_0 + %2237 = OpExtInst %float %1 FSign %2232 + %2238 = OpConvertFToS %int %2237 + %2239 = OpConvertSToF %float %2238 + %2240 = OpFMul %float %2236 %2236 + %2241 = OpFSub %float %float_1 %2240 + %2242 = OpFMul %float %2239 %2241 + %2243 = OpFAdd %float %float_1 %2242 + %2244 = OpFMul %float %2243 %float_0_0250000004 + %2245 = OpFOrdLessThanEqual %bool %2230 %float_0_0533333346 + OpSelectionMerge %2246 None + OpBranchConditional %2245 %2247 %2248 + %2248 = OpLabel + %2249 = OpFOrdGreaterThanEqual %bool %2230 %float_0_159999996 + OpSelectionMerge %2250 None + OpBranchConditional %2249 %2251 %2252 + %2252 = OpLabel + %2253 = OpFDiv %float %float_0_239999995 %2229 + %2254 = OpFSub %float %2253 %float_0_5 + %2255 = OpFMul %float %2244 %2254 + OpBranch %2250 + %2251 = OpLabel + OpBranch %2250 + %2250 = OpLabel + %2256 = OpPhi %float %2255 %2252 %float_0 %2251 + OpBranch %2246 + %2247 = OpLabel + OpBranch %2246 + %2246 = OpLabel + %2257 = OpPhi %float %2256 %2250 %2244 %2247 + %2258 = OpFAdd %float %float_1 %2257 + %2259 = OpCompositeConstruct %v3float %2258 %2258 %2258 + %2260 = OpFMul %v3float %2204 %2259 + %2261 = OpCompositeExtract %float %2260 0 + %2262 = OpCompositeExtract %float %2260 1 + %2263 = OpFOrdEqual %bool %2261 %2262 + %2264 = OpCompositeExtract %float %2260 2 + %2265 = OpFOrdEqual %bool %2262 %2264 + %2266 = OpLogicalAnd %bool %2263 %2265 + OpSelectionMerge %2267 None + OpBranchConditional %2266 %2268 %2269 + %2269 = OpLabel + %2270 = OpExtInst %float %1 Sqrt %float_3 + %2271 = OpFSub %float %2262 %2264 + %2272 = OpFMul %float %2270 %2271 + %2273 = OpFMul %float %float_2 %2261 + %2274 = OpFSub %float %2273 %2262 + %2275 = OpFSub %float %2274 %2264 + %2276 = OpExtInst %float %1 Atan2 %2272 %2275 + %2277 = OpFMul %float %float_57_2957764 %2276 + OpBranch %2267 + %2268 = OpLabel + OpBranch %2267 + %2267 = OpLabel + %2278 = OpPhi %float %2277 %2269 %float_0 %2268 + %2279 = OpFOrdLessThan %bool %2278 %float_0 + OpSelectionMerge %2280 None + OpBranchConditional %2279 %2281 %2280 + %2281 = OpLabel + %2282 = OpFAdd %float %2278 %float_360 + OpBranch %2280 + %2280 = OpLabel + %2283 = OpPhi %float %2278 %2267 %2282 %2281 + %2284 = OpExtInst %float %1 FClamp %2283 %float_0 %float_360 + %2285 = OpFOrdGreaterThan %bool %2284 %float_180 + OpSelectionMerge %2286 None + OpBranchConditional %2285 %2287 %2286 + %2287 = OpLabel + %2288 = OpFSub %float %2284 %float_360 + OpBranch %2286 + %2286 = OpLabel + %2289 = OpPhi %float %2284 %2280 %2288 %2287 + %2290 = OpFOrdGreaterThan %bool %2289 %float_n67_5 + %2291 = OpFOrdLessThan %bool %2289 %float_67_5 + %2292 = OpLogicalAnd %bool %2290 %2291 + OpSelectionMerge %2293 None + OpBranchConditional %2292 %2294 %2293 + %2294 = OpLabel + %2295 = OpFSub %float %2289 %float_n67_5 + %2296 = OpFMul %float %2295 %float_0_0296296291 + %2297 = OpConvertFToS %int %2296 + %2298 = OpConvertSToF %float %2297 + %2299 = OpFSub %float %2296 %2298 + %2300 = OpFMul %float %2299 %2299 + %2301 = OpFMul %float %2300 %2299 + %2302 = OpIEqual %bool %2297 %int_3 + OpSelectionMerge %2303 None + OpBranchConditional %2302 %2304 %2305 + %2305 = OpLabel + %2306 = OpIEqual %bool %2297 %int_2 + OpSelectionMerge %2307 None + OpBranchConditional %2306 %2308 %2309 + %2309 = OpLabel + %2310 = OpIEqual %bool %2297 %int_1 + OpSelectionMerge %2311 None + OpBranchConditional %2310 %2312 %2313 + %2313 = OpLabel + %2314 = OpIEqual %bool %2297 %int_0 + OpSelectionMerge %2315 None + OpBranchConditional %2314 %2316 %2317 + %2317 = OpLabel + OpBranch %2315 + %2316 = OpLabel + %2318 = OpFMul %float %2301 %float_0_166666672 + OpBranch %2315 + %2315 = OpLabel + %2319 = OpPhi %float %float_0 %2317 %2318 %2316 + OpBranch %2311 + %2312 = OpLabel + %2320 = OpFMul %float %2301 %float_n0_5 + %2321 = OpFMul %float %2300 %float_0_5 + %2322 = OpFAdd %float %2320 %2321 + %2323 = OpFMul %float %2299 %float_0_5 + %2324 = OpFAdd %float %2322 %2323 + %2325 = OpFAdd %float %2324 %float_0_166666672 + OpBranch %2311 + %2311 = OpLabel + %2326 = OpPhi %float %2319 %2315 %2325 %2312 + OpBranch %2307 + %2308 = OpLabel + %2327 = OpFMul %float %2301 %float_0_5 + %2328 = OpFMul %float %2300 %float_n1 + %2329 = OpFAdd %float %2327 %2328 + %2330 = OpFAdd %float %2329 %float_0_666666687 + OpBranch %2307 + %2307 = OpLabel + %2331 = OpPhi %float %2326 %2311 %2330 %2308 + OpBranch %2303 + %2304 = OpLabel + %2332 = OpFMul %float %2301 %float_n0_166666672 + %2333 = OpFMul %float %2300 %float_0_5 + %2334 = OpFAdd %float %2332 %2333 + %2335 = OpFMul %float %2299 %float_n0_5 + %2336 = OpFAdd %float %2334 %2335 + %2337 = OpFAdd %float %2336 %float_0_166666672 + OpBranch %2303 + %2303 = OpLabel + %2338 = OpPhi %float %2331 %2307 %2337 %2304 + OpBranch %2293 + %2293 = OpLabel + %2339 = OpPhi %float %float_0 %2286 %2338 %2303 + %2340 = OpFMul %float %2339 %float_1_5 + %2341 = OpFMul %float %2340 %2216 + %2342 = OpFSub %float %float_0_0299999993 %2261 + %2343 = OpFMul %float %2341 %2342 + %2344 = OpFMul %float %2343 %float_0_180000007 + %2345 = OpFAdd %float %2261 %2344 + %2346 = OpCompositeInsert %v3float %2345 %2260 0 + %2347 = OpExtInst %v3float %1 FClamp %2346 %138 %337 + %2348 = OpVectorTimesMatrix %v3float %2347 %434 + %2349 = OpExtInst %v3float %1 FClamp %2348 %138 %337 + %2350 = OpDot %float %2349 %73 + %2351 = OpCompositeConstruct %v3float %2350 %2350 %2350 + %2352 = OpExtInst %v3float %1 FMix %2351 %2349 %241 + %2353 = OpCompositeExtract %float %2352 0 + %2354 = OpExtInst %float %1 Exp2 %float_n15 + %2355 = OpFMul %float %float_0_179999992 %2354 + %2356 = OpExtInst %float %1 Exp2 %float_18 + %2357 = OpFMul %float %float_0_179999992 %2356 + OpStore %546 %499 + OpStore %545 %500 + %2358 = OpFOrdLessThanEqual %bool %2353 %float_0 + %2359 = OpExtInst %float %1 Exp2 %float_n14 + %2360 = OpSelect %float %2358 %2359 %2353 + %2361 = OpExtInst %float %1 Log %2360 + %2362 = OpFDiv %float %2361 %1091 + %2363 = OpExtInst %float %1 Log %2355 + %2364 = OpFDiv %float %2363 %1091 + %2365 = OpFOrdLessThanEqual %bool %2362 %2364 + OpSelectionMerge %2366 None + OpBranchConditional %2365 %2367 %2368 + %2368 = OpLabel + %2369 = OpFOrdGreaterThan %bool %2362 %2364 + %2370 = OpExtInst %float %1 Log %float_0_180000007 + %2371 = OpFDiv %float %2370 %1091 + %2372 = OpFOrdLessThan %bool %2362 %2371 + %2373 = OpLogicalAnd %bool %2369 %2372 + OpSelectionMerge %2374 None + OpBranchConditional %2373 %2375 %2376 + %2376 = OpLabel + %2377 = OpFOrdGreaterThanEqual %bool %2362 %2371 + %2378 = OpExtInst %float %1 Log %2357 + %2379 = OpFDiv %float %2378 %1091 + %2380 = OpFOrdLessThan %bool %2362 %2379 + %2381 = OpLogicalAnd %bool %2377 %2380 + OpSelectionMerge %2382 None + OpBranchConditional %2381 %2383 %2384 + %2384 = OpLabel + %2385 = OpExtInst %float %1 Log %float_10000 + %2386 = OpFDiv %float %2385 %1091 + OpBranch %2382 + %2383 = OpLabel + %2387 = OpFSub %float %2362 %2371 + %2388 = OpFMul %float %float_3 %2387 + %2389 = OpFSub %float %2379 %2371 + %2390 = OpFDiv %float %2388 %2389 + %2391 = OpConvertFToS %int %2390 + %2392 = OpConvertSToF %float %2391 + %2393 = OpFSub %float %2390 %2392 + %2394 = OpAccessChain %_ptr_Function_float %545 %2391 + %2395 = OpLoad %float %2394 + %2396 = OpIAdd %int %2391 %int_1 + %2397 = OpAccessChain %_ptr_Function_float %545 %2396 + %2398 = OpLoad %float %2397 + %2399 = OpIAdd %int %2391 %int_2 + %2400 = OpAccessChain %_ptr_Function_float %545 %2399 + %2401 = OpLoad %float %2400 + %2402 = OpCompositeConstruct %v3float %2395 %2398 %2401 + %2403 = OpFMul %float %2393 %2393 + %2404 = OpCompositeConstruct %v3float %2403 %2393 %float_1 + %2405 = OpMatrixTimesVector %v3float %466 %2402 + %2406 = OpDot %float %2404 %2405 + OpBranch %2382 + %2382 = OpLabel + %2407 = OpPhi %float %2386 %2384 %2406 %2383 + OpBranch %2374 + %2375 = OpLabel + %2408 = OpFSub %float %2362 %2364 + %2409 = OpFMul %float %float_3 %2408 + %2410 = OpFSub %float %2371 %2364 + %2411 = OpFDiv %float %2409 %2410 + %2412 = OpConvertFToS %int %2411 + %2413 = OpConvertSToF %float %2412 + %2414 = OpFSub %float %2411 %2413 + %2415 = OpAccessChain %_ptr_Function_float %546 %2412 + %2416 = OpLoad %float %2415 + %2417 = OpIAdd %int %2412 %int_1 + %2418 = OpAccessChain %_ptr_Function_float %546 %2417 + %2419 = OpLoad %float %2418 + %2420 = OpIAdd %int %2412 %int_2 + %2421 = OpAccessChain %_ptr_Function_float %546 %2420 + %2422 = OpLoad %float %2421 + %2423 = OpCompositeConstruct %v3float %2416 %2419 %2422 + %2424 = OpFMul %float %2414 %2414 + %2425 = OpCompositeConstruct %v3float %2424 %2414 %float_1 + %2426 = OpMatrixTimesVector %v3float %466 %2423 + %2427 = OpDot %float %2425 %2426 + OpBranch %2374 + %2374 = OpLabel + %2428 = OpPhi %float %2407 %2382 %2427 %2375 + OpBranch %2366 + %2367 = OpLabel + %2429 = OpExtInst %float %1 Log %float_9_99999975en05 + %2430 = OpFDiv %float %2429 %1091 + OpBranch %2366 + %2366 = OpLabel + %2431 = OpPhi %float %2428 %2374 %2430 %2367 + %2432 = OpExtInst %float %1 Pow %float_10 %2431 + %2433 = OpCompositeInsert %v3float %2432 %523 0 + %2434 = OpCompositeExtract %float %2352 1 + OpStore %548 %499 + OpStore %547 %500 + %2435 = OpFOrdLessThanEqual %bool %2434 %float_0 + %2436 = OpSelect %float %2435 %2359 %2434 + %2437 = OpExtInst %float %1 Log %2436 + %2438 = OpFDiv %float %2437 %1091 + %2439 = OpFOrdLessThanEqual %bool %2438 %2364 + OpSelectionMerge %2440 None + OpBranchConditional %2439 %2441 %2442 + %2442 = OpLabel + %2443 = OpFOrdGreaterThan %bool %2438 %2364 + %2444 = OpExtInst %float %1 Log %float_0_180000007 + %2445 = OpFDiv %float %2444 %1091 + %2446 = OpFOrdLessThan %bool %2438 %2445 + %2447 = OpLogicalAnd %bool %2443 %2446 + OpSelectionMerge %2448 None + OpBranchConditional %2447 %2449 %2450 + %2450 = OpLabel + %2451 = OpFOrdGreaterThanEqual %bool %2438 %2445 + %2452 = OpExtInst %float %1 Log %2357 + %2453 = OpFDiv %float %2452 %1091 + %2454 = OpFOrdLessThan %bool %2438 %2453 + %2455 = OpLogicalAnd %bool %2451 %2454 + OpSelectionMerge %2456 None + OpBranchConditional %2455 %2457 %2458 + %2458 = OpLabel + %2459 = OpExtInst %float %1 Log %float_10000 + %2460 = OpFDiv %float %2459 %1091 + OpBranch %2456 + %2457 = OpLabel + %2461 = OpFSub %float %2438 %2445 + %2462 = OpFMul %float %float_3 %2461 + %2463 = OpFSub %float %2453 %2445 + %2464 = OpFDiv %float %2462 %2463 + %2465 = OpConvertFToS %int %2464 + %2466 = OpConvertSToF %float %2465 + %2467 = OpFSub %float %2464 %2466 + %2468 = OpAccessChain %_ptr_Function_float %547 %2465 + %2469 = OpLoad %float %2468 + %2470 = OpIAdd %int %2465 %int_1 + %2471 = OpAccessChain %_ptr_Function_float %547 %2470 + %2472 = OpLoad %float %2471 + %2473 = OpIAdd %int %2465 %int_2 + %2474 = OpAccessChain %_ptr_Function_float %547 %2473 + %2475 = OpLoad %float %2474 + %2476 = OpCompositeConstruct %v3float %2469 %2472 %2475 + %2477 = OpFMul %float %2467 %2467 + %2478 = OpCompositeConstruct %v3float %2477 %2467 %float_1 + %2479 = OpMatrixTimesVector %v3float %466 %2476 + %2480 = OpDot %float %2478 %2479 + OpBranch %2456 + %2456 = OpLabel + %2481 = OpPhi %float %2460 %2458 %2480 %2457 + OpBranch %2448 + %2449 = OpLabel + %2482 = OpFSub %float %2438 %2364 + %2483 = OpFMul %float %float_3 %2482 + %2484 = OpFSub %float %2445 %2364 + %2485 = OpFDiv %float %2483 %2484 + %2486 = OpConvertFToS %int %2485 + %2487 = OpConvertSToF %float %2486 + %2488 = OpFSub %float %2485 %2487 + %2489 = OpAccessChain %_ptr_Function_float %548 %2486 + %2490 = OpLoad %float %2489 + %2491 = OpIAdd %int %2486 %int_1 + %2492 = OpAccessChain %_ptr_Function_float %548 %2491 + %2493 = OpLoad %float %2492 + %2494 = OpIAdd %int %2486 %int_2 + %2495 = OpAccessChain %_ptr_Function_float %548 %2494 + %2496 = OpLoad %float %2495 + %2497 = OpCompositeConstruct %v3float %2490 %2493 %2496 + %2498 = OpFMul %float %2488 %2488 + %2499 = OpCompositeConstruct %v3float %2498 %2488 %float_1 + %2500 = OpMatrixTimesVector %v3float %466 %2497 + %2501 = OpDot %float %2499 %2500 + OpBranch %2448 + %2448 = OpLabel + %2502 = OpPhi %float %2481 %2456 %2501 %2449 + OpBranch %2440 + %2441 = OpLabel + %2503 = OpExtInst %float %1 Log %float_9_99999975en05 + %2504 = OpFDiv %float %2503 %1091 + OpBranch %2440 + %2440 = OpLabel + %2505 = OpPhi %float %2502 %2448 %2504 %2441 + %2506 = OpExtInst %float %1 Pow %float_10 %2505 + %2507 = OpCompositeInsert %v3float %2506 %2433 1 + %2508 = OpCompositeExtract %float %2352 2 + OpStore %550 %499 + OpStore %549 %500 + %2509 = OpFOrdLessThanEqual %bool %2508 %float_0 + %2510 = OpSelect %float %2509 %2359 %2508 + %2511 = OpExtInst %float %1 Log %2510 + %2512 = OpFDiv %float %2511 %1091 + %2513 = OpFOrdLessThanEqual %bool %2512 %2364 + OpSelectionMerge %2514 None + OpBranchConditional %2513 %2515 %2516 + %2516 = OpLabel + %2517 = OpFOrdGreaterThan %bool %2512 %2364 + %2518 = OpExtInst %float %1 Log %float_0_180000007 + %2519 = OpFDiv %float %2518 %1091 + %2520 = OpFOrdLessThan %bool %2512 %2519 + %2521 = OpLogicalAnd %bool %2517 %2520 + OpSelectionMerge %2522 None + OpBranchConditional %2521 %2523 %2524 + %2524 = OpLabel + %2525 = OpFOrdGreaterThanEqual %bool %2512 %2519 + %2526 = OpExtInst %float %1 Log %2357 + %2527 = OpFDiv %float %2526 %1091 + %2528 = OpFOrdLessThan %bool %2512 %2527 + %2529 = OpLogicalAnd %bool %2525 %2528 + OpSelectionMerge %2530 None + OpBranchConditional %2529 %2531 %2532 + %2532 = OpLabel + %2533 = OpExtInst %float %1 Log %float_10000 + %2534 = OpFDiv %float %2533 %1091 + OpBranch %2530 + %2531 = OpLabel + %2535 = OpFSub %float %2512 %2519 + %2536 = OpFMul %float %float_3 %2535 + %2537 = OpFSub %float %2527 %2519 + %2538 = OpFDiv %float %2536 %2537 + %2539 = OpConvertFToS %int %2538 + %2540 = OpConvertSToF %float %2539 + %2541 = OpFSub %float %2538 %2540 + %2542 = OpAccessChain %_ptr_Function_float %549 %2539 + %2543 = OpLoad %float %2542 + %2544 = OpIAdd %int %2539 %int_1 + %2545 = OpAccessChain %_ptr_Function_float %549 %2544 + %2546 = OpLoad %float %2545 + %2547 = OpIAdd %int %2539 %int_2 + %2548 = OpAccessChain %_ptr_Function_float %549 %2547 + %2549 = OpLoad %float %2548 + %2550 = OpCompositeConstruct %v3float %2543 %2546 %2549 + %2551 = OpFMul %float %2541 %2541 + %2552 = OpCompositeConstruct %v3float %2551 %2541 %float_1 + %2553 = OpMatrixTimesVector %v3float %466 %2550 + %2554 = OpDot %float %2552 %2553 + OpBranch %2530 + %2530 = OpLabel + %2555 = OpPhi %float %2534 %2532 %2554 %2531 + OpBranch %2522 + %2523 = OpLabel + %2556 = OpFSub %float %2512 %2364 + %2557 = OpFMul %float %float_3 %2556 + %2558 = OpFSub %float %2519 %2364 + %2559 = OpFDiv %float %2557 %2558 + %2560 = OpConvertFToS %int %2559 + %2561 = OpConvertSToF %float %2560 + %2562 = OpFSub %float %2559 %2561 + %2563 = OpAccessChain %_ptr_Function_float %550 %2560 + %2564 = OpLoad %float %2563 + %2565 = OpIAdd %int %2560 %int_1 + %2566 = OpAccessChain %_ptr_Function_float %550 %2565 + %2567 = OpLoad %float %2566 + %2568 = OpIAdd %int %2560 %int_2 + %2569 = OpAccessChain %_ptr_Function_float %550 %2568 + %2570 = OpLoad %float %2569 + %2571 = OpCompositeConstruct %v3float %2564 %2567 %2570 + %2572 = OpFMul %float %2562 %2562 + %2573 = OpCompositeConstruct %v3float %2572 %2562 %float_1 + %2574 = OpMatrixTimesVector %v3float %466 %2571 + %2575 = OpDot %float %2573 %2574 + OpBranch %2522 + %2522 = OpLabel + %2576 = OpPhi %float %2555 %2530 %2575 %2523 + OpBranch %2514 + %2515 = OpLabel + %2577 = OpExtInst %float %1 Log %float_9_99999975en05 + %2578 = OpFDiv %float %2577 %1091 + OpBranch %2514 + %2514 = OpLabel + %2579 = OpPhi %float %2576 %2522 %2578 %2515 + %2580 = OpExtInst %float %1 Pow %float_10 %2579 + %2581 = OpCompositeInsert %v3float %2580 %2507 2 + %2582 = OpVectorTimesMatrix %v3float %2581 %438 + %2583 = OpVectorTimesMatrix %v3float %2582 %434 + %2584 = OpExtInst %float %1 Pow %float_2 %float_n12 + %2585 = OpFMul %float %float_0_179999992 %2584 + OpStore %558 %499 + OpStore %557 %500 + %2586 = OpFOrdLessThanEqual %bool %2585 %float_0 + %2587 = OpSelect %float %2586 %2359 %2585 + %2588 = OpExtInst %float %1 Log %2587 + %2589 = OpFDiv %float %2588 %1091 + %2590 = OpFOrdLessThanEqual %bool %2589 %2364 + OpSelectionMerge %2591 None + OpBranchConditional %2590 %2592 %2593 + %2593 = OpLabel + %2594 = OpFOrdGreaterThan %bool %2589 %2364 + %2595 = OpExtInst %float %1 Log %float_0_180000007 + %2596 = OpFDiv %float %2595 %1091 + %2597 = OpFOrdLessThan %bool %2589 %2596 + %2598 = OpLogicalAnd %bool %2594 %2597 + OpSelectionMerge %2599 None + OpBranchConditional %2598 %2600 %2601 + %2601 = OpLabel + %2602 = OpFOrdGreaterThanEqual %bool %2589 %2596 + %2603 = OpExtInst %float %1 Log %2357 + %2604 = OpFDiv %float %2603 %1091 + %2605 = OpFOrdLessThan %bool %2589 %2604 + %2606 = OpLogicalAnd %bool %2602 %2605 + OpSelectionMerge %2607 None + OpBranchConditional %2606 %2608 %2609 + %2609 = OpLabel + %2610 = OpExtInst %float %1 Log %float_10000 + %2611 = OpFDiv %float %2610 %1091 + OpBranch %2607 + %2608 = OpLabel + %2612 = OpFSub %float %2589 %2596 + %2613 = OpFMul %float %float_3 %2612 + %2614 = OpFSub %float %2604 %2596 + %2615 = OpFDiv %float %2613 %2614 + %2616 = OpConvertFToS %int %2615 + %2617 = OpConvertSToF %float %2616 + %2618 = OpFSub %float %2615 %2617 + %2619 = OpAccessChain %_ptr_Function_float %557 %2616 + %2620 = OpLoad %float %2619 + %2621 = OpIAdd %int %2616 %int_1 + %2622 = OpAccessChain %_ptr_Function_float %557 %2621 + %2623 = OpLoad %float %2622 + %2624 = OpIAdd %int %2616 %int_2 + %2625 = OpAccessChain %_ptr_Function_float %557 %2624 + %2626 = OpLoad %float %2625 + %2627 = OpCompositeConstruct %v3float %2620 %2623 %2626 + %2628 = OpFMul %float %2618 %2618 + %2629 = OpCompositeConstruct %v3float %2628 %2618 %float_1 + %2630 = OpMatrixTimesVector %v3float %466 %2627 + %2631 = OpDot %float %2629 %2630 + OpBranch %2607 + %2607 = OpLabel + %2632 = OpPhi %float %2611 %2609 %2631 %2608 + OpBranch %2599 + %2600 = OpLabel + %2633 = OpFSub %float %2589 %2364 + %2634 = OpFMul %float %float_3 %2633 + %2635 = OpFSub %float %2596 %2364 + %2636 = OpFDiv %float %2634 %2635 + %2637 = OpConvertFToS %int %2636 + %2638 = OpConvertSToF %float %2637 + %2639 = OpFSub %float %2636 %2638 + %2640 = OpAccessChain %_ptr_Function_float %558 %2637 + %2641 = OpLoad %float %2640 + %2642 = OpIAdd %int %2637 %int_1 + %2643 = OpAccessChain %_ptr_Function_float %558 %2642 + %2644 = OpLoad %float %2643 + %2645 = OpIAdd %int %2637 %int_2 + %2646 = OpAccessChain %_ptr_Function_float %558 %2645 + %2647 = OpLoad %float %2646 + %2648 = OpCompositeConstruct %v3float %2641 %2644 %2647 + %2649 = OpFMul %float %2639 %2639 + %2650 = OpCompositeConstruct %v3float %2649 %2639 %float_1 + %2651 = OpMatrixTimesVector %v3float %466 %2648 + %2652 = OpDot %float %2650 %2651 + OpBranch %2599 + %2599 = OpLabel + %2653 = OpPhi %float %2632 %2607 %2652 %2600 + OpBranch %2591 + %2592 = OpLabel + %2654 = OpExtInst %float %1 Log %float_9_99999975en05 + %2655 = OpFDiv %float %2654 %1091 + OpBranch %2591 + %2591 = OpLabel + %2656 = OpPhi %float %2653 %2599 %2655 %2592 + %2657 = OpExtInst %float %1 Pow %float_10 %2656 + OpStore %560 %499 + OpStore %559 %500 + %2658 = OpExtInst %float %1 Log %float_0_180000007 + %2659 = OpFDiv %float %2658 %1091 + %2660 = OpFOrdLessThanEqual %bool %2659 %2364 + OpSelectionMerge %2661 None + OpBranchConditional %2660 %2662 %2663 + %2663 = OpLabel + %2664 = OpFOrdGreaterThan %bool %2659 %2364 + %2665 = OpFOrdLessThan %bool %2659 %2659 + %2666 = OpLogicalAnd %bool %2664 %2665 + OpSelectionMerge %2667 None + OpBranchConditional %2666 %2668 %2669 + %2669 = OpLabel + %2670 = OpFOrdGreaterThanEqual %bool %2659 %2659 + %2671 = OpExtInst %float %1 Log %2357 + %2672 = OpFDiv %float %2671 %1091 + %2673 = OpFOrdLessThan %bool %2659 %2672 + %2674 = OpLogicalAnd %bool %2670 %2673 + OpSelectionMerge %2675 None + OpBranchConditional %2674 %2676 %2677 + %2677 = OpLabel + %2678 = OpExtInst %float %1 Log %float_10000 + %2679 = OpFDiv %float %2678 %1091 + OpBranch %2675 + %2676 = OpLabel + %2680 = OpFSub %float %2659 %2659 + %2681 = OpFMul %float %float_3 %2680 + %2682 = OpFSub %float %2672 %2659 + %2683 = OpFDiv %float %2681 %2682 + %2684 = OpConvertFToS %int %2683 + %2685 = OpConvertSToF %float %2684 + %2686 = OpFSub %float %2683 %2685 + %2687 = OpAccessChain %_ptr_Function_float %559 %2684 + %2688 = OpLoad %float %2687 + %2689 = OpIAdd %int %2684 %int_1 + %2690 = OpAccessChain %_ptr_Function_float %559 %2689 + %2691 = OpLoad %float %2690 + %2692 = OpIAdd %int %2684 %int_2 + %2693 = OpAccessChain %_ptr_Function_float %559 %2692 + %2694 = OpLoad %float %2693 + %2695 = OpCompositeConstruct %v3float %2688 %2691 %2694 + %2696 = OpFMul %float %2686 %2686 + %2697 = OpCompositeConstruct %v3float %2696 %2686 %float_1 + %2698 = OpMatrixTimesVector %v3float %466 %2695 + %2699 = OpDot %float %2697 %2698 + OpBranch %2675 + %2675 = OpLabel + %2700 = OpPhi %float %2679 %2677 %2699 %2676 + OpBranch %2667 + %2668 = OpLabel + %2701 = OpAccessChain %_ptr_Function_float %560 %int_3 + %2702 = OpLoad %float %2701 + %2703 = OpAccessChain %_ptr_Function_float %560 %int_4 + %2704 = OpLoad %float %2703 + %2705 = OpAccessChain %_ptr_Function_float %560 %int_5 + %2706 = OpLoad %float %2705 + %2707 = OpCompositeConstruct %v3float %2702 %2704 %2706 + %2708 = OpMatrixTimesVector %v3float %466 %2707 + %2709 = OpCompositeExtract %float %2708 2 + OpBranch %2667 + %2667 = OpLabel + %2710 = OpPhi %float %2700 %2675 %2709 %2668 + OpBranch %2661 + %2662 = OpLabel + %2711 = OpExtInst %float %1 Log %float_9_99999975en05 + %2712 = OpFDiv %float %2711 %1091 + OpBranch %2661 + %2661 = OpLabel + %2713 = OpPhi %float %2710 %2667 %2712 %2662 + %2714 = OpExtInst %float %1 Pow %float_10 %2713 + %2715 = OpExtInst %float %1 Pow %float_2 %float_10 + %2716 = OpFMul %float %float_0_179999992 %2715 + OpStore %562 %499 + OpStore %561 %500 + %2717 = OpFOrdLessThanEqual %bool %2716 %float_0 + %2718 = OpSelect %float %2717 %2359 %2716 + %2719 = OpExtInst %float %1 Log %2718 + %2720 = OpFDiv %float %2719 %1091 + %2721 = OpFOrdLessThanEqual %bool %2720 %2364 + OpSelectionMerge %2722 None + OpBranchConditional %2721 %2723 %2724 + %2724 = OpLabel + %2725 = OpFOrdGreaterThan %bool %2720 %2364 + %2726 = OpFOrdLessThan %bool %2720 %2659 + %2727 = OpLogicalAnd %bool %2725 %2726 + OpSelectionMerge %2728 None + OpBranchConditional %2727 %2729 %2730 + %2730 = OpLabel + %2731 = OpFOrdGreaterThanEqual %bool %2720 %2659 + %2732 = OpExtInst %float %1 Log %2357 + %2733 = OpFDiv %float %2732 %1091 + %2734 = OpFOrdLessThan %bool %2720 %2733 + %2735 = OpLogicalAnd %bool %2731 %2734 + OpSelectionMerge %2736 None + OpBranchConditional %2735 %2737 %2738 + %2738 = OpLabel + %2739 = OpExtInst %float %1 Log %float_10000 + %2740 = OpFDiv %float %2739 %1091 + OpBranch %2736 + %2737 = OpLabel + %2741 = OpFSub %float %2720 %2659 + %2742 = OpFMul %float %float_3 %2741 + %2743 = OpFSub %float %2733 %2659 + %2744 = OpFDiv %float %2742 %2743 + %2745 = OpConvertFToS %int %2744 + %2746 = OpConvertSToF %float %2745 + %2747 = OpFSub %float %2744 %2746 + %2748 = OpAccessChain %_ptr_Function_float %561 %2745 + %2749 = OpLoad %float %2748 + %2750 = OpIAdd %int %2745 %int_1 + %2751 = OpAccessChain %_ptr_Function_float %561 %2750 + %2752 = OpLoad %float %2751 + %2753 = OpIAdd %int %2745 %int_2 + %2754 = OpAccessChain %_ptr_Function_float %561 %2753 + %2755 = OpLoad %float %2754 + %2756 = OpCompositeConstruct %v3float %2749 %2752 %2755 + %2757 = OpFMul %float %2747 %2747 + %2758 = OpCompositeConstruct %v3float %2757 %2747 %float_1 + %2759 = OpMatrixTimesVector %v3float %466 %2756 + %2760 = OpDot %float %2758 %2759 + OpBranch %2736 + %2736 = OpLabel + %2761 = OpPhi %float %2740 %2738 %2760 %2737 + OpBranch %2728 + %2729 = OpLabel + %2762 = OpFSub %float %2720 %2364 + %2763 = OpFMul %float %float_3 %2762 + %2764 = OpFSub %float %2659 %2364 + %2765 = OpFDiv %float %2763 %2764 + %2766 = OpConvertFToS %int %2765 + %2767 = OpConvertSToF %float %2766 + %2768 = OpFSub %float %2765 %2767 + %2769 = OpAccessChain %_ptr_Function_float %562 %2766 + %2770 = OpLoad %float %2769 + %2771 = OpIAdd %int %2766 %int_1 + %2772 = OpAccessChain %_ptr_Function_float %562 %2771 + %2773 = OpLoad %float %2772 + %2774 = OpIAdd %int %2766 %int_2 + %2775 = OpAccessChain %_ptr_Function_float %562 %2774 + %2776 = OpLoad %float %2775 + %2777 = OpCompositeConstruct %v3float %2770 %2773 %2776 + %2778 = OpFMul %float %2768 %2768 + %2779 = OpCompositeConstruct %v3float %2778 %2768 %float_1 + %2780 = OpMatrixTimesVector %v3float %466 %2777 + %2781 = OpDot %float %2779 %2780 + OpBranch %2728 + %2728 = OpLabel + %2782 = OpPhi %float %2761 %2736 %2781 %2729 + OpBranch %2722 + %2723 = OpLabel + %2783 = OpExtInst %float %1 Log %float_9_99999975en05 + %2784 = OpFDiv %float %2783 %1091 + OpBranch %2722 + %2722 = OpLabel + %2785 = OpPhi %float %2782 %2728 %2784 %2723 + %2786 = OpExtInst %float %1 Pow %float_10 %2785 + %2787 = OpCompositeExtract %float %2583 0 + OpStore %556 %503 + OpStore %555 %504 + %2788 = OpFOrdLessThanEqual %bool %2787 %float_0 + %2789 = OpSelect %float %2788 %float_9_99999975en05 %2787 + %2790 = OpExtInst %float %1 Log %2789 + %2791 = OpFDiv %float %2790 %1091 + %2792 = OpExtInst %float %1 Log %2657 + %2793 = OpFDiv %float %2792 %1091 + %2794 = OpFOrdLessThanEqual %bool %2791 %2793 + OpSelectionMerge %2795 None + OpBranchConditional %2794 %2796 %2797 + %2797 = OpLabel + %2798 = OpFOrdGreaterThan %bool %2791 %2793 + %2799 = OpExtInst %float %1 Log %2714 + %2800 = OpFDiv %float %2799 %1091 + %2801 = OpFOrdLessThan %bool %2791 %2800 + %2802 = OpLogicalAnd %bool %2798 %2801 + OpSelectionMerge %2803 None + OpBranchConditional %2802 %2804 %2805 + %2805 = OpLabel + %2806 = OpFOrdGreaterThanEqual %bool %2791 %2800 + %2807 = OpExtInst %float %1 Log %2786 + %2808 = OpFDiv %float %2807 %1091 + %2809 = OpFOrdLessThan %bool %2791 %2808 + %2810 = OpLogicalAnd %bool %2806 %2809 + OpSelectionMerge %2811 None + OpBranchConditional %2810 %2812 %2813 + %2813 = OpLabel + %2814 = OpFMul %float %2791 %float_0_0599999987 + %2815 = OpExtInst %float %1 Log %float_1000 + %2816 = OpFDiv %float %2815 %1091 + %2817 = OpFMul %float %float_0_0599999987 %2807 + %2818 = OpFDiv %float %2817 %1091 + %2819 = OpFSub %float %2816 %2818 + %2820 = OpFAdd %float %2814 %2819 + OpBranch %2811 + %2812 = OpLabel + %2821 = OpFSub %float %2791 %2800 + %2822 = OpFMul %float %float_7 %2821 + %2823 = OpFSub %float %2808 %2800 + %2824 = OpFDiv %float %2822 %2823 + %2825 = OpConvertFToS %int %2824 + %2826 = OpConvertSToF %float %2825 + %2827 = OpFSub %float %2824 %2826 + %2828 = OpAccessChain %_ptr_Function_float %555 %2825 + %2829 = OpLoad %float %2828 + %2830 = OpIAdd %int %2825 %int_1 + %2831 = OpAccessChain %_ptr_Function_float %555 %2830 + %2832 = OpLoad %float %2831 + %2833 = OpIAdd %int %2825 %int_2 + %2834 = OpAccessChain %_ptr_Function_float %555 %2833 + %2835 = OpLoad %float %2834 + %2836 = OpCompositeConstruct %v3float %2829 %2832 %2835 + %2837 = OpFMul %float %2827 %2827 + %2838 = OpCompositeConstruct %v3float %2837 %2827 %float_1 + %2839 = OpMatrixTimesVector %v3float %466 %2836 + %2840 = OpDot %float %2838 %2839 + OpBranch %2811 + %2811 = OpLabel + %2841 = OpPhi %float %2820 %2813 %2840 %2812 + OpBranch %2803 + %2804 = OpLabel + %2842 = OpFSub %float %2791 %2793 + %2843 = OpFMul %float %float_7 %2842 + %2844 = OpFSub %float %2800 %2793 + %2845 = OpFDiv %float %2843 %2844 + %2846 = OpConvertFToS %int %2845 + %2847 = OpConvertSToF %float %2846 + %2848 = OpFSub %float %2845 %2847 + %2849 = OpAccessChain %_ptr_Function_float %556 %2846 + %2850 = OpLoad %float %2849 + %2851 = OpIAdd %int %2846 %int_1 + %2852 = OpAccessChain %_ptr_Function_float %556 %2851 + %2853 = OpLoad %float %2852 + %2854 = OpIAdd %int %2846 %int_2 + %2855 = OpAccessChain %_ptr_Function_float %556 %2854 + %2856 = OpLoad %float %2855 + %2857 = OpCompositeConstruct %v3float %2850 %2853 %2856 + %2858 = OpFMul %float %2848 %2848 + %2859 = OpCompositeConstruct %v3float %2858 %2848 %float_1 + %2860 = OpMatrixTimesVector %v3float %466 %2857 + %2861 = OpDot %float %2859 %2860 + OpBranch %2803 + %2803 = OpLabel + %2862 = OpPhi %float %2841 %2811 %2861 %2804 + OpBranch %2795 + %2796 = OpLabel + %2863 = OpFMul %float %2791 %float_3 + %2864 = OpExtInst %float %1 Log %float_9_99999975en05 + %2865 = OpFDiv %float %2864 %1091 + %2866 = OpFMul %float %float_3 %2792 + %2867 = OpFDiv %float %2866 %1091 + %2868 = OpFSub %float %2865 %2867 + %2869 = OpFAdd %float %2863 %2868 + OpBranch %2795 + %2795 = OpLabel + %2870 = OpPhi %float %2862 %2803 %2869 %2796 + %2871 = OpExtInst %float %1 Pow %float_10 %2870 + %2872 = OpCompositeInsert %v3float %2871 %523 0 + %2873 = OpCompositeExtract %float %2583 1 + OpStore %554 %503 + OpStore %553 %504 + %2874 = OpFOrdLessThanEqual %bool %2873 %float_0 + %2875 = OpSelect %float %2874 %float_9_99999975en05 %2873 + %2876 = OpExtInst %float %1 Log %2875 + %2877 = OpFDiv %float %2876 %1091 + %2878 = OpFOrdLessThanEqual %bool %2877 %2793 + OpSelectionMerge %2879 None + OpBranchConditional %2878 %2880 %2881 + %2881 = OpLabel + %2882 = OpFOrdGreaterThan %bool %2877 %2793 + %2883 = OpExtInst %float %1 Log %2714 + %2884 = OpFDiv %float %2883 %1091 + %2885 = OpFOrdLessThan %bool %2877 %2884 + %2886 = OpLogicalAnd %bool %2882 %2885 + OpSelectionMerge %2887 None + OpBranchConditional %2886 %2888 %2889 + %2889 = OpLabel + %2890 = OpFOrdGreaterThanEqual %bool %2877 %2884 + %2891 = OpExtInst %float %1 Log %2786 + %2892 = OpFDiv %float %2891 %1091 + %2893 = OpFOrdLessThan %bool %2877 %2892 + %2894 = OpLogicalAnd %bool %2890 %2893 + OpSelectionMerge %2895 None + OpBranchConditional %2894 %2896 %2897 + %2897 = OpLabel + %2898 = OpFMul %float %2877 %float_0_0599999987 + %2899 = OpExtInst %float %1 Log %float_1000 + %2900 = OpFDiv %float %2899 %1091 + %2901 = OpFMul %float %float_0_0599999987 %2891 + %2902 = OpFDiv %float %2901 %1091 + %2903 = OpFSub %float %2900 %2902 + %2904 = OpFAdd %float %2898 %2903 + OpBranch %2895 + %2896 = OpLabel + %2905 = OpFSub %float %2877 %2884 + %2906 = OpFMul %float %float_7 %2905 + %2907 = OpFSub %float %2892 %2884 + %2908 = OpFDiv %float %2906 %2907 + %2909 = OpConvertFToS %int %2908 + %2910 = OpConvertSToF %float %2909 + %2911 = OpFSub %float %2908 %2910 + %2912 = OpAccessChain %_ptr_Function_float %553 %2909 + %2913 = OpLoad %float %2912 + %2914 = OpIAdd %int %2909 %int_1 + %2915 = OpAccessChain %_ptr_Function_float %553 %2914 + %2916 = OpLoad %float %2915 + %2917 = OpIAdd %int %2909 %int_2 + %2918 = OpAccessChain %_ptr_Function_float %553 %2917 + %2919 = OpLoad %float %2918 + %2920 = OpCompositeConstruct %v3float %2913 %2916 %2919 + %2921 = OpFMul %float %2911 %2911 + %2922 = OpCompositeConstruct %v3float %2921 %2911 %float_1 + %2923 = OpMatrixTimesVector %v3float %466 %2920 + %2924 = OpDot %float %2922 %2923 + OpBranch %2895 + %2895 = OpLabel + %2925 = OpPhi %float %2904 %2897 %2924 %2896 + OpBranch %2887 + %2888 = OpLabel + %2926 = OpFSub %float %2877 %2793 + %2927 = OpFMul %float %float_7 %2926 + %2928 = OpFSub %float %2884 %2793 + %2929 = OpFDiv %float %2927 %2928 + %2930 = OpConvertFToS %int %2929 + %2931 = OpConvertSToF %float %2930 + %2932 = OpFSub %float %2929 %2931 + %2933 = OpAccessChain %_ptr_Function_float %554 %2930 + %2934 = OpLoad %float %2933 + %2935 = OpIAdd %int %2930 %int_1 + %2936 = OpAccessChain %_ptr_Function_float %554 %2935 + %2937 = OpLoad %float %2936 + %2938 = OpIAdd %int %2930 %int_2 + %2939 = OpAccessChain %_ptr_Function_float %554 %2938 + %2940 = OpLoad %float %2939 + %2941 = OpCompositeConstruct %v3float %2934 %2937 %2940 + %2942 = OpFMul %float %2932 %2932 + %2943 = OpCompositeConstruct %v3float %2942 %2932 %float_1 + %2944 = OpMatrixTimesVector %v3float %466 %2941 + %2945 = OpDot %float %2943 %2944 + OpBranch %2887 + %2887 = OpLabel + %2946 = OpPhi %float %2925 %2895 %2945 %2888 + OpBranch %2879 + %2880 = OpLabel + %2947 = OpFMul %float %2877 %float_3 + %2948 = OpExtInst %float %1 Log %float_9_99999975en05 + %2949 = OpFDiv %float %2948 %1091 + %2950 = OpFMul %float %float_3 %2792 + %2951 = OpFDiv %float %2950 %1091 + %2952 = OpFSub %float %2949 %2951 + %2953 = OpFAdd %float %2947 %2952 + OpBranch %2879 + %2879 = OpLabel + %2954 = OpPhi %float %2946 %2887 %2953 %2880 + %2955 = OpExtInst %float %1 Pow %float_10 %2954 + %2956 = OpCompositeInsert %v3float %2955 %2872 1 + %2957 = OpCompositeExtract %float %2583 2 + OpStore %552 %503 + OpStore %551 %504 + %2958 = OpFOrdLessThanEqual %bool %2957 %float_0 + %2959 = OpSelect %float %2958 %float_9_99999975en05 %2957 + %2960 = OpExtInst %float %1 Log %2959 + %2961 = OpFDiv %float %2960 %1091 + %2962 = OpFOrdLessThanEqual %bool %2961 %2793 + OpSelectionMerge %2963 None + OpBranchConditional %2962 %2964 %2965 + %2965 = OpLabel + %2966 = OpFOrdGreaterThan %bool %2961 %2793 + %2967 = OpExtInst %float %1 Log %2714 + %2968 = OpFDiv %float %2967 %1091 + %2969 = OpFOrdLessThan %bool %2961 %2968 + %2970 = OpLogicalAnd %bool %2966 %2969 + OpSelectionMerge %2971 None + OpBranchConditional %2970 %2972 %2973 + %2973 = OpLabel + %2974 = OpFOrdGreaterThanEqual %bool %2961 %2968 + %2975 = OpExtInst %float %1 Log %2786 + %2976 = OpFDiv %float %2975 %1091 + %2977 = OpFOrdLessThan %bool %2961 %2976 + %2978 = OpLogicalAnd %bool %2974 %2977 + OpSelectionMerge %2979 None + OpBranchConditional %2978 %2980 %2981 + %2981 = OpLabel + %2982 = OpFMul %float %2961 %float_0_0599999987 + %2983 = OpExtInst %float %1 Log %float_1000 + %2984 = OpFDiv %float %2983 %1091 + %2985 = OpFMul %float %float_0_0599999987 %2975 + %2986 = OpFDiv %float %2985 %1091 + %2987 = OpFSub %float %2984 %2986 + %2988 = OpFAdd %float %2982 %2987 + OpBranch %2979 + %2980 = OpLabel + %2989 = OpFSub %float %2961 %2968 + %2990 = OpFMul %float %float_7 %2989 + %2991 = OpFSub %float %2976 %2968 + %2992 = OpFDiv %float %2990 %2991 + %2993 = OpConvertFToS %int %2992 + %2994 = OpConvertSToF %float %2993 + %2995 = OpFSub %float %2992 %2994 + %2996 = OpAccessChain %_ptr_Function_float %551 %2993 + %2997 = OpLoad %float %2996 + %2998 = OpIAdd %int %2993 %int_1 + %2999 = OpAccessChain %_ptr_Function_float %551 %2998 + %3000 = OpLoad %float %2999 + %3001 = OpIAdd %int %2993 %int_2 + %3002 = OpAccessChain %_ptr_Function_float %551 %3001 + %3003 = OpLoad %float %3002 + %3004 = OpCompositeConstruct %v3float %2997 %3000 %3003 + %3005 = OpFMul %float %2995 %2995 + %3006 = OpCompositeConstruct %v3float %3005 %2995 %float_1 + %3007 = OpMatrixTimesVector %v3float %466 %3004 + %3008 = OpDot %float %3006 %3007 + OpBranch %2979 + %2979 = OpLabel + %3009 = OpPhi %float %2988 %2981 %3008 %2980 + OpBranch %2971 + %2972 = OpLabel + %3010 = OpFSub %float %2961 %2793 + %3011 = OpFMul %float %float_7 %3010 + %3012 = OpFSub %float %2968 %2793 + %3013 = OpFDiv %float %3011 %3012 + %3014 = OpConvertFToS %int %3013 + %3015 = OpConvertSToF %float %3014 + %3016 = OpFSub %float %3013 %3015 + %3017 = OpAccessChain %_ptr_Function_float %552 %3014 + %3018 = OpLoad %float %3017 + %3019 = OpIAdd %int %3014 %int_1 + %3020 = OpAccessChain %_ptr_Function_float %552 %3019 + %3021 = OpLoad %float %3020 + %3022 = OpIAdd %int %3014 %int_2 + %3023 = OpAccessChain %_ptr_Function_float %552 %3022 + %3024 = OpLoad %float %3023 + %3025 = OpCompositeConstruct %v3float %3018 %3021 %3024 + %3026 = OpFMul %float %3016 %3016 + %3027 = OpCompositeConstruct %v3float %3026 %3016 %float_1 + %3028 = OpMatrixTimesVector %v3float %466 %3025 + %3029 = OpDot %float %3027 %3028 + OpBranch %2971 + %2971 = OpLabel + %3030 = OpPhi %float %3009 %2979 %3029 %2972 + OpBranch %2963 + %2964 = OpLabel + %3031 = OpFMul %float %2961 %float_3 + %3032 = OpExtInst %float %1 Log %float_9_99999975en05 + %3033 = OpFDiv %float %3032 %1091 + %3034 = OpFMul %float %float_3 %2792 + %3035 = OpFDiv %float %3034 %1091 + %3036 = OpFSub %float %3033 %3035 + %3037 = OpFAdd %float %3031 %3036 + OpBranch %2963 + %2963 = OpLabel + %3038 = OpPhi %float %3030 %2971 %3037 %2964 + %3039 = OpExtInst %float %1 Pow %float_10 %3038 + %3040 = OpCompositeInsert %v3float %3039 %2956 2 + %3041 = OpFSub %v3float %3040 %361 + %3042 = OpVectorTimesMatrix %v3float %3041 %602 + %3043 = OpFMul %v3float %3042 %519 + %3044 = OpExtInst %v3float %1 Pow %3043 %286 + %3045 = OpFMul %v3float %196 %3044 + %3046 = OpFAdd %v3float %195 %3045 + %3047 = OpFMul %v3float %197 %3044 + %3048 = OpFAdd %v3float %141 %3047 + %3049 = OpFDiv %v3float %141 %3048 + %3050 = OpFMul %v3float %3046 %3049 + %3051 = OpExtInst %v3float %1 Pow %3050 %287 + OpBranch %1336 + %1336 = OpLabel + %3052 = OpPhi %v3float %2201 %1342 %3051 %2963 + OpBranch %1330 + %1331 = OpLabel + %3053 = OpVectorTimesMatrix %v3float %1324 %573 + %3054 = OpVectorTimesMatrix %v3float %3053 %602 + %3055 = OpExtInst %v3float %1 FMax %263 %3054 + %3056 = OpFMul %v3float %3055 %275 + %3057 = OpExtInst %v3float %1 FMax %3055 %277 + %3058 = OpExtInst %v3float %1 Pow %3057 %279 + %3059 = OpFMul %v3float %3058 %281 + %3060 = OpFSub %v3float %3059 %283 + %3061 = OpExtInst %v3float %1 FMin %3056 %3060 + OpBranch %1330 + %1330 = OpLabel + %3062 = OpPhi %v3float %3052 %1336 %3061 %1331 + OpBranch %1326 + %1327 = OpLabel + %3063 = OpCompositeExtract %float %1324 0 + OpBranch %3064 + %3064 = OpLabel + OpLoopMerge %3065 %3066 None + OpBranch %3067 + %3067 = OpLabel + %3068 = OpFOrdLessThan %bool %3063 %float_0_00313066994 + OpSelectionMerge %3069 None + OpBranchConditional %3068 %3070 %3069 + %3070 = OpLabel + %3071 = OpFMul %float %3063 %float_12_9200001 + OpBranch %3065 + %3069 = OpLabel + %3072 = OpExtInst %float %1 Pow %3063 %float_0_416666657 + %3073 = OpFMul %float %3072 %float_1_05499995 + %3074 = OpFSub %float %3073 %float_0_0549999997 + OpBranch %3065 + %3066 = OpLabel + OpBranch %3064 + %3065 = OpLabel + %3075 = OpPhi %float %3071 %3070 %3074 %3069 + %3076 = OpCompositeExtract %float %1324 1 + OpBranch %3077 + %3077 = OpLabel + OpLoopMerge %3078 %3079 None + OpBranch %3080 + %3080 = OpLabel + %3081 = OpFOrdLessThan %bool %3076 %float_0_00313066994 + OpSelectionMerge %3082 None + OpBranchConditional %3081 %3083 %3082 + %3083 = OpLabel + %3084 = OpFMul %float %3076 %float_12_9200001 + OpBranch %3078 + %3082 = OpLabel + %3085 = OpExtInst %float %1 Pow %3076 %float_0_416666657 + %3086 = OpFMul %float %3085 %float_1_05499995 + %3087 = OpFSub %float %3086 %float_0_0549999997 + OpBranch %3078 + %3079 = OpLabel + OpBranch %3077 + %3078 = OpLabel + %3088 = OpPhi %float %3084 %3083 %3087 %3082 + %3089 = OpCompositeExtract %float %1324 2 + OpBranch %3090 + %3090 = OpLabel + OpLoopMerge %3091 %3092 None + OpBranch %3093 + %3093 = OpLabel + %3094 = OpFOrdLessThan %bool %3089 %float_0_00313066994 + OpSelectionMerge %3095 None + OpBranchConditional %3094 %3096 %3095 + %3096 = OpLabel + %3097 = OpFMul %float %3089 %float_12_9200001 + OpBranch %3091 + %3095 = OpLabel + %3098 = OpExtInst %float %1 Pow %3089 %float_0_416666657 + %3099 = OpFMul %float %3098 %float_1_05499995 + %3100 = OpFSub %float %3099 %float_0_0549999997 + OpBranch %3091 + %3092 = OpLabel + OpBranch %3090 + %3091 = OpLabel + %3101 = OpPhi %float %3097 %3096 %3100 %3095 + %3102 = OpCompositeConstruct %v3float %3075 %3088 %3101 + OpBranch %1326 + %1326 = OpLabel + %3103 = OpPhi %v3float %3062 %1330 %3102 %3091 + %3104 = OpFMul %v3float %3103 %522 + %3105 = OpVectorShuffle %v4float %135 %3104 4 5 6 3 + %3106 = OpCompositeInsert %v4float %float_0 %3105 3 + OpStore %out_var_SV_Target0 %3106 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag b/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag new file mode 100644 index 00000000000..097eb6354d5 --- /dev/null +++ b/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag @@ -0,0 +1,1230 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 271 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %MainPS "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_PRIMITIVE_ID %in_var_TEXCOORD7 %gl_FragCoord %gl_FrontFacing %gl_SampleMask %out_var_SV_Target0 %gl_SampleMask_0 + OpExecutionMode %MainPS OriginUpperLeft + OpSource HLSL 600 + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_TranslatedWorldToView" + OpMemberName %type_View 3 "View_ViewToTranslatedWorld" + OpMemberName %type_View 4 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 6 "View_ViewToClip" + OpMemberName %type_View 7 "View_ViewToClipNoAA" + OpMemberName %type_View 8 "View_ClipToView" + OpMemberName %type_View 9 "View_ClipToTranslatedWorld" + OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 11 "View_ScreenToWorld" + OpMemberName %type_View 12 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 13 "View_ViewForward" + OpMemberName %type_View 14 "PrePadding_View_844" + OpMemberName %type_View 15 "View_ViewUp" + OpMemberName %type_View 16 "PrePadding_View_860" + OpMemberName %type_View 17 "View_ViewRight" + OpMemberName %type_View 18 "PrePadding_View_876" + OpMemberName %type_View 19 "View_HMDViewNoRollUp" + OpMemberName %type_View 20 "PrePadding_View_892" + OpMemberName %type_View 21 "View_HMDViewNoRollRight" + OpMemberName %type_View 22 "PrePadding_View_908" + OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 24 "View_ScreenPositionScaleBias" + OpMemberName %type_View 25 "View_WorldCameraOrigin" + OpMemberName %type_View 26 "PrePadding_View_956" + OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 28 "PrePadding_View_972" + OpMemberName %type_View 29 "View_WorldViewOrigin" + OpMemberName %type_View 30 "PrePadding_View_988" + OpMemberName %type_View 31 "View_PreViewTranslation" + OpMemberName %type_View 32 "PrePadding_View_1004" + OpMemberName %type_View 33 "View_PrevProjection" + OpMemberName %type_View 34 "View_PrevViewProj" + OpMemberName %type_View 35 "View_PrevViewRotationProj" + OpMemberName %type_View 36 "View_PrevViewToClip" + OpMemberName %type_View 37 "View_PrevClipToView" + OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 43 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 44 "PrePadding_View_1660" + OpMemberName %type_View 45 "View_PrevWorldViewOrigin" + OpMemberName %type_View 46 "PrePadding_View_1676" + OpMemberName %type_View 47 "View_PrevPreViewTranslation" + OpMemberName %type_View 48 "PrePadding_View_1692" + OpMemberName %type_View 49 "View_PrevInvViewProj" + OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 51 "View_ClipToPrevClip" + OpMemberName %type_View 52 "View_TemporalAAJitter" + OpMemberName %type_View 53 "View_GlobalClippingPlane" + OpMemberName %type_View 54 "View_FieldOfViewWideAngles" + OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 56 "View_ViewRectMin" + OpMemberName %type_View 57 "View_ViewSizeAndInvSize" + OpMemberName %type_View 58 "View_BufferSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 60 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 61 "View_PreExposure" + OpMemberName %type_View 62 "View_OneOverPreExposure" + OpMemberName %type_View 63 "PrePadding_View_2012" + OpMemberName %type_View 64 "View_DiffuseOverrideParameter" + OpMemberName %type_View 65 "View_SpecularOverrideParameter" + OpMemberName %type_View 66 "View_NormalOverrideParameter" + OpMemberName %type_View 67 "View_RoughnessOverrideParameter" + OpMemberName %type_View 68 "View_PrevFrameGameTime" + OpMemberName %type_View 69 "View_PrevFrameRealTime" + OpMemberName %type_View 70 "View_OutOfBoundsMask" + OpMemberName %type_View 71 "PrePadding_View_2084" + OpMemberName %type_View 72 "PrePadding_View_2088" + OpMemberName %type_View 73 "PrePadding_View_2092" + OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 75 "View_CullingSign" + OpMemberName %type_View 76 "View_NearPlane" + OpMemberName %type_View 77 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 78 "View_GameTime" + OpMemberName %type_View 79 "View_RealTime" + OpMemberName %type_View 80 "View_DeltaTime" + OpMemberName %type_View 81 "View_MaterialTextureMipBias" + OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 83 "View_Random" + OpMemberName %type_View 84 "View_FrameNumber" + OpMemberName %type_View 85 "View_StateFrameIndexMod8" + OpMemberName %type_View 86 "View_StateFrameIndex" + OpMemberName %type_View 87 "View_CameraCut" + OpMemberName %type_View 88 "View_UnlitViewmodeMask" + OpMemberName %type_View 89 "PrePadding_View_2164" + OpMemberName %type_View 90 "PrePadding_View_2168" + OpMemberName %type_View 91 "PrePadding_View_2172" + OpMemberName %type_View 92 "View_DirectionalLightColor" + OpMemberName %type_View 93 "View_DirectionalLightDirection" + OpMemberName %type_View 94 "PrePadding_View_2204" + OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 97 "View_TemporalAAParams" + OpMemberName %type_View 98 "View_CircleDOFParams" + OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 101 "View_DepthOfFieldScale" + OpMemberName %type_View 102 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 108 "View_GeneralPurposeTweak" + OpMemberName %type_View 109 "View_DemosaicVposOffset" + OpMemberName %type_View 110 "PrePadding_View_2348" + OpMemberName %type_View 111 "View_IndirectLightingColorScale" + OpMemberName %type_View 112 "View_HDR32bppEncodingMode" + OpMemberName %type_View 113 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 114 "View_AtmosphericFogSunPower" + OpMemberName %type_View 115 "View_AtmosphericFogPower" + OpMemberName %type_View 116 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 122 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 125 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 127 "View_AtmosphericFogSunColor" + OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 130 "View_AmbientCubemapTint" + OpMemberName %type_View 131 "View_AmbientCubemapIntensity" + OpMemberName %type_View 132 "View_SkyLightParameters" + OpMemberName %type_View 133 "PrePadding_View_2488" + OpMemberName %type_View 134 "PrePadding_View_2492" + OpMemberName %type_View 135 "View_SkyLightColor" + OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 137 "View_MobilePreviewMode" + OpMemberName %type_View 138 "View_HMDEyePaddingOffset" + OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 140 "View_ShowDecalsMask" + OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 143 "PrePadding_View_2648" + OpMemberName %type_View 144 "PrePadding_View_2652" + OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 146 "View_StereoPassIndex" + OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 149 "View_GlobalVolumeDimension" + OpMemberName %type_View 150 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 151 "View_MaxGlobalDistance" + OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 153 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 154 "PrePadding_View_2828" + OpMemberName %type_View 155 "View_VolumetricFogGridZParams" + OpMemberName %type_View 156 "PrePadding_View_2844" + OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 158 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 159 "PrePadding_View_2860" + OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 161 "PrePadding_View_2876" + OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 163 "PrePadding_View_2892" + OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 167 "View_StereoIPD" + OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_3d_image "type.3d.image" + OpName %type_sampler "type.sampler" + OpName %View_SharedBilinearClampedSampler "View_SharedBilinearClampedSampler" + OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float" + OpName %View_PrimitiveSceneData "View_PrimitiveSceneData" + OpName %type_TranslucentBasePass "type.TranslucentBasePass" + OpMemberName %type_TranslucentBasePass 0 "TranslucentBasePass_Shared_Forward_NumLocalLights" + OpMemberName %type_TranslucentBasePass 1 "TranslucentBasePass_Shared_Forward_NumReflectionCaptures" + OpMemberName %type_TranslucentBasePass 2 "TranslucentBasePass_Shared_Forward_HasDirectionalLight" + OpMemberName %type_TranslucentBasePass 3 "TranslucentBasePass_Shared_Forward_NumGridCells" + OpMemberName %type_TranslucentBasePass 4 "TranslucentBasePass_Shared_Forward_CulledGridSize" + OpMemberName %type_TranslucentBasePass 5 "TranslucentBasePass_Shared_Forward_MaxCulledLightsPerCell" + OpMemberName %type_TranslucentBasePass 6 "TranslucentBasePass_Shared_Forward_LightGridPixelSizeShift" + OpMemberName %type_TranslucentBasePass 7 "PrePadding_TranslucentBasePass_Shared_Forward_36" + OpMemberName %type_TranslucentBasePass 8 "PrePadding_TranslucentBasePass_Shared_Forward_40" + OpMemberName %type_TranslucentBasePass 9 "PrePadding_TranslucentBasePass_Shared_Forward_44" + OpMemberName %type_TranslucentBasePass 10 "TranslucentBasePass_Shared_Forward_LightGridZParams" + OpMemberName %type_TranslucentBasePass 11 "PrePadding_TranslucentBasePass_Shared_Forward_60" + OpMemberName %type_TranslucentBasePass 12 "TranslucentBasePass_Shared_Forward_DirectionalLightDirection" + OpMemberName %type_TranslucentBasePass 13 "PrePadding_TranslucentBasePass_Shared_Forward_76" + OpMemberName %type_TranslucentBasePass 14 "TranslucentBasePass_Shared_Forward_DirectionalLightColor" + OpMemberName %type_TranslucentBasePass 15 "TranslucentBasePass_Shared_Forward_DirectionalLightVolumetricScatteringIntensity" + OpMemberName %type_TranslucentBasePass 16 "TranslucentBasePass_Shared_Forward_DirectionalLightShadowMapChannelMask" + OpMemberName %type_TranslucentBasePass 17 "PrePadding_TranslucentBasePass_Shared_Forward_100" + OpMemberName %type_TranslucentBasePass 18 "TranslucentBasePass_Shared_Forward_DirectionalLightDistanceFadeMAD" + OpMemberName %type_TranslucentBasePass 19 "TranslucentBasePass_Shared_Forward_NumDirectionalLightCascades" + OpMemberName %type_TranslucentBasePass 20 "PrePadding_TranslucentBasePass_Shared_Forward_116" + OpMemberName %type_TranslucentBasePass 21 "PrePadding_TranslucentBasePass_Shared_Forward_120" + OpMemberName %type_TranslucentBasePass 22 "PrePadding_TranslucentBasePass_Shared_Forward_124" + OpMemberName %type_TranslucentBasePass 23 "TranslucentBasePass_Shared_Forward_CascadeEndDepths" + OpMemberName %type_TranslucentBasePass 24 "TranslucentBasePass_Shared_Forward_DirectionalLightWorldToShadowMatrix" + OpMemberName %type_TranslucentBasePass 25 "TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapMinMax" + OpMemberName %type_TranslucentBasePass 26 "TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapAtlasBufferSize" + OpMemberName %type_TranslucentBasePass 27 "TranslucentBasePass_Shared_Forward_DirectionalLightDepthBias" + OpMemberName %type_TranslucentBasePass 28 "TranslucentBasePass_Shared_Forward_DirectionalLightUseStaticShadowing" + OpMemberName %type_TranslucentBasePass 29 "PrePadding_TranslucentBasePass_Shared_Forward_488" + OpMemberName %type_TranslucentBasePass 30 "PrePadding_TranslucentBasePass_Shared_Forward_492" + OpMemberName %type_TranslucentBasePass 31 "TranslucentBasePass_Shared_Forward_DirectionalLightStaticShadowBufferSize" + OpMemberName %type_TranslucentBasePass 32 "TranslucentBasePass_Shared_Forward_DirectionalLightWorldToStaticShadow" + OpMemberName %type_TranslucentBasePass 33 "PrePadding_TranslucentBasePass_Shared_ForwardISR_576" + OpMemberName %type_TranslucentBasePass 34 "PrePadding_TranslucentBasePass_Shared_ForwardISR_580" + OpMemberName %type_TranslucentBasePass 35 "PrePadding_TranslucentBasePass_Shared_ForwardISR_584" + OpMemberName %type_TranslucentBasePass 36 "PrePadding_TranslucentBasePass_Shared_ForwardISR_588" + OpMemberName %type_TranslucentBasePass 37 "PrePadding_TranslucentBasePass_Shared_ForwardISR_592" + OpMemberName %type_TranslucentBasePass 38 "PrePadding_TranslucentBasePass_Shared_ForwardISR_596" + OpMemberName %type_TranslucentBasePass 39 "PrePadding_TranslucentBasePass_Shared_ForwardISR_600" + OpMemberName %type_TranslucentBasePass 40 "PrePadding_TranslucentBasePass_Shared_ForwardISR_604" + OpMemberName %type_TranslucentBasePass 41 "PrePadding_TranslucentBasePass_Shared_ForwardISR_608" + OpMemberName %type_TranslucentBasePass 42 "PrePadding_TranslucentBasePass_Shared_ForwardISR_612" + OpMemberName %type_TranslucentBasePass 43 "PrePadding_TranslucentBasePass_Shared_ForwardISR_616" + OpMemberName %type_TranslucentBasePass 44 "PrePadding_TranslucentBasePass_Shared_ForwardISR_620" + OpMemberName %type_TranslucentBasePass 45 "PrePadding_TranslucentBasePass_Shared_ForwardISR_624" + OpMemberName %type_TranslucentBasePass 46 "PrePadding_TranslucentBasePass_Shared_ForwardISR_628" + OpMemberName %type_TranslucentBasePass 47 "PrePadding_TranslucentBasePass_Shared_ForwardISR_632" + OpMemberName %type_TranslucentBasePass 48 "PrePadding_TranslucentBasePass_Shared_ForwardISR_636" + OpMemberName %type_TranslucentBasePass 49 "TranslucentBasePass_Shared_ForwardISR_NumLocalLights" + OpMemberName %type_TranslucentBasePass 50 "TranslucentBasePass_Shared_ForwardISR_NumReflectionCaptures" + OpMemberName %type_TranslucentBasePass 51 "TranslucentBasePass_Shared_ForwardISR_HasDirectionalLight" + OpMemberName %type_TranslucentBasePass 52 "TranslucentBasePass_Shared_ForwardISR_NumGridCells" + OpMemberName %type_TranslucentBasePass 53 "TranslucentBasePass_Shared_ForwardISR_CulledGridSize" + OpMemberName %type_TranslucentBasePass 54 "TranslucentBasePass_Shared_ForwardISR_MaxCulledLightsPerCell" + OpMemberName %type_TranslucentBasePass 55 "TranslucentBasePass_Shared_ForwardISR_LightGridPixelSizeShift" + OpMemberName %type_TranslucentBasePass 56 "PrePadding_TranslucentBasePass_Shared_ForwardISR_676" + OpMemberName %type_TranslucentBasePass 57 "PrePadding_TranslucentBasePass_Shared_ForwardISR_680" + OpMemberName %type_TranslucentBasePass 58 "PrePadding_TranslucentBasePass_Shared_ForwardISR_684" + OpMemberName %type_TranslucentBasePass 59 "TranslucentBasePass_Shared_ForwardISR_LightGridZParams" + OpMemberName %type_TranslucentBasePass 60 "PrePadding_TranslucentBasePass_Shared_ForwardISR_700" + OpMemberName %type_TranslucentBasePass 61 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightDirection" + OpMemberName %type_TranslucentBasePass 62 "PrePadding_TranslucentBasePass_Shared_ForwardISR_716" + OpMemberName %type_TranslucentBasePass 63 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightColor" + OpMemberName %type_TranslucentBasePass 64 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightVolumetricScatteringIntensity" + OpMemberName %type_TranslucentBasePass 65 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowMapChannelMask" + OpMemberName %type_TranslucentBasePass 66 "PrePadding_TranslucentBasePass_Shared_ForwardISR_740" + OpMemberName %type_TranslucentBasePass 67 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightDistanceFadeMAD" + OpMemberName %type_TranslucentBasePass 68 "TranslucentBasePass_Shared_ForwardISR_NumDirectionalLightCascades" + OpMemberName %type_TranslucentBasePass 69 "PrePadding_TranslucentBasePass_Shared_ForwardISR_756" + OpMemberName %type_TranslucentBasePass 70 "PrePadding_TranslucentBasePass_Shared_ForwardISR_760" + OpMemberName %type_TranslucentBasePass 71 "PrePadding_TranslucentBasePass_Shared_ForwardISR_764" + OpMemberName %type_TranslucentBasePass 72 "TranslucentBasePass_Shared_ForwardISR_CascadeEndDepths" + OpMemberName %type_TranslucentBasePass 73 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToShadowMatrix" + OpMemberName %type_TranslucentBasePass 74 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapMinMax" + OpMemberName %type_TranslucentBasePass 75 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapAtlasBufferSize" + OpMemberName %type_TranslucentBasePass 76 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightDepthBias" + OpMemberName %type_TranslucentBasePass 77 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightUseStaticShadowing" + OpMemberName %type_TranslucentBasePass 78 "PrePadding_TranslucentBasePass_Shared_ForwardISR_1128" + OpMemberName %type_TranslucentBasePass 79 "PrePadding_TranslucentBasePass_Shared_ForwardISR_1132" + OpMemberName %type_TranslucentBasePass 80 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightStaticShadowBufferSize" + OpMemberName %type_TranslucentBasePass 81 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToStaticShadow" + OpMemberName %type_TranslucentBasePass 82 "PrePadding_TranslucentBasePass_Shared_Reflection_1216" + OpMemberName %type_TranslucentBasePass 83 "PrePadding_TranslucentBasePass_Shared_Reflection_1220" + OpMemberName %type_TranslucentBasePass 84 "PrePadding_TranslucentBasePass_Shared_Reflection_1224" + OpMemberName %type_TranslucentBasePass 85 "PrePadding_TranslucentBasePass_Shared_Reflection_1228" + OpMemberName %type_TranslucentBasePass 86 "PrePadding_TranslucentBasePass_Shared_Reflection_1232" + OpMemberName %type_TranslucentBasePass 87 "PrePadding_TranslucentBasePass_Shared_Reflection_1236" + OpMemberName %type_TranslucentBasePass 88 "PrePadding_TranslucentBasePass_Shared_Reflection_1240" + OpMemberName %type_TranslucentBasePass 89 "PrePadding_TranslucentBasePass_Shared_Reflection_1244" + OpMemberName %type_TranslucentBasePass 90 "PrePadding_TranslucentBasePass_Shared_Reflection_1248" + OpMemberName %type_TranslucentBasePass 91 "PrePadding_TranslucentBasePass_Shared_Reflection_1252" + OpMemberName %type_TranslucentBasePass 92 "PrePadding_TranslucentBasePass_Shared_Reflection_1256" + OpMemberName %type_TranslucentBasePass 93 "PrePadding_TranslucentBasePass_Shared_Reflection_1260" + OpMemberName %type_TranslucentBasePass 94 "PrePadding_TranslucentBasePass_Shared_Reflection_1264" + OpMemberName %type_TranslucentBasePass 95 "PrePadding_TranslucentBasePass_Shared_Reflection_1268" + OpMemberName %type_TranslucentBasePass 96 "PrePadding_TranslucentBasePass_Shared_Reflection_1272" + OpMemberName %type_TranslucentBasePass 97 "PrePadding_TranslucentBasePass_Shared_Reflection_1276" + OpMemberName %type_TranslucentBasePass 98 "TranslucentBasePass_Shared_Reflection_SkyLightParameters" + OpMemberName %type_TranslucentBasePass 99 "TranslucentBasePass_Shared_Reflection_SkyLightCubemapBrightness" + OpMemberName %type_TranslucentBasePass 100 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1300" + OpMemberName %type_TranslucentBasePass 101 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1304" + OpMemberName %type_TranslucentBasePass 102 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1308" + OpMemberName %type_TranslucentBasePass 103 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1312" + OpMemberName %type_TranslucentBasePass 104 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1316" + OpMemberName %type_TranslucentBasePass 105 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1320" + OpMemberName %type_TranslucentBasePass 106 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1324" + OpMemberName %type_TranslucentBasePass 107 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1328" + OpMemberName %type_TranslucentBasePass 108 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1332" + OpMemberName %type_TranslucentBasePass 109 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1336" + OpMemberName %type_TranslucentBasePass 110 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1340" + OpMemberName %type_TranslucentBasePass 111 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1344" + OpMemberName %type_TranslucentBasePass 112 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1348" + OpMemberName %type_TranslucentBasePass 113 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1352" + OpMemberName %type_TranslucentBasePass 114 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1356" + OpMemberName %type_TranslucentBasePass 115 "TranslucentBasePass_Shared_PlanarReflection_ReflectionPlane" + OpMemberName %type_TranslucentBasePass 116 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionOrigin" + OpMemberName %type_TranslucentBasePass 117 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionXAxis" + OpMemberName %type_TranslucentBasePass 118 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionYAxis" + OpMemberName %type_TranslucentBasePass 119 "TranslucentBasePass_Shared_PlanarReflection_InverseTransposeMirrorMatrix" + OpMemberName %type_TranslucentBasePass 120 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters" + OpMemberName %type_TranslucentBasePass 121 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1484" + OpMemberName %type_TranslucentBasePass 122 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters2" + OpMemberName %type_TranslucentBasePass 123 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1496" + OpMemberName %type_TranslucentBasePass 124 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1500" + OpMemberName %type_TranslucentBasePass 125 "TranslucentBasePass_Shared_PlanarReflection_ProjectionWithExtraFOV" + OpMemberName %type_TranslucentBasePass 126 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenScaleBias" + OpMemberName %type_TranslucentBasePass 127 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenBound" + OpMemberName %type_TranslucentBasePass 128 "TranslucentBasePass_Shared_PlanarReflection_bIsStereo" + OpMemberName %type_TranslucentBasePass 129 "PrePadding_TranslucentBasePass_Shared_Fog_1676" + OpMemberName %type_TranslucentBasePass 130 "PrePadding_TranslucentBasePass_Shared_Fog_1680" + OpMemberName %type_TranslucentBasePass 131 "PrePadding_TranslucentBasePass_Shared_Fog_1684" + OpMemberName %type_TranslucentBasePass 132 "PrePadding_TranslucentBasePass_Shared_Fog_1688" + OpMemberName %type_TranslucentBasePass 133 "PrePadding_TranslucentBasePass_Shared_Fog_1692" + OpMemberName %type_TranslucentBasePass 134 "TranslucentBasePass_Shared_Fog_ExponentialFogParameters" + OpMemberName %type_TranslucentBasePass 135 "TranslucentBasePass_Shared_Fog_ExponentialFogParameters2" + OpMemberName %type_TranslucentBasePass 136 "TranslucentBasePass_Shared_Fog_ExponentialFogColorParameter" + OpMemberName %type_TranslucentBasePass 137 "TranslucentBasePass_Shared_Fog_ExponentialFogParameters3" + OpMemberName %type_TranslucentBasePass 138 "TranslucentBasePass_Shared_Fog_InscatteringLightDirection" + OpMemberName %type_TranslucentBasePass 139 "TranslucentBasePass_Shared_Fog_DirectionalInscatteringColor" + OpMemberName %type_TranslucentBasePass 140 "TranslucentBasePass_Shared_Fog_SinCosInscatteringColorCubemapRotation" + OpMemberName %type_TranslucentBasePass 141 "PrePadding_TranslucentBasePass_Shared_Fog_1800" + OpMemberName %type_TranslucentBasePass 142 "PrePadding_TranslucentBasePass_Shared_Fog_1804" + OpMemberName %type_TranslucentBasePass 143 "TranslucentBasePass_Shared_Fog_FogInscatteringTextureParameters" + OpMemberName %type_TranslucentBasePass 144 "TranslucentBasePass_Shared_Fog_ApplyVolumetricFog" + OpMemberName %type_TranslucentBasePass 145 "PrePadding_TranslucentBasePass_1824" + OpMemberName %type_TranslucentBasePass 146 "PrePadding_TranslucentBasePass_1828" + OpMemberName %type_TranslucentBasePass 147 "PrePadding_TranslucentBasePass_1832" + OpMemberName %type_TranslucentBasePass 148 "PrePadding_TranslucentBasePass_1836" + OpMemberName %type_TranslucentBasePass 149 "PrePadding_TranslucentBasePass_1840" + OpMemberName %type_TranslucentBasePass 150 "PrePadding_TranslucentBasePass_1844" + OpMemberName %type_TranslucentBasePass 151 "PrePadding_TranslucentBasePass_1848" + OpMemberName %type_TranslucentBasePass 152 "PrePadding_TranslucentBasePass_1852" + OpMemberName %type_TranslucentBasePass 153 "PrePadding_TranslucentBasePass_1856" + OpMemberName %type_TranslucentBasePass 154 "PrePadding_TranslucentBasePass_1860" + OpMemberName %type_TranslucentBasePass 155 "PrePadding_TranslucentBasePass_1864" + OpMemberName %type_TranslucentBasePass 156 "PrePadding_TranslucentBasePass_1868" + OpMemberName %type_TranslucentBasePass 157 "PrePadding_TranslucentBasePass_1872" + OpMemberName %type_TranslucentBasePass 158 "PrePadding_TranslucentBasePass_1876" + OpMemberName %type_TranslucentBasePass 159 "PrePadding_TranslucentBasePass_1880" + OpMemberName %type_TranslucentBasePass 160 "PrePadding_TranslucentBasePass_1884" + OpMemberName %type_TranslucentBasePass 161 "PrePadding_TranslucentBasePass_1888" + OpMemberName %type_TranslucentBasePass 162 "PrePadding_TranslucentBasePass_1892" + OpMemberName %type_TranslucentBasePass 163 "PrePadding_TranslucentBasePass_1896" + OpMemberName %type_TranslucentBasePass 164 "PrePadding_TranslucentBasePass_1900" + OpMemberName %type_TranslucentBasePass 165 "PrePadding_TranslucentBasePass_1904" + OpMemberName %type_TranslucentBasePass 166 "PrePadding_TranslucentBasePass_1908" + OpMemberName %type_TranslucentBasePass 167 "PrePadding_TranslucentBasePass_1912" + OpMemberName %type_TranslucentBasePass 168 "PrePadding_TranslucentBasePass_1916" + OpMemberName %type_TranslucentBasePass 169 "PrePadding_TranslucentBasePass_1920" + OpMemberName %type_TranslucentBasePass 170 "PrePadding_TranslucentBasePass_1924" + OpMemberName %type_TranslucentBasePass 171 "PrePadding_TranslucentBasePass_1928" + OpMemberName %type_TranslucentBasePass 172 "PrePadding_TranslucentBasePass_1932" + OpMemberName %type_TranslucentBasePass 173 "PrePadding_TranslucentBasePass_1936" + OpMemberName %type_TranslucentBasePass 174 "PrePadding_TranslucentBasePass_1940" + OpMemberName %type_TranslucentBasePass 175 "PrePadding_TranslucentBasePass_1944" + OpMemberName %type_TranslucentBasePass 176 "PrePadding_TranslucentBasePass_1948" + OpMemberName %type_TranslucentBasePass 177 "PrePadding_TranslucentBasePass_1952" + OpMemberName %type_TranslucentBasePass 178 "PrePadding_TranslucentBasePass_1956" + OpMemberName %type_TranslucentBasePass 179 "PrePadding_TranslucentBasePass_1960" + OpMemberName %type_TranslucentBasePass 180 "PrePadding_TranslucentBasePass_1964" + OpMemberName %type_TranslucentBasePass 181 "PrePadding_TranslucentBasePass_1968" + OpMemberName %type_TranslucentBasePass 182 "PrePadding_TranslucentBasePass_1972" + OpMemberName %type_TranslucentBasePass 183 "PrePadding_TranslucentBasePass_1976" + OpMemberName %type_TranslucentBasePass 184 "PrePadding_TranslucentBasePass_1980" + OpMemberName %type_TranslucentBasePass 185 "PrePadding_TranslucentBasePass_1984" + OpMemberName %type_TranslucentBasePass 186 "PrePadding_TranslucentBasePass_1988" + OpMemberName %type_TranslucentBasePass 187 "PrePadding_TranslucentBasePass_1992" + OpMemberName %type_TranslucentBasePass 188 "PrePadding_TranslucentBasePass_1996" + OpMemberName %type_TranslucentBasePass 189 "PrePadding_TranslucentBasePass_2000" + OpMemberName %type_TranslucentBasePass 190 "PrePadding_TranslucentBasePass_2004" + OpMemberName %type_TranslucentBasePass 191 "PrePadding_TranslucentBasePass_2008" + OpMemberName %type_TranslucentBasePass 192 "PrePadding_TranslucentBasePass_2012" + OpMemberName %type_TranslucentBasePass 193 "PrePadding_TranslucentBasePass_2016" + OpMemberName %type_TranslucentBasePass 194 "PrePadding_TranslucentBasePass_2020" + OpMemberName %type_TranslucentBasePass 195 "PrePadding_TranslucentBasePass_2024" + OpMemberName %type_TranslucentBasePass 196 "PrePadding_TranslucentBasePass_2028" + OpMemberName %type_TranslucentBasePass 197 "PrePadding_TranslucentBasePass_2032" + OpMemberName %type_TranslucentBasePass 198 "PrePadding_TranslucentBasePass_2036" + OpMemberName %type_TranslucentBasePass 199 "PrePadding_TranslucentBasePass_2040" + OpMemberName %type_TranslucentBasePass 200 "PrePadding_TranslucentBasePass_2044" + OpMemberName %type_TranslucentBasePass 201 "PrePadding_TranslucentBasePass_2048" + OpMemberName %type_TranslucentBasePass 202 "PrePadding_TranslucentBasePass_2052" + OpMemberName %type_TranslucentBasePass 203 "PrePadding_TranslucentBasePass_2056" + OpMemberName %type_TranslucentBasePass 204 "PrePadding_TranslucentBasePass_2060" + OpMemberName %type_TranslucentBasePass 205 "PrePadding_TranslucentBasePass_2064" + OpMemberName %type_TranslucentBasePass 206 "PrePadding_TranslucentBasePass_2068" + OpMemberName %type_TranslucentBasePass 207 "PrePadding_TranslucentBasePass_2072" + OpMemberName %type_TranslucentBasePass 208 "PrePadding_TranslucentBasePass_2076" + OpMemberName %type_TranslucentBasePass 209 "PrePadding_TranslucentBasePass_2080" + OpMemberName %type_TranslucentBasePass 210 "PrePadding_TranslucentBasePass_2084" + OpMemberName %type_TranslucentBasePass 211 "PrePadding_TranslucentBasePass_2088" + OpMemberName %type_TranslucentBasePass 212 "PrePadding_TranslucentBasePass_2092" + OpMemberName %type_TranslucentBasePass 213 "PrePadding_TranslucentBasePass_2096" + OpMemberName %type_TranslucentBasePass 214 "PrePadding_TranslucentBasePass_2100" + OpMemberName %type_TranslucentBasePass 215 "PrePadding_TranslucentBasePass_2104" + OpMemberName %type_TranslucentBasePass 216 "PrePadding_TranslucentBasePass_2108" + OpMemberName %type_TranslucentBasePass 217 "PrePadding_TranslucentBasePass_2112" + OpMemberName %type_TranslucentBasePass 218 "PrePadding_TranslucentBasePass_2116" + OpMemberName %type_TranslucentBasePass 219 "PrePadding_TranslucentBasePass_2120" + OpMemberName %type_TranslucentBasePass 220 "PrePadding_TranslucentBasePass_2124" + OpMemberName %type_TranslucentBasePass 221 "PrePadding_TranslucentBasePass_2128" + OpMemberName %type_TranslucentBasePass 222 "PrePadding_TranslucentBasePass_2132" + OpMemberName %type_TranslucentBasePass 223 "PrePadding_TranslucentBasePass_2136" + OpMemberName %type_TranslucentBasePass 224 "PrePadding_TranslucentBasePass_2140" + OpMemberName %type_TranslucentBasePass 225 "TranslucentBasePass_HZBUvFactorAndInvFactor" + OpMemberName %type_TranslucentBasePass 226 "TranslucentBasePass_PrevScreenPositionScaleBias" + OpMemberName %type_TranslucentBasePass 227 "TranslucentBasePass_PrevSceneColorPreExposureInv" + OpName %TranslucentBasePass "TranslucentBasePass" + OpName %TranslucentBasePass_Shared_Fog_IntegratedLightScattering "TranslucentBasePass_Shared_Fog_IntegratedLightScattering" + OpName %type_Material "type.Material" + OpMemberName %type_Material 0 "Material_VectorExpressions" + OpMemberName %type_Material 1 "Material_ScalarExpressions" + OpName %Material "Material" + OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid" + OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid" + OpName %in_var_PRIMITIVE_ID "in.var.PRIMITIVE_ID" + OpName %in_var_TEXCOORD7 "in.var.TEXCOORD7" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %MainPS "MainPS" + OpName %type_sampled_image "type.sampled.image" + OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %in_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID" + OpDecorate %in_var_PRIMITIVE_ID Flat + OpDecorateString %in_var_TEXCOORD7 UserSemantic "TEXCOORD7" + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_Position" + OpDecorate %gl_FrontFacing BuiltIn FrontFacing + OpDecorateString %gl_FrontFacing UserSemantic "SV_IsFrontFace" + OpDecorate %gl_FrontFacing Flat + OpDecorate %gl_SampleMask BuiltIn SampleMask + OpDecorateString %gl_SampleMask UserSemantic "SV_Coverage" + OpDecorate %gl_SampleMask Flat + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %gl_SampleMask_0 BuiltIn SampleMask + OpDecorateString %gl_SampleMask_0 UserSemantic "SV_Coverage" + OpDecorate %in_var_TEXCOORD10_centroid Location 0 + OpDecorate %in_var_TEXCOORD11_centroid Location 1 + OpDecorate %in_var_PRIMITIVE_ID Location 2 + OpDecorate %in_var_TEXCOORD7 Location 3 + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 1 + OpDecorate %View_SharedBilinearClampedSampler DescriptorSet 0 + OpDecorate %View_SharedBilinearClampedSampler Binding 0 + OpDecorate %View_PrimitiveSceneData DescriptorSet 0 + OpDecorate %View_PrimitiveSceneData Binding 0 + OpDecorate %TranslucentBasePass DescriptorSet 0 + OpDecorate %TranslucentBasePass Binding 2 + OpDecorate %TranslucentBasePass_Shared_Fog_IntegratedLightScattering DescriptorSet 0 + OpDecorate %TranslucentBasePass_Shared_Fog_IntegratedLightScattering Binding 0 + OpDecorate %Material DescriptorSet 0 + OpDecorate %Material Binding 3 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 14 Offset 844 + OpMemberDecorate %type_View 15 Offset 848 + OpMemberDecorate %type_View 16 Offset 860 + OpMemberDecorate %type_View 17 Offset 864 + OpMemberDecorate %type_View 18 Offset 876 + OpMemberDecorate %type_View 19 Offset 880 + OpMemberDecorate %type_View 20 Offset 892 + OpMemberDecorate %type_View 21 Offset 896 + OpMemberDecorate %type_View 22 Offset 908 + OpMemberDecorate %type_View 23 Offset 912 + OpMemberDecorate %type_View 24 Offset 928 + OpMemberDecorate %type_View 25 Offset 944 + OpMemberDecorate %type_View 26 Offset 956 + OpMemberDecorate %type_View 27 Offset 960 + OpMemberDecorate %type_View 28 Offset 972 + OpMemberDecorate %type_View 29 Offset 976 + OpMemberDecorate %type_View 30 Offset 988 + OpMemberDecorate %type_View 31 Offset 992 + OpMemberDecorate %type_View 32 Offset 1004 + OpMemberDecorate %type_View 33 Offset 1008 + OpMemberDecorate %type_View 33 MatrixStride 16 + OpMemberDecorate %type_View 33 ColMajor + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 44 Offset 1660 + OpMemberDecorate %type_View 45 Offset 1664 + OpMemberDecorate %type_View 46 Offset 1676 + OpMemberDecorate %type_View 47 Offset 1680 + OpMemberDecorate %type_View 48 Offset 1692 + OpMemberDecorate %type_View 49 Offset 1696 + OpMemberDecorate %type_View 49 MatrixStride 16 + OpMemberDecorate %type_View 49 ColMajor + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 53 Offset 1904 + OpMemberDecorate %type_View 54 Offset 1920 + OpMemberDecorate %type_View 55 Offset 1928 + OpMemberDecorate %type_View 56 Offset 1936 + OpMemberDecorate %type_View 57 Offset 1952 + OpMemberDecorate %type_View 58 Offset 1968 + OpMemberDecorate %type_View 59 Offset 1984 + OpMemberDecorate %type_View 60 Offset 2000 + OpMemberDecorate %type_View 61 Offset 2004 + OpMemberDecorate %type_View 62 Offset 2008 + OpMemberDecorate %type_View 63 Offset 2012 + OpMemberDecorate %type_View 64 Offset 2016 + OpMemberDecorate %type_View 65 Offset 2032 + OpMemberDecorate %type_View 66 Offset 2048 + OpMemberDecorate %type_View 67 Offset 2064 + OpMemberDecorate %type_View 68 Offset 2072 + OpMemberDecorate %type_View 69 Offset 2076 + OpMemberDecorate %type_View 70 Offset 2080 + OpMemberDecorate %type_View 71 Offset 2084 + OpMemberDecorate %type_View 72 Offset 2088 + OpMemberDecorate %type_View 73 Offset 2092 + OpMemberDecorate %type_View 74 Offset 2096 + OpMemberDecorate %type_View 75 Offset 2108 + OpMemberDecorate %type_View 76 Offset 2112 + OpMemberDecorate %type_View 77 Offset 2116 + OpMemberDecorate %type_View 78 Offset 2120 + OpMemberDecorate %type_View 79 Offset 2124 + OpMemberDecorate %type_View 80 Offset 2128 + OpMemberDecorate %type_View 81 Offset 2132 + OpMemberDecorate %type_View 82 Offset 2136 + OpMemberDecorate %type_View 83 Offset 2140 + OpMemberDecorate %type_View 84 Offset 2144 + OpMemberDecorate %type_View 85 Offset 2148 + OpMemberDecorate %type_View 86 Offset 2152 + OpMemberDecorate %type_View 87 Offset 2156 + OpMemberDecorate %type_View 88 Offset 2160 + OpMemberDecorate %type_View 89 Offset 2164 + OpMemberDecorate %type_View 90 Offset 2168 + OpMemberDecorate %type_View 91 Offset 2172 + OpMemberDecorate %type_View 92 Offset 2176 + OpMemberDecorate %type_View 93 Offset 2192 + OpMemberDecorate %type_View 94 Offset 2204 + OpMemberDecorate %type_View 95 Offset 2208 + OpMemberDecorate %type_View 96 Offset 2240 + OpMemberDecorate %type_View 97 Offset 2272 + OpMemberDecorate %type_View 98 Offset 2288 + OpMemberDecorate %type_View 99 Offset 2304 + OpMemberDecorate %type_View 100 Offset 2308 + OpMemberDecorate %type_View 101 Offset 2312 + OpMemberDecorate %type_View 102 Offset 2316 + OpMemberDecorate %type_View 103 Offset 2320 + OpMemberDecorate %type_View 104 Offset 2324 + OpMemberDecorate %type_View 105 Offset 2328 + OpMemberDecorate %type_View 106 Offset 2332 + OpMemberDecorate %type_View 107 Offset 2336 + OpMemberDecorate %type_View 108 Offset 2340 + OpMemberDecorate %type_View 109 Offset 2344 + OpMemberDecorate %type_View 110 Offset 2348 + OpMemberDecorate %type_View 111 Offset 2352 + OpMemberDecorate %type_View 112 Offset 2364 + OpMemberDecorate %type_View 113 Offset 2368 + OpMemberDecorate %type_View 114 Offset 2380 + OpMemberDecorate %type_View 115 Offset 2384 + OpMemberDecorate %type_View 116 Offset 2388 + OpMemberDecorate %type_View 117 Offset 2392 + OpMemberDecorate %type_View 118 Offset 2396 + OpMemberDecorate %type_View 119 Offset 2400 + OpMemberDecorate %type_View 120 Offset 2404 + OpMemberDecorate %type_View 121 Offset 2408 + OpMemberDecorate %type_View 122 Offset 2412 + OpMemberDecorate %type_View 123 Offset 2416 + OpMemberDecorate %type_View 124 Offset 2420 + OpMemberDecorate %type_View 125 Offset 2424 + OpMemberDecorate %type_View 126 Offset 2428 + OpMemberDecorate %type_View 127 Offset 2432 + OpMemberDecorate %type_View 128 Offset 2448 + OpMemberDecorate %type_View 129 Offset 2460 + OpMemberDecorate %type_View 130 Offset 2464 + OpMemberDecorate %type_View 131 Offset 2480 + OpMemberDecorate %type_View 132 Offset 2484 + OpMemberDecorate %type_View 133 Offset 2488 + OpMemberDecorate %type_View 134 Offset 2492 + OpMemberDecorate %type_View 135 Offset 2496 + OpMemberDecorate %type_View 136 Offset 2512 + OpMemberDecorate %type_View 137 Offset 2624 + OpMemberDecorate %type_View 138 Offset 2628 + OpMemberDecorate %type_View 139 Offset 2632 + OpMemberDecorate %type_View 140 Offset 2636 + OpMemberDecorate %type_View 141 Offset 2640 + OpMemberDecorate %type_View 142 Offset 2644 + OpMemberDecorate %type_View 143 Offset 2648 + OpMemberDecorate %type_View 144 Offset 2652 + OpMemberDecorate %type_View 145 Offset 2656 + OpMemberDecorate %type_View 146 Offset 2668 + OpMemberDecorate %type_View 147 Offset 2672 + OpMemberDecorate %type_View 148 Offset 2736 + OpMemberDecorate %type_View 149 Offset 2800 + OpMemberDecorate %type_View 150 Offset 2804 + OpMemberDecorate %type_View 151 Offset 2808 + OpMemberDecorate %type_View 152 Offset 2812 + OpMemberDecorate %type_View 153 Offset 2816 + OpMemberDecorate %type_View 154 Offset 2828 + OpMemberDecorate %type_View 155 Offset 2832 + OpMemberDecorate %type_View 156 Offset 2844 + OpMemberDecorate %type_View 157 Offset 2848 + OpMemberDecorate %type_View 158 Offset 2856 + OpMemberDecorate %type_View 159 Offset 2860 + OpMemberDecorate %type_View 160 Offset 2864 + OpMemberDecorate %type_View 161 Offset 2876 + OpMemberDecorate %type_View 162 Offset 2880 + OpMemberDecorate %type_View 163 Offset 2892 + OpMemberDecorate %type_View 164 Offset 2896 + OpMemberDecorate %type_View 165 Offset 2908 + OpMemberDecorate %type_View 166 Offset 2912 + OpMemberDecorate %type_View 167 Offset 2924 + OpMemberDecorate %type_View 168 Offset 2928 + OpMemberDecorate %type_View 169 Offset 2932 + OpDecorate %type_View Block + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0 + OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable + OpDecorate %type_StructuredBuffer_v4float BufferBlock + OpDecorate %_arr_mat4v4float_uint_4 ArrayStride 64 + OpDecorate %_arr_mat4v4float_uint_2 ArrayStride 64 + OpMemberDecorate %type_TranslucentBasePass 0 Offset 0 + OpMemberDecorate %type_TranslucentBasePass 1 Offset 4 + OpMemberDecorate %type_TranslucentBasePass 2 Offset 8 + OpMemberDecorate %type_TranslucentBasePass 3 Offset 12 + OpMemberDecorate %type_TranslucentBasePass 4 Offset 16 + OpMemberDecorate %type_TranslucentBasePass 5 Offset 28 + OpMemberDecorate %type_TranslucentBasePass 6 Offset 32 + OpMemberDecorate %type_TranslucentBasePass 7 Offset 36 + OpMemberDecorate %type_TranslucentBasePass 8 Offset 40 + OpMemberDecorate %type_TranslucentBasePass 9 Offset 44 + OpMemberDecorate %type_TranslucentBasePass 10 Offset 48 + OpMemberDecorate %type_TranslucentBasePass 11 Offset 60 + OpMemberDecorate %type_TranslucentBasePass 12 Offset 64 + OpMemberDecorate %type_TranslucentBasePass 13 Offset 76 + OpMemberDecorate %type_TranslucentBasePass 14 Offset 80 + OpMemberDecorate %type_TranslucentBasePass 15 Offset 92 + OpMemberDecorate %type_TranslucentBasePass 16 Offset 96 + OpMemberDecorate %type_TranslucentBasePass 17 Offset 100 + OpMemberDecorate %type_TranslucentBasePass 18 Offset 104 + OpMemberDecorate %type_TranslucentBasePass 19 Offset 112 + OpMemberDecorate %type_TranslucentBasePass 20 Offset 116 + OpMemberDecorate %type_TranslucentBasePass 21 Offset 120 + OpMemberDecorate %type_TranslucentBasePass 22 Offset 124 + OpMemberDecorate %type_TranslucentBasePass 23 Offset 128 + OpMemberDecorate %type_TranslucentBasePass 24 Offset 144 + OpMemberDecorate %type_TranslucentBasePass 24 MatrixStride 16 + OpMemberDecorate %type_TranslucentBasePass 24 ColMajor + OpMemberDecorate %type_TranslucentBasePass 25 Offset 400 + OpMemberDecorate %type_TranslucentBasePass 26 Offset 464 + OpMemberDecorate %type_TranslucentBasePass 27 Offset 480 + OpMemberDecorate %type_TranslucentBasePass 28 Offset 484 + OpMemberDecorate %type_TranslucentBasePass 29 Offset 488 + OpMemberDecorate %type_TranslucentBasePass 30 Offset 492 + OpMemberDecorate %type_TranslucentBasePass 31 Offset 496 + OpMemberDecorate %type_TranslucentBasePass 32 Offset 512 + OpMemberDecorate %type_TranslucentBasePass 32 MatrixStride 16 + OpMemberDecorate %type_TranslucentBasePass 32 ColMajor + OpMemberDecorate %type_TranslucentBasePass 33 Offset 576 + OpMemberDecorate %type_TranslucentBasePass 34 Offset 580 + OpMemberDecorate %type_TranslucentBasePass 35 Offset 584 + OpMemberDecorate %type_TranslucentBasePass 36 Offset 588 + OpMemberDecorate %type_TranslucentBasePass 37 Offset 592 + OpMemberDecorate %type_TranslucentBasePass 38 Offset 596 + OpMemberDecorate %type_TranslucentBasePass 39 Offset 600 + OpMemberDecorate %type_TranslucentBasePass 40 Offset 604 + OpMemberDecorate %type_TranslucentBasePass 41 Offset 608 + OpMemberDecorate %type_TranslucentBasePass 42 Offset 612 + OpMemberDecorate %type_TranslucentBasePass 43 Offset 616 + OpMemberDecorate %type_TranslucentBasePass 44 Offset 620 + OpMemberDecorate %type_TranslucentBasePass 45 Offset 624 + OpMemberDecorate %type_TranslucentBasePass 46 Offset 628 + OpMemberDecorate %type_TranslucentBasePass 47 Offset 632 + OpMemberDecorate %type_TranslucentBasePass 48 Offset 636 + OpMemberDecorate %type_TranslucentBasePass 49 Offset 640 + OpMemberDecorate %type_TranslucentBasePass 50 Offset 644 + OpMemberDecorate %type_TranslucentBasePass 51 Offset 648 + OpMemberDecorate %type_TranslucentBasePass 52 Offset 652 + OpMemberDecorate %type_TranslucentBasePass 53 Offset 656 + OpMemberDecorate %type_TranslucentBasePass 54 Offset 668 + OpMemberDecorate %type_TranslucentBasePass 55 Offset 672 + OpMemberDecorate %type_TranslucentBasePass 56 Offset 676 + OpMemberDecorate %type_TranslucentBasePass 57 Offset 680 + OpMemberDecorate %type_TranslucentBasePass 58 Offset 684 + OpMemberDecorate %type_TranslucentBasePass 59 Offset 688 + OpMemberDecorate %type_TranslucentBasePass 60 Offset 700 + OpMemberDecorate %type_TranslucentBasePass 61 Offset 704 + OpMemberDecorate %type_TranslucentBasePass 62 Offset 716 + OpMemberDecorate %type_TranslucentBasePass 63 Offset 720 + OpMemberDecorate %type_TranslucentBasePass 64 Offset 732 + OpMemberDecorate %type_TranslucentBasePass 65 Offset 736 + OpMemberDecorate %type_TranslucentBasePass 66 Offset 740 + OpMemberDecorate %type_TranslucentBasePass 67 Offset 744 + OpMemberDecorate %type_TranslucentBasePass 68 Offset 752 + OpMemberDecorate %type_TranslucentBasePass 69 Offset 756 + OpMemberDecorate %type_TranslucentBasePass 70 Offset 760 + OpMemberDecorate %type_TranslucentBasePass 71 Offset 764 + OpMemberDecorate %type_TranslucentBasePass 72 Offset 768 + OpMemberDecorate %type_TranslucentBasePass 73 Offset 784 + OpMemberDecorate %type_TranslucentBasePass 73 MatrixStride 16 + OpMemberDecorate %type_TranslucentBasePass 73 ColMajor + OpMemberDecorate %type_TranslucentBasePass 74 Offset 1040 + OpMemberDecorate %type_TranslucentBasePass 75 Offset 1104 + OpMemberDecorate %type_TranslucentBasePass 76 Offset 1120 + OpMemberDecorate %type_TranslucentBasePass 77 Offset 1124 + OpMemberDecorate %type_TranslucentBasePass 78 Offset 1128 + OpMemberDecorate %type_TranslucentBasePass 79 Offset 1132 + OpMemberDecorate %type_TranslucentBasePass 80 Offset 1136 + OpMemberDecorate %type_TranslucentBasePass 81 Offset 1152 + OpMemberDecorate %type_TranslucentBasePass 81 MatrixStride 16 + OpMemberDecorate %type_TranslucentBasePass 81 ColMajor + OpMemberDecorate %type_TranslucentBasePass 82 Offset 1216 + OpMemberDecorate %type_TranslucentBasePass 83 Offset 1220 + OpMemberDecorate %type_TranslucentBasePass 84 Offset 1224 + OpMemberDecorate %type_TranslucentBasePass 85 Offset 1228 + OpMemberDecorate %type_TranslucentBasePass 86 Offset 1232 + OpMemberDecorate %type_TranslucentBasePass 87 Offset 1236 + OpMemberDecorate %type_TranslucentBasePass 88 Offset 1240 + OpMemberDecorate %type_TranslucentBasePass 89 Offset 1244 + OpMemberDecorate %type_TranslucentBasePass 90 Offset 1248 + OpMemberDecorate %type_TranslucentBasePass 91 Offset 1252 + OpMemberDecorate %type_TranslucentBasePass 92 Offset 1256 + OpMemberDecorate %type_TranslucentBasePass 93 Offset 1260 + OpMemberDecorate %type_TranslucentBasePass 94 Offset 1264 + OpMemberDecorate %type_TranslucentBasePass 95 Offset 1268 + OpMemberDecorate %type_TranslucentBasePass 96 Offset 1272 + OpMemberDecorate %type_TranslucentBasePass 97 Offset 1276 + OpMemberDecorate %type_TranslucentBasePass 98 Offset 1280 + OpMemberDecorate %type_TranslucentBasePass 99 Offset 1296 + OpMemberDecorate %type_TranslucentBasePass 100 Offset 1300 + OpMemberDecorate %type_TranslucentBasePass 101 Offset 1304 + OpMemberDecorate %type_TranslucentBasePass 102 Offset 1308 + OpMemberDecorate %type_TranslucentBasePass 103 Offset 1312 + OpMemberDecorate %type_TranslucentBasePass 104 Offset 1316 + OpMemberDecorate %type_TranslucentBasePass 105 Offset 1320 + OpMemberDecorate %type_TranslucentBasePass 106 Offset 1324 + OpMemberDecorate %type_TranslucentBasePass 107 Offset 1328 + OpMemberDecorate %type_TranslucentBasePass 108 Offset 1332 + OpMemberDecorate %type_TranslucentBasePass 109 Offset 1336 + OpMemberDecorate %type_TranslucentBasePass 110 Offset 1340 + OpMemberDecorate %type_TranslucentBasePass 111 Offset 1344 + OpMemberDecorate %type_TranslucentBasePass 112 Offset 1348 + OpMemberDecorate %type_TranslucentBasePass 113 Offset 1352 + OpMemberDecorate %type_TranslucentBasePass 114 Offset 1356 + OpMemberDecorate %type_TranslucentBasePass 115 Offset 1360 + OpMemberDecorate %type_TranslucentBasePass 116 Offset 1376 + OpMemberDecorate %type_TranslucentBasePass 117 Offset 1392 + OpMemberDecorate %type_TranslucentBasePass 118 Offset 1408 + OpMemberDecorate %type_TranslucentBasePass 119 Offset 1424 + OpMemberDecorate %type_TranslucentBasePass 119 MatrixStride 16 + OpMemberDecorate %type_TranslucentBasePass 119 ColMajor + OpMemberDecorate %type_TranslucentBasePass 120 Offset 1472 + OpMemberDecorate %type_TranslucentBasePass 121 Offset 1484 + OpMemberDecorate %type_TranslucentBasePass 122 Offset 1488 + OpMemberDecorate %type_TranslucentBasePass 123 Offset 1496 + OpMemberDecorate %type_TranslucentBasePass 124 Offset 1500 + OpMemberDecorate %type_TranslucentBasePass 125 Offset 1504 + OpMemberDecorate %type_TranslucentBasePass 125 MatrixStride 16 + OpMemberDecorate %type_TranslucentBasePass 125 ColMajor + OpMemberDecorate %type_TranslucentBasePass 126 Offset 1632 + OpMemberDecorate %type_TranslucentBasePass 127 Offset 1664 + OpMemberDecorate %type_TranslucentBasePass 128 Offset 1672 + OpMemberDecorate %type_TranslucentBasePass 129 Offset 1676 + OpMemberDecorate %type_TranslucentBasePass 130 Offset 1680 + OpMemberDecorate %type_TranslucentBasePass 131 Offset 1684 + OpMemberDecorate %type_TranslucentBasePass 132 Offset 1688 + OpMemberDecorate %type_TranslucentBasePass 133 Offset 1692 + OpMemberDecorate %type_TranslucentBasePass 134 Offset 1696 + OpMemberDecorate %type_TranslucentBasePass 135 Offset 1712 + OpMemberDecorate %type_TranslucentBasePass 136 Offset 1728 + OpMemberDecorate %type_TranslucentBasePass 137 Offset 1744 + OpMemberDecorate %type_TranslucentBasePass 138 Offset 1760 + OpMemberDecorate %type_TranslucentBasePass 139 Offset 1776 + OpMemberDecorate %type_TranslucentBasePass 140 Offset 1792 + OpMemberDecorate %type_TranslucentBasePass 141 Offset 1800 + OpMemberDecorate %type_TranslucentBasePass 142 Offset 1804 + OpMemberDecorate %type_TranslucentBasePass 143 Offset 1808 + OpMemberDecorate %type_TranslucentBasePass 144 Offset 1820 + OpMemberDecorate %type_TranslucentBasePass 145 Offset 1824 + OpMemberDecorate %type_TranslucentBasePass 146 Offset 1828 + OpMemberDecorate %type_TranslucentBasePass 147 Offset 1832 + OpMemberDecorate %type_TranslucentBasePass 148 Offset 1836 + OpMemberDecorate %type_TranslucentBasePass 149 Offset 1840 + OpMemberDecorate %type_TranslucentBasePass 150 Offset 1844 + OpMemberDecorate %type_TranslucentBasePass 151 Offset 1848 + OpMemberDecorate %type_TranslucentBasePass 152 Offset 1852 + OpMemberDecorate %type_TranslucentBasePass 153 Offset 1856 + OpMemberDecorate %type_TranslucentBasePass 154 Offset 1860 + OpMemberDecorate %type_TranslucentBasePass 155 Offset 1864 + OpMemberDecorate %type_TranslucentBasePass 156 Offset 1868 + OpMemberDecorate %type_TranslucentBasePass 157 Offset 1872 + OpMemberDecorate %type_TranslucentBasePass 158 Offset 1876 + OpMemberDecorate %type_TranslucentBasePass 159 Offset 1880 + OpMemberDecorate %type_TranslucentBasePass 160 Offset 1884 + OpMemberDecorate %type_TranslucentBasePass 161 Offset 1888 + OpMemberDecorate %type_TranslucentBasePass 162 Offset 1892 + OpMemberDecorate %type_TranslucentBasePass 163 Offset 1896 + OpMemberDecorate %type_TranslucentBasePass 164 Offset 1900 + OpMemberDecorate %type_TranslucentBasePass 165 Offset 1904 + OpMemberDecorate %type_TranslucentBasePass 166 Offset 1908 + OpMemberDecorate %type_TranslucentBasePass 167 Offset 1912 + OpMemberDecorate %type_TranslucentBasePass 168 Offset 1916 + OpMemberDecorate %type_TranslucentBasePass 169 Offset 1920 + OpMemberDecorate %type_TranslucentBasePass 170 Offset 1924 + OpMemberDecorate %type_TranslucentBasePass 171 Offset 1928 + OpMemberDecorate %type_TranslucentBasePass 172 Offset 1932 + OpMemberDecorate %type_TranslucentBasePass 173 Offset 1936 + OpMemberDecorate %type_TranslucentBasePass 174 Offset 1940 + OpMemberDecorate %type_TranslucentBasePass 175 Offset 1944 + OpMemberDecorate %type_TranslucentBasePass 176 Offset 1948 + OpMemberDecorate %type_TranslucentBasePass 177 Offset 1952 + OpMemberDecorate %type_TranslucentBasePass 178 Offset 1956 + OpMemberDecorate %type_TranslucentBasePass 179 Offset 1960 + OpMemberDecorate %type_TranslucentBasePass 180 Offset 1964 + OpMemberDecorate %type_TranslucentBasePass 181 Offset 1968 + OpMemberDecorate %type_TranslucentBasePass 182 Offset 1972 + OpMemberDecorate %type_TranslucentBasePass 183 Offset 1976 + OpMemberDecorate %type_TranslucentBasePass 184 Offset 1980 + OpMemberDecorate %type_TranslucentBasePass 185 Offset 1984 + OpMemberDecorate %type_TranslucentBasePass 186 Offset 1988 + OpMemberDecorate %type_TranslucentBasePass 187 Offset 1992 + OpMemberDecorate %type_TranslucentBasePass 188 Offset 1996 + OpMemberDecorate %type_TranslucentBasePass 189 Offset 2000 + OpMemberDecorate %type_TranslucentBasePass 190 Offset 2004 + OpMemberDecorate %type_TranslucentBasePass 191 Offset 2008 + OpMemberDecorate %type_TranslucentBasePass 192 Offset 2012 + OpMemberDecorate %type_TranslucentBasePass 193 Offset 2016 + OpMemberDecorate %type_TranslucentBasePass 194 Offset 2020 + OpMemberDecorate %type_TranslucentBasePass 195 Offset 2024 + OpMemberDecorate %type_TranslucentBasePass 196 Offset 2028 + OpMemberDecorate %type_TranslucentBasePass 197 Offset 2032 + OpMemberDecorate %type_TranslucentBasePass 198 Offset 2036 + OpMemberDecorate %type_TranslucentBasePass 199 Offset 2040 + OpMemberDecorate %type_TranslucentBasePass 200 Offset 2044 + OpMemberDecorate %type_TranslucentBasePass 201 Offset 2048 + OpMemberDecorate %type_TranslucentBasePass 202 Offset 2052 + OpMemberDecorate %type_TranslucentBasePass 203 Offset 2056 + OpMemberDecorate %type_TranslucentBasePass 204 Offset 2060 + OpMemberDecorate %type_TranslucentBasePass 205 Offset 2064 + OpMemberDecorate %type_TranslucentBasePass 206 Offset 2068 + OpMemberDecorate %type_TranslucentBasePass 207 Offset 2072 + OpMemberDecorate %type_TranslucentBasePass 208 Offset 2076 + OpMemberDecorate %type_TranslucentBasePass 209 Offset 2080 + OpMemberDecorate %type_TranslucentBasePass 210 Offset 2084 + OpMemberDecorate %type_TranslucentBasePass 211 Offset 2088 + OpMemberDecorate %type_TranslucentBasePass 212 Offset 2092 + OpMemberDecorate %type_TranslucentBasePass 213 Offset 2096 + OpMemberDecorate %type_TranslucentBasePass 214 Offset 2100 + OpMemberDecorate %type_TranslucentBasePass 215 Offset 2104 + OpMemberDecorate %type_TranslucentBasePass 216 Offset 2108 + OpMemberDecorate %type_TranslucentBasePass 217 Offset 2112 + OpMemberDecorate %type_TranslucentBasePass 218 Offset 2116 + OpMemberDecorate %type_TranslucentBasePass 219 Offset 2120 + OpMemberDecorate %type_TranslucentBasePass 220 Offset 2124 + OpMemberDecorate %type_TranslucentBasePass 221 Offset 2128 + OpMemberDecorate %type_TranslucentBasePass 222 Offset 2132 + OpMemberDecorate %type_TranslucentBasePass 223 Offset 2136 + OpMemberDecorate %type_TranslucentBasePass 224 Offset 2140 + OpMemberDecorate %type_TranslucentBasePass 225 Offset 2144 + OpMemberDecorate %type_TranslucentBasePass 226 Offset 2160 + OpMemberDecorate %type_TranslucentBasePass 227 Offset 2176 + OpDecorate %type_TranslucentBasePass Block + OpDecorate %_arr_v4float_uint_1 ArrayStride 16 + OpMemberDecorate %type_Material 0 Offset 0 + OpMemberDecorate %type_Material 1 Offset 32 + OpDecorate %type_Material Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %uint_0 = OpConstant %uint 0 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %int_0 = OpConstant %int 0 + %int_2 = OpConstant %int 2 + %float_0 = OpConstant %float 0 + %48 = OpConstantComposite %v3float %float_0 %float_0 %float_0 + %int_10 = OpConstant %int 10 + %int_144 = OpConstant %int 144 + %int_70 = OpConstant %int 70 + %float_1 = OpConstant %float 1 + %53 = OpConstantComposite %v3float %float_1 %float_1 %float_1 +%float_0_577000022 = OpConstant %float 0.577000022 + %55 = OpConstantComposite %v3float %float_0_577000022 %float_0_577000022 %float_0_577000022 + %56 = OpConstantComposite %v3float %float_1 %float_1 %float_0 + %57 = OpConstantComposite %v3float %float_0 %float_1 %float_1 + %float_0_5 = OpConstant %float 0.5 + %59 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5 + %int_60 = OpConstant %int 60 + %61 = OpConstantComposite %v2float %float_0_5 %float_0_5 + %uint_26 = OpConstant %uint 26 + %uint_1 = OpConstant %uint 1 + %uint_5 = OpConstant %uint 5 + %uint_19 = OpConstant %uint 19 + %float_n0_5 = OpConstant %float -0.5 + %67 = OpConstantComposite %v2float %float_0_5 %float_n0_5 + %68 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1 + %float_0_25 = OpConstant %float 0.25 + %int_31 = OpConstant %int 31 + %int_66 = OpConstant %int 66 + %int_153 = OpConstant %int 153 + %int_155 = OpConstant %int 155 +%mat3v3float = OpTypeMatrix %v3float 3 + %75 = OpConstantComposite %v3float %float_0 %float_0 %float_1 + %float_n1 = OpConstant %float -1 +%float_0_200000003 = OpConstant %float 0.200000003 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%type_3d_image = OpTypeImage %float 3D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler +%_runtimearr_v4float = OpTypeRuntimeArray %v4float +%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float + %v3int = OpTypeVector %int 3 +%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4 +%mat3v4float = OpTypeMatrix %v4float 3 +%_arr_mat4v4float_uint_2 = OpTypeArray %mat4v4float %uint_2 +%type_TranslucentBasePass = OpTypeStruct %uint %uint %uint %uint %v3int %uint %uint %uint %uint %uint %v3float %float %v3float %float %v3float %float %uint %uint %v2float %uint %uint %uint %uint %v4float %_arr_mat4v4float_uint_4 %_arr_v4float_uint_4 %v4float %float %uint %uint %uint %v4float %mat4v4float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %v3int %uint %uint %uint %uint %uint %v3float %float %v3float %float %v3float %float %uint %uint %v2float %uint %uint %uint %uint %v4float %_arr_mat4v4float_uint_4 %_arr_v4float_uint_4 %v4float %float %uint %uint %uint %v4float %mat4v4float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %v4float %v4float %v4float %mat3v4float %v3float %float %v2float %float %float %_arr_mat4v4float_uint_2 %_arr_v4float_uint_2 %v2float %uint %float %float %float %float %float %v4float %v4float %v4float %v4float %v4float %v4float %v2float %float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %v4float %float +%_ptr_Uniform_type_TranslucentBasePass = OpTypePointer Uniform %type_TranslucentBasePass +%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1 +%type_Material = OpTypeStruct %_arr_v4float_uint_2 %_arr_v4float_uint_1 +%_ptr_Uniform_type_Material = OpTypePointer Uniform %type_Material +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Input_bool = OpTypePointer Input %bool +%_arr_uint_uint_1 = OpTypeArray %uint %uint_1 +%_ptr_Input__arr_uint_uint_1 = OpTypePointer Input %_arr_uint_uint_1 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_ptr_Output__arr_uint_uint_1 = OpTypePointer Output %_arr_uint_uint_1 + %void = OpTypeVoid + %94 = OpTypeFunction %void +%_ptr_Output_uint = OpTypePointer Output %uint +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %v3bool = OpTypeVector %bool 3 +%_ptr_Uniform_int = OpTypePointer Uniform %int +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%type_sampled_image = OpTypeSampledImage %type_3d_image + %View = OpVariable %_ptr_Uniform_type_View Uniform +%View_SharedBilinearClampedSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%View_PrimitiveSceneData = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform +%TranslucentBasePass = OpVariable %_ptr_Uniform_type_TranslucentBasePass Uniform +%TranslucentBasePass_Shared_Fog_IntegratedLightScattering = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant + %Material = OpVariable %_ptr_Uniform_type_Material Uniform +%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input_v4float Input +%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input_v4float Input +%in_var_PRIMITIVE_ID = OpVariable %_ptr_Input_uint Input +%in_var_TEXCOORD7 = OpVariable %_ptr_Input_v4float Input +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%gl_FrontFacing = OpVariable %_ptr_Input_bool Input +%gl_SampleMask = OpVariable %_ptr_Input__arr_uint_uint_1 Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output +%gl_SampleMask_0 = OpVariable %_ptr_Output__arr_uint_uint_1 Output + %102 = OpConstantNull %v4float + %float_n1_5 = OpConstant %float -1.5 + %float_3 = OpConstant %float 3 + %105 = OpConstantComposite %v3float %float_n1 %float_n1_5 %float_3 +%float_12_25 = OpConstant %float 12.25 +%float_0_00200000009 = OpConstant %float 0.00200000009 + %108 = OpUndef %float + %uint_15 = OpConstant %uint 15 + %MainPS = OpFunction %void None %94 + %110 = OpLabel + %111 = OpLoad %v4float %in_var_TEXCOORD10_centroid + %112 = OpLoad %v4float %in_var_TEXCOORD11_centroid + %113 = OpLoad %uint %in_var_PRIMITIVE_ID + %114 = OpLoad %v4float %in_var_TEXCOORD7 + %115 = OpLoad %v4float %gl_FragCoord + %116 = OpLoad %_arr_uint_uint_1 %gl_SampleMask + %117 = OpCompositeExtract %uint %116 0 + %118 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_1 + %119 = OpLoad %mat4v4float %118 + %120 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_10 + %121 = OpLoad %mat4v4float %120 + %122 = OpAccessChain %_ptr_Uniform_v3float %View %int_31 + %123 = OpLoad %v3float %122 + %124 = OpAccessChain %_ptr_Uniform_v4float %View %int_66 + %125 = OpLoad %v4float %124 + %126 = OpVectorShuffle %v3float %111 %111 0 1 2 + %127 = OpVectorShuffle %v3float %112 %112 0 1 2 + %128 = OpExtInst %v3float %1 Cross %127 %126 + %129 = OpCompositeExtract %float %112 3 + %130 = OpCompositeConstruct %v3float %129 %129 %129 + %131 = OpFMul %v3float %128 %130 + %132 = OpCompositeConstruct %mat3v3float %126 %131 %127 + %133 = OpCompositeExtract %float %115 0 + %134 = OpCompositeExtract %float %115 1 + %135 = OpCompositeExtract %float %115 2 + %136 = OpCompositeConstruct %v4float %133 %134 %135 %float_1 + %137 = OpMatrixTimesVector %v4float %121 %136 + %138 = OpVectorShuffle %v3float %137 %137 0 1 2 + %139 = OpCompositeExtract %float %137 3 + %140 = OpCompositeConstruct %v3float %139 %139 %139 + %141 = OpFDiv %v3float %138 %140 + %142 = OpFSub %v3float %141 %123 + %143 = OpVectorShuffle %v3float %125 %125 0 1 2 + %144 = OpCompositeExtract %float %125 3 + %145 = OpCompositeConstruct %v3float %144 %144 %144 + %146 = OpFMul %v3float %75 %145 + %147 = OpFAdd %v3float %146 %143 + %148 = OpExtInst %v3float %1 Normalize %147 + %149 = OpMatrixTimesVector %v3float %132 %148 + %150 = OpExtInst %v3float %1 Normalize %149 + %151 = OpExtInst %float %1 Sqrt %float_12_25 + %152 = OpCompositeConstruct %v3float %151 %151 %151 + %153 = OpFDiv %v3float %105 %152 + %154 = OpDot %float %153 %150 + %155 = OpFAdd %float %float_1 %154 + %156 = OpFMul %float %155 %float_0_5 + %157 = OpFAdd %float %156 %float_0_200000003 + %158 = OpAccessChain %_ptr_Uniform_v4float %Material %int_0 %int_1 + %159 = OpLoad %v4float %158 + %160 = OpVectorShuffle %v3float %159 %159 0 1 2 + %161 = OpCompositeConstruct %v3float %157 %157 %157 + %162 = OpFMul %v3float %160 %161 + %163 = OpAccessChain %_ptr_Uniform_float %TranslucentBasePass %int_144 + %164 = OpLoad %float %163 + %165 = OpFOrdGreaterThan %bool %164 %float_0 + OpSelectionMerge %166 None + OpBranchConditional %165 %167 %166 + %167 = OpLabel + %168 = OpCompositeExtract %float %142 0 + %169 = OpCompositeExtract %float %142 1 + %170 = OpCompositeExtract %float %142 2 + %171 = OpCompositeConstruct %v4float %168 %169 %170 %float_1 + %172 = OpMatrixTimesVector %v4float %119 %171 + %173 = OpCompositeExtract %float %172 3 + %174 = OpCompositeConstruct %v2float %173 %173 + %175 = OpVectorShuffle %v2float %172 %172 0 1 + %176 = OpFDiv %v2float %175 %174 + %177 = OpVectorShuffle %v2float %176 %102 0 1 + %178 = OpFMul %v2float %177 %67 + %179 = OpFAdd %v2float %178 %61 + %180 = OpCompositeExtract %float %179 0 + %181 = OpCompositeExtract %float %179 1 + %182 = OpAccessChain %_ptr_Uniform_float %View %int_155 %int_0 + %183 = OpLoad %float %182 + %184 = OpFMul %float %173 %183 + %185 = OpAccessChain %_ptr_Uniform_float %View %int_155 %int_1 + %186 = OpLoad %float %185 + %187 = OpFAdd %float %184 %186 + %188 = OpExtInst %float %1 Log2 %187 + %189 = OpAccessChain %_ptr_Uniform_float %View %int_155 %int_2 + %190 = OpLoad %float %189 + %191 = OpFMul %float %188 %190 + %192 = OpAccessChain %_ptr_Uniform_float %View %int_153 %int_2 + %193 = OpLoad %float %192 + %194 = OpFMul %float %191 %193 + %195 = OpCompositeConstruct %v3float %180 %181 %194 + OpSelectionMerge %196 None + OpBranchConditional %165 %197 %196 + %197 = OpLabel + %198 = OpLoad %type_3d_image %TranslucentBasePass_Shared_Fog_IntegratedLightScattering + %199 = OpLoad %type_sampler %View_SharedBilinearClampedSampler + %200 = OpSampledImage %type_sampled_image %198 %199 + %201 = OpImageSampleExplicitLod %v4float %200 %195 Lod %float_0 + OpBranch %196 + %196 = OpLabel + %202 = OpPhi %v4float %68 %167 %201 %197 + %203 = OpVectorShuffle %v3float %202 %202 0 1 2 + %204 = OpVectorShuffle %v3float %114 %114 0 1 2 + %205 = OpCompositeExtract %float %202 3 + %206 = OpCompositeConstruct %v3float %205 %205 %205 + %207 = OpFMul %v3float %204 %206 + %208 = OpFAdd %v3float %203 %207 + %209 = OpCompositeExtract %float %208 0 + %210 = OpCompositeExtract %float %208 1 + %211 = OpCompositeExtract %float %208 2 + %212 = OpCompositeExtract %float %114 3 + %213 = OpFMul %float %205 %212 + %214 = OpCompositeConstruct %v4float %209 %210 %211 %213 + OpBranch %166 + %166 = OpLabel + %215 = OpPhi %v4float %114 %110 %214 %196 + %216 = OpExtInst %v3float %1 FMax %162 %48 + %217 = OpAccessChain %_ptr_Uniform_float %View %int_70 + %218 = OpLoad %float %217 + %219 = OpFOrdGreaterThan %bool %218 %float_0 + OpSelectionMerge %220 DontFlatten + OpBranchConditional %219 %221 %220 + %221 = OpLabel + %222 = OpIMul %uint %113 %uint_26 + %223 = OpIAdd %uint %222 %uint_5 + %224 = OpAccessChain %_ptr_Uniform_v4float %View_PrimitiveSceneData %int_0 %223 + %225 = OpLoad %v4float %224 + %226 = OpVectorShuffle %v3float %225 %225 0 1 2 + %227 = OpFSub %v3float %142 %226 + %228 = OpExtInst %v3float %1 FAbs %227 + %229 = OpIAdd %uint %222 %uint_19 + %230 = OpAccessChain %_ptr_Uniform_v4float %View_PrimitiveSceneData %int_0 %229 + %231 = OpLoad %v4float %230 + %232 = OpVectorShuffle %v3float %231 %231 0 1 2 + %233 = OpFAdd %v3float %232 %53 + %234 = OpFOrdGreaterThan %v3bool %228 %233 + %235 = OpAny %bool %234 + OpSelectionMerge %236 None + OpBranchConditional %235 %237 %236 + %237 = OpLabel + %238 = OpDot %float %142 %55 + %239 = OpFMul %float %238 %float_0_00200000009 + %240 = OpExtInst %float %1 Fract %239 + %241 = OpCompositeConstruct %v3float %240 %240 %240 + %242 = OpFOrdGreaterThan %v3bool %241 %59 + %243 = OpSelect %v3float %242 %53 %48 + %244 = OpExtInst %v3float %1 FMix %56 %57 %243 + OpBranch %236 + %236 = OpLabel + %245 = OpPhi %v3float %216 %221 %244 %237 + OpBranch %220 + %220 = OpLabel + %246 = OpPhi %v3float %216 %166 %245 %236 + %247 = OpCompositeExtract %float %215 3 + %248 = OpCompositeConstruct %v3float %247 %247 %247 + %249 = OpFMul %v3float %246 %248 + %250 = OpVectorShuffle %v3float %215 %215 0 1 2 + %251 = OpFAdd %v3float %249 %250 + %252 = OpCompositeExtract %float %251 0 + %253 = OpCompositeExtract %float %251 1 + %254 = OpCompositeExtract %float %251 2 + %255 = OpCompositeConstruct %v4float %252 %253 %254 %108 + %256 = OpCompositeInsert %v4float %float_1 %255 3 + %257 = OpAccessChain %_ptr_Uniform_int %View %int_60 + %258 = OpLoad %int %257 + %259 = OpSGreaterThan %bool %258 %int_1 + OpSelectionMerge %260 None + OpBranchConditional %259 %261 %262 + %262 = OpLabel + OpBranch %260 + %261 = OpLabel + %263 = OpConvertSToF %float %258 + %264 = OpFMul %float %263 %float_0_25 + %265 = OpCompositeConstruct %v4float %264 %264 %264 %264 + %266 = OpFMul %v4float %256 %265 + %267 = OpBitwiseAnd %uint %117 %uint_15 + OpBranch %260 + %260 = OpLabel + %268 = OpPhi %v4float %266 %261 %256 %262 + %269 = OpPhi %uint %267 %261 %117 %262 + OpStore %out_var_SV_Target0 %268 + %270 = OpAccessChain %_ptr_Output_uint %gl_SampleMask_0 %uint_0 + OpStore %270 %269 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag b/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag new file mode 100644 index 00000000000..e6565873a01 --- /dev/null +++ b/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag @@ -0,0 +1,589 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 130 +; Schema: 0 + OpCapability Shader + OpCapability InputAttachment + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %Main "main" %gl_FragCoord %out_var_SV_Target0 + OpExecutionMode %Main OriginUpperLeft + OpSource HLSL 600 + OpName %type_subpass_image "type.subpass.image" + OpName %gl_LastFragData "gl_LastFragData" + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_TranslatedWorldToView" + OpMemberName %type_View 3 "View_ViewToTranslatedWorld" + OpMemberName %type_View 4 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 6 "View_ViewToClip" + OpMemberName %type_View 7 "View_ViewToClipNoAA" + OpMemberName %type_View 8 "View_ClipToView" + OpMemberName %type_View 9 "View_ClipToTranslatedWorld" + OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 11 "View_ScreenToWorld" + OpMemberName %type_View 12 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 13 "View_ViewForward" + OpMemberName %type_View 14 "PrePadding_View_844" + OpMemberName %type_View 15 "View_ViewUp" + OpMemberName %type_View 16 "PrePadding_View_860" + OpMemberName %type_View 17 "View_ViewRight" + OpMemberName %type_View 18 "PrePadding_View_876" + OpMemberName %type_View 19 "View_HMDViewNoRollUp" + OpMemberName %type_View 20 "PrePadding_View_892" + OpMemberName %type_View 21 "View_HMDViewNoRollRight" + OpMemberName %type_View 22 "PrePadding_View_908" + OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 24 "View_ScreenPositionScaleBias" + OpMemberName %type_View 25 "View_WorldCameraOrigin" + OpMemberName %type_View 26 "PrePadding_View_956" + OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 28 "PrePadding_View_972" + OpMemberName %type_View 29 "View_WorldViewOrigin" + OpMemberName %type_View 30 "PrePadding_View_988" + OpMemberName %type_View 31 "View_PreViewTranslation" + OpMemberName %type_View 32 "PrePadding_View_1004" + OpMemberName %type_View 33 "View_PrevProjection" + OpMemberName %type_View 34 "View_PrevViewProj" + OpMemberName %type_View 35 "View_PrevViewRotationProj" + OpMemberName %type_View 36 "View_PrevViewToClip" + OpMemberName %type_View 37 "View_PrevClipToView" + OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 43 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 44 "PrePadding_View_1660" + OpMemberName %type_View 45 "View_PrevWorldViewOrigin" + OpMemberName %type_View 46 "PrePadding_View_1676" + OpMemberName %type_View 47 "View_PrevPreViewTranslation" + OpMemberName %type_View 48 "PrePadding_View_1692" + OpMemberName %type_View 49 "View_PrevInvViewProj" + OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 51 "View_ClipToPrevClip" + OpMemberName %type_View 52 "View_TemporalAAJitter" + OpMemberName %type_View 53 "View_GlobalClippingPlane" + OpMemberName %type_View 54 "View_FieldOfViewWideAngles" + OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 56 "View_ViewRectMin" + OpMemberName %type_View 57 "View_ViewSizeAndInvSize" + OpMemberName %type_View 58 "View_BufferSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 60 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 61 "View_PreExposure" + OpMemberName %type_View 62 "View_OneOverPreExposure" + OpMemberName %type_View 63 "PrePadding_View_2012" + OpMemberName %type_View 64 "View_DiffuseOverrideParameter" + OpMemberName %type_View 65 "View_SpecularOverrideParameter" + OpMemberName %type_View 66 "View_NormalOverrideParameter" + OpMemberName %type_View 67 "View_RoughnessOverrideParameter" + OpMemberName %type_View 68 "View_PrevFrameGameTime" + OpMemberName %type_View 69 "View_PrevFrameRealTime" + OpMemberName %type_View 70 "View_OutOfBoundsMask" + OpMemberName %type_View 71 "PrePadding_View_2084" + OpMemberName %type_View 72 "PrePadding_View_2088" + OpMemberName %type_View 73 "PrePadding_View_2092" + OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 75 "View_CullingSign" + OpMemberName %type_View 76 "View_NearPlane" + OpMemberName %type_View 77 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 78 "View_GameTime" + OpMemberName %type_View 79 "View_RealTime" + OpMemberName %type_View 80 "View_DeltaTime" + OpMemberName %type_View 81 "View_MaterialTextureMipBias" + OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 83 "View_Random" + OpMemberName %type_View 84 "View_FrameNumber" + OpMemberName %type_View 85 "View_StateFrameIndexMod8" + OpMemberName %type_View 86 "View_StateFrameIndex" + OpMemberName %type_View 87 "View_CameraCut" + OpMemberName %type_View 88 "View_UnlitViewmodeMask" + OpMemberName %type_View 89 "PrePadding_View_2164" + OpMemberName %type_View 90 "PrePadding_View_2168" + OpMemberName %type_View 91 "PrePadding_View_2172" + OpMemberName %type_View 92 "View_DirectionalLightColor" + OpMemberName %type_View 93 "View_DirectionalLightDirection" + OpMemberName %type_View 94 "PrePadding_View_2204" + OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 97 "View_TemporalAAParams" + OpMemberName %type_View 98 "View_CircleDOFParams" + OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 101 "View_DepthOfFieldScale" + OpMemberName %type_View 102 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 108 "View_GeneralPurposeTweak" + OpMemberName %type_View 109 "View_DemosaicVposOffset" + OpMemberName %type_View 110 "PrePadding_View_2348" + OpMemberName %type_View 111 "View_IndirectLightingColorScale" + OpMemberName %type_View 112 "View_HDR32bppEncodingMode" + OpMemberName %type_View 113 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 114 "View_AtmosphericFogSunPower" + OpMemberName %type_View 115 "View_AtmosphericFogPower" + OpMemberName %type_View 116 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 122 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 125 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 127 "View_AtmosphericFogSunColor" + OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 130 "View_AmbientCubemapTint" + OpMemberName %type_View 131 "View_AmbientCubemapIntensity" + OpMemberName %type_View 132 "View_SkyLightParameters" + OpMemberName %type_View 133 "PrePadding_View_2488" + OpMemberName %type_View 134 "PrePadding_View_2492" + OpMemberName %type_View 135 "View_SkyLightColor" + OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 137 "View_MobilePreviewMode" + OpMemberName %type_View 138 "View_HMDEyePaddingOffset" + OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 140 "View_ShowDecalsMask" + OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 143 "PrePadding_View_2648" + OpMemberName %type_View 144 "PrePadding_View_2652" + OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 146 "View_StereoPassIndex" + OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 149 "View_GlobalVolumeDimension" + OpMemberName %type_View 150 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 151 "View_MaxGlobalDistance" + OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 153 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 154 "PrePadding_View_2828" + OpMemberName %type_View 155 "View_VolumetricFogGridZParams" + OpMemberName %type_View 156 "PrePadding_View_2844" + OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 158 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 159 "PrePadding_View_2860" + OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 161 "PrePadding_View_2876" + OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 163 "PrePadding_View_2892" + OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 167 "View_StereoIPD" + OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_sampler "type.sampler" + OpName %type_2d_image "type.2d.image" + OpName %ShadowDepthTexture "ShadowDepthTexture" + OpName %ShadowDepthTextureSampler "ShadowDepthTextureSampler" + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "SoftTransitionScale" + OpMemberName %type__Globals 1 "ShadowBufferSize" + OpMemberName %type__Globals 2 "ShadowFadeFraction" + OpMemberName %type__Globals 3 "ShadowSharpen" + OpMemberName %type__Globals 4 "LightPositionAndInvRadius" + OpMemberName %type__Globals 5 "ScreenToShadowMatrix" + OpMemberName %type__Globals 6 "ProjectionDepthBiasParameters" + OpMemberName %type__Globals 7 "ModulatedShadowColor" + OpMemberName %type__Globals 8 "ShadowTileOffsetAndSize" + OpName %_Globals "$Globals" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %Main "Main" + OpName %type_sampled_image "type.sampled.image" + OpDecorate %gl_LastFragData InputAttachmentIndex 0 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION" + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %gl_LastFragData DescriptorSet 0 + OpDecorate %gl_LastFragData Binding 0 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %ShadowDepthTexture DescriptorSet 0 + OpDecorate %ShadowDepthTexture Binding 0 + OpDecorate %ShadowDepthTextureSampler DescriptorSet 0 + OpDecorate %ShadowDepthTextureSampler Binding 0 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 1 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 14 Offset 844 + OpMemberDecorate %type_View 15 Offset 848 + OpMemberDecorate %type_View 16 Offset 860 + OpMemberDecorate %type_View 17 Offset 864 + OpMemberDecorate %type_View 18 Offset 876 + OpMemberDecorate %type_View 19 Offset 880 + OpMemberDecorate %type_View 20 Offset 892 + OpMemberDecorate %type_View 21 Offset 896 + OpMemberDecorate %type_View 22 Offset 908 + OpMemberDecorate %type_View 23 Offset 912 + OpMemberDecorate %type_View 24 Offset 928 + OpMemberDecorate %type_View 25 Offset 944 + OpMemberDecorate %type_View 26 Offset 956 + OpMemberDecorate %type_View 27 Offset 960 + OpMemberDecorate %type_View 28 Offset 972 + OpMemberDecorate %type_View 29 Offset 976 + OpMemberDecorate %type_View 30 Offset 988 + OpMemberDecorate %type_View 31 Offset 992 + OpMemberDecorate %type_View 32 Offset 1004 + OpMemberDecorate %type_View 33 Offset 1008 + OpMemberDecorate %type_View 33 MatrixStride 16 + OpMemberDecorate %type_View 33 ColMajor + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 44 Offset 1660 + OpMemberDecorate %type_View 45 Offset 1664 + OpMemberDecorate %type_View 46 Offset 1676 + OpMemberDecorate %type_View 47 Offset 1680 + OpMemberDecorate %type_View 48 Offset 1692 + OpMemberDecorate %type_View 49 Offset 1696 + OpMemberDecorate %type_View 49 MatrixStride 16 + OpMemberDecorate %type_View 49 ColMajor + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 53 Offset 1904 + OpMemberDecorate %type_View 54 Offset 1920 + OpMemberDecorate %type_View 55 Offset 1928 + OpMemberDecorate %type_View 56 Offset 1936 + OpMemberDecorate %type_View 57 Offset 1952 + OpMemberDecorate %type_View 58 Offset 1968 + OpMemberDecorate %type_View 59 Offset 1984 + OpMemberDecorate %type_View 60 Offset 2000 + OpMemberDecorate %type_View 61 Offset 2004 + OpMemberDecorate %type_View 62 Offset 2008 + OpMemberDecorate %type_View 63 Offset 2012 + OpMemberDecorate %type_View 64 Offset 2016 + OpMemberDecorate %type_View 65 Offset 2032 + OpMemberDecorate %type_View 66 Offset 2048 + OpMemberDecorate %type_View 67 Offset 2064 + OpMemberDecorate %type_View 68 Offset 2072 + OpMemberDecorate %type_View 69 Offset 2076 + OpMemberDecorate %type_View 70 Offset 2080 + OpMemberDecorate %type_View 71 Offset 2084 + OpMemberDecorate %type_View 72 Offset 2088 + OpMemberDecorate %type_View 73 Offset 2092 + OpMemberDecorate %type_View 74 Offset 2096 + OpMemberDecorate %type_View 75 Offset 2108 + OpMemberDecorate %type_View 76 Offset 2112 + OpMemberDecorate %type_View 77 Offset 2116 + OpMemberDecorate %type_View 78 Offset 2120 + OpMemberDecorate %type_View 79 Offset 2124 + OpMemberDecorate %type_View 80 Offset 2128 + OpMemberDecorate %type_View 81 Offset 2132 + OpMemberDecorate %type_View 82 Offset 2136 + OpMemberDecorate %type_View 83 Offset 2140 + OpMemberDecorate %type_View 84 Offset 2144 + OpMemberDecorate %type_View 85 Offset 2148 + OpMemberDecorate %type_View 86 Offset 2152 + OpMemberDecorate %type_View 87 Offset 2156 + OpMemberDecorate %type_View 88 Offset 2160 + OpMemberDecorate %type_View 89 Offset 2164 + OpMemberDecorate %type_View 90 Offset 2168 + OpMemberDecorate %type_View 91 Offset 2172 + OpMemberDecorate %type_View 92 Offset 2176 + OpMemberDecorate %type_View 93 Offset 2192 + OpMemberDecorate %type_View 94 Offset 2204 + OpMemberDecorate %type_View 95 Offset 2208 + OpMemberDecorate %type_View 96 Offset 2240 + OpMemberDecorate %type_View 97 Offset 2272 + OpMemberDecorate %type_View 98 Offset 2288 + OpMemberDecorate %type_View 99 Offset 2304 + OpMemberDecorate %type_View 100 Offset 2308 + OpMemberDecorate %type_View 101 Offset 2312 + OpMemberDecorate %type_View 102 Offset 2316 + OpMemberDecorate %type_View 103 Offset 2320 + OpMemberDecorate %type_View 104 Offset 2324 + OpMemberDecorate %type_View 105 Offset 2328 + OpMemberDecorate %type_View 106 Offset 2332 + OpMemberDecorate %type_View 107 Offset 2336 + OpMemberDecorate %type_View 108 Offset 2340 + OpMemberDecorate %type_View 109 Offset 2344 + OpMemberDecorate %type_View 110 Offset 2348 + OpMemberDecorate %type_View 111 Offset 2352 + OpMemberDecorate %type_View 112 Offset 2364 + OpMemberDecorate %type_View 113 Offset 2368 + OpMemberDecorate %type_View 114 Offset 2380 + OpMemberDecorate %type_View 115 Offset 2384 + OpMemberDecorate %type_View 116 Offset 2388 + OpMemberDecorate %type_View 117 Offset 2392 + OpMemberDecorate %type_View 118 Offset 2396 + OpMemberDecorate %type_View 119 Offset 2400 + OpMemberDecorate %type_View 120 Offset 2404 + OpMemberDecorate %type_View 121 Offset 2408 + OpMemberDecorate %type_View 122 Offset 2412 + OpMemberDecorate %type_View 123 Offset 2416 + OpMemberDecorate %type_View 124 Offset 2420 + OpMemberDecorate %type_View 125 Offset 2424 + OpMemberDecorate %type_View 126 Offset 2428 + OpMemberDecorate %type_View 127 Offset 2432 + OpMemberDecorate %type_View 128 Offset 2448 + OpMemberDecorate %type_View 129 Offset 2460 + OpMemberDecorate %type_View 130 Offset 2464 + OpMemberDecorate %type_View 131 Offset 2480 + OpMemberDecorate %type_View 132 Offset 2484 + OpMemberDecorate %type_View 133 Offset 2488 + OpMemberDecorate %type_View 134 Offset 2492 + OpMemberDecorate %type_View 135 Offset 2496 + OpMemberDecorate %type_View 136 Offset 2512 + OpMemberDecorate %type_View 137 Offset 2624 + OpMemberDecorate %type_View 138 Offset 2628 + OpMemberDecorate %type_View 139 Offset 2632 + OpMemberDecorate %type_View 140 Offset 2636 + OpMemberDecorate %type_View 141 Offset 2640 + OpMemberDecorate %type_View 142 Offset 2644 + OpMemberDecorate %type_View 143 Offset 2648 + OpMemberDecorate %type_View 144 Offset 2652 + OpMemberDecorate %type_View 145 Offset 2656 + OpMemberDecorate %type_View 146 Offset 2668 + OpMemberDecorate %type_View 147 Offset 2672 + OpMemberDecorate %type_View 148 Offset 2736 + OpMemberDecorate %type_View 149 Offset 2800 + OpMemberDecorate %type_View 150 Offset 2804 + OpMemberDecorate %type_View 151 Offset 2808 + OpMemberDecorate %type_View 152 Offset 2812 + OpMemberDecorate %type_View 153 Offset 2816 + OpMemberDecorate %type_View 154 Offset 2828 + OpMemberDecorate %type_View 155 Offset 2832 + OpMemberDecorate %type_View 156 Offset 2844 + OpMemberDecorate %type_View 157 Offset 2848 + OpMemberDecorate %type_View 158 Offset 2856 + OpMemberDecorate %type_View 159 Offset 2860 + OpMemberDecorate %type_View 160 Offset 2864 + OpMemberDecorate %type_View 161 Offset 2876 + OpMemberDecorate %type_View 162 Offset 2880 + OpMemberDecorate %type_View 163 Offset 2892 + OpMemberDecorate %type_View 164 Offset 2896 + OpMemberDecorate %type_View 165 Offset 2908 + OpMemberDecorate %type_View 166 Offset 2912 + OpMemberDecorate %type_View 167 Offset 2924 + OpMemberDecorate %type_View 168 Offset 2928 + OpMemberDecorate %type_View 169 Offset 2932 + OpDecorate %type_View Block + OpMemberDecorate %type__Globals 0 Offset 0 + OpMemberDecorate %type__Globals 1 Offset 16 + OpMemberDecorate %type__Globals 2 Offset 32 + OpMemberDecorate %type__Globals 3 Offset 36 + OpMemberDecorate %type__Globals 4 Offset 48 + OpMemberDecorate %type__Globals 5 Offset 64 + OpMemberDecorate %type__Globals 5 MatrixStride 16 + OpMemberDecorate %type__Globals 5 ColMajor + OpMemberDecorate %type__Globals 6 Offset 128 + OpMemberDecorate %type__Globals 7 Offset 144 + OpMemberDecorate %type__Globals 8 Offset 160 + OpDecorate %type__Globals Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %float_1 = OpConstant %float 1 + %int_58 = OpConstant %int 58 + %int_24 = OpConstant %int 24 + %int_5 = OpConstant %int 5 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %int_8 = OpConstant %int 8 +%float_0_999989986 = OpConstant %float 0.999989986 + %int_0 = OpConstant %int 0 + %float_0 = OpConstant %float 0 + %int_7 = OpConstant %int 7 + %float_0_5 = OpConstant %float 0.5 + %41 = OpConstantComposite %v3float %float_1 %float_1 %float_1 + %v2int = OpTypeVector %int 2 + %43 = OpConstantComposite %v2int %int_0 %int_0 + %44 = OpConstantComposite %v3float %float_0 %float_0 %float_0 +%type_subpass_image = OpTypeImage %float SubpassData 2 0 0 2 Unknown +%_ptr_UniformConstant_type_subpass_image = OpTypePointer UniformConstant %type_subpass_image +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type__Globals = OpTypeStruct %v3float %v4float %float %float %v4float %mat4v4float %v2float %v4float %v4float +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %53 = OpTypeFunction %void +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float +%type_sampled_image = OpTypeSampledImage %type_2d_image +%gl_LastFragData = OpVariable %_ptr_UniformConstant_type_subpass_image UniformConstant + %View = OpVariable %_ptr_Uniform_type_View Uniform +%ShadowDepthTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%ShadowDepthTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output + %57 = OpConstantNull %v4float + %58 = OpUndef %v4float + %Main = OpFunction %void None %53 + %59 = OpLabel + %60 = OpLoad %v4float %gl_FragCoord + %61 = OpVectorShuffle %v2float %60 %60 0 1 + %62 = OpAccessChain %_ptr_Uniform_v4float %View %int_58 + %63 = OpLoad %v4float %62 + %64 = OpVectorShuffle %v2float %63 %63 2 3 + %65 = OpFMul %v2float %61 %64 + %66 = OpLoad %type_subpass_image %gl_LastFragData + %67 = OpImageRead %v4float %66 %43 None + %68 = OpCompositeExtract %float %67 3 + %69 = OpAccessChain %_ptr_Uniform_v4float %View %int_24 + %70 = OpLoad %v4float %69 + %71 = OpVectorShuffle %v2float %70 %70 3 2 + %72 = OpFSub %v2float %65 %71 + %73 = OpVectorShuffle %v2float %70 %70 0 1 + %74 = OpFDiv %v2float %72 %73 + %75 = OpCompositeConstruct %v2float %68 %68 + %76 = OpFMul %v2float %74 %75 + %77 = OpCompositeExtract %float %76 0 + %78 = OpCompositeExtract %float %76 1 + %79 = OpCompositeConstruct %v4float %77 %78 %68 %float_1 + %80 = OpAccessChain %_ptr_Uniform_mat4v4float %_Globals %int_5 + %81 = OpLoad %mat4v4float %80 + %82 = OpMatrixTimesVector %v4float %81 %79 + %83 = OpCompositeExtract %float %82 2 + %84 = OpCompositeExtract %float %82 3 + %85 = OpCompositeConstruct %v3float %84 %84 %84 + %86 = OpVectorShuffle %v3float %82 %82 0 1 2 + %87 = OpFDiv %v3float %86 %85 + %88 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_8 + %89 = OpLoad %v4float %88 + %90 = OpVectorShuffle %v2float %89 %89 2 3 + %91 = OpVectorShuffle %v2float %87 %57 0 1 + %92 = OpFMul %v2float %91 %90 + %93 = OpVectorShuffle %v2float %89 %89 0 1 + %94 = OpVectorShuffle %v2float %92 %57 0 1 + %95 = OpFAdd %v2float %94 %93 + %96 = OpExtInst %float %1 FMin %83 %float_0_999989986 + %97 = OpLoad %type_2d_image %ShadowDepthTexture + %98 = OpLoad %type_sampler %ShadowDepthTextureSampler + %99 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_2 + %100 = OpLoad %float %99 + %101 = OpVectorShuffle %v2float %95 %57 0 1 + %102 = OpSampledImage %type_sampled_image %97 %98 + %103 = OpImageSampleExplicitLod %v4float %102 %101 Lod %float_0 + %104 = OpVectorShuffle %v3float %103 %103 0 0 0 + %105 = OpFMul %float %96 %100 + %106 = OpFSub %float %105 %float_1 + %107 = OpCompositeConstruct %v3float %100 %100 %100 + %108 = OpFMul %v3float %104 %107 + %109 = OpCompositeConstruct %v3float %106 %106 %106 + %110 = OpFSub %v3float %108 %109 + %111 = OpExtInst %v3float %1 FClamp %110 %44 %41 + %112 = OpCompositeExtract %float %111 0 + %113 = OpFSub %float %112 %float_0_5 + %114 = OpAccessChain %_ptr_Uniform_float %_Globals %int_3 + %115 = OpLoad %float %114 + %116 = OpFMul %float %113 %115 + %117 = OpFAdd %float %116 %float_0_5 + %118 = OpExtInst %float %1 FClamp %117 %float_0 %float_1 + %119 = OpFMul %float %118 %118 + %120 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 + %121 = OpLoad %float %120 + %122 = OpExtInst %float %1 FMix %float_1 %119 %121 + %123 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_7 + %124 = OpLoad %v4float %123 + %125 = OpVectorShuffle %v3float %124 %124 0 1 2 + %126 = OpCompositeConstruct %v3float %122 %122 %122 + %127 = OpExtInst %v3float %1 FMix %125 %41 %126 + %128 = OpVectorShuffle %v4float %58 %127 4 5 6 3 + %129 = OpCompositeInsert %v4float %float_0 %128 3 + OpStore %out_var_SV_Target0 %129 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag b/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag new file mode 100644 index 00000000000..e6565873a01 --- /dev/null +++ b/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag @@ -0,0 +1,589 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 130 +; Schema: 0 + OpCapability Shader + OpCapability InputAttachment + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %Main "main" %gl_FragCoord %out_var_SV_Target0 + OpExecutionMode %Main OriginUpperLeft + OpSource HLSL 600 + OpName %type_subpass_image "type.subpass.image" + OpName %gl_LastFragData "gl_LastFragData" + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_TranslatedWorldToView" + OpMemberName %type_View 3 "View_ViewToTranslatedWorld" + OpMemberName %type_View 4 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 6 "View_ViewToClip" + OpMemberName %type_View 7 "View_ViewToClipNoAA" + OpMemberName %type_View 8 "View_ClipToView" + OpMemberName %type_View 9 "View_ClipToTranslatedWorld" + OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 11 "View_ScreenToWorld" + OpMemberName %type_View 12 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 13 "View_ViewForward" + OpMemberName %type_View 14 "PrePadding_View_844" + OpMemberName %type_View 15 "View_ViewUp" + OpMemberName %type_View 16 "PrePadding_View_860" + OpMemberName %type_View 17 "View_ViewRight" + OpMemberName %type_View 18 "PrePadding_View_876" + OpMemberName %type_View 19 "View_HMDViewNoRollUp" + OpMemberName %type_View 20 "PrePadding_View_892" + OpMemberName %type_View 21 "View_HMDViewNoRollRight" + OpMemberName %type_View 22 "PrePadding_View_908" + OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 24 "View_ScreenPositionScaleBias" + OpMemberName %type_View 25 "View_WorldCameraOrigin" + OpMemberName %type_View 26 "PrePadding_View_956" + OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 28 "PrePadding_View_972" + OpMemberName %type_View 29 "View_WorldViewOrigin" + OpMemberName %type_View 30 "PrePadding_View_988" + OpMemberName %type_View 31 "View_PreViewTranslation" + OpMemberName %type_View 32 "PrePadding_View_1004" + OpMemberName %type_View 33 "View_PrevProjection" + OpMemberName %type_View 34 "View_PrevViewProj" + OpMemberName %type_View 35 "View_PrevViewRotationProj" + OpMemberName %type_View 36 "View_PrevViewToClip" + OpMemberName %type_View 37 "View_PrevClipToView" + OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 43 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 44 "PrePadding_View_1660" + OpMemberName %type_View 45 "View_PrevWorldViewOrigin" + OpMemberName %type_View 46 "PrePadding_View_1676" + OpMemberName %type_View 47 "View_PrevPreViewTranslation" + OpMemberName %type_View 48 "PrePadding_View_1692" + OpMemberName %type_View 49 "View_PrevInvViewProj" + OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 51 "View_ClipToPrevClip" + OpMemberName %type_View 52 "View_TemporalAAJitter" + OpMemberName %type_View 53 "View_GlobalClippingPlane" + OpMemberName %type_View 54 "View_FieldOfViewWideAngles" + OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 56 "View_ViewRectMin" + OpMemberName %type_View 57 "View_ViewSizeAndInvSize" + OpMemberName %type_View 58 "View_BufferSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 60 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 61 "View_PreExposure" + OpMemberName %type_View 62 "View_OneOverPreExposure" + OpMemberName %type_View 63 "PrePadding_View_2012" + OpMemberName %type_View 64 "View_DiffuseOverrideParameter" + OpMemberName %type_View 65 "View_SpecularOverrideParameter" + OpMemberName %type_View 66 "View_NormalOverrideParameter" + OpMemberName %type_View 67 "View_RoughnessOverrideParameter" + OpMemberName %type_View 68 "View_PrevFrameGameTime" + OpMemberName %type_View 69 "View_PrevFrameRealTime" + OpMemberName %type_View 70 "View_OutOfBoundsMask" + OpMemberName %type_View 71 "PrePadding_View_2084" + OpMemberName %type_View 72 "PrePadding_View_2088" + OpMemberName %type_View 73 "PrePadding_View_2092" + OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 75 "View_CullingSign" + OpMemberName %type_View 76 "View_NearPlane" + OpMemberName %type_View 77 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 78 "View_GameTime" + OpMemberName %type_View 79 "View_RealTime" + OpMemberName %type_View 80 "View_DeltaTime" + OpMemberName %type_View 81 "View_MaterialTextureMipBias" + OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 83 "View_Random" + OpMemberName %type_View 84 "View_FrameNumber" + OpMemberName %type_View 85 "View_StateFrameIndexMod8" + OpMemberName %type_View 86 "View_StateFrameIndex" + OpMemberName %type_View 87 "View_CameraCut" + OpMemberName %type_View 88 "View_UnlitViewmodeMask" + OpMemberName %type_View 89 "PrePadding_View_2164" + OpMemberName %type_View 90 "PrePadding_View_2168" + OpMemberName %type_View 91 "PrePadding_View_2172" + OpMemberName %type_View 92 "View_DirectionalLightColor" + OpMemberName %type_View 93 "View_DirectionalLightDirection" + OpMemberName %type_View 94 "PrePadding_View_2204" + OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 97 "View_TemporalAAParams" + OpMemberName %type_View 98 "View_CircleDOFParams" + OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 101 "View_DepthOfFieldScale" + OpMemberName %type_View 102 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 108 "View_GeneralPurposeTweak" + OpMemberName %type_View 109 "View_DemosaicVposOffset" + OpMemberName %type_View 110 "PrePadding_View_2348" + OpMemberName %type_View 111 "View_IndirectLightingColorScale" + OpMemberName %type_View 112 "View_HDR32bppEncodingMode" + OpMemberName %type_View 113 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 114 "View_AtmosphericFogSunPower" + OpMemberName %type_View 115 "View_AtmosphericFogPower" + OpMemberName %type_View 116 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 122 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 125 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 127 "View_AtmosphericFogSunColor" + OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 130 "View_AmbientCubemapTint" + OpMemberName %type_View 131 "View_AmbientCubemapIntensity" + OpMemberName %type_View 132 "View_SkyLightParameters" + OpMemberName %type_View 133 "PrePadding_View_2488" + OpMemberName %type_View 134 "PrePadding_View_2492" + OpMemberName %type_View 135 "View_SkyLightColor" + OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 137 "View_MobilePreviewMode" + OpMemberName %type_View 138 "View_HMDEyePaddingOffset" + OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 140 "View_ShowDecalsMask" + OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 143 "PrePadding_View_2648" + OpMemberName %type_View 144 "PrePadding_View_2652" + OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 146 "View_StereoPassIndex" + OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 149 "View_GlobalVolumeDimension" + OpMemberName %type_View 150 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 151 "View_MaxGlobalDistance" + OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 153 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 154 "PrePadding_View_2828" + OpMemberName %type_View 155 "View_VolumetricFogGridZParams" + OpMemberName %type_View 156 "PrePadding_View_2844" + OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 158 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 159 "PrePadding_View_2860" + OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 161 "PrePadding_View_2876" + OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 163 "PrePadding_View_2892" + OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 167 "View_StereoIPD" + OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_sampler "type.sampler" + OpName %type_2d_image "type.2d.image" + OpName %ShadowDepthTexture "ShadowDepthTexture" + OpName %ShadowDepthTextureSampler "ShadowDepthTextureSampler" + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "SoftTransitionScale" + OpMemberName %type__Globals 1 "ShadowBufferSize" + OpMemberName %type__Globals 2 "ShadowFadeFraction" + OpMemberName %type__Globals 3 "ShadowSharpen" + OpMemberName %type__Globals 4 "LightPositionAndInvRadius" + OpMemberName %type__Globals 5 "ScreenToShadowMatrix" + OpMemberName %type__Globals 6 "ProjectionDepthBiasParameters" + OpMemberName %type__Globals 7 "ModulatedShadowColor" + OpMemberName %type__Globals 8 "ShadowTileOffsetAndSize" + OpName %_Globals "$Globals" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %Main "Main" + OpName %type_sampled_image "type.sampled.image" + OpDecorate %gl_LastFragData InputAttachmentIndex 0 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION" + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %gl_LastFragData DescriptorSet 0 + OpDecorate %gl_LastFragData Binding 0 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %ShadowDepthTexture DescriptorSet 0 + OpDecorate %ShadowDepthTexture Binding 0 + OpDecorate %ShadowDepthTextureSampler DescriptorSet 0 + OpDecorate %ShadowDepthTextureSampler Binding 0 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 1 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 14 Offset 844 + OpMemberDecorate %type_View 15 Offset 848 + OpMemberDecorate %type_View 16 Offset 860 + OpMemberDecorate %type_View 17 Offset 864 + OpMemberDecorate %type_View 18 Offset 876 + OpMemberDecorate %type_View 19 Offset 880 + OpMemberDecorate %type_View 20 Offset 892 + OpMemberDecorate %type_View 21 Offset 896 + OpMemberDecorate %type_View 22 Offset 908 + OpMemberDecorate %type_View 23 Offset 912 + OpMemberDecorate %type_View 24 Offset 928 + OpMemberDecorate %type_View 25 Offset 944 + OpMemberDecorate %type_View 26 Offset 956 + OpMemberDecorate %type_View 27 Offset 960 + OpMemberDecorate %type_View 28 Offset 972 + OpMemberDecorate %type_View 29 Offset 976 + OpMemberDecorate %type_View 30 Offset 988 + OpMemberDecorate %type_View 31 Offset 992 + OpMemberDecorate %type_View 32 Offset 1004 + OpMemberDecorate %type_View 33 Offset 1008 + OpMemberDecorate %type_View 33 MatrixStride 16 + OpMemberDecorate %type_View 33 ColMajor + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 44 Offset 1660 + OpMemberDecorate %type_View 45 Offset 1664 + OpMemberDecorate %type_View 46 Offset 1676 + OpMemberDecorate %type_View 47 Offset 1680 + OpMemberDecorate %type_View 48 Offset 1692 + OpMemberDecorate %type_View 49 Offset 1696 + OpMemberDecorate %type_View 49 MatrixStride 16 + OpMemberDecorate %type_View 49 ColMajor + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 53 Offset 1904 + OpMemberDecorate %type_View 54 Offset 1920 + OpMemberDecorate %type_View 55 Offset 1928 + OpMemberDecorate %type_View 56 Offset 1936 + OpMemberDecorate %type_View 57 Offset 1952 + OpMemberDecorate %type_View 58 Offset 1968 + OpMemberDecorate %type_View 59 Offset 1984 + OpMemberDecorate %type_View 60 Offset 2000 + OpMemberDecorate %type_View 61 Offset 2004 + OpMemberDecorate %type_View 62 Offset 2008 + OpMemberDecorate %type_View 63 Offset 2012 + OpMemberDecorate %type_View 64 Offset 2016 + OpMemberDecorate %type_View 65 Offset 2032 + OpMemberDecorate %type_View 66 Offset 2048 + OpMemberDecorate %type_View 67 Offset 2064 + OpMemberDecorate %type_View 68 Offset 2072 + OpMemberDecorate %type_View 69 Offset 2076 + OpMemberDecorate %type_View 70 Offset 2080 + OpMemberDecorate %type_View 71 Offset 2084 + OpMemberDecorate %type_View 72 Offset 2088 + OpMemberDecorate %type_View 73 Offset 2092 + OpMemberDecorate %type_View 74 Offset 2096 + OpMemberDecorate %type_View 75 Offset 2108 + OpMemberDecorate %type_View 76 Offset 2112 + OpMemberDecorate %type_View 77 Offset 2116 + OpMemberDecorate %type_View 78 Offset 2120 + OpMemberDecorate %type_View 79 Offset 2124 + OpMemberDecorate %type_View 80 Offset 2128 + OpMemberDecorate %type_View 81 Offset 2132 + OpMemberDecorate %type_View 82 Offset 2136 + OpMemberDecorate %type_View 83 Offset 2140 + OpMemberDecorate %type_View 84 Offset 2144 + OpMemberDecorate %type_View 85 Offset 2148 + OpMemberDecorate %type_View 86 Offset 2152 + OpMemberDecorate %type_View 87 Offset 2156 + OpMemberDecorate %type_View 88 Offset 2160 + OpMemberDecorate %type_View 89 Offset 2164 + OpMemberDecorate %type_View 90 Offset 2168 + OpMemberDecorate %type_View 91 Offset 2172 + OpMemberDecorate %type_View 92 Offset 2176 + OpMemberDecorate %type_View 93 Offset 2192 + OpMemberDecorate %type_View 94 Offset 2204 + OpMemberDecorate %type_View 95 Offset 2208 + OpMemberDecorate %type_View 96 Offset 2240 + OpMemberDecorate %type_View 97 Offset 2272 + OpMemberDecorate %type_View 98 Offset 2288 + OpMemberDecorate %type_View 99 Offset 2304 + OpMemberDecorate %type_View 100 Offset 2308 + OpMemberDecorate %type_View 101 Offset 2312 + OpMemberDecorate %type_View 102 Offset 2316 + OpMemberDecorate %type_View 103 Offset 2320 + OpMemberDecorate %type_View 104 Offset 2324 + OpMemberDecorate %type_View 105 Offset 2328 + OpMemberDecorate %type_View 106 Offset 2332 + OpMemberDecorate %type_View 107 Offset 2336 + OpMemberDecorate %type_View 108 Offset 2340 + OpMemberDecorate %type_View 109 Offset 2344 + OpMemberDecorate %type_View 110 Offset 2348 + OpMemberDecorate %type_View 111 Offset 2352 + OpMemberDecorate %type_View 112 Offset 2364 + OpMemberDecorate %type_View 113 Offset 2368 + OpMemberDecorate %type_View 114 Offset 2380 + OpMemberDecorate %type_View 115 Offset 2384 + OpMemberDecorate %type_View 116 Offset 2388 + OpMemberDecorate %type_View 117 Offset 2392 + OpMemberDecorate %type_View 118 Offset 2396 + OpMemberDecorate %type_View 119 Offset 2400 + OpMemberDecorate %type_View 120 Offset 2404 + OpMemberDecorate %type_View 121 Offset 2408 + OpMemberDecorate %type_View 122 Offset 2412 + OpMemberDecorate %type_View 123 Offset 2416 + OpMemberDecorate %type_View 124 Offset 2420 + OpMemberDecorate %type_View 125 Offset 2424 + OpMemberDecorate %type_View 126 Offset 2428 + OpMemberDecorate %type_View 127 Offset 2432 + OpMemberDecorate %type_View 128 Offset 2448 + OpMemberDecorate %type_View 129 Offset 2460 + OpMemberDecorate %type_View 130 Offset 2464 + OpMemberDecorate %type_View 131 Offset 2480 + OpMemberDecorate %type_View 132 Offset 2484 + OpMemberDecorate %type_View 133 Offset 2488 + OpMemberDecorate %type_View 134 Offset 2492 + OpMemberDecorate %type_View 135 Offset 2496 + OpMemberDecorate %type_View 136 Offset 2512 + OpMemberDecorate %type_View 137 Offset 2624 + OpMemberDecorate %type_View 138 Offset 2628 + OpMemberDecorate %type_View 139 Offset 2632 + OpMemberDecorate %type_View 140 Offset 2636 + OpMemberDecorate %type_View 141 Offset 2640 + OpMemberDecorate %type_View 142 Offset 2644 + OpMemberDecorate %type_View 143 Offset 2648 + OpMemberDecorate %type_View 144 Offset 2652 + OpMemberDecorate %type_View 145 Offset 2656 + OpMemberDecorate %type_View 146 Offset 2668 + OpMemberDecorate %type_View 147 Offset 2672 + OpMemberDecorate %type_View 148 Offset 2736 + OpMemberDecorate %type_View 149 Offset 2800 + OpMemberDecorate %type_View 150 Offset 2804 + OpMemberDecorate %type_View 151 Offset 2808 + OpMemberDecorate %type_View 152 Offset 2812 + OpMemberDecorate %type_View 153 Offset 2816 + OpMemberDecorate %type_View 154 Offset 2828 + OpMemberDecorate %type_View 155 Offset 2832 + OpMemberDecorate %type_View 156 Offset 2844 + OpMemberDecorate %type_View 157 Offset 2848 + OpMemberDecorate %type_View 158 Offset 2856 + OpMemberDecorate %type_View 159 Offset 2860 + OpMemberDecorate %type_View 160 Offset 2864 + OpMemberDecorate %type_View 161 Offset 2876 + OpMemberDecorate %type_View 162 Offset 2880 + OpMemberDecorate %type_View 163 Offset 2892 + OpMemberDecorate %type_View 164 Offset 2896 + OpMemberDecorate %type_View 165 Offset 2908 + OpMemberDecorate %type_View 166 Offset 2912 + OpMemberDecorate %type_View 167 Offset 2924 + OpMemberDecorate %type_View 168 Offset 2928 + OpMemberDecorate %type_View 169 Offset 2932 + OpDecorate %type_View Block + OpMemberDecorate %type__Globals 0 Offset 0 + OpMemberDecorate %type__Globals 1 Offset 16 + OpMemberDecorate %type__Globals 2 Offset 32 + OpMemberDecorate %type__Globals 3 Offset 36 + OpMemberDecorate %type__Globals 4 Offset 48 + OpMemberDecorate %type__Globals 5 Offset 64 + OpMemberDecorate %type__Globals 5 MatrixStride 16 + OpMemberDecorate %type__Globals 5 ColMajor + OpMemberDecorate %type__Globals 6 Offset 128 + OpMemberDecorate %type__Globals 7 Offset 144 + OpMemberDecorate %type__Globals 8 Offset 160 + OpDecorate %type__Globals Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %float_1 = OpConstant %float 1 + %int_58 = OpConstant %int 58 + %int_24 = OpConstant %int 24 + %int_5 = OpConstant %int 5 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %int_8 = OpConstant %int 8 +%float_0_999989986 = OpConstant %float 0.999989986 + %int_0 = OpConstant %int 0 + %float_0 = OpConstant %float 0 + %int_7 = OpConstant %int 7 + %float_0_5 = OpConstant %float 0.5 + %41 = OpConstantComposite %v3float %float_1 %float_1 %float_1 + %v2int = OpTypeVector %int 2 + %43 = OpConstantComposite %v2int %int_0 %int_0 + %44 = OpConstantComposite %v3float %float_0 %float_0 %float_0 +%type_subpass_image = OpTypeImage %float SubpassData 2 0 0 2 Unknown +%_ptr_UniformConstant_type_subpass_image = OpTypePointer UniformConstant %type_subpass_image +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type__Globals = OpTypeStruct %v3float %v4float %float %float %v4float %mat4v4float %v2float %v4float %v4float +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %53 = OpTypeFunction %void +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float +%type_sampled_image = OpTypeSampledImage %type_2d_image +%gl_LastFragData = OpVariable %_ptr_UniformConstant_type_subpass_image UniformConstant + %View = OpVariable %_ptr_Uniform_type_View Uniform +%ShadowDepthTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%ShadowDepthTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output + %57 = OpConstantNull %v4float + %58 = OpUndef %v4float + %Main = OpFunction %void None %53 + %59 = OpLabel + %60 = OpLoad %v4float %gl_FragCoord + %61 = OpVectorShuffle %v2float %60 %60 0 1 + %62 = OpAccessChain %_ptr_Uniform_v4float %View %int_58 + %63 = OpLoad %v4float %62 + %64 = OpVectorShuffle %v2float %63 %63 2 3 + %65 = OpFMul %v2float %61 %64 + %66 = OpLoad %type_subpass_image %gl_LastFragData + %67 = OpImageRead %v4float %66 %43 None + %68 = OpCompositeExtract %float %67 3 + %69 = OpAccessChain %_ptr_Uniform_v4float %View %int_24 + %70 = OpLoad %v4float %69 + %71 = OpVectorShuffle %v2float %70 %70 3 2 + %72 = OpFSub %v2float %65 %71 + %73 = OpVectorShuffle %v2float %70 %70 0 1 + %74 = OpFDiv %v2float %72 %73 + %75 = OpCompositeConstruct %v2float %68 %68 + %76 = OpFMul %v2float %74 %75 + %77 = OpCompositeExtract %float %76 0 + %78 = OpCompositeExtract %float %76 1 + %79 = OpCompositeConstruct %v4float %77 %78 %68 %float_1 + %80 = OpAccessChain %_ptr_Uniform_mat4v4float %_Globals %int_5 + %81 = OpLoad %mat4v4float %80 + %82 = OpMatrixTimesVector %v4float %81 %79 + %83 = OpCompositeExtract %float %82 2 + %84 = OpCompositeExtract %float %82 3 + %85 = OpCompositeConstruct %v3float %84 %84 %84 + %86 = OpVectorShuffle %v3float %82 %82 0 1 2 + %87 = OpFDiv %v3float %86 %85 + %88 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_8 + %89 = OpLoad %v4float %88 + %90 = OpVectorShuffle %v2float %89 %89 2 3 + %91 = OpVectorShuffle %v2float %87 %57 0 1 + %92 = OpFMul %v2float %91 %90 + %93 = OpVectorShuffle %v2float %89 %89 0 1 + %94 = OpVectorShuffle %v2float %92 %57 0 1 + %95 = OpFAdd %v2float %94 %93 + %96 = OpExtInst %float %1 FMin %83 %float_0_999989986 + %97 = OpLoad %type_2d_image %ShadowDepthTexture + %98 = OpLoad %type_sampler %ShadowDepthTextureSampler + %99 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_2 + %100 = OpLoad %float %99 + %101 = OpVectorShuffle %v2float %95 %57 0 1 + %102 = OpSampledImage %type_sampled_image %97 %98 + %103 = OpImageSampleExplicitLod %v4float %102 %101 Lod %float_0 + %104 = OpVectorShuffle %v3float %103 %103 0 0 0 + %105 = OpFMul %float %96 %100 + %106 = OpFSub %float %105 %float_1 + %107 = OpCompositeConstruct %v3float %100 %100 %100 + %108 = OpFMul %v3float %104 %107 + %109 = OpCompositeConstruct %v3float %106 %106 %106 + %110 = OpFSub %v3float %108 %109 + %111 = OpExtInst %v3float %1 FClamp %110 %44 %41 + %112 = OpCompositeExtract %float %111 0 + %113 = OpFSub %float %112 %float_0_5 + %114 = OpAccessChain %_ptr_Uniform_float %_Globals %int_3 + %115 = OpLoad %float %114 + %116 = OpFMul %float %113 %115 + %117 = OpFAdd %float %116 %float_0_5 + %118 = OpExtInst %float %1 FClamp %117 %float_0 %float_1 + %119 = OpFMul %float %118 %118 + %120 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 + %121 = OpLoad %float %120 + %122 = OpExtInst %float %1 FMix %float_1 %119 %121 + %123 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_7 + %124 = OpLoad %v4float %123 + %125 = OpVectorShuffle %v3float %124 %124 0 1 2 + %126 = OpCompositeConstruct %v3float %122 %122 %122 + %127 = OpExtInst %v3float %1 FMix %125 %41 %126 + %128 = OpVectorShuffle %v4float %58 %127 4 5 6 3 + %129 = OpCompositeInsert %v4float %float_0 %128 3 + OpStore %out_var_SV_Target0 %129 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag b/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag new file mode 100644 index 00000000000..270a1978fcf --- /dev/null +++ b/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag @@ -0,0 +1,242 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 180 +; Schema: 0 + OpCapability Shader + OpCapability SampledBuffer + OpCapability ImageBuffer + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %ShadowObjectCullPS "main" %in_var_TEXCOORD0 %gl_FragCoord %out_var_SV_Target0 + OpExecutionMode %ShadowObjectCullPS OriginUpperLeft + OpSource HLSL 600 + OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float" + OpName %CulledObjectBoxBounds "CulledObjectBoxBounds" + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "ShadowTileListGroupSize" + OpName %_Globals "$Globals" + OpName %type_buffer_image "type.buffer.image" + OpName %RWShadowTileNumCulledObjects "RWShadowTileNumCulledObjects" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %ShadowObjectCullPS "ShadowObjectCullPS" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorate %in_var_TEXCOORD0 Flat + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION" + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %in_var_TEXCOORD0 Location 0 + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %CulledObjectBoxBounds DescriptorSet 0 + OpDecorate %CulledObjectBoxBounds Binding 1 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 2 + OpDecorate %RWShadowTileNumCulledObjects DescriptorSet 0 + OpDecorate %RWShadowTileNumCulledObjects Binding 0 + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0 + OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable + OpDecorate %type_StructuredBuffer_v4float BufferBlock + OpMemberDecorate %type__Globals 0 Offset 0 + OpDecorate %type__Globals Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_4 = OpConstant %uint 4 + %float_0 = OpConstant %float 0 + %22 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %int_1 = OpConstant %int 1 + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %float_2 = OpConstant %float 2 + %27 = OpConstantComposite %v2float %float_2 %float_2 + %float_1 = OpConstant %float 1 + %29 = OpConstantComposite %v2float %float_1 %float_1 +%float_n1000 = OpConstant %float -1000 + %int_2 = OpConstant %int 2 + %float_0_5 = OpConstant %float 0.5 + %33 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5 +%float_500000 = OpConstant %float 500000 + %35 = OpConstantComposite %v3float %float_500000 %float_500000 %float_500000 +%float_n500000 = OpConstant %float -500000 + %37 = OpConstantComposite %v3float %float_n500000 %float_n500000 %float_n500000 + %int_3 = OpConstant %int 3 + %int_4 = OpConstant %int 4 + %int_5 = OpConstant %int 5 + %int_6 = OpConstant %int 6 + %int_7 = OpConstant %int 7 + %int_8 = OpConstant %int 8 + %44 = OpConstantComposite %v3float %float_1 %float_1 %float_1 + %float_n1 = OpConstant %float -1 + %46 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1 + %uint_5 = OpConstant %uint 5 + %uint_0 = OpConstant %uint 0 + %uint_3 = OpConstant %uint 3 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float +%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float + %v2uint = OpTypeVector %uint 2 +%type__Globals = OpTypeStruct %v2uint +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%type_buffer_image = OpTypeImage %uint Buffer 2 0 0 2 R32ui +%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %58 = OpTypeFunction %void +%_ptr_Function_v3float = OpTypePointer Function %v3float + %uint_8 = OpConstant %uint 8 +%_arr_v3float_uint_8 = OpTypeArray %v3float %uint_8 +%_ptr_Function__arr_v3float_uint_8 = OpTypePointer Function %_arr_v3float_uint_8 +%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bool = OpTypeBool + %v2bool = OpTypeVector %bool 2 + %v3bool = OpTypeVector %bool 3 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Image_uint = OpTypePointer Image %uint +%CulledObjectBoxBounds = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%RWShadowTileNumCulledObjects = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +%in_var_TEXCOORD0 = OpVariable %_ptr_Input_uint Input +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output + %70 = OpUndef %v3float + %71 = OpConstantNull %v3float +%ShadowObjectCullPS = OpFunction %void None %58 + %72 = OpLabel + %73 = OpVariable %_ptr_Function__arr_v3float_uint_8 Function + %74 = OpLoad %uint %in_var_TEXCOORD0 + %75 = OpLoad %v4float %gl_FragCoord + %76 = OpVectorShuffle %v2float %75 %75 0 1 + %77 = OpConvertFToU %v2uint %76 + %78 = OpCompositeExtract %uint %77 1 + %79 = OpAccessChain %_ptr_Uniform_v2uint %_Globals %int_0 + %80 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_0 + %81 = OpLoad %uint %80 + %82 = OpIMul %uint %78 %81 + %83 = OpCompositeExtract %uint %77 0 + %84 = OpIAdd %uint %82 %83 + %85 = OpConvertUToF %float %83 + %86 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_1 + %87 = OpLoad %uint %86 + %88 = OpISub %uint %87 %uint_1 + %89 = OpISub %uint %88 %78 + %90 = OpConvertUToF %float %89 + %91 = OpCompositeConstruct %v2float %85 %90 + %92 = OpLoad %v2uint %79 + %93 = OpConvertUToF %v2float %92 + %94 = OpFDiv %v2float %91 %93 + %95 = OpFMul %v2float %94 %27 + %96 = OpFSub %v2float %95 %29 + %97 = OpFAdd %v2float %91 %29 + %98 = OpFDiv %v2float %97 %93 + %99 = OpFMul %v2float %98 %27 + %100 = OpFSub %v2float %99 %29 + %101 = OpVectorShuffle %v3float %70 %100 3 4 2 + %102 = OpCompositeInsert %v3float %float_1 %101 2 + %103 = OpIMul %uint %74 %uint_5 + %104 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %103 + %105 = OpLoad %v4float %104 + %106 = OpVectorShuffle %v3float %105 %105 0 1 2 + %107 = OpIAdd %uint %103 %uint_1 + %108 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %107 + %109 = OpLoad %v4float %108 + %110 = OpVectorShuffle %v3float %109 %109 0 1 2 + %111 = OpVectorShuffle %v2float %109 %71 0 1 + %112 = OpVectorShuffle %v2float %96 %71 0 1 + %113 = OpFOrdGreaterThan %v2bool %111 %112 + %114 = OpAll %bool %113 + %115 = OpFOrdLessThan %v3bool %106 %102 + %116 = OpAll %bool %115 + %117 = OpLogicalAnd %bool %114 %116 + OpSelectionMerge %118 DontFlatten + OpBranchConditional %117 %119 %118 + %119 = OpLabel + %120 = OpFAdd %v3float %106 %110 + %121 = OpFMul %v3float %33 %120 + %122 = OpCompositeExtract %float %96 0 + %123 = OpCompositeExtract %float %96 1 + %124 = OpCompositeConstruct %v3float %122 %123 %float_n1000 + %125 = OpAccessChain %_ptr_Function_v3float %73 %int_0 + OpStore %125 %124 + %126 = OpCompositeExtract %float %100 0 + %127 = OpCompositeConstruct %v3float %126 %123 %float_n1000 + %128 = OpAccessChain %_ptr_Function_v3float %73 %int_1 + OpStore %128 %127 + %129 = OpCompositeExtract %float %100 1 + %130 = OpCompositeConstruct %v3float %122 %129 %float_n1000 + %131 = OpAccessChain %_ptr_Function_v3float %73 %int_2 + OpStore %131 %130 + %132 = OpCompositeConstruct %v3float %126 %129 %float_n1000 + %133 = OpAccessChain %_ptr_Function_v3float %73 %int_3 + OpStore %133 %132 + %134 = OpCompositeConstruct %v3float %122 %123 %float_1 + %135 = OpAccessChain %_ptr_Function_v3float %73 %int_4 + OpStore %135 %134 + %136 = OpCompositeConstruct %v3float %126 %123 %float_1 + %137 = OpAccessChain %_ptr_Function_v3float %73 %int_5 + OpStore %137 %136 + %138 = OpCompositeConstruct %v3float %122 %129 %float_1 + %139 = OpAccessChain %_ptr_Function_v3float %73 %int_6 + OpStore %139 %138 + %140 = OpCompositeConstruct %v3float %126 %129 %float_1 + %141 = OpAccessChain %_ptr_Function_v3float %73 %int_7 + OpStore %141 %140 + %142 = OpIAdd %uint %103 %uint_2 + %143 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %142 + %144 = OpLoad %v4float %143 + %145 = OpVectorShuffle %v3float %144 %144 0 1 2 + %146 = OpIAdd %uint %103 %uint_3 + %147 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %146 + %148 = OpLoad %v4float %147 + %149 = OpVectorShuffle %v3float %148 %148 0 1 2 + %150 = OpIAdd %uint %103 %uint_4 + %151 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %150 + %152 = OpLoad %v4float %151 + %153 = OpVectorShuffle %v3float %152 %152 0 1 2 + OpBranch %154 + %154 = OpLabel + %155 = OpPhi %v3float %37 %119 %156 %157 + %158 = OpPhi %v3float %35 %119 %159 %157 + %160 = OpPhi %int %int_0 %119 %161 %157 + %162 = OpSLessThan %bool %160 %int_8 + OpLoopMerge %163 %157 Unroll + OpBranchConditional %162 %157 %163 + %157 = OpLabel + %164 = OpAccessChain %_ptr_Function_v3float %73 %160 + %165 = OpLoad %v3float %164 + %166 = OpFSub %v3float %165 %121 + %167 = OpDot %float %166 %145 + %168 = OpDot %float %166 %149 + %169 = OpDot %float %166 %153 + %170 = OpCompositeConstruct %v3float %167 %168 %169 + %159 = OpExtInst %v3float %1 FMin %158 %170 + %156 = OpExtInst %v3float %1 FMax %155 %170 + %161 = OpIAdd %int %160 %int_1 + OpBranch %154 + %163 = OpLabel + %171 = OpFOrdLessThan %v3bool %158 %44 + %172 = OpAll %bool %171 + %173 = OpFOrdGreaterThan %v3bool %155 %46 + %174 = OpAll %bool %173 + %175 = OpLogicalAnd %bool %172 %174 + OpSelectionMerge %176 DontFlatten + OpBranchConditional %175 %177 %176 + %177 = OpLabel + %178 = OpImageTexelPointer %_ptr_Image_uint %RWShadowTileNumCulledObjects %84 %uint_0 + %179 = OpAtomicIAdd %uint %178 %uint_1 %uint_0 %uint_1 + OpBranch %176 + %176 = OpLabel + OpBranch %118 + %118 = OpLabel + OpStore %out_var_SV_Target0 %22 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/frag/texture-atomics.asm.frag b/shaders-ue4/asm/frag/texture-atomics.asm.frag new file mode 100644 index 00000000000..270a1978fcf --- /dev/null +++ b/shaders-ue4/asm/frag/texture-atomics.asm.frag @@ -0,0 +1,242 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 180 +; Schema: 0 + OpCapability Shader + OpCapability SampledBuffer + OpCapability ImageBuffer + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %ShadowObjectCullPS "main" %in_var_TEXCOORD0 %gl_FragCoord %out_var_SV_Target0 + OpExecutionMode %ShadowObjectCullPS OriginUpperLeft + OpSource HLSL 600 + OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float" + OpName %CulledObjectBoxBounds "CulledObjectBoxBounds" + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "ShadowTileListGroupSize" + OpName %_Globals "$Globals" + OpName %type_buffer_image "type.buffer.image" + OpName %RWShadowTileNumCulledObjects "RWShadowTileNumCulledObjects" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %ShadowObjectCullPS "ShadowObjectCullPS" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorate %in_var_TEXCOORD0 Flat + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION" + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %in_var_TEXCOORD0 Location 0 + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %CulledObjectBoxBounds DescriptorSet 0 + OpDecorate %CulledObjectBoxBounds Binding 1 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 2 + OpDecorate %RWShadowTileNumCulledObjects DescriptorSet 0 + OpDecorate %RWShadowTileNumCulledObjects Binding 0 + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0 + OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable + OpDecorate %type_StructuredBuffer_v4float BufferBlock + OpMemberDecorate %type__Globals 0 Offset 0 + OpDecorate %type__Globals Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_4 = OpConstant %uint 4 + %float_0 = OpConstant %float 0 + %22 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %int_1 = OpConstant %int 1 + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %float_2 = OpConstant %float 2 + %27 = OpConstantComposite %v2float %float_2 %float_2 + %float_1 = OpConstant %float 1 + %29 = OpConstantComposite %v2float %float_1 %float_1 +%float_n1000 = OpConstant %float -1000 + %int_2 = OpConstant %int 2 + %float_0_5 = OpConstant %float 0.5 + %33 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5 +%float_500000 = OpConstant %float 500000 + %35 = OpConstantComposite %v3float %float_500000 %float_500000 %float_500000 +%float_n500000 = OpConstant %float -500000 + %37 = OpConstantComposite %v3float %float_n500000 %float_n500000 %float_n500000 + %int_3 = OpConstant %int 3 + %int_4 = OpConstant %int 4 + %int_5 = OpConstant %int 5 + %int_6 = OpConstant %int 6 + %int_7 = OpConstant %int 7 + %int_8 = OpConstant %int 8 + %44 = OpConstantComposite %v3float %float_1 %float_1 %float_1 + %float_n1 = OpConstant %float -1 + %46 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1 + %uint_5 = OpConstant %uint 5 + %uint_0 = OpConstant %uint 0 + %uint_3 = OpConstant %uint 3 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float +%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float + %v2uint = OpTypeVector %uint 2 +%type__Globals = OpTypeStruct %v2uint +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%type_buffer_image = OpTypeImage %uint Buffer 2 0 0 2 R32ui +%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %58 = OpTypeFunction %void +%_ptr_Function_v3float = OpTypePointer Function %v3float + %uint_8 = OpConstant %uint 8 +%_arr_v3float_uint_8 = OpTypeArray %v3float %uint_8 +%_ptr_Function__arr_v3float_uint_8 = OpTypePointer Function %_arr_v3float_uint_8 +%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bool = OpTypeBool + %v2bool = OpTypeVector %bool 2 + %v3bool = OpTypeVector %bool 3 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Image_uint = OpTypePointer Image %uint +%CulledObjectBoxBounds = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%RWShadowTileNumCulledObjects = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +%in_var_TEXCOORD0 = OpVariable %_ptr_Input_uint Input +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output + %70 = OpUndef %v3float + %71 = OpConstantNull %v3float +%ShadowObjectCullPS = OpFunction %void None %58 + %72 = OpLabel + %73 = OpVariable %_ptr_Function__arr_v3float_uint_8 Function + %74 = OpLoad %uint %in_var_TEXCOORD0 + %75 = OpLoad %v4float %gl_FragCoord + %76 = OpVectorShuffle %v2float %75 %75 0 1 + %77 = OpConvertFToU %v2uint %76 + %78 = OpCompositeExtract %uint %77 1 + %79 = OpAccessChain %_ptr_Uniform_v2uint %_Globals %int_0 + %80 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_0 + %81 = OpLoad %uint %80 + %82 = OpIMul %uint %78 %81 + %83 = OpCompositeExtract %uint %77 0 + %84 = OpIAdd %uint %82 %83 + %85 = OpConvertUToF %float %83 + %86 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_1 + %87 = OpLoad %uint %86 + %88 = OpISub %uint %87 %uint_1 + %89 = OpISub %uint %88 %78 + %90 = OpConvertUToF %float %89 + %91 = OpCompositeConstruct %v2float %85 %90 + %92 = OpLoad %v2uint %79 + %93 = OpConvertUToF %v2float %92 + %94 = OpFDiv %v2float %91 %93 + %95 = OpFMul %v2float %94 %27 + %96 = OpFSub %v2float %95 %29 + %97 = OpFAdd %v2float %91 %29 + %98 = OpFDiv %v2float %97 %93 + %99 = OpFMul %v2float %98 %27 + %100 = OpFSub %v2float %99 %29 + %101 = OpVectorShuffle %v3float %70 %100 3 4 2 + %102 = OpCompositeInsert %v3float %float_1 %101 2 + %103 = OpIMul %uint %74 %uint_5 + %104 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %103 + %105 = OpLoad %v4float %104 + %106 = OpVectorShuffle %v3float %105 %105 0 1 2 + %107 = OpIAdd %uint %103 %uint_1 + %108 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %107 + %109 = OpLoad %v4float %108 + %110 = OpVectorShuffle %v3float %109 %109 0 1 2 + %111 = OpVectorShuffle %v2float %109 %71 0 1 + %112 = OpVectorShuffle %v2float %96 %71 0 1 + %113 = OpFOrdGreaterThan %v2bool %111 %112 + %114 = OpAll %bool %113 + %115 = OpFOrdLessThan %v3bool %106 %102 + %116 = OpAll %bool %115 + %117 = OpLogicalAnd %bool %114 %116 + OpSelectionMerge %118 DontFlatten + OpBranchConditional %117 %119 %118 + %119 = OpLabel + %120 = OpFAdd %v3float %106 %110 + %121 = OpFMul %v3float %33 %120 + %122 = OpCompositeExtract %float %96 0 + %123 = OpCompositeExtract %float %96 1 + %124 = OpCompositeConstruct %v3float %122 %123 %float_n1000 + %125 = OpAccessChain %_ptr_Function_v3float %73 %int_0 + OpStore %125 %124 + %126 = OpCompositeExtract %float %100 0 + %127 = OpCompositeConstruct %v3float %126 %123 %float_n1000 + %128 = OpAccessChain %_ptr_Function_v3float %73 %int_1 + OpStore %128 %127 + %129 = OpCompositeExtract %float %100 1 + %130 = OpCompositeConstruct %v3float %122 %129 %float_n1000 + %131 = OpAccessChain %_ptr_Function_v3float %73 %int_2 + OpStore %131 %130 + %132 = OpCompositeConstruct %v3float %126 %129 %float_n1000 + %133 = OpAccessChain %_ptr_Function_v3float %73 %int_3 + OpStore %133 %132 + %134 = OpCompositeConstruct %v3float %122 %123 %float_1 + %135 = OpAccessChain %_ptr_Function_v3float %73 %int_4 + OpStore %135 %134 + %136 = OpCompositeConstruct %v3float %126 %123 %float_1 + %137 = OpAccessChain %_ptr_Function_v3float %73 %int_5 + OpStore %137 %136 + %138 = OpCompositeConstruct %v3float %122 %129 %float_1 + %139 = OpAccessChain %_ptr_Function_v3float %73 %int_6 + OpStore %139 %138 + %140 = OpCompositeConstruct %v3float %126 %129 %float_1 + %141 = OpAccessChain %_ptr_Function_v3float %73 %int_7 + OpStore %141 %140 + %142 = OpIAdd %uint %103 %uint_2 + %143 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %142 + %144 = OpLoad %v4float %143 + %145 = OpVectorShuffle %v3float %144 %144 0 1 2 + %146 = OpIAdd %uint %103 %uint_3 + %147 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %146 + %148 = OpLoad %v4float %147 + %149 = OpVectorShuffle %v3float %148 %148 0 1 2 + %150 = OpIAdd %uint %103 %uint_4 + %151 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %150 + %152 = OpLoad %v4float %151 + %153 = OpVectorShuffle %v3float %152 %152 0 1 2 + OpBranch %154 + %154 = OpLabel + %155 = OpPhi %v3float %37 %119 %156 %157 + %158 = OpPhi %v3float %35 %119 %159 %157 + %160 = OpPhi %int %int_0 %119 %161 %157 + %162 = OpSLessThan %bool %160 %int_8 + OpLoopMerge %163 %157 Unroll + OpBranchConditional %162 %157 %163 + %157 = OpLabel + %164 = OpAccessChain %_ptr_Function_v3float %73 %160 + %165 = OpLoad %v3float %164 + %166 = OpFSub %v3float %165 %121 + %167 = OpDot %float %166 %145 + %168 = OpDot %float %166 %149 + %169 = OpDot %float %166 %153 + %170 = OpCompositeConstruct %v3float %167 %168 %169 + %159 = OpExtInst %v3float %1 FMin %158 %170 + %156 = OpExtInst %v3float %1 FMax %155 %170 + %161 = OpIAdd %int %160 %int_1 + OpBranch %154 + %163 = OpLabel + %171 = OpFOrdLessThan %v3bool %158 %44 + %172 = OpAll %bool %171 + %173 = OpFOrdGreaterThan %v3bool %155 %46 + %174 = OpAll %bool %173 + %175 = OpLogicalAnd %bool %172 %174 + OpSelectionMerge %176 DontFlatten + OpBranchConditional %175 %177 %176 + %177 = OpLabel + %178 = OpImageTexelPointer %_ptr_Image_uint %RWShadowTileNumCulledObjects %84 %uint_0 + %179 = OpAtomicIAdd %uint %178 %uint_1 %uint_0 %uint_1 + OpBranch %176 + %176 = OpLabel + OpBranch %118 + %118 = OpLabel + OpStore %out_var_SV_Target0 %22 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag b/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag new file mode 100644 index 00000000000..270a1978fcf --- /dev/null +++ b/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag @@ -0,0 +1,242 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 180 +; Schema: 0 + OpCapability Shader + OpCapability SampledBuffer + OpCapability ImageBuffer + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %ShadowObjectCullPS "main" %in_var_TEXCOORD0 %gl_FragCoord %out_var_SV_Target0 + OpExecutionMode %ShadowObjectCullPS OriginUpperLeft + OpSource HLSL 600 + OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float" + OpName %CulledObjectBoxBounds "CulledObjectBoxBounds" + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "ShadowTileListGroupSize" + OpName %_Globals "$Globals" + OpName %type_buffer_image "type.buffer.image" + OpName %RWShadowTileNumCulledObjects "RWShadowTileNumCulledObjects" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %out_var_SV_Target0 "out.var.SV_Target0" + OpName %ShadowObjectCullPS "ShadowObjectCullPS" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorate %in_var_TEXCOORD0 Flat + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION" + OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0" + OpDecorate %in_var_TEXCOORD0 Location 0 + OpDecorate %out_var_SV_Target0 Location 0 + OpDecorate %CulledObjectBoxBounds DescriptorSet 0 + OpDecorate %CulledObjectBoxBounds Binding 1 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 2 + OpDecorate %RWShadowTileNumCulledObjects DescriptorSet 0 + OpDecorate %RWShadowTileNumCulledObjects Binding 0 + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0 + OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable + OpDecorate %type_StructuredBuffer_v4float BufferBlock + OpMemberDecorate %type__Globals 0 Offset 0 + OpDecorate %type__Globals Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_4 = OpConstant %uint 4 + %float_0 = OpConstant %float 0 + %22 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %int_1 = OpConstant %int 1 + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %float_2 = OpConstant %float 2 + %27 = OpConstantComposite %v2float %float_2 %float_2 + %float_1 = OpConstant %float 1 + %29 = OpConstantComposite %v2float %float_1 %float_1 +%float_n1000 = OpConstant %float -1000 + %int_2 = OpConstant %int 2 + %float_0_5 = OpConstant %float 0.5 + %33 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5 +%float_500000 = OpConstant %float 500000 + %35 = OpConstantComposite %v3float %float_500000 %float_500000 %float_500000 +%float_n500000 = OpConstant %float -500000 + %37 = OpConstantComposite %v3float %float_n500000 %float_n500000 %float_n500000 + %int_3 = OpConstant %int 3 + %int_4 = OpConstant %int 4 + %int_5 = OpConstant %int 5 + %int_6 = OpConstant %int 6 + %int_7 = OpConstant %int 7 + %int_8 = OpConstant %int 8 + %44 = OpConstantComposite %v3float %float_1 %float_1 %float_1 + %float_n1 = OpConstant %float -1 + %46 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1 + %uint_5 = OpConstant %uint 5 + %uint_0 = OpConstant %uint 0 + %uint_3 = OpConstant %uint 3 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float +%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float + %v2uint = OpTypeVector %uint 2 +%type__Globals = OpTypeStruct %v2uint +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%type_buffer_image = OpTypeImage %uint Buffer 2 0 0 2 R32ui +%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %58 = OpTypeFunction %void +%_ptr_Function_v3float = OpTypePointer Function %v3float + %uint_8 = OpConstant %uint 8 +%_arr_v3float_uint_8 = OpTypeArray %v3float %uint_8 +%_ptr_Function__arr_v3float_uint_8 = OpTypePointer Function %_arr_v3float_uint_8 +%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bool = OpTypeBool + %v2bool = OpTypeVector %bool 2 + %v3bool = OpTypeVector %bool 3 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Image_uint = OpTypePointer Image %uint +%CulledObjectBoxBounds = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%RWShadowTileNumCulledObjects = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +%in_var_TEXCOORD0 = OpVariable %_ptr_Input_uint Input +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input +%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output + %70 = OpUndef %v3float + %71 = OpConstantNull %v3float +%ShadowObjectCullPS = OpFunction %void None %58 + %72 = OpLabel + %73 = OpVariable %_ptr_Function__arr_v3float_uint_8 Function + %74 = OpLoad %uint %in_var_TEXCOORD0 + %75 = OpLoad %v4float %gl_FragCoord + %76 = OpVectorShuffle %v2float %75 %75 0 1 + %77 = OpConvertFToU %v2uint %76 + %78 = OpCompositeExtract %uint %77 1 + %79 = OpAccessChain %_ptr_Uniform_v2uint %_Globals %int_0 + %80 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_0 + %81 = OpLoad %uint %80 + %82 = OpIMul %uint %78 %81 + %83 = OpCompositeExtract %uint %77 0 + %84 = OpIAdd %uint %82 %83 + %85 = OpConvertUToF %float %83 + %86 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_1 + %87 = OpLoad %uint %86 + %88 = OpISub %uint %87 %uint_1 + %89 = OpISub %uint %88 %78 + %90 = OpConvertUToF %float %89 + %91 = OpCompositeConstruct %v2float %85 %90 + %92 = OpLoad %v2uint %79 + %93 = OpConvertUToF %v2float %92 + %94 = OpFDiv %v2float %91 %93 + %95 = OpFMul %v2float %94 %27 + %96 = OpFSub %v2float %95 %29 + %97 = OpFAdd %v2float %91 %29 + %98 = OpFDiv %v2float %97 %93 + %99 = OpFMul %v2float %98 %27 + %100 = OpFSub %v2float %99 %29 + %101 = OpVectorShuffle %v3float %70 %100 3 4 2 + %102 = OpCompositeInsert %v3float %float_1 %101 2 + %103 = OpIMul %uint %74 %uint_5 + %104 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %103 + %105 = OpLoad %v4float %104 + %106 = OpVectorShuffle %v3float %105 %105 0 1 2 + %107 = OpIAdd %uint %103 %uint_1 + %108 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %107 + %109 = OpLoad %v4float %108 + %110 = OpVectorShuffle %v3float %109 %109 0 1 2 + %111 = OpVectorShuffle %v2float %109 %71 0 1 + %112 = OpVectorShuffle %v2float %96 %71 0 1 + %113 = OpFOrdGreaterThan %v2bool %111 %112 + %114 = OpAll %bool %113 + %115 = OpFOrdLessThan %v3bool %106 %102 + %116 = OpAll %bool %115 + %117 = OpLogicalAnd %bool %114 %116 + OpSelectionMerge %118 DontFlatten + OpBranchConditional %117 %119 %118 + %119 = OpLabel + %120 = OpFAdd %v3float %106 %110 + %121 = OpFMul %v3float %33 %120 + %122 = OpCompositeExtract %float %96 0 + %123 = OpCompositeExtract %float %96 1 + %124 = OpCompositeConstruct %v3float %122 %123 %float_n1000 + %125 = OpAccessChain %_ptr_Function_v3float %73 %int_0 + OpStore %125 %124 + %126 = OpCompositeExtract %float %100 0 + %127 = OpCompositeConstruct %v3float %126 %123 %float_n1000 + %128 = OpAccessChain %_ptr_Function_v3float %73 %int_1 + OpStore %128 %127 + %129 = OpCompositeExtract %float %100 1 + %130 = OpCompositeConstruct %v3float %122 %129 %float_n1000 + %131 = OpAccessChain %_ptr_Function_v3float %73 %int_2 + OpStore %131 %130 + %132 = OpCompositeConstruct %v3float %126 %129 %float_n1000 + %133 = OpAccessChain %_ptr_Function_v3float %73 %int_3 + OpStore %133 %132 + %134 = OpCompositeConstruct %v3float %122 %123 %float_1 + %135 = OpAccessChain %_ptr_Function_v3float %73 %int_4 + OpStore %135 %134 + %136 = OpCompositeConstruct %v3float %126 %123 %float_1 + %137 = OpAccessChain %_ptr_Function_v3float %73 %int_5 + OpStore %137 %136 + %138 = OpCompositeConstruct %v3float %122 %129 %float_1 + %139 = OpAccessChain %_ptr_Function_v3float %73 %int_6 + OpStore %139 %138 + %140 = OpCompositeConstruct %v3float %126 %129 %float_1 + %141 = OpAccessChain %_ptr_Function_v3float %73 %int_7 + OpStore %141 %140 + %142 = OpIAdd %uint %103 %uint_2 + %143 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %142 + %144 = OpLoad %v4float %143 + %145 = OpVectorShuffle %v3float %144 %144 0 1 2 + %146 = OpIAdd %uint %103 %uint_3 + %147 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %146 + %148 = OpLoad %v4float %147 + %149 = OpVectorShuffle %v3float %148 %148 0 1 2 + %150 = OpIAdd %uint %103 %uint_4 + %151 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %150 + %152 = OpLoad %v4float %151 + %153 = OpVectorShuffle %v3float %152 %152 0 1 2 + OpBranch %154 + %154 = OpLabel + %155 = OpPhi %v3float %37 %119 %156 %157 + %158 = OpPhi %v3float %35 %119 %159 %157 + %160 = OpPhi %int %int_0 %119 %161 %157 + %162 = OpSLessThan %bool %160 %int_8 + OpLoopMerge %163 %157 Unroll + OpBranchConditional %162 %157 %163 + %157 = OpLabel + %164 = OpAccessChain %_ptr_Function_v3float %73 %160 + %165 = OpLoad %v3float %164 + %166 = OpFSub %v3float %165 %121 + %167 = OpDot %float %166 %145 + %168 = OpDot %float %166 %149 + %169 = OpDot %float %166 %153 + %170 = OpCompositeConstruct %v3float %167 %168 %169 + %159 = OpExtInst %v3float %1 FMin %158 %170 + %156 = OpExtInst %v3float %1 FMax %155 %170 + %161 = OpIAdd %int %160 %int_1 + OpBranch %154 + %163 = OpLabel + %171 = OpFOrdLessThan %v3bool %158 %44 + %172 = OpAll %bool %171 + %173 = OpFOrdGreaterThan %v3bool %155 %46 + %174 = OpAll %bool %173 + %175 = OpLogicalAnd %bool %172 %174 + OpSelectionMerge %176 DontFlatten + OpBranchConditional %175 %177 %176 + %177 = OpLabel + %178 = OpImageTexelPointer %_ptr_Image_uint %RWShadowTileNumCulledObjects %84 %uint_0 + %179 = OpAtomicIAdd %uint %178 %uint_1 %uint_0 %uint_1 + OpBranch %176 + %176 = OpLabel + OpBranch %118 + %118 = OpLabel + OpStore %out_var_SV_Target0 %22 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc b/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc new file mode 100644 index 00000000000..4c70e14cb72 --- /dev/null +++ b/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc @@ -0,0 +1,1158 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 598 +; Schema: 0 + OpCapability Tessellation + OpCapability SampledBuffer + OpCapability StorageImageExtendedFormats + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %MainHull "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_COLOR0 %in_var_TEXCOORD0 %in_var_TEXCOORD4 %in_var_PRIMITIVE_ID %in_var_LIGHTMAP_ID %in_var_VS_To_DS_Position %gl_InvocationID %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_COLOR0 %out_var_TEXCOORD0 %out_var_TEXCOORD4 %out_var_PRIMITIVE_ID %out_var_LIGHTMAP_ID %out_var_VS_To_DS_Position %out_var_PN_POSITION %out_var_PN_DisplacementScales %out_var_PN_TessellationMultiplier %out_var_PN_WorldDisplacementMultiplier %gl_TessLevelOuter %gl_TessLevelInner %out_var_PN_POSITION9 + OpExecutionMode %MainHull Triangles + OpExecutionMode %MainHull SpacingFractionalOdd + OpExecutionMode %MainHull VertexOrderCw + OpExecutionMode %MainHull OutputVertices 3 + OpSource HLSL 600 + OpName %FPNTessellationHSToDS "FPNTessellationHSToDS" + OpMemberName %FPNTessellationHSToDS 0 "PassSpecificData" + OpMemberName %FPNTessellationHSToDS 1 "WorldPosition" + OpMemberName %FPNTessellationHSToDS 2 "DisplacementScale" + OpMemberName %FPNTessellationHSToDS 3 "TessellationMultiplier" + OpMemberName %FPNTessellationHSToDS 4 "WorldDisplacementMultiplier" + OpName %FBasePassVSToDS "FBasePassVSToDS" + OpMemberName %FBasePassVSToDS 0 "FactoryInterpolants" + OpMemberName %FBasePassVSToDS 1 "BasePassInterpolants" + OpMemberName %FBasePassVSToDS 2 "Position" + OpName %FVertexFactoryInterpolantsVSToDS "FVertexFactoryInterpolantsVSToDS" + OpMemberName %FVertexFactoryInterpolantsVSToDS 0 "InterpolantsVSToPS" + OpName %FVertexFactoryInterpolantsVSToPS "FVertexFactoryInterpolantsVSToPS" + OpMemberName %FVertexFactoryInterpolantsVSToPS 0 "TangentToWorld0" + OpMemberName %FVertexFactoryInterpolantsVSToPS 1 "TangentToWorld2" + OpMemberName %FVertexFactoryInterpolantsVSToPS 2 "Color" + OpMemberName %FVertexFactoryInterpolantsVSToPS 3 "TexCoords" + OpMemberName %FVertexFactoryInterpolantsVSToPS 4 "LightMapCoordinate" + OpMemberName %FVertexFactoryInterpolantsVSToPS 5 "PrimitiveId" + OpMemberName %FVertexFactoryInterpolantsVSToPS 6 "LightmapDataIndex" + OpName %FBasePassInterpolantsVSToDS "FBasePassInterpolantsVSToDS" + OpName %FSharedBasePassInterpolants "FSharedBasePassInterpolants" + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_TranslatedWorldToView" + OpMemberName %type_View 3 "View_ViewToTranslatedWorld" + OpMemberName %type_View 4 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 6 "View_ViewToClip" + OpMemberName %type_View 7 "View_ViewToClipNoAA" + OpMemberName %type_View 8 "View_ClipToView" + OpMemberName %type_View 9 "View_ClipToTranslatedWorld" + OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 11 "View_ScreenToWorld" + OpMemberName %type_View 12 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 13 "View_ViewForward" + OpMemberName %type_View 14 "PrePadding_View_844" + OpMemberName %type_View 15 "View_ViewUp" + OpMemberName %type_View 16 "PrePadding_View_860" + OpMemberName %type_View 17 "View_ViewRight" + OpMemberName %type_View 18 "PrePadding_View_876" + OpMemberName %type_View 19 "View_HMDViewNoRollUp" + OpMemberName %type_View 20 "PrePadding_View_892" + OpMemberName %type_View 21 "View_HMDViewNoRollRight" + OpMemberName %type_View 22 "PrePadding_View_908" + OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 24 "View_ScreenPositionScaleBias" + OpMemberName %type_View 25 "View_WorldCameraOrigin" + OpMemberName %type_View 26 "PrePadding_View_956" + OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 28 "PrePadding_View_972" + OpMemberName %type_View 29 "View_WorldViewOrigin" + OpMemberName %type_View 30 "PrePadding_View_988" + OpMemberName %type_View 31 "View_PreViewTranslation" + OpMemberName %type_View 32 "PrePadding_View_1004" + OpMemberName %type_View 33 "View_PrevProjection" + OpMemberName %type_View 34 "View_PrevViewProj" + OpMemberName %type_View 35 "View_PrevViewRotationProj" + OpMemberName %type_View 36 "View_PrevViewToClip" + OpMemberName %type_View 37 "View_PrevClipToView" + OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 43 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 44 "PrePadding_View_1660" + OpMemberName %type_View 45 "View_PrevWorldViewOrigin" + OpMemberName %type_View 46 "PrePadding_View_1676" + OpMemberName %type_View 47 "View_PrevPreViewTranslation" + OpMemberName %type_View 48 "PrePadding_View_1692" + OpMemberName %type_View 49 "View_PrevInvViewProj" + OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 51 "View_ClipToPrevClip" + OpMemberName %type_View 52 "View_TemporalAAJitter" + OpMemberName %type_View 53 "View_GlobalClippingPlane" + OpMemberName %type_View 54 "View_FieldOfViewWideAngles" + OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 56 "View_ViewRectMin" + OpMemberName %type_View 57 "View_ViewSizeAndInvSize" + OpMemberName %type_View 58 "View_BufferSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 60 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 61 "View_PreExposure" + OpMemberName %type_View 62 "View_OneOverPreExposure" + OpMemberName %type_View 63 "PrePadding_View_2012" + OpMemberName %type_View 64 "View_DiffuseOverrideParameter" + OpMemberName %type_View 65 "View_SpecularOverrideParameter" + OpMemberName %type_View 66 "View_NormalOverrideParameter" + OpMemberName %type_View 67 "View_RoughnessOverrideParameter" + OpMemberName %type_View 68 "View_PrevFrameGameTime" + OpMemberName %type_View 69 "View_PrevFrameRealTime" + OpMemberName %type_View 70 "View_OutOfBoundsMask" + OpMemberName %type_View 71 "PrePadding_View_2084" + OpMemberName %type_View 72 "PrePadding_View_2088" + OpMemberName %type_View 73 "PrePadding_View_2092" + OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 75 "View_CullingSign" + OpMemberName %type_View 76 "View_NearPlane" + OpMemberName %type_View 77 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 78 "View_GameTime" + OpMemberName %type_View 79 "View_RealTime" + OpMemberName %type_View 80 "View_DeltaTime" + OpMemberName %type_View 81 "View_MaterialTextureMipBias" + OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 83 "View_Random" + OpMemberName %type_View 84 "View_FrameNumber" + OpMemberName %type_View 85 "View_StateFrameIndexMod8" + OpMemberName %type_View 86 "View_StateFrameIndex" + OpMemberName %type_View 87 "View_CameraCut" + OpMemberName %type_View 88 "View_UnlitViewmodeMask" + OpMemberName %type_View 89 "PrePadding_View_2164" + OpMemberName %type_View 90 "PrePadding_View_2168" + OpMemberName %type_View 91 "PrePadding_View_2172" + OpMemberName %type_View 92 "View_DirectionalLightColor" + OpMemberName %type_View 93 "View_DirectionalLightDirection" + OpMemberName %type_View 94 "PrePadding_View_2204" + OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 97 "View_TemporalAAParams" + OpMemberName %type_View 98 "View_CircleDOFParams" + OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 101 "View_DepthOfFieldScale" + OpMemberName %type_View 102 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 108 "View_GeneralPurposeTweak" + OpMemberName %type_View 109 "View_DemosaicVposOffset" + OpMemberName %type_View 110 "PrePadding_View_2348" + OpMemberName %type_View 111 "View_IndirectLightingColorScale" + OpMemberName %type_View 112 "View_HDR32bppEncodingMode" + OpMemberName %type_View 113 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 114 "View_AtmosphericFogSunPower" + OpMemberName %type_View 115 "View_AtmosphericFogPower" + OpMemberName %type_View 116 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 122 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 125 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 127 "View_AtmosphericFogSunColor" + OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 130 "View_AmbientCubemapTint" + OpMemberName %type_View 131 "View_AmbientCubemapIntensity" + OpMemberName %type_View 132 "View_SkyLightParameters" + OpMemberName %type_View 133 "PrePadding_View_2488" + OpMemberName %type_View 134 "PrePadding_View_2492" + OpMemberName %type_View 135 "View_SkyLightColor" + OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 137 "View_MobilePreviewMode" + OpMemberName %type_View 138 "View_HMDEyePaddingOffset" + OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 140 "View_ShowDecalsMask" + OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 143 "PrePadding_View_2648" + OpMemberName %type_View 144 "PrePadding_View_2652" + OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 146 "View_StereoPassIndex" + OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 149 "View_GlobalVolumeDimension" + OpMemberName %type_View 150 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 151 "View_MaxGlobalDistance" + OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 153 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 154 "PrePadding_View_2828" + OpMemberName %type_View 155 "View_VolumetricFogGridZParams" + OpMemberName %type_View 156 "PrePadding_View_2844" + OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 158 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 159 "PrePadding_View_2860" + OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 161 "PrePadding_View_2876" + OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 163 "PrePadding_View_2892" + OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 167 "View_StereoIPD" + OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float" + OpName %View_PrimitiveSceneData "View_PrimitiveSceneData" + OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid" + OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid" + OpName %in_var_COLOR0 "in.var.COLOR0" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %in_var_TEXCOORD4 "in.var.TEXCOORD4" + OpName %in_var_PRIMITIVE_ID "in.var.PRIMITIVE_ID" + OpName %in_var_LIGHTMAP_ID "in.var.LIGHTMAP_ID" + OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position" + OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid" + OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid" + OpName %out_var_COLOR0 "out.var.COLOR0" + OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0" + OpName %out_var_TEXCOORD4 "out.var.TEXCOORD4" + OpName %out_var_PRIMITIVE_ID "out.var.PRIMITIVE_ID" + OpName %out_var_LIGHTMAP_ID "out.var.LIGHTMAP_ID" + OpName %out_var_VS_To_DS_Position "out.var.VS_To_DS_Position" + OpName %out_var_PN_POSITION "out.var.PN_POSITION" + OpName %out_var_PN_DisplacementScales "out.var.PN_DisplacementScales" + OpName %out_var_PN_TessellationMultiplier "out.var.PN_TessellationMultiplier" + OpName %out_var_PN_WorldDisplacementMultiplier "out.var.PN_WorldDisplacementMultiplier" + OpName %out_var_PN_POSITION9 "out.var.PN_POSITION9" + OpName %MainHull "MainHull" + OpName %param_var_I "param.var.I" + OpName %temp_var_hullMainRetVal "temp.var.hullMainRetVal" + OpName %if_merge "if.merge" + OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %in_var_COLOR0 UserSemantic "COLOR0" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %in_var_TEXCOORD4 UserSemantic "TEXCOORD4" + OpDecorateString %in_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID" + OpDecorateString %in_var_LIGHTMAP_ID UserSemantic "LIGHTMAP_ID" + OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position" + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorateString %gl_InvocationID UserSemantic "SV_OutputControlPointID" + OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %out_var_COLOR0 UserSemantic "COLOR0" + OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %out_var_TEXCOORD4 UserSemantic "TEXCOORD4" + OpDecorateString %out_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID" + OpDecorateString %out_var_LIGHTMAP_ID UserSemantic "LIGHTMAP_ID" + OpDecorateString %out_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position" + OpDecorateString %out_var_PN_POSITION UserSemantic "PN_POSITION" + OpDecorateString %out_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales" + OpDecorateString %out_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier" + OpDecorateString %out_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier" + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor" + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor" + OpDecorate %gl_TessLevelInner Patch + OpDecorateString %out_var_PN_POSITION9 UserSemantic "PN_POSITION9" + OpDecorate %out_var_PN_POSITION9 Patch + OpDecorate %in_var_TEXCOORD10_centroid Location 0 + OpDecorate %in_var_TEXCOORD11_centroid Location 1 + OpDecorate %in_var_COLOR0 Location 2 + OpDecorate %in_var_TEXCOORD0 Location 3 + OpDecorate %in_var_TEXCOORD4 Location 4 + OpDecorate %in_var_PRIMITIVE_ID Location 5 + OpDecorate %in_var_LIGHTMAP_ID Location 6 + OpDecorate %in_var_VS_To_DS_Position Location 7 + OpDecorate %out_var_COLOR0 Location 0 + OpDecorate %out_var_LIGHTMAP_ID Location 1 + OpDecorate %out_var_PN_DisplacementScales Location 2 + OpDecorate %out_var_PN_POSITION Location 3 + OpDecorate %out_var_PN_POSITION9 Location 6 + OpDecorate %out_var_PN_TessellationMultiplier Location 7 + OpDecorate %out_var_PN_WorldDisplacementMultiplier Location 8 + OpDecorate %out_var_PRIMITIVE_ID Location 9 + OpDecorate %out_var_TEXCOORD0 Location 10 + OpDecorate %out_var_TEXCOORD10_centroid Location 11 + OpDecorate %out_var_TEXCOORD11_centroid Location 12 + OpDecorate %out_var_TEXCOORD4 Location 13 + OpDecorate %out_var_VS_To_DS_Position Location 14 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 1 + OpDecorate %View_PrimitiveSceneData DescriptorSet 0 + OpDecorate %View_PrimitiveSceneData Binding 0 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 14 Offset 844 + OpMemberDecorate %type_View 15 Offset 848 + OpMemberDecorate %type_View 16 Offset 860 + OpMemberDecorate %type_View 17 Offset 864 + OpMemberDecorate %type_View 18 Offset 876 + OpMemberDecorate %type_View 19 Offset 880 + OpMemberDecorate %type_View 20 Offset 892 + OpMemberDecorate %type_View 21 Offset 896 + OpMemberDecorate %type_View 22 Offset 908 + OpMemberDecorate %type_View 23 Offset 912 + OpMemberDecorate %type_View 24 Offset 928 + OpMemberDecorate %type_View 25 Offset 944 + OpMemberDecorate %type_View 26 Offset 956 + OpMemberDecorate %type_View 27 Offset 960 + OpMemberDecorate %type_View 28 Offset 972 + OpMemberDecorate %type_View 29 Offset 976 + OpMemberDecorate %type_View 30 Offset 988 + OpMemberDecorate %type_View 31 Offset 992 + OpMemberDecorate %type_View 32 Offset 1004 + OpMemberDecorate %type_View 33 Offset 1008 + OpMemberDecorate %type_View 33 MatrixStride 16 + OpMemberDecorate %type_View 33 ColMajor + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 44 Offset 1660 + OpMemberDecorate %type_View 45 Offset 1664 + OpMemberDecorate %type_View 46 Offset 1676 + OpMemberDecorate %type_View 47 Offset 1680 + OpMemberDecorate %type_View 48 Offset 1692 + OpMemberDecorate %type_View 49 Offset 1696 + OpMemberDecorate %type_View 49 MatrixStride 16 + OpMemberDecorate %type_View 49 ColMajor + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 53 Offset 1904 + OpMemberDecorate %type_View 54 Offset 1920 + OpMemberDecorate %type_View 55 Offset 1928 + OpMemberDecorate %type_View 56 Offset 1936 + OpMemberDecorate %type_View 57 Offset 1952 + OpMemberDecorate %type_View 58 Offset 1968 + OpMemberDecorate %type_View 59 Offset 1984 + OpMemberDecorate %type_View 60 Offset 2000 + OpMemberDecorate %type_View 61 Offset 2004 + OpMemberDecorate %type_View 62 Offset 2008 + OpMemberDecorate %type_View 63 Offset 2012 + OpMemberDecorate %type_View 64 Offset 2016 + OpMemberDecorate %type_View 65 Offset 2032 + OpMemberDecorate %type_View 66 Offset 2048 + OpMemberDecorate %type_View 67 Offset 2064 + OpMemberDecorate %type_View 68 Offset 2072 + OpMemberDecorate %type_View 69 Offset 2076 + OpMemberDecorate %type_View 70 Offset 2080 + OpMemberDecorate %type_View 71 Offset 2084 + OpMemberDecorate %type_View 72 Offset 2088 + OpMemberDecorate %type_View 73 Offset 2092 + OpMemberDecorate %type_View 74 Offset 2096 + OpMemberDecorate %type_View 75 Offset 2108 + OpMemberDecorate %type_View 76 Offset 2112 + OpMemberDecorate %type_View 77 Offset 2116 + OpMemberDecorate %type_View 78 Offset 2120 + OpMemberDecorate %type_View 79 Offset 2124 + OpMemberDecorate %type_View 80 Offset 2128 + OpMemberDecorate %type_View 81 Offset 2132 + OpMemberDecorate %type_View 82 Offset 2136 + OpMemberDecorate %type_View 83 Offset 2140 + OpMemberDecorate %type_View 84 Offset 2144 + OpMemberDecorate %type_View 85 Offset 2148 + OpMemberDecorate %type_View 86 Offset 2152 + OpMemberDecorate %type_View 87 Offset 2156 + OpMemberDecorate %type_View 88 Offset 2160 + OpMemberDecorate %type_View 89 Offset 2164 + OpMemberDecorate %type_View 90 Offset 2168 + OpMemberDecorate %type_View 91 Offset 2172 + OpMemberDecorate %type_View 92 Offset 2176 + OpMemberDecorate %type_View 93 Offset 2192 + OpMemberDecorate %type_View 94 Offset 2204 + OpMemberDecorate %type_View 95 Offset 2208 + OpMemberDecorate %type_View 96 Offset 2240 + OpMemberDecorate %type_View 97 Offset 2272 + OpMemberDecorate %type_View 98 Offset 2288 + OpMemberDecorate %type_View 99 Offset 2304 + OpMemberDecorate %type_View 100 Offset 2308 + OpMemberDecorate %type_View 101 Offset 2312 + OpMemberDecorate %type_View 102 Offset 2316 + OpMemberDecorate %type_View 103 Offset 2320 + OpMemberDecorate %type_View 104 Offset 2324 + OpMemberDecorate %type_View 105 Offset 2328 + OpMemberDecorate %type_View 106 Offset 2332 + OpMemberDecorate %type_View 107 Offset 2336 + OpMemberDecorate %type_View 108 Offset 2340 + OpMemberDecorate %type_View 109 Offset 2344 + OpMemberDecorate %type_View 110 Offset 2348 + OpMemberDecorate %type_View 111 Offset 2352 + OpMemberDecorate %type_View 112 Offset 2364 + OpMemberDecorate %type_View 113 Offset 2368 + OpMemberDecorate %type_View 114 Offset 2380 + OpMemberDecorate %type_View 115 Offset 2384 + OpMemberDecorate %type_View 116 Offset 2388 + OpMemberDecorate %type_View 117 Offset 2392 + OpMemberDecorate %type_View 118 Offset 2396 + OpMemberDecorate %type_View 119 Offset 2400 + OpMemberDecorate %type_View 120 Offset 2404 + OpMemberDecorate %type_View 121 Offset 2408 + OpMemberDecorate %type_View 122 Offset 2412 + OpMemberDecorate %type_View 123 Offset 2416 + OpMemberDecorate %type_View 124 Offset 2420 + OpMemberDecorate %type_View 125 Offset 2424 + OpMemberDecorate %type_View 126 Offset 2428 + OpMemberDecorate %type_View 127 Offset 2432 + OpMemberDecorate %type_View 128 Offset 2448 + OpMemberDecorate %type_View 129 Offset 2460 + OpMemberDecorate %type_View 130 Offset 2464 + OpMemberDecorate %type_View 131 Offset 2480 + OpMemberDecorate %type_View 132 Offset 2484 + OpMemberDecorate %type_View 133 Offset 2488 + OpMemberDecorate %type_View 134 Offset 2492 + OpMemberDecorate %type_View 135 Offset 2496 + OpMemberDecorate %type_View 136 Offset 2512 + OpMemberDecorate %type_View 137 Offset 2624 + OpMemberDecorate %type_View 138 Offset 2628 + OpMemberDecorate %type_View 139 Offset 2632 + OpMemberDecorate %type_View 140 Offset 2636 + OpMemberDecorate %type_View 141 Offset 2640 + OpMemberDecorate %type_View 142 Offset 2644 + OpMemberDecorate %type_View 143 Offset 2648 + OpMemberDecorate %type_View 144 Offset 2652 + OpMemberDecorate %type_View 145 Offset 2656 + OpMemberDecorate %type_View 146 Offset 2668 + OpMemberDecorate %type_View 147 Offset 2672 + OpMemberDecorate %type_View 148 Offset 2736 + OpMemberDecorate %type_View 149 Offset 2800 + OpMemberDecorate %type_View 150 Offset 2804 + OpMemberDecorate %type_View 151 Offset 2808 + OpMemberDecorate %type_View 152 Offset 2812 + OpMemberDecorate %type_View 153 Offset 2816 + OpMemberDecorate %type_View 154 Offset 2828 + OpMemberDecorate %type_View 155 Offset 2832 + OpMemberDecorate %type_View 156 Offset 2844 + OpMemberDecorate %type_View 157 Offset 2848 + OpMemberDecorate %type_View 158 Offset 2856 + OpMemberDecorate %type_View 159 Offset 2860 + OpMemberDecorate %type_View 160 Offset 2864 + OpMemberDecorate %type_View 161 Offset 2876 + OpMemberDecorate %type_View 162 Offset 2880 + OpMemberDecorate %type_View 163 Offset 2892 + OpMemberDecorate %type_View 164 Offset 2896 + OpMemberDecorate %type_View 165 Offset 2908 + OpMemberDecorate %type_View 166 Offset 2912 + OpMemberDecorate %type_View 167 Offset 2924 + OpMemberDecorate %type_View 168 Offset 2928 + OpMemberDecorate %type_View 169 Offset 2932 + OpDecorate %type_View Block + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0 + OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable + OpDecorate %type_StructuredBuffer_v4float BufferBlock + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_3 = OpConstant %uint 3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 + %int_1 = OpConstant %int 1 + %int_0 = OpConstant %int 0 + %int_2 = OpConstant %int 2 + %float_2 = OpConstant %float 2 + %62 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %float_0_5 = OpConstant %float 0.5 + %int_3 = OpConstant %int 3 +%float_0_333000004 = OpConstant %float 0.333000004 + %float_1 = OpConstant %float 1 + %67 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %float_15 = OpConstant %float 15 + %69 = OpConstantComposite %v4float %float_15 %float_15 %float_15 %float_15 +%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1 +%FVertexFactoryInterpolantsVSToPS = OpTypeStruct %v4float %v4float %v4float %_arr_v4float_uint_1 %v4float %uint %uint +%FVertexFactoryInterpolantsVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToPS +%FSharedBasePassInterpolants = OpTypeStruct +%FBasePassInterpolantsVSToDS = OpTypeStruct %FSharedBasePassInterpolants +%FBasePassVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToDS %FBasePassInterpolantsVSToDS %v4float +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%FPNTessellationHSToDS = OpTypeStruct %FBasePassVSToDS %_arr_v4float_uint_3 %v3float %float %float + %v3int = OpTypeVector %int 3 + %73 = OpConstantComposite %v3int %int_0 %int_0 %int_0 + %74 = OpConstantComposite %v3int %int_3 %int_3 %int_3 + %float_0 = OpConstant %float 0 + %76 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %77 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5 + %int_77 = OpConstant %int 77 + %int_6 = OpConstant %int 6 + %int_27 = OpConstant %int 27 + %81 = OpConstantComposite %v3int %int_1 %int_1 %int_1 + %82 = OpConstantComposite %v3int %int_2 %int_2 %int_2 + %uint_26 = OpConstant %uint 26 + %uint_12 = OpConstant %uint 12 + %uint_22 = OpConstant %uint 22 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%_runtimearr_v4float = OpTypeRuntimeArray %v4float +%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float +%_arr_v4float_uint_12 = OpTypeArray %v4float %uint_12 +%_ptr_Input__arr_v4float_uint_12 = OpTypePointer Input %_arr_v4float_uint_12 +%_arr__arr_v4float_uint_1_uint_12 = OpTypeArray %_arr_v4float_uint_1 %uint_12 +%_ptr_Input__arr__arr_v4float_uint_1_uint_12 = OpTypePointer Input %_arr__arr_v4float_uint_1_uint_12 +%_arr_uint_uint_12 = OpTypeArray %uint %uint_12 +%_ptr_Input__arr_uint_uint_12 = OpTypePointer Input %_arr_uint_uint_12 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3 +%_arr__arr_v4float_uint_1_uint_3 = OpTypeArray %_arr_v4float_uint_1 %uint_3 +%_ptr_Output__arr__arr_v4float_uint_1_uint_3 = OpTypePointer Output %_arr__arr_v4float_uint_1_uint_3 +%_arr_uint_uint_3 = OpTypeArray %uint %uint_3 +%_ptr_Output__arr_uint_uint_3 = OpTypePointer Output %_arr_uint_uint_3 +%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3 +%_ptr_Output__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Output %_arr__arr_v4float_uint_3_uint_3 +%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3 +%_ptr_Output__arr_v3float_uint_3 = OpTypePointer Output %_arr_v3float_uint_3 +%_ptr_Output__arr_float_uint_3 = OpTypePointer Output %_arr_float_uint_3 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %111 = OpTypeFunction %void +%_arr_FBasePassVSToDS_uint_12 = OpTypeArray %FBasePassVSToDS %uint_12 +%_ptr_Function__arr_FBasePassVSToDS_uint_12 = OpTypePointer Function %_arr_FBasePassVSToDS_uint_12 +%_arr_FPNTessellationHSToDS_uint_3 = OpTypeArray %FPNTessellationHSToDS %uint_3 +%_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 = OpTypePointer Workgroup %_arr_FPNTessellationHSToDS_uint_3 +%_ptr_Output__arr_v4float_uint_1 = OpTypePointer Output %_arr_v4float_uint_1 +%_ptr_Output_uint = OpTypePointer Output %uint +%_ptr_Output_v3float = OpTypePointer Output %v3float +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Workgroup_FPNTessellationHSToDS = OpTypePointer Workgroup %FPNTessellationHSToDS + %bool = OpTypeBool +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Workgroup_float = OpTypePointer Workgroup %float +%mat3v3float = OpTypeMatrix %v3float 3 +%_ptr_Function_FVertexFactoryInterpolantsVSToDS = OpTypePointer Function %FVertexFactoryInterpolantsVSToDS +%_ptr_Function_FVertexFactoryInterpolantsVSToPS = OpTypePointer Function %FVertexFactoryInterpolantsVSToPS +%_ptr_Function_FBasePassVSToDS = OpTypePointer Function %FBasePassVSToDS + %v3bool = OpTypeVector %bool 3 +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %View = OpVariable %_ptr_Uniform_type_View Uniform +%View_PrimitiveSceneData = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform +%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%in_var_COLOR0 = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr__arr_v4float_uint_1_uint_12 Input +%in_var_TEXCOORD4 = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%in_var_PRIMITIVE_ID = OpVariable %_ptr_Input__arr_uint_uint_12 Input +%in_var_LIGHTMAP_ID = OpVariable %_ptr_Input__arr_uint_uint_12 Input +%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%gl_InvocationID = OpVariable %_ptr_Input_uint Input +%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_COLOR0 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_TEXCOORD0 = OpVariable %_ptr_Output__arr__arr_v4float_uint_1_uint_3 Output +%out_var_TEXCOORD4 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_PRIMITIVE_ID = OpVariable %_ptr_Output__arr_uint_uint_3 Output +%out_var_LIGHTMAP_ID = OpVariable %_ptr_Output__arr_uint_uint_3 Output +%out_var_VS_To_DS_Position = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_PN_POSITION = OpVariable %_ptr_Output__arr__arr_v4float_uint_3_uint_3 Output +%out_var_PN_DisplacementScales = OpVariable %_ptr_Output__arr_v3float_uint_3 Output +%out_var_PN_TessellationMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output +%out_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output +%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output +%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output +%out_var_PN_POSITION9 = OpVariable %_ptr_Output_v4float Output + %133 = OpConstantNull %FSharedBasePassInterpolants + %134 = OpConstantComposite %FBasePassInterpolantsVSToDS %133 +%float_0_333333343 = OpConstant %float 0.333333343 + %136 = OpConstantComposite %v4float %float_0_333333343 %float_0_333333343 %float_0_333333343 %float_0_333333343 + %137 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5 +%float_0_166666672 = OpConstant %float 0.166666672 + %139 = OpConstantComposite %v4float %float_0_166666672 %float_0_166666672 %float_0_166666672 %float_0_166666672 + %140 = OpUndef %v4float + +; XXX: Original asm used Function here, which is wrong. +; This patches the SPIR-V to be correct. +%temp_var_hullMainRetVal = OpVariable %_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 Workgroup + + %MainHull = OpFunction %void None %111 + %141 = OpLabel +%param_var_I = OpVariable %_ptr_Function__arr_FBasePassVSToDS_uint_12 Function + %142 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD10_centroid + %143 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD11_centroid + %144 = OpLoad %_arr_v4float_uint_12 %in_var_COLOR0 + %145 = OpLoad %_arr__arr_v4float_uint_1_uint_12 %in_var_TEXCOORD0 + %146 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD4 + %147 = OpLoad %_arr_uint_uint_12 %in_var_PRIMITIVE_ID + %148 = OpLoad %_arr_uint_uint_12 %in_var_LIGHTMAP_ID + %149 = OpCompositeExtract %v4float %142 0 + %150 = OpCompositeExtract %v4float %143 0 + %151 = OpCompositeExtract %v4float %144 0 + %152 = OpCompositeExtract %_arr_v4float_uint_1 %145 0 + %153 = OpCompositeExtract %v4float %146 0 + %154 = OpCompositeExtract %uint %147 0 + %155 = OpCompositeExtract %uint %148 0 + %156 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %149 %150 %151 %152 %153 %154 %155 + %157 = OpCompositeExtract %v4float %142 1 + %158 = OpCompositeExtract %v4float %143 1 + %159 = OpCompositeExtract %v4float %144 1 + %160 = OpCompositeExtract %_arr_v4float_uint_1 %145 1 + %161 = OpCompositeExtract %v4float %146 1 + %162 = OpCompositeExtract %uint %147 1 + %163 = OpCompositeExtract %uint %148 1 + %164 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %157 %158 %159 %160 %161 %162 %163 + %165 = OpCompositeExtract %v4float %142 2 + %166 = OpCompositeExtract %v4float %143 2 + %167 = OpCompositeExtract %v4float %144 2 + %168 = OpCompositeExtract %_arr_v4float_uint_1 %145 2 + %169 = OpCompositeExtract %v4float %146 2 + %170 = OpCompositeExtract %uint %147 2 + %171 = OpCompositeExtract %uint %148 2 + %172 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %165 %166 %167 %168 %169 %170 %171 + %173 = OpCompositeExtract %v4float %142 3 + %174 = OpCompositeExtract %v4float %143 3 + %175 = OpCompositeExtract %v4float %144 3 + %176 = OpCompositeExtract %_arr_v4float_uint_1 %145 3 + %177 = OpCompositeExtract %v4float %146 3 + %178 = OpCompositeExtract %uint %147 3 + %179 = OpCompositeExtract %uint %148 3 + %180 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %173 %174 %175 %176 %177 %178 %179 + %181 = OpCompositeExtract %v4float %142 4 + %182 = OpCompositeExtract %v4float %143 4 + %183 = OpCompositeExtract %v4float %144 4 + %184 = OpCompositeExtract %_arr_v4float_uint_1 %145 4 + %185 = OpCompositeExtract %v4float %146 4 + %186 = OpCompositeExtract %uint %147 4 + %187 = OpCompositeExtract %uint %148 4 + %188 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %181 %182 %183 %184 %185 %186 %187 + %189 = OpCompositeExtract %v4float %142 5 + %190 = OpCompositeExtract %v4float %143 5 + %191 = OpCompositeExtract %v4float %144 5 + %192 = OpCompositeExtract %_arr_v4float_uint_1 %145 5 + %193 = OpCompositeExtract %v4float %146 5 + %194 = OpCompositeExtract %uint %147 5 + %195 = OpCompositeExtract %uint %148 5 + %196 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %189 %190 %191 %192 %193 %194 %195 + %197 = OpCompositeExtract %v4float %142 6 + %198 = OpCompositeExtract %v4float %143 6 + %199 = OpCompositeExtract %v4float %144 6 + %200 = OpCompositeExtract %_arr_v4float_uint_1 %145 6 + %201 = OpCompositeExtract %v4float %146 6 + %202 = OpCompositeExtract %uint %147 6 + %203 = OpCompositeExtract %uint %148 6 + %204 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %197 %198 %199 %200 %201 %202 %203 + %205 = OpCompositeExtract %v4float %142 7 + %206 = OpCompositeExtract %v4float %143 7 + %207 = OpCompositeExtract %v4float %144 7 + %208 = OpCompositeExtract %_arr_v4float_uint_1 %145 7 + %209 = OpCompositeExtract %v4float %146 7 + %210 = OpCompositeExtract %uint %147 7 + %211 = OpCompositeExtract %uint %148 7 + %212 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %205 %206 %207 %208 %209 %210 %211 + %213 = OpCompositeExtract %v4float %142 8 + %214 = OpCompositeExtract %v4float %143 8 + %215 = OpCompositeExtract %v4float %144 8 + %216 = OpCompositeExtract %_arr_v4float_uint_1 %145 8 + %217 = OpCompositeExtract %v4float %146 8 + %218 = OpCompositeExtract %uint %147 8 + %219 = OpCompositeExtract %uint %148 8 + %220 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %213 %214 %215 %216 %217 %218 %219 + %221 = OpCompositeExtract %v4float %142 9 + %222 = OpCompositeExtract %v4float %143 9 + %223 = OpCompositeExtract %v4float %144 9 + %224 = OpCompositeExtract %_arr_v4float_uint_1 %145 9 + %225 = OpCompositeExtract %v4float %146 9 + %226 = OpCompositeExtract %uint %147 9 + %227 = OpCompositeExtract %uint %148 9 + %228 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %221 %222 %223 %224 %225 %226 %227 + %229 = OpCompositeExtract %v4float %142 10 + %230 = OpCompositeExtract %v4float %143 10 + %231 = OpCompositeExtract %v4float %144 10 + %232 = OpCompositeExtract %_arr_v4float_uint_1 %145 10 + %233 = OpCompositeExtract %v4float %146 10 + %234 = OpCompositeExtract %uint %147 10 + %235 = OpCompositeExtract %uint %148 10 + %236 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %229 %230 %231 %232 %233 %234 %235 + %237 = OpCompositeExtract %v4float %142 11 + %238 = OpCompositeExtract %v4float %143 11 + %239 = OpCompositeExtract %v4float %144 11 + %240 = OpCompositeExtract %_arr_v4float_uint_1 %145 11 + %241 = OpCompositeExtract %v4float %146 11 + %242 = OpCompositeExtract %uint %147 11 + %243 = OpCompositeExtract %uint %148 11 + %244 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %237 %238 %239 %240 %241 %242 %243 + %245 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %156 + %246 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %164 + %247 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %172 + %248 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %180 + %249 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %188 + %250 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %196 + %251 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %204 + %252 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %212 + %253 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %220 + %254 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %228 + %255 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %236 + %256 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %244 + %257 = OpLoad %_arr_v4float_uint_12 %in_var_VS_To_DS_Position + %258 = OpCompositeExtract %v4float %257 0 + %259 = OpCompositeConstruct %FBasePassVSToDS %245 %134 %258 + %260 = OpCompositeExtract %v4float %257 1 + %261 = OpCompositeConstruct %FBasePassVSToDS %246 %134 %260 + %262 = OpCompositeExtract %v4float %257 2 + %263 = OpCompositeConstruct %FBasePassVSToDS %247 %134 %262 + %264 = OpCompositeExtract %v4float %257 3 + %265 = OpCompositeConstruct %FBasePassVSToDS %248 %134 %264 + %266 = OpCompositeExtract %v4float %257 4 + %267 = OpCompositeConstruct %FBasePassVSToDS %249 %134 %266 + %268 = OpCompositeExtract %v4float %257 5 + %269 = OpCompositeConstruct %FBasePassVSToDS %250 %134 %268 + %270 = OpCompositeExtract %v4float %257 6 + %271 = OpCompositeConstruct %FBasePassVSToDS %251 %134 %270 + %272 = OpCompositeExtract %v4float %257 7 + %273 = OpCompositeConstruct %FBasePassVSToDS %252 %134 %272 + %274 = OpCompositeExtract %v4float %257 8 + %275 = OpCompositeConstruct %FBasePassVSToDS %253 %134 %274 + %276 = OpCompositeExtract %v4float %257 9 + %277 = OpCompositeConstruct %FBasePassVSToDS %254 %134 %276 + %278 = OpCompositeExtract %v4float %257 10 + %279 = OpCompositeConstruct %FBasePassVSToDS %255 %134 %278 + %280 = OpCompositeExtract %v4float %257 11 + %281 = OpCompositeConstruct %FBasePassVSToDS %256 %134 %280 + %282 = OpCompositeConstruct %_arr_FBasePassVSToDS_uint_12 %259 %261 %263 %265 %267 %269 %271 %273 %275 %277 %279 %281 + OpStore %param_var_I %282 + %283 = OpLoad %uint %gl_InvocationID + %284 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %283 %int_0 + %285 = OpLoad %FVertexFactoryInterpolantsVSToDS %284 + %286 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %285 0 + %287 = OpCompositeExtract %v4float %286 0 + %288 = OpCompositeExtract %v4float %286 1 + %289 = OpVectorShuffle %v3float %287 %287 0 1 2 + %290 = OpVectorShuffle %v3float %288 %288 0 1 2 + %291 = OpExtInst %v3float %1 Cross %290 %289 + %292 = OpCompositeExtract %float %288 3 + %293 = OpCompositeConstruct %v3float %292 %292 %292 + %294 = OpFMul %v3float %291 %293 + %295 = OpCompositeConstruct %mat3v3float %289 %294 %290 + %296 = OpCompositeExtract %float %288 0 + %297 = OpCompositeExtract %float %288 1 + %298 = OpCompositeExtract %float %288 2 + %299 = OpCompositeConstruct %v4float %296 %297 %298 %float_0 + %300 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToPS %param_var_I %283 %int_0 %int_0 + %301 = OpLoad %FVertexFactoryInterpolantsVSToPS %300 + %302 = OpCompositeExtract %uint %301 5 + %303 = OpIMul %uint %302 %uint_26 + %304 = OpIAdd %uint %303 %uint_22 + %305 = OpAccessChain %_ptr_Uniform_v4float %View_PrimitiveSceneData %int_0 %304 + %306 = OpLoad %v4float %305 + %307 = OpVectorShuffle %v3float %306 %306 0 1 2 + %308 = OpVectorTimesMatrix %v3float %307 %295 + %309 = OpULessThan %bool %283 %uint_2 + %310 = OpIAdd %uint %283 %uint_1 + %311 = OpSelect %uint %309 %310 %uint_0 + %312 = OpIMul %uint %uint_2 %283 + %313 = OpIAdd %uint %uint_3 %312 + %314 = OpIAdd %uint %312 %uint_4 + %315 = OpAccessChain %_ptr_Function_FBasePassVSToDS %param_var_I %283 + %316 = OpLoad %FBasePassVSToDS %315 + %317 = OpAccessChain %_ptr_Function_v4float %param_var_I %283 %int_2 + %318 = OpLoad %v4float %317 + %319 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %311 %int_0 + %320 = OpLoad %FVertexFactoryInterpolantsVSToDS %319 + %321 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %320 0 + %322 = OpCompositeExtract %v4float %321 1 + %323 = OpCompositeExtract %float %322 0 + %324 = OpCompositeExtract %float %322 1 + %325 = OpCompositeExtract %float %322 2 + %326 = OpCompositeConstruct %v4float %323 %324 %325 %float_0 + %327 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %313 %int_0 + %328 = OpLoad %FVertexFactoryInterpolantsVSToDS %327 + %329 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %328 0 + %330 = OpCompositeExtract %v4float %329 1 + %331 = OpCompositeExtract %float %330 0 + %332 = OpCompositeExtract %float %330 1 + %333 = OpCompositeExtract %float %330 2 + %334 = OpCompositeConstruct %v4float %331 %332 %333 %float_0 + %335 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %314 %int_0 + %336 = OpLoad %FVertexFactoryInterpolantsVSToDS %335 + %337 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %336 0 + %338 = OpCompositeExtract %v4float %337 1 + %339 = OpCompositeExtract %float %338 0 + %340 = OpCompositeExtract %float %338 1 + %341 = OpCompositeExtract %float %338 2 + %342 = OpCompositeConstruct %v4float %339 %340 %341 %float_0 + %343 = OpLoad %v4float %317 + %344 = OpAccessChain %_ptr_Function_v4float %param_var_I %311 %int_2 + %345 = OpLoad %v4float %344 + %346 = OpFMul %v4float %62 %343 + %347 = OpFAdd %v4float %346 %345 + %348 = OpFSub %v4float %345 %343 + %349 = OpDot %float %348 %299 + %350 = OpCompositeConstruct %v4float %349 %349 %349 %349 + %351 = OpFMul %v4float %350 %299 + %352 = OpFSub %v4float %347 %351 + %353 = OpFMul %v4float %352 %136 + %354 = OpAccessChain %_ptr_Function_v4float %param_var_I %313 %int_2 + %355 = OpLoad %v4float %354 + %356 = OpAccessChain %_ptr_Function_v4float %param_var_I %314 %int_2 + %357 = OpLoad %v4float %356 + %358 = OpFMul %v4float %62 %355 + %359 = OpFAdd %v4float %358 %357 + %360 = OpFSub %v4float %357 %355 + %361 = OpDot %float %360 %334 + %362 = OpCompositeConstruct %v4float %361 %361 %361 %361 + %363 = OpFMul %v4float %362 %334 + %364 = OpFSub %v4float %359 %363 + %365 = OpFMul %v4float %364 %136 + %366 = OpFAdd %v4float %353 %365 + %367 = OpFMul %v4float %366 %137 + %368 = OpLoad %v4float %344 + %369 = OpLoad %v4float %317 + %370 = OpFMul %v4float %62 %368 + %371 = OpFAdd %v4float %370 %369 + %372 = OpFSub %v4float %369 %368 + %373 = OpDot %float %372 %326 + %374 = OpCompositeConstruct %v4float %373 %373 %373 %373 + %375 = OpFMul %v4float %374 %326 + %376 = OpFSub %v4float %371 %375 + %377 = OpFMul %v4float %376 %136 + %378 = OpLoad %v4float %356 + %379 = OpLoad %v4float %354 + %380 = OpFMul %v4float %62 %378 + %381 = OpFAdd %v4float %380 %379 + %382 = OpFSub %v4float %379 %378 + %383 = OpDot %float %382 %342 + %384 = OpCompositeConstruct %v4float %383 %383 %383 %383 + %385 = OpFMul %v4float %384 %342 + %386 = OpFSub %v4float %381 %385 + %387 = OpFMul %v4float %386 %136 + %388 = OpFAdd %v4float %377 %387 + %389 = OpFMul %v4float %388 %137 + %390 = OpCompositeConstruct %_arr_v4float_uint_3 %318 %367 %389 + %391 = OpCompositeConstruct %FPNTessellationHSToDS %316 %390 %308 %float_1 %float_1 + %392 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %316 0 + %393 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %392 0 + %394 = OpCompositeExtract %v4float %393 0 + %395 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD10_centroid %283 + OpStore %395 %394 + %396 = OpCompositeExtract %v4float %393 1 + %397 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD11_centroid %283 + OpStore %397 %396 + %398 = OpCompositeExtract %v4float %393 2 + %399 = OpAccessChain %_ptr_Output_v4float %out_var_COLOR0 %283 + OpStore %399 %398 + %400 = OpCompositeExtract %_arr_v4float_uint_1 %393 3 + %401 = OpAccessChain %_ptr_Output__arr_v4float_uint_1 %out_var_TEXCOORD0 %283 + OpStore %401 %400 + %402 = OpCompositeExtract %v4float %393 4 + %403 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD4 %283 + OpStore %403 %402 + %404 = OpCompositeExtract %uint %393 5 + %405 = OpAccessChain %_ptr_Output_uint %out_var_PRIMITIVE_ID %283 + OpStore %405 %404 + %406 = OpCompositeExtract %uint %393 6 + %407 = OpAccessChain %_ptr_Output_uint %out_var_LIGHTMAP_ID %283 + OpStore %407 %406 + %408 = OpCompositeExtract %v4float %316 2 + %409 = OpAccessChain %_ptr_Output_v4float %out_var_VS_To_DS_Position %283 + OpStore %409 %408 + %410 = OpAccessChain %_ptr_Output__arr_v4float_uint_3 %out_var_PN_POSITION %283 + OpStore %410 %390 + %411 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DisplacementScales %283 + OpStore %411 %308 + %412 = OpAccessChain %_ptr_Output_float %out_var_PN_TessellationMultiplier %283 + OpStore %412 %float_1 + %413 = OpAccessChain %_ptr_Output_float %out_var_PN_WorldDisplacementMultiplier %283 + OpStore %413 %float_1 + %414 = OpAccessChain %_ptr_Workgroup_FPNTessellationHSToDS %temp_var_hullMainRetVal %283 + OpStore %414 %391 + OpControlBarrier %uint_2 %uint_4 %uint_0 + %415 = OpIEqual %bool %283 %uint_0 + OpSelectionMerge %if_merge None + OpBranchConditional %415 %416 %if_merge + %416 = OpLabel + %417 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0 + %418 = OpLoad %mat4v4float %417 + %419 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_6 + %420 = OpLoad %mat4v4float %419 + %421 = OpAccessChain %_ptr_Uniform_v3float %View %int_27 + %422 = OpLoad %v3float %421 + %423 = OpAccessChain %_ptr_Uniform_float %View %int_77 + %424 = OpLoad %float %423 + %425 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_0 + %426 = OpLoad %v4float %425 + %427 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_1 + %428 = OpLoad %v4float %427 + %429 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_2 + %430 = OpLoad %v4float %429 + %431 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_0 + %432 = OpLoad %v4float %431 + %433 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_1 + %434 = OpLoad %v4float %433 + %435 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_2 + %436 = OpLoad %v4float %435 + %437 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_0 + %438 = OpLoad %v4float %437 + %439 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_1 + %440 = OpLoad %v4float %439 + %441 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_2 + %442 = OpLoad %v4float %441 + %443 = OpFAdd %v4float %428 %430 + %444 = OpFAdd %v4float %443 %434 + %445 = OpFAdd %v4float %444 %436 + %446 = OpFAdd %v4float %445 %440 + %447 = OpFAdd %v4float %446 %442 + %448 = OpFMul %v4float %447 %139 + %449 = OpFAdd %v4float %438 %432 + %450 = OpFAdd %v4float %449 %426 + %451 = OpFMul %v4float %450 %136 + %452 = OpFSub %v4float %448 %451 + %453 = OpFMul %v4float %452 %137 + %454 = OpFAdd %v4float %448 %453 + %455 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_1 %int_3 + %456 = OpLoad %float %455 + %457 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_2 %int_3 + %458 = OpLoad %float %457 + %459 = OpFAdd %float %456 %458 + %460 = OpFMul %float %float_0_5 %459 + %461 = OpCompositeInsert %v4float %460 %140 0 + %462 = OpLoad %float %457 + %463 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_0 %int_3 + %464 = OpLoad %float %463 + %465 = OpFAdd %float %462 %464 + %466 = OpFMul %float %float_0_5 %465 + %467 = OpCompositeInsert %v4float %466 %461 1 + %468 = OpLoad %float %463 + %469 = OpLoad %float %455 + %470 = OpFAdd %float %468 %469 + %471 = OpFMul %float %float_0_5 %470 + %472 = OpCompositeInsert %v4float %471 %467 2 + %473 = OpLoad %float %463 + %474 = OpLoad %float %455 + %475 = OpFAdd %float %473 %474 + %476 = OpLoad %float %457 + %477 = OpFAdd %float %475 %476 + %478 = OpFMul %float %float_0_333000004 %477 + %479 = OpCompositeInsert %v4float %478 %472 3 + %480 = OpVectorShuffle %v3float %426 %426 0 1 2 + %481 = OpVectorShuffle %v3float %432 %432 0 1 2 + %482 = OpVectorShuffle %v3float %438 %438 0 1 2 + OpBranch %483 + %483 = OpLabel + OpLoopMerge %484 %485 None + OpBranch %486 + %486 = OpLabel + %487 = OpMatrixTimesVector %v4float %420 %76 + %488 = OpCompositeExtract %float %426 0 + %489 = OpCompositeExtract %float %426 1 + %490 = OpCompositeExtract %float %426 2 + %491 = OpCompositeConstruct %v4float %488 %489 %490 %float_1 + %492 = OpMatrixTimesVector %v4float %418 %491 + %493 = OpVectorShuffle %v3float %492 %492 0 1 2 + %494 = OpVectorShuffle %v3float %487 %487 0 1 2 + %495 = OpFSub %v3float %493 %494 + %496 = OpCompositeExtract %float %492 3 + %497 = OpCompositeExtract %float %487 3 + %498 = OpFAdd %float %496 %497 + %499 = OpCompositeConstruct %v3float %498 %498 %498 + %500 = OpFOrdLessThan %v3bool %495 %499 + %501 = OpSelect %v3int %500 %81 %73 + %502 = OpFAdd %v3float %493 %494 + %503 = OpFNegate %float %496 + %504 = OpFSub %float %503 %497 + %505 = OpCompositeConstruct %v3float %504 %504 %504 + %506 = OpFOrdGreaterThan %v3bool %502 %505 + %507 = OpSelect %v3int %506 %81 %73 + %508 = OpIMul %v3int %82 %507 + %509 = OpIAdd %v3int %501 %508 + %510 = OpCompositeExtract %float %432 0 + %511 = OpCompositeExtract %float %432 1 + %512 = OpCompositeExtract %float %432 2 + %513 = OpCompositeConstruct %v4float %510 %511 %512 %float_1 + %514 = OpMatrixTimesVector %v4float %418 %513 + %515 = OpVectorShuffle %v3float %514 %514 0 1 2 + %516 = OpFSub %v3float %515 %494 + %517 = OpCompositeExtract %float %514 3 + %518 = OpFAdd %float %517 %497 + %519 = OpCompositeConstruct %v3float %518 %518 %518 + %520 = OpFOrdLessThan %v3bool %516 %519 + %521 = OpSelect %v3int %520 %81 %73 + %522 = OpFAdd %v3float %515 %494 + %523 = OpFNegate %float %517 + %524 = OpFSub %float %523 %497 + %525 = OpCompositeConstruct %v3float %524 %524 %524 + %526 = OpFOrdGreaterThan %v3bool %522 %525 + %527 = OpSelect %v3int %526 %81 %73 + %528 = OpIMul %v3int %82 %527 + %529 = OpIAdd %v3int %521 %528 + %530 = OpBitwiseOr %v3int %509 %529 + %531 = OpCompositeExtract %float %438 0 + %532 = OpCompositeExtract %float %438 1 + %533 = OpCompositeExtract %float %438 2 + %534 = OpCompositeConstruct %v4float %531 %532 %533 %float_1 + %535 = OpMatrixTimesVector %v4float %418 %534 + %536 = OpVectorShuffle %v3float %535 %535 0 1 2 + %537 = OpFSub %v3float %536 %494 + %538 = OpCompositeExtract %float %535 3 + %539 = OpFAdd %float %538 %497 + %540 = OpCompositeConstruct %v3float %539 %539 %539 + %541 = OpFOrdLessThan %v3bool %537 %540 + %542 = OpSelect %v3int %541 %81 %73 + %543 = OpFAdd %v3float %536 %494 + %544 = OpFNegate %float %538 + %545 = OpFSub %float %544 %497 + %546 = OpCompositeConstruct %v3float %545 %545 %545 + %547 = OpFOrdGreaterThan %v3bool %543 %546 + %548 = OpSelect %v3int %547 %81 %73 + %549 = OpIMul %v3int %82 %548 + %550 = OpIAdd %v3int %542 %549 + %551 = OpBitwiseOr %v3int %530 %550 + %552 = OpINotEqual %v3bool %551 %74 + %553 = OpAny %bool %552 + OpSelectionMerge %554 None + OpBranchConditional %553 %555 %554 + %555 = OpLabel + OpBranch %484 + %554 = OpLabel + %556 = OpFSub %v3float %480 %481 + %557 = OpFSub %v3float %481 %482 + %558 = OpFSub %v3float %482 %480 + %559 = OpFAdd %v3float %480 %481 + %560 = OpFMul %v3float %77 %559 + %561 = OpFSub %v3float %560 %422 + %562 = OpFAdd %v3float %481 %482 + %563 = OpFMul %v3float %77 %562 + %564 = OpFSub %v3float %563 %422 + %565 = OpFAdd %v3float %482 %480 + %566 = OpFMul %v3float %77 %565 + %567 = OpFSub %v3float %566 %422 + %568 = OpDot %float %557 %557 + %569 = OpDot %float %564 %564 + %570 = OpFDiv %float %568 %569 + %571 = OpExtInst %float %1 Sqrt %570 + %572 = OpDot %float %558 %558 + %573 = OpDot %float %567 %567 + %574 = OpFDiv %float %572 %573 + %575 = OpExtInst %float %1 Sqrt %574 + %576 = OpDot %float %556 %556 + %577 = OpDot %float %561 %561 + %578 = OpFDiv %float %576 %577 + %579 = OpExtInst %float %1 Sqrt %578 + %580 = OpCompositeConstruct %v4float %571 %575 %579 %float_1 + %581 = OpFAdd %float %571 %575 + %582 = OpFAdd %float %581 %579 + %583 = OpFMul %float %float_0_333000004 %582 + %584 = OpCompositeInsert %v4float %583 %580 3 + %585 = OpCompositeConstruct %v4float %424 %424 %424 %424 + %586 = OpFMul %v4float %585 %584 + OpBranch %484 + %485 = OpLabel + OpBranch %483 + %484 = OpLabel + %587 = OpPhi %v4float %76 %555 %586 %554 + %588 = OpFMul %v4float %479 %587 + %589 = OpExtInst %v4float %1 FClamp %588 %67 %69 + %590 = OpCompositeExtract %float %589 0 + %591 = OpCompositeExtract %float %589 1 + %592 = OpCompositeExtract %float %589 2 + %593 = OpCompositeExtract %float %589 3 + %594 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_0 + OpStore %594 %590 + %595 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_1 + OpStore %595 %591 + %596 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_2 + OpStore %596 %592 + %597 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %uint_0 + OpStore %597 %593 + OpStore %out_var_PN_POSITION9 %454 + OpBranch %if_merge + %if_merge = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc b/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc new file mode 100644 index 00000000000..a146896b90b --- /dev/null +++ b/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc @@ -0,0 +1,1264 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 607 +; Schema: 0 + OpCapability Tessellation + OpCapability SampledBuffer + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %MainHull "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_VS_To_DS_Position %in_var_VS_To_DS_VertexID %gl_InvocationID %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_VS_To_DS_Position %out_var_VS_To_DS_VertexID %out_var_PN_POSITION %out_var_PN_DisplacementScales %out_var_PN_TessellationMultiplier %out_var_PN_WorldDisplacementMultiplier %out_var_PN_DominantVertex %out_var_PN_DominantVertex1 %out_var_PN_DominantVertex2 %out_var_PN_DominantEdge %out_var_PN_DominantEdge1 %out_var_PN_DominantEdge2 %out_var_PN_DominantEdge3 %out_var_PN_DominantEdge4 %out_var_PN_DominantEdge5 %gl_TessLevelOuter %gl_TessLevelInner %out_var_PN_POSITION9 + OpExecutionMode %MainHull Triangles + OpExecutionMode %MainHull SpacingFractionalOdd + OpExecutionMode %MainHull VertexOrderCw + OpExecutionMode %MainHull OutputVertices 3 + OpSource HLSL 600 + OpName %FPNTessellationHSToDS "FPNTessellationHSToDS" + OpMemberName %FPNTessellationHSToDS 0 "PassSpecificData" + OpMemberName %FPNTessellationHSToDS 1 "WorldPosition" + OpMemberName %FPNTessellationHSToDS 2 "DisplacementScale" + OpMemberName %FPNTessellationHSToDS 3 "TessellationMultiplier" + OpMemberName %FPNTessellationHSToDS 4 "WorldDisplacementMultiplier" + OpMemberName %FPNTessellationHSToDS 5 "DominantVertex" + OpMemberName %FPNTessellationHSToDS 6 "DominantEdge" + OpName %FHitProxyVSToDS "FHitProxyVSToDS" + OpMemberName %FHitProxyVSToDS 0 "FactoryInterpolants" + OpMemberName %FHitProxyVSToDS 1 "Position" + OpMemberName %FHitProxyVSToDS 2 "VertexID" + OpName %FVertexFactoryInterpolantsVSToDS "FVertexFactoryInterpolantsVSToDS" + OpMemberName %FVertexFactoryInterpolantsVSToDS 0 "InterpolantsVSToPS" + OpName %FVertexFactoryInterpolantsVSToPS "FVertexFactoryInterpolantsVSToPS" + OpMemberName %FVertexFactoryInterpolantsVSToPS 0 "TangentToWorld0" + OpMemberName %FVertexFactoryInterpolantsVSToPS 1 "TangentToWorld2" + OpName %FHullShaderConstantDominantVertexData "FHullShaderConstantDominantVertexData" + OpMemberName %FHullShaderConstantDominantVertexData 0 "UV" + OpMemberName %FHullShaderConstantDominantVertexData 1 "Normal" + OpMemberName %FHullShaderConstantDominantVertexData 2 "Tangent" + OpName %FHullShaderConstantDominantEdgeData "FHullShaderConstantDominantEdgeData" + OpMemberName %FHullShaderConstantDominantEdgeData 0 "UV0" + OpMemberName %FHullShaderConstantDominantEdgeData 1 "UV1" + OpMemberName %FHullShaderConstantDominantEdgeData 2 "Normal0" + OpMemberName %FHullShaderConstantDominantEdgeData 3 "Normal1" + OpMemberName %FHullShaderConstantDominantEdgeData 4 "Tangent0" + OpMemberName %FHullShaderConstantDominantEdgeData 5 "Tangent1" + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_ClipToWorld" + OpMemberName %type_View 3 "View_TranslatedWorldToView" + OpMemberName %type_View 4 "View_ViewToTranslatedWorld" + OpMemberName %type_View 5 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 7 "View_ViewToClip" + OpMemberName %type_View 8 "View_ViewToClipNoAA" + OpMemberName %type_View 9 "View_ClipToView" + OpMemberName %type_View 10 "View_ClipToTranslatedWorld" + OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 12 "View_ScreenToWorld" + OpMemberName %type_View 13 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 14 "View_ViewForward" + OpMemberName %type_View 15 "PrePadding_View_908" + OpMemberName %type_View 16 "View_ViewUp" + OpMemberName %type_View 17 "PrePadding_View_924" + OpMemberName %type_View 18 "View_ViewRight" + OpMemberName %type_View 19 "PrePadding_View_940" + OpMemberName %type_View 20 "View_HMDViewNoRollUp" + OpMemberName %type_View 21 "PrePadding_View_956" + OpMemberName %type_View 22 "View_HMDViewNoRollRight" + OpMemberName %type_View 23 "PrePadding_View_972" + OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 25 "View_ScreenPositionScaleBias" + OpMemberName %type_View 26 "View_WorldCameraOrigin" + OpMemberName %type_View 27 "PrePadding_View_1020" + OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 29 "PrePadding_View_1036" + OpMemberName %type_View 30 "View_WorldViewOrigin" + OpMemberName %type_View 31 "PrePadding_View_1052" + OpMemberName %type_View 32 "View_PreViewTranslation" + OpMemberName %type_View 33 "PrePadding_View_1068" + OpMemberName %type_View 34 "View_PrevProjection" + OpMemberName %type_View 35 "View_PrevViewProj" + OpMemberName %type_View 36 "View_PrevViewRotationProj" + OpMemberName %type_View 37 "View_PrevViewToClip" + OpMemberName %type_View 38 "View_PrevClipToView" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 40 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 44 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 45 "PrePadding_View_1724" + OpMemberName %type_View 46 "View_PrevWorldViewOrigin" + OpMemberName %type_View 47 "PrePadding_View_1740" + OpMemberName %type_View 48 "View_PrevPreViewTranslation" + OpMemberName %type_View 49 "PrePadding_View_1756" + OpMemberName %type_View 50 "View_PrevInvViewProj" + OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 52 "View_ClipToPrevClip" + OpMemberName %type_View 53 "View_TemporalAAJitter" + OpMemberName %type_View 54 "View_GlobalClippingPlane" + OpMemberName %type_View 55 "View_FieldOfViewWideAngles" + OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 57 "View_ViewRectMin" + OpMemberName %type_View 58 "View_ViewSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferSizeAndInvSize" + OpMemberName %type_View 60 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 61 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 62 "View_PreExposure" + OpMemberName %type_View 63 "View_OneOverPreExposure" + OpMemberName %type_View 64 "PrePadding_View_2076" + OpMemberName %type_View 65 "View_DiffuseOverrideParameter" + OpMemberName %type_View 66 "View_SpecularOverrideParameter" + OpMemberName %type_View 67 "View_NormalOverrideParameter" + OpMemberName %type_View 68 "View_RoughnessOverrideParameter" + OpMemberName %type_View 69 "View_PrevFrameGameTime" + OpMemberName %type_View 70 "View_PrevFrameRealTime" + OpMemberName %type_View 71 "View_OutOfBoundsMask" + OpMemberName %type_View 72 "PrePadding_View_2148" + OpMemberName %type_View 73 "PrePadding_View_2152" + OpMemberName %type_View 74 "PrePadding_View_2156" + OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 76 "View_CullingSign" + OpMemberName %type_View 77 "View_NearPlane" + OpMemberName %type_View 78 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 79 "View_GameTime" + OpMemberName %type_View 80 "View_RealTime" + OpMemberName %type_View 81 "View_DeltaTime" + OpMemberName %type_View 82 "View_MaterialTextureMipBias" + OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 84 "View_Random" + OpMemberName %type_View 85 "View_FrameNumber" + OpMemberName %type_View 86 "View_StateFrameIndexMod8" + OpMemberName %type_View 87 "View_StateFrameIndex" + OpMemberName %type_View 88 "View_CameraCut" + OpMemberName %type_View 89 "View_UnlitViewmodeMask" + OpMemberName %type_View 90 "PrePadding_View_2228" + OpMemberName %type_View 91 "PrePadding_View_2232" + OpMemberName %type_View 92 "PrePadding_View_2236" + OpMemberName %type_View 93 "View_DirectionalLightColor" + OpMemberName %type_View 94 "View_DirectionalLightDirection" + OpMemberName %type_View 95 "PrePadding_View_2268" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 98 "View_TemporalAAParams" + OpMemberName %type_View 99 "View_CircleDOFParams" + OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 102 "View_DepthOfFieldScale" + OpMemberName %type_View 103 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 109 "View_GeneralPurposeTweak" + OpMemberName %type_View 110 "View_DemosaicVposOffset" + OpMemberName %type_View 111 "PrePadding_View_2412" + OpMemberName %type_View 112 "View_IndirectLightingColorScale" + OpMemberName %type_View 113 "View_HDR32bppEncodingMode" + OpMemberName %type_View 114 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 115 "View_AtmosphericFogSunPower" + OpMemberName %type_View 116 "View_AtmosphericFogPower" + OpMemberName %type_View 117 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 123 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian" + OpMemberName %type_View 127 "PrePadding_View_2492" + OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance" + OpMemberName %type_View 129 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 131 "PrePadding_View_2520" + OpMemberName %type_View 132 "PrePadding_View_2524" + OpMemberName %type_View 133 "View_AtmosphericFogSunColor" + OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 136 "View_AmbientCubemapTint" + OpMemberName %type_View 137 "View_AmbientCubemapIntensity" + OpMemberName %type_View 138 "View_SkyLightParameters" + OpMemberName %type_View 139 "PrePadding_View_2584" + OpMemberName %type_View 140 "PrePadding_View_2588" + OpMemberName %type_View 141 "View_SkyLightColor" + OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 143 "View_MobilePreviewMode" + OpMemberName %type_View 144 "View_HMDEyePaddingOffset" + OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 146 "View_ShowDecalsMask" + OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 149 "PrePadding_View_2744" + OpMemberName %type_View 150 "PrePadding_View_2748" + OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 152 "View_StereoPassIndex" + OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 155 "View_GlobalVolumeDimension" + OpMemberName %type_View 156 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 157 "View_MaxGlobalDistance" + OpMemberName %type_View 158 "PrePadding_View_2908" + OpMemberName %type_View 159 "View_CursorPosition" + OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 161 "PrePadding_View_2924" + OpMemberName %type_View 162 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 163 "PrePadding_View_2940" + OpMemberName %type_View 164 "View_VolumetricFogGridZParams" + OpMemberName %type_View 165 "PrePadding_View_2956" + OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 167 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 168 "PrePadding_View_2972" + OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 170 "PrePadding_View_2988" + OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 172 "PrePadding_View_3004" + OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 176 "View_StereoIPD" + OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_Primitive "type.Primitive" + OpMemberName %type_Primitive 0 "Primitive_LocalToWorld" + OpMemberName %type_Primitive 1 "Primitive_InvNonUniformScaleAndDeterminantSign" + OpMemberName %type_Primitive 2 "Primitive_ObjectWorldPositionAndRadius" + OpMemberName %type_Primitive 3 "Primitive_WorldToLocal" + OpMemberName %type_Primitive 4 "Primitive_PreviousLocalToWorld" + OpMemberName %type_Primitive 5 "Primitive_PreviousWorldToLocal" + OpMemberName %type_Primitive 6 "Primitive_ActorWorldPosition" + OpMemberName %type_Primitive 7 "Primitive_UseSingleSampleShadowFromStationaryLights" + OpMemberName %type_Primitive 8 "Primitive_ObjectBounds" + OpMemberName %type_Primitive 9 "Primitive_LpvBiasMultiplier" + OpMemberName %type_Primitive 10 "Primitive_DecalReceiverMask" + OpMemberName %type_Primitive 11 "Primitive_PerObjectGBufferData" + OpMemberName %type_Primitive 12 "Primitive_UseVolumetricLightmapShadowFromStationaryLights" + OpMemberName %type_Primitive 13 "Primitive_DrawsVelocity" + OpMemberName %type_Primitive 14 "Primitive_ObjectOrientation" + OpMemberName %type_Primitive 15 "Primitive_NonUniformScale" + OpMemberName %type_Primitive 16 "Primitive_LocalObjectBoundsMin" + OpMemberName %type_Primitive 17 "Primitive_LightingChannelMask" + OpMemberName %type_Primitive 18 "Primitive_LocalObjectBoundsMax" + OpMemberName %type_Primitive 19 "Primitive_LightmapDataIndex" + OpMemberName %type_Primitive 20 "Primitive_PreSkinnedLocalBounds" + OpMemberName %type_Primitive 21 "Primitive_SingleCaptureIndex" + OpMemberName %type_Primitive 22 "Primitive_OutputVelocity" + OpMemberName %type_Primitive 23 "PrePadding_Primitive_420" + OpMemberName %type_Primitive 24 "PrePadding_Primitive_424" + OpMemberName %type_Primitive 25 "PrePadding_Primitive_428" + OpMemberName %type_Primitive 26 "Primitive_CustomPrimitiveData" + OpName %Primitive "Primitive" + OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid" + OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid" + OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position" + OpName %in_var_VS_To_DS_VertexID "in.var.VS_To_DS_VertexID" + OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid" + OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid" + OpName %out_var_VS_To_DS_Position "out.var.VS_To_DS_Position" + OpName %out_var_VS_To_DS_VertexID "out.var.VS_To_DS_VertexID" + OpName %out_var_PN_POSITION "out.var.PN_POSITION" + OpName %out_var_PN_DisplacementScales "out.var.PN_DisplacementScales" + OpName %out_var_PN_TessellationMultiplier "out.var.PN_TessellationMultiplier" + OpName %out_var_PN_WorldDisplacementMultiplier "out.var.PN_WorldDisplacementMultiplier" + OpName %out_var_PN_DominantVertex "out.var.PN_DominantVertex" + OpName %out_var_PN_DominantVertex1 "out.var.PN_DominantVertex1" + OpName %out_var_PN_DominantVertex2 "out.var.PN_DominantVertex2" + OpName %out_var_PN_DominantEdge "out.var.PN_DominantEdge" + OpName %out_var_PN_DominantEdge1 "out.var.PN_DominantEdge1" + OpName %out_var_PN_DominantEdge2 "out.var.PN_DominantEdge2" + OpName %out_var_PN_DominantEdge3 "out.var.PN_DominantEdge3" + OpName %out_var_PN_DominantEdge4 "out.var.PN_DominantEdge4" + OpName %out_var_PN_DominantEdge5 "out.var.PN_DominantEdge5" + OpName %out_var_PN_POSITION9 "out.var.PN_POSITION9" + OpName %MainHull "MainHull" + OpName %param_var_I "param.var.I" + OpName %temp_var_hullMainRetVal "temp.var.hullMainRetVal" + OpName %if_merge "if.merge" + OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position" + OpDecorateString %in_var_VS_To_DS_VertexID UserSemantic "VS_To_DS_VertexID" + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorateString %gl_InvocationID UserSemantic "SV_OutputControlPointID" + OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %out_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position" + OpDecorateString %out_var_VS_To_DS_VertexID UserSemantic "VS_To_DS_VertexID" + OpDecorateString %out_var_PN_POSITION UserSemantic "PN_POSITION" + OpDecorateString %out_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales" + OpDecorateString %out_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier" + OpDecorateString %out_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier" + OpDecorateString %out_var_PN_DominantVertex UserSemantic "PN_DominantVertex" + OpDecorateString %out_var_PN_DominantVertex1 UserSemantic "PN_DominantVertex" + OpDecorateString %out_var_PN_DominantVertex2 UserSemantic "PN_DominantVertex" + OpDecorateString %out_var_PN_DominantEdge UserSemantic "PN_DominantEdge" + OpDecorateString %out_var_PN_DominantEdge1 UserSemantic "PN_DominantEdge" + OpDecorateString %out_var_PN_DominantEdge2 UserSemantic "PN_DominantEdge" + OpDecorateString %out_var_PN_DominantEdge3 UserSemantic "PN_DominantEdge" + OpDecorateString %out_var_PN_DominantEdge4 UserSemantic "PN_DominantEdge" + OpDecorateString %out_var_PN_DominantEdge5 UserSemantic "PN_DominantEdge" + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor" + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor" + OpDecorate %gl_TessLevelInner Patch + OpDecorateString %out_var_PN_POSITION9 UserSemantic "PN_POSITION9" + OpDecorate %out_var_PN_POSITION9 Patch + OpDecorate %in_var_TEXCOORD10_centroid Location 0 + OpDecorate %in_var_TEXCOORD11_centroid Location 1 + OpDecorate %in_var_VS_To_DS_Position Location 2 + OpDecorate %in_var_VS_To_DS_VertexID Location 3 + OpDecorate %out_var_PN_DisplacementScales Location 0 + OpDecorate %out_var_PN_DominantEdge Location 1 + OpDecorate %out_var_PN_DominantEdge1 Location 2 + OpDecorate %out_var_PN_DominantEdge2 Location 3 + OpDecorate %out_var_PN_DominantEdge3 Location 4 + OpDecorate %out_var_PN_DominantEdge4 Location 5 + OpDecorate %out_var_PN_DominantEdge5 Location 6 + OpDecorate %out_var_PN_DominantVertex Location 7 + OpDecorate %out_var_PN_DominantVertex1 Location 8 + OpDecorate %out_var_PN_DominantVertex2 Location 9 + OpDecorate %out_var_PN_POSITION Location 10 + OpDecorate %out_var_PN_POSITION9 Location 13 + OpDecorate %out_var_PN_TessellationMultiplier Location 14 + OpDecorate %out_var_PN_WorldDisplacementMultiplier Location 15 + OpDecorate %out_var_TEXCOORD10_centroid Location 16 + OpDecorate %out_var_TEXCOORD11_centroid Location 17 + OpDecorate %out_var_VS_To_DS_Position Location 18 + OpDecorate %out_var_VS_To_DS_VertexID Location 19 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %Primitive DescriptorSet 0 + OpDecorate %Primitive Binding 1 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 13 MatrixStride 16 + OpMemberDecorate %type_View 13 ColMajor + OpMemberDecorate %type_View 14 Offset 896 + OpMemberDecorate %type_View 15 Offset 908 + OpMemberDecorate %type_View 16 Offset 912 + OpMemberDecorate %type_View 17 Offset 924 + OpMemberDecorate %type_View 18 Offset 928 + OpMemberDecorate %type_View 19 Offset 940 + OpMemberDecorate %type_View 20 Offset 944 + OpMemberDecorate %type_View 21 Offset 956 + OpMemberDecorate %type_View 22 Offset 960 + OpMemberDecorate %type_View 23 Offset 972 + OpMemberDecorate %type_View 24 Offset 976 + OpMemberDecorate %type_View 25 Offset 992 + OpMemberDecorate %type_View 26 Offset 1008 + OpMemberDecorate %type_View 27 Offset 1020 + OpMemberDecorate %type_View 28 Offset 1024 + OpMemberDecorate %type_View 29 Offset 1036 + OpMemberDecorate %type_View 30 Offset 1040 + OpMemberDecorate %type_View 31 Offset 1052 + OpMemberDecorate %type_View 32 Offset 1056 + OpMemberDecorate %type_View 33 Offset 1068 + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 43 MatrixStride 16 + OpMemberDecorate %type_View 43 ColMajor + OpMemberDecorate %type_View 44 Offset 1712 + OpMemberDecorate %type_View 45 Offset 1724 + OpMemberDecorate %type_View 46 Offset 1728 + OpMemberDecorate %type_View 47 Offset 1740 + OpMemberDecorate %type_View 48 Offset 1744 + OpMemberDecorate %type_View 49 Offset 1756 + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 52 MatrixStride 16 + OpMemberDecorate %type_View 52 ColMajor + OpMemberDecorate %type_View 53 Offset 1952 + OpMemberDecorate %type_View 54 Offset 1968 + OpMemberDecorate %type_View 55 Offset 1984 + OpMemberDecorate %type_View 56 Offset 1992 + OpMemberDecorate %type_View 57 Offset 2000 + OpMemberDecorate %type_View 58 Offset 2016 + OpMemberDecorate %type_View 59 Offset 2032 + OpMemberDecorate %type_View 60 Offset 2048 + OpMemberDecorate %type_View 61 Offset 2064 + OpMemberDecorate %type_View 62 Offset 2068 + OpMemberDecorate %type_View 63 Offset 2072 + OpMemberDecorate %type_View 64 Offset 2076 + OpMemberDecorate %type_View 65 Offset 2080 + OpMemberDecorate %type_View 66 Offset 2096 + OpMemberDecorate %type_View 67 Offset 2112 + OpMemberDecorate %type_View 68 Offset 2128 + OpMemberDecorate %type_View 69 Offset 2136 + OpMemberDecorate %type_View 70 Offset 2140 + OpMemberDecorate %type_View 71 Offset 2144 + OpMemberDecorate %type_View 72 Offset 2148 + OpMemberDecorate %type_View 73 Offset 2152 + OpMemberDecorate %type_View 74 Offset 2156 + OpMemberDecorate %type_View 75 Offset 2160 + OpMemberDecorate %type_View 76 Offset 2172 + OpMemberDecorate %type_View 77 Offset 2176 + OpMemberDecorate %type_View 78 Offset 2180 + OpMemberDecorate %type_View 79 Offset 2184 + OpMemberDecorate %type_View 80 Offset 2188 + OpMemberDecorate %type_View 81 Offset 2192 + OpMemberDecorate %type_View 82 Offset 2196 + OpMemberDecorate %type_View 83 Offset 2200 + OpMemberDecorate %type_View 84 Offset 2204 + OpMemberDecorate %type_View 85 Offset 2208 + OpMemberDecorate %type_View 86 Offset 2212 + OpMemberDecorate %type_View 87 Offset 2216 + OpMemberDecorate %type_View 88 Offset 2220 + OpMemberDecorate %type_View 89 Offset 2224 + OpMemberDecorate %type_View 90 Offset 2228 + OpMemberDecorate %type_View 91 Offset 2232 + OpMemberDecorate %type_View 92 Offset 2236 + OpMemberDecorate %type_View 93 Offset 2240 + OpMemberDecorate %type_View 94 Offset 2256 + OpMemberDecorate %type_View 95 Offset 2268 + OpMemberDecorate %type_View 96 Offset 2272 + OpMemberDecorate %type_View 97 Offset 2304 + OpMemberDecorate %type_View 98 Offset 2336 + OpMemberDecorate %type_View 99 Offset 2352 + OpMemberDecorate %type_View 100 Offset 2368 + OpMemberDecorate %type_View 101 Offset 2372 + OpMemberDecorate %type_View 102 Offset 2376 + OpMemberDecorate %type_View 103 Offset 2380 + OpMemberDecorate %type_View 104 Offset 2384 + OpMemberDecorate %type_View 105 Offset 2388 + OpMemberDecorate %type_View 106 Offset 2392 + OpMemberDecorate %type_View 107 Offset 2396 + OpMemberDecorate %type_View 108 Offset 2400 + OpMemberDecorate %type_View 109 Offset 2404 + OpMemberDecorate %type_View 110 Offset 2408 + OpMemberDecorate %type_View 111 Offset 2412 + OpMemberDecorate %type_View 112 Offset 2416 + OpMemberDecorate %type_View 113 Offset 2428 + OpMemberDecorate %type_View 114 Offset 2432 + OpMemberDecorate %type_View 115 Offset 2444 + OpMemberDecorate %type_View 116 Offset 2448 + OpMemberDecorate %type_View 117 Offset 2452 + OpMemberDecorate %type_View 118 Offset 2456 + OpMemberDecorate %type_View 119 Offset 2460 + OpMemberDecorate %type_View 120 Offset 2464 + OpMemberDecorate %type_View 121 Offset 2468 + OpMemberDecorate %type_View 122 Offset 2472 + OpMemberDecorate %type_View 123 Offset 2476 + OpMemberDecorate %type_View 124 Offset 2480 + OpMemberDecorate %type_View 125 Offset 2484 + OpMemberDecorate %type_View 126 Offset 2488 + OpMemberDecorate %type_View 127 Offset 2492 + OpMemberDecorate %type_View 128 Offset 2496 + OpMemberDecorate %type_View 129 Offset 2512 + OpMemberDecorate %type_View 130 Offset 2516 + OpMemberDecorate %type_View 131 Offset 2520 + OpMemberDecorate %type_View 132 Offset 2524 + OpMemberDecorate %type_View 133 Offset 2528 + OpMemberDecorate %type_View 134 Offset 2544 + OpMemberDecorate %type_View 135 Offset 2556 + OpMemberDecorate %type_View 136 Offset 2560 + OpMemberDecorate %type_View 137 Offset 2576 + OpMemberDecorate %type_View 138 Offset 2580 + OpMemberDecorate %type_View 139 Offset 2584 + OpMemberDecorate %type_View 140 Offset 2588 + OpMemberDecorate %type_View 141 Offset 2592 + OpMemberDecorate %type_View 142 Offset 2608 + OpMemberDecorate %type_View 143 Offset 2720 + OpMemberDecorate %type_View 144 Offset 2724 + OpMemberDecorate %type_View 145 Offset 2728 + OpMemberDecorate %type_View 146 Offset 2732 + OpMemberDecorate %type_View 147 Offset 2736 + OpMemberDecorate %type_View 148 Offset 2740 + OpMemberDecorate %type_View 149 Offset 2744 + OpMemberDecorate %type_View 150 Offset 2748 + OpMemberDecorate %type_View 151 Offset 2752 + OpMemberDecorate %type_View 152 Offset 2764 + OpMemberDecorate %type_View 153 Offset 2768 + OpMemberDecorate %type_View 154 Offset 2832 + OpMemberDecorate %type_View 155 Offset 2896 + OpMemberDecorate %type_View 156 Offset 2900 + OpMemberDecorate %type_View 157 Offset 2904 + OpMemberDecorate %type_View 158 Offset 2908 + OpMemberDecorate %type_View 159 Offset 2912 + OpMemberDecorate %type_View 160 Offset 2920 + OpMemberDecorate %type_View 161 Offset 2924 + OpMemberDecorate %type_View 162 Offset 2928 + OpMemberDecorate %type_View 163 Offset 2940 + OpMemberDecorate %type_View 164 Offset 2944 + OpMemberDecorate %type_View 165 Offset 2956 + OpMemberDecorate %type_View 166 Offset 2960 + OpMemberDecorate %type_View 167 Offset 2968 + OpMemberDecorate %type_View 168 Offset 2972 + OpMemberDecorate %type_View 169 Offset 2976 + OpMemberDecorate %type_View 170 Offset 2988 + OpMemberDecorate %type_View 171 Offset 2992 + OpMemberDecorate %type_View 172 Offset 3004 + OpMemberDecorate %type_View 173 Offset 3008 + OpMemberDecorate %type_View 174 Offset 3020 + OpMemberDecorate %type_View 175 Offset 3024 + OpMemberDecorate %type_View 176 Offset 3036 + OpMemberDecorate %type_View 177 Offset 3040 + OpMemberDecorate %type_View 178 Offset 3044 + OpDecorate %type_View Block + OpMemberDecorate %type_Primitive 0 Offset 0 + OpMemberDecorate %type_Primitive 0 MatrixStride 16 + OpMemberDecorate %type_Primitive 0 ColMajor + OpMemberDecorate %type_Primitive 1 Offset 64 + OpMemberDecorate %type_Primitive 2 Offset 80 + OpMemberDecorate %type_Primitive 3 Offset 96 + OpMemberDecorate %type_Primitive 3 MatrixStride 16 + OpMemberDecorate %type_Primitive 3 ColMajor + OpMemberDecorate %type_Primitive 4 Offset 160 + OpMemberDecorate %type_Primitive 4 MatrixStride 16 + OpMemberDecorate %type_Primitive 4 ColMajor + OpMemberDecorate %type_Primitive 5 Offset 224 + OpMemberDecorate %type_Primitive 5 MatrixStride 16 + OpMemberDecorate %type_Primitive 5 ColMajor + OpMemberDecorate %type_Primitive 6 Offset 288 + OpMemberDecorate %type_Primitive 7 Offset 300 + OpMemberDecorate %type_Primitive 8 Offset 304 + OpMemberDecorate %type_Primitive 9 Offset 316 + OpMemberDecorate %type_Primitive 10 Offset 320 + OpMemberDecorate %type_Primitive 11 Offset 324 + OpMemberDecorate %type_Primitive 12 Offset 328 + OpMemberDecorate %type_Primitive 13 Offset 332 + OpMemberDecorate %type_Primitive 14 Offset 336 + OpMemberDecorate %type_Primitive 15 Offset 352 + OpMemberDecorate %type_Primitive 16 Offset 368 + OpMemberDecorate %type_Primitive 17 Offset 380 + OpMemberDecorate %type_Primitive 18 Offset 384 + OpMemberDecorate %type_Primitive 19 Offset 396 + OpMemberDecorate %type_Primitive 20 Offset 400 + OpMemberDecorate %type_Primitive 21 Offset 412 + OpMemberDecorate %type_Primitive 22 Offset 416 + OpMemberDecorate %type_Primitive 23 Offset 420 + OpMemberDecorate %type_Primitive 24 Offset 424 + OpMemberDecorate %type_Primitive 25 Offset 428 + OpMemberDecorate %type_Primitive 26 Offset 432 + OpDecorate %type_Primitive Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %v2int = OpTypeVector %int 2 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_3 = OpConstant %uint 3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 + %int_1 = OpConstant %int 1 + %int_0 = OpConstant %int 0 + %int_2 = OpConstant %int 2 + %float_2 = OpConstant %float 2 + %63 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %float_0_5 = OpConstant %float 0.5 + %int_3 = OpConstant %int 3 +%float_0_333000004 = OpConstant %float 0.333000004 + %float_1 = OpConstant %float 1 + %68 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %float_15 = OpConstant %float 15 + %70 = OpConstantComposite %v4float %float_15 %float_15 %float_15 %float_15 +%FVertexFactoryInterpolantsVSToPS = OpTypeStruct %v4float %v4float +%FVertexFactoryInterpolantsVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToPS +%FHitProxyVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToDS %v4float %uint +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%FHullShaderConstantDominantVertexData = OpTypeStruct %v2float %v4float %v3float +%FHullShaderConstantDominantEdgeData = OpTypeStruct %v2float %v2float %v4float %v4float %v3float %v3float +%FPNTessellationHSToDS = OpTypeStruct %FHitProxyVSToDS %_arr_v4float_uint_3 %v3float %float %float %FHullShaderConstantDominantVertexData %FHullShaderConstantDominantEdgeData + %uint_9 = OpConstant %uint 9 + %v3int = OpTypeVector %int 3 + %74 = OpConstantComposite %v3int %int_0 %int_0 %int_0 + %75 = OpConstantComposite %v3int %int_3 %int_3 %int_3 + %float_0 = OpConstant %float 0 + %77 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %78 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5 + %int_78 = OpConstant %int 78 + %int_15 = OpConstant %int 15 + %int_7 = OpConstant %int 7 + %int_28 = OpConstant %int 28 + %83 = OpConstantComposite %v3int %int_1 %int_1 %int_1 + %84 = OpConstantComposite %v3int %int_2 %int_2 %int_2 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%type_Primitive = OpTypeStruct %mat4v4float %v4float %v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %float %float %float %float %v4float %v4float %v3float %uint %v3float %uint %v3float %int %uint %uint %uint %uint %_arr_v4float_uint_4 +%_ptr_Uniform_type_Primitive = OpTypePointer Uniform %type_Primitive + %uint_12 = OpConstant %uint 12 +%_arr_v4float_uint_12 = OpTypeArray %v4float %uint_12 +%_ptr_Input__arr_v4float_uint_12 = OpTypePointer Input %_arr_v4float_uint_12 +%_arr_uint_uint_12 = OpTypeArray %uint %uint_12 +%_ptr_Input__arr_uint_uint_12 = OpTypePointer Input %_arr_uint_uint_12 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3 +%_arr_uint_uint_3 = OpTypeArray %uint %uint_3 +%_ptr_Output__arr_uint_uint_3 = OpTypePointer Output %_arr_uint_uint_3 +%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3 +%_ptr_Output__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Output %_arr__arr_v4float_uint_3_uint_3 +%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3 +%_ptr_Output__arr_v3float_uint_3 = OpTypePointer Output %_arr_v3float_uint_3 +%_ptr_Output__arr_float_uint_3 = OpTypePointer Output %_arr_float_uint_3 +%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3 +%_ptr_Output__arr_v2float_uint_3 = OpTypePointer Output %_arr_v2float_uint_3 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %109 = OpTypeFunction %void +%_arr_FHitProxyVSToDS_uint_12 = OpTypeArray %FHitProxyVSToDS %uint_12 +%_ptr_Function__arr_FHitProxyVSToDS_uint_12 = OpTypePointer Function %_arr_FHitProxyVSToDS_uint_12 +%_arr_FPNTessellationHSToDS_uint_3 = OpTypeArray %FPNTessellationHSToDS %uint_3 +%_ptr_Function__arr_FPNTessellationHSToDS_uint_3 = OpTypePointer Function %_arr_FPNTessellationHSToDS_uint_3 +%_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 = OpTypePointer Workgroup %_arr_FPNTessellationHSToDS_uint_3 +%_ptr_Output_uint = OpTypePointer Output %uint +%_ptr_Output_v3float = OpTypePointer Output %v3float +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Output_v2float = OpTypePointer Output %v2float +%_ptr_Function_FPNTessellationHSToDS = OpTypePointer Function %FPNTessellationHSToDS +%_ptr_Workgroup_FPNTessellationHSToDS = OpTypePointer Workgroup %FPNTessellationHSToDS + %bool = OpTypeBool +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float +%_ptr_Workgroup_float = OpTypePointer Workgroup %float +%mat3v3float = OpTypeMatrix %v3float 3 +%_ptr_Function_FVertexFactoryInterpolantsVSToDS = OpTypePointer Function %FVertexFactoryInterpolantsVSToDS +%_ptr_Function_FHitProxyVSToDS = OpTypePointer Function %FHitProxyVSToDS + %v3bool = OpTypeVector %bool 3 +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %View = OpVariable %_ptr_Uniform_type_View Uniform + %Primitive = OpVariable %_ptr_Uniform_type_Primitive Uniform +%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%in_var_VS_To_DS_VertexID = OpVariable %_ptr_Input__arr_uint_uint_12 Input +%gl_InvocationID = OpVariable %_ptr_Input_uint Input +%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_VS_To_DS_Position = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_VS_To_DS_VertexID = OpVariable %_ptr_Output__arr_uint_uint_3 Output +%out_var_PN_POSITION = OpVariable %_ptr_Output__arr__arr_v4float_uint_3_uint_3 Output +%out_var_PN_DisplacementScales = OpVariable %_ptr_Output__arr_v3float_uint_3 Output +%out_var_PN_TessellationMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output +%out_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output +%out_var_PN_DominantVertex = OpVariable %_ptr_Output__arr_v2float_uint_3 Output +%out_var_PN_DominantVertex1 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_PN_DominantVertex2 = OpVariable %_ptr_Output__arr_v3float_uint_3 Output +%out_var_PN_DominantEdge = OpVariable %_ptr_Output__arr_v2float_uint_3 Output +%out_var_PN_DominantEdge1 = OpVariable %_ptr_Output__arr_v2float_uint_3 Output +%out_var_PN_DominantEdge2 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_PN_DominantEdge3 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_PN_DominantEdge4 = OpVariable %_ptr_Output__arr_v3float_uint_3 Output +%out_var_PN_DominantEdge5 = OpVariable %_ptr_Output__arr_v3float_uint_3 Output +%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output +%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output +%out_var_PN_POSITION9 = OpVariable %_ptr_Output_v4float Output + %130 = OpConstantNull %v2float +%float_0_333333343 = OpConstant %float 0.333333343 + %132 = OpConstantComposite %v4float %float_0_333333343 %float_0_333333343 %float_0_333333343 %float_0_333333343 + %133 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5 +%float_0_166666672 = OpConstant %float 0.166666672 + %135 = OpConstantComposite %v4float %float_0_166666672 %float_0_166666672 %float_0_166666672 %float_0_166666672 + %136 = OpUndef %v4float + +; XXX: Original asm used Function here, which is wrong. +; This patches the SPIR-V to be correct. +%temp_var_hullMainRetVal = OpVariable %_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 Workgroup + + %MainHull = OpFunction %void None %109 + %137 = OpLabel +%param_var_I = OpVariable %_ptr_Function__arr_FHitProxyVSToDS_uint_12 Function + %138 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD10_centroid + %139 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD11_centroid + %140 = OpCompositeExtract %v4float %138 0 + %141 = OpCompositeExtract %v4float %139 0 + %142 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %140 %141 + %143 = OpCompositeExtract %v4float %138 1 + %144 = OpCompositeExtract %v4float %139 1 + %145 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %143 %144 + %146 = OpCompositeExtract %v4float %138 2 + %147 = OpCompositeExtract %v4float %139 2 + %148 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %146 %147 + %149 = OpCompositeExtract %v4float %138 3 + %150 = OpCompositeExtract %v4float %139 3 + %151 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %149 %150 + %152 = OpCompositeExtract %v4float %138 4 + %153 = OpCompositeExtract %v4float %139 4 + %154 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %152 %153 + %155 = OpCompositeExtract %v4float %138 5 + %156 = OpCompositeExtract %v4float %139 5 + %157 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %155 %156 + %158 = OpCompositeExtract %v4float %138 6 + %159 = OpCompositeExtract %v4float %139 6 + %160 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %158 %159 + %161 = OpCompositeExtract %v4float %138 7 + %162 = OpCompositeExtract %v4float %139 7 + %163 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %161 %162 + %164 = OpCompositeExtract %v4float %138 8 + %165 = OpCompositeExtract %v4float %139 8 + %166 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %164 %165 + %167 = OpCompositeExtract %v4float %138 9 + %168 = OpCompositeExtract %v4float %139 9 + %169 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %167 %168 + %170 = OpCompositeExtract %v4float %138 10 + %171 = OpCompositeExtract %v4float %139 10 + %172 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %170 %171 + %173 = OpCompositeExtract %v4float %138 11 + %174 = OpCompositeExtract %v4float %139 11 + %175 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %173 %174 + %176 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %142 + %177 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %145 + %178 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %148 + %179 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %151 + %180 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %154 + %181 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %157 + %182 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %160 + %183 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %163 + %184 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %166 + %185 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %169 + %186 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %172 + %187 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %175 + %188 = OpLoad %_arr_v4float_uint_12 %in_var_VS_To_DS_Position + %189 = OpLoad %_arr_uint_uint_12 %in_var_VS_To_DS_VertexID + %190 = OpCompositeExtract %v4float %188 0 + %191 = OpCompositeExtract %uint %189 0 + %192 = OpCompositeConstruct %FHitProxyVSToDS %176 %190 %191 + %193 = OpCompositeExtract %v4float %188 1 + %194 = OpCompositeExtract %uint %189 1 + %195 = OpCompositeConstruct %FHitProxyVSToDS %177 %193 %194 + %196 = OpCompositeExtract %v4float %188 2 + %197 = OpCompositeExtract %uint %189 2 + %198 = OpCompositeConstruct %FHitProxyVSToDS %178 %196 %197 + %199 = OpCompositeExtract %v4float %188 3 + %200 = OpCompositeExtract %uint %189 3 + %201 = OpCompositeConstruct %FHitProxyVSToDS %179 %199 %200 + %202 = OpCompositeExtract %v4float %188 4 + %203 = OpCompositeExtract %uint %189 4 + %204 = OpCompositeConstruct %FHitProxyVSToDS %180 %202 %203 + %205 = OpCompositeExtract %v4float %188 5 + %206 = OpCompositeExtract %uint %189 5 + %207 = OpCompositeConstruct %FHitProxyVSToDS %181 %205 %206 + %208 = OpCompositeExtract %v4float %188 6 + %209 = OpCompositeExtract %uint %189 6 + %210 = OpCompositeConstruct %FHitProxyVSToDS %182 %208 %209 + %211 = OpCompositeExtract %v4float %188 7 + %212 = OpCompositeExtract %uint %189 7 + %213 = OpCompositeConstruct %FHitProxyVSToDS %183 %211 %212 + %214 = OpCompositeExtract %v4float %188 8 + %215 = OpCompositeExtract %uint %189 8 + %216 = OpCompositeConstruct %FHitProxyVSToDS %184 %214 %215 + %217 = OpCompositeExtract %v4float %188 9 + %218 = OpCompositeExtract %uint %189 9 + %219 = OpCompositeConstruct %FHitProxyVSToDS %185 %217 %218 + %220 = OpCompositeExtract %v4float %188 10 + %221 = OpCompositeExtract %uint %189 10 + %222 = OpCompositeConstruct %FHitProxyVSToDS %186 %220 %221 + %223 = OpCompositeExtract %v4float %188 11 + %224 = OpCompositeExtract %uint %189 11 + %225 = OpCompositeConstruct %FHitProxyVSToDS %187 %223 %224 + %226 = OpCompositeConstruct %_arr_FHitProxyVSToDS_uint_12 %192 %195 %198 %201 %204 %207 %210 %213 %216 %219 %222 %225 + OpStore %param_var_I %226 + %227 = OpLoad %uint %gl_InvocationID + %228 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %227 %int_0 + %229 = OpLoad %FVertexFactoryInterpolantsVSToDS %228 + %230 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %229 0 + %231 = OpCompositeExtract %v4float %230 0 + %232 = OpCompositeExtract %v4float %230 1 + %233 = OpVectorShuffle %v3float %231 %231 0 1 2 + %234 = OpVectorShuffle %v3float %232 %232 0 1 2 + %235 = OpExtInst %v3float %1 Cross %234 %233 + %236 = OpCompositeExtract %float %232 3 + %237 = OpCompositeConstruct %v3float %236 %236 %236 + %238 = OpFMul %v3float %235 %237 + %239 = OpCompositeConstruct %mat3v3float %233 %238 %234 + %240 = OpCompositeExtract %float %232 0 + %241 = OpCompositeExtract %float %232 1 + %242 = OpCompositeExtract %float %232 2 + %243 = OpCompositeConstruct %v4float %240 %241 %242 %float_0 + %244 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_15 + %245 = OpLoad %v4float %244 + %246 = OpVectorShuffle %v3float %245 %245 0 1 2 + %247 = OpVectorTimesMatrix %v3float %246 %239 + %248 = OpULessThan %bool %227 %uint_2 + %249 = OpIAdd %uint %227 %uint_1 + %250 = OpSelect %uint %248 %249 %uint_0 + %251 = OpIMul %uint %uint_2 %227 + %252 = OpIAdd %uint %uint_3 %251 + %253 = OpIAdd %uint %251 %uint_4 + %254 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %227 + %255 = OpLoad %FHitProxyVSToDS %254 + %256 = OpAccessChain %_ptr_Function_v4float %param_var_I %227 %int_1 + %257 = OpLoad %v4float %256 + %258 = OpULessThan %bool %250 %uint_2 + %259 = OpIAdd %uint %250 %uint_1 + %260 = OpSelect %uint %258 %259 %uint_0 + %261 = OpIMul %uint %uint_2 %250 + %262 = OpIAdd %uint %uint_3 %261 + %263 = OpIAdd %uint %261 %uint_4 + %264 = OpIAdd %uint %uint_9 %227 + %265 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %264 + %266 = OpLoad %FHitProxyVSToDS %265 + %267 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %266 0 + %268 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %267 0 + %269 = OpCompositeExtract %v4float %268 0 + %270 = OpCompositeExtract %v4float %268 1 + %271 = OpVectorShuffle %v3float %269 %269 0 1 2 + %272 = OpCompositeExtract %float %270 0 + %273 = OpCompositeExtract %float %270 1 + %274 = OpCompositeExtract %float %270 2 + %275 = OpCompositeConstruct %v4float %272 %273 %274 %float_0 + %276 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %250 + %277 = OpLoad %FHitProxyVSToDS %276 + %278 = OpCompositeExtract %uint %277 2 + %279 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %260 + %280 = OpLoad %FHitProxyVSToDS %279 + %281 = OpCompositeExtract %uint %280 2 + %282 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %262 + %283 = OpLoad %FHitProxyVSToDS %282 + %284 = OpCompositeExtract %uint %283 2 + %285 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %263 + %286 = OpLoad %FHitProxyVSToDS %285 + %287 = OpCompositeExtract %uint %286 2 + %288 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %277 0 + %289 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %288 0 + %290 = OpCompositeExtract %v4float %289 0 + %291 = OpCompositeExtract %v4float %289 1 + %292 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %280 0 + %293 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %292 0 + %294 = OpCompositeExtract %v4float %293 0 + %295 = OpCompositeExtract %v4float %293 1 + %296 = OpULessThan %bool %284 %278 + %297 = OpIEqual %bool %284 %278 + %298 = OpULessThan %bool %287 %281 + %299 = OpLogicalAnd %bool %297 %298 + %300 = OpLogicalOr %bool %296 %299 + OpSelectionMerge %301 None + OpBranchConditional %300 %302 %301 + %302 = OpLabel + %303 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %283 0 + %304 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %303 0 + %305 = OpCompositeExtract %v4float %304 0 + %306 = OpCompositeExtract %v4float %304 1 + %307 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %286 0 + %308 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %307 0 + %309 = OpCompositeExtract %v4float %308 0 + %310 = OpCompositeExtract %v4float %308 1 + OpBranch %301 + %301 = OpLabel + %311 = OpPhi %v4float %294 %137 %309 %302 + %312 = OpPhi %v4float %295 %137 %310 %302 + %313 = OpPhi %v4float %290 %137 %305 %302 + %314 = OpPhi %v4float %291 %137 %306 %302 + %315 = OpVectorShuffle %v3float %313 %313 0 1 2 + %316 = OpVectorShuffle %v3float %311 %311 0 1 2 + %317 = OpCompositeExtract %float %314 0 + %318 = OpCompositeExtract %float %314 1 + %319 = OpCompositeExtract %float %314 2 + %320 = OpCompositeConstruct %v4float %317 %318 %319 %float_0 + %321 = OpCompositeExtract %float %312 0 + %322 = OpCompositeExtract %float %312 1 + %323 = OpCompositeExtract %float %312 2 + %324 = OpCompositeConstruct %v4float %321 %322 %323 %float_0 + %325 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %250 %int_0 + %326 = OpLoad %FVertexFactoryInterpolantsVSToDS %325 + %327 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %326 0 + %328 = OpCompositeExtract %v4float %327 1 + %329 = OpCompositeExtract %float %328 0 + %330 = OpCompositeExtract %float %328 1 + %331 = OpCompositeExtract %float %328 2 + %332 = OpCompositeConstruct %v4float %329 %330 %331 %float_0 + %333 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %252 %int_0 + %334 = OpLoad %FVertexFactoryInterpolantsVSToDS %333 + %335 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %334 0 + %336 = OpCompositeExtract %v4float %335 1 + %337 = OpCompositeExtract %float %336 0 + %338 = OpCompositeExtract %float %336 1 + %339 = OpCompositeExtract %float %336 2 + %340 = OpCompositeConstruct %v4float %337 %338 %339 %float_0 + %341 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %253 %int_0 + %342 = OpLoad %FVertexFactoryInterpolantsVSToDS %341 + %343 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %342 0 + %344 = OpCompositeExtract %v4float %343 1 + %345 = OpCompositeExtract %float %344 0 + %346 = OpCompositeExtract %float %344 1 + %347 = OpCompositeExtract %float %344 2 + %348 = OpCompositeConstruct %v4float %345 %346 %347 %float_0 + %349 = OpLoad %v4float %256 + %350 = OpAccessChain %_ptr_Function_v4float %param_var_I %250 %int_1 + %351 = OpLoad %v4float %350 + %352 = OpFMul %v4float %63 %349 + %353 = OpFAdd %v4float %352 %351 + %354 = OpFSub %v4float %351 %349 + %355 = OpDot %float %354 %243 + %356 = OpCompositeConstruct %v4float %355 %355 %355 %355 + %357 = OpFMul %v4float %356 %243 + %358 = OpFSub %v4float %353 %357 + %359 = OpFMul %v4float %358 %132 + %360 = OpAccessChain %_ptr_Function_v4float %param_var_I %252 %int_1 + %361 = OpLoad %v4float %360 + %362 = OpAccessChain %_ptr_Function_v4float %param_var_I %253 %int_1 + %363 = OpLoad %v4float %362 + %364 = OpFMul %v4float %63 %361 + %365 = OpFAdd %v4float %364 %363 + %366 = OpFSub %v4float %363 %361 + %367 = OpDot %float %366 %340 + %368 = OpCompositeConstruct %v4float %367 %367 %367 %367 + %369 = OpFMul %v4float %368 %340 + %370 = OpFSub %v4float %365 %369 + %371 = OpFMul %v4float %370 %132 + %372 = OpFAdd %v4float %359 %371 + %373 = OpFMul %v4float %372 %133 + %374 = OpLoad %v4float %350 + %375 = OpLoad %v4float %256 + %376 = OpFMul %v4float %63 %374 + %377 = OpFAdd %v4float %376 %375 + %378 = OpFSub %v4float %375 %374 + %379 = OpDot %float %378 %332 + %380 = OpCompositeConstruct %v4float %379 %379 %379 %379 + %381 = OpFMul %v4float %380 %332 + %382 = OpFSub %v4float %377 %381 + %383 = OpFMul %v4float %382 %132 + %384 = OpLoad %v4float %362 + %385 = OpLoad %v4float %360 + %386 = OpFMul %v4float %63 %384 + %387 = OpFAdd %v4float %386 %385 + %388 = OpFSub %v4float %385 %384 + %389 = OpDot %float %388 %348 + %390 = OpCompositeConstruct %v4float %389 %389 %389 %389 + %391 = OpFMul %v4float %390 %348 + %392 = OpFSub %v4float %387 %391 + %393 = OpFMul %v4float %392 %132 + %394 = OpFAdd %v4float %383 %393 + %395 = OpFMul %v4float %394 %133 + %396 = OpCompositeConstruct %FHullShaderConstantDominantEdgeData %130 %130 %320 %324 %315 %316 + %397 = OpCompositeConstruct %FHullShaderConstantDominantVertexData %130 %275 %271 + %398 = OpCompositeConstruct %_arr_v4float_uint_3 %257 %373 %395 + %399 = OpCompositeConstruct %FPNTessellationHSToDS %255 %398 %247 %float_1 %float_1 %397 %396 + %400 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %255 0 + %401 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %400 0 + %402 = OpCompositeExtract %v4float %401 0 + %403 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD10_centroid %227 + OpStore %403 %402 + %404 = OpCompositeExtract %v4float %401 1 + %405 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD11_centroid %227 + OpStore %405 %404 + %406 = OpCompositeExtract %v4float %255 1 + %407 = OpAccessChain %_ptr_Output_v4float %out_var_VS_To_DS_Position %227 + OpStore %407 %406 + %408 = OpCompositeExtract %uint %255 2 + %409 = OpAccessChain %_ptr_Output_uint %out_var_VS_To_DS_VertexID %227 + OpStore %409 %408 + %410 = OpAccessChain %_ptr_Output__arr_v4float_uint_3 %out_var_PN_POSITION %227 + OpStore %410 %398 + %411 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DisplacementScales %227 + OpStore %411 %247 + %412 = OpAccessChain %_ptr_Output_float %out_var_PN_TessellationMultiplier %227 + OpStore %412 %float_1 + %413 = OpAccessChain %_ptr_Output_float %out_var_PN_WorldDisplacementMultiplier %227 + OpStore %413 %float_1 + %414 = OpAccessChain %_ptr_Output_v2float %out_var_PN_DominantVertex %227 + OpStore %414 %130 + %415 = OpAccessChain %_ptr_Output_v4float %out_var_PN_DominantVertex1 %227 + OpStore %415 %275 + %416 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DominantVertex2 %227 + OpStore %416 %271 + %417 = OpAccessChain %_ptr_Output_v2float %out_var_PN_DominantEdge %227 + OpStore %417 %130 + %418 = OpAccessChain %_ptr_Output_v2float %out_var_PN_DominantEdge1 %227 + OpStore %418 %130 + %419 = OpAccessChain %_ptr_Output_v4float %out_var_PN_DominantEdge2 %227 + OpStore %419 %320 + %420 = OpAccessChain %_ptr_Output_v4float %out_var_PN_DominantEdge3 %227 + OpStore %420 %324 + %421 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DominantEdge4 %227 + OpStore %421 %315 + %422 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DominantEdge5 %227 + OpStore %422 %316 + %423 = OpAccessChain %_ptr_Workgroup_FPNTessellationHSToDS %temp_var_hullMainRetVal %227 + OpStore %423 %399 + OpControlBarrier %uint_2 %uint_4 %uint_0 + %424 = OpIEqual %bool %227 %uint_0 + OpSelectionMerge %if_merge None + OpBranchConditional %424 %425 %if_merge + %425 = OpLabel + %426 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0 + %427 = OpLoad %mat4v4float %426 + %428 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_7 + %429 = OpLoad %mat4v4float %428 + %430 = OpAccessChain %_ptr_Uniform_v3float %View %int_28 + %431 = OpLoad %v3float %430 + %432 = OpAccessChain %_ptr_Uniform_float %View %int_78 + %433 = OpLoad %float %432 + %434 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_0 + %435 = OpLoad %v4float %434 + %436 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_1 + %437 = OpLoad %v4float %436 + %438 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_2 + %439 = OpLoad %v4float %438 + %440 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_0 + %441 = OpLoad %v4float %440 + %442 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_1 + %443 = OpLoad %v4float %442 + %444 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_2 + %445 = OpLoad %v4float %444 + %446 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_0 + %447 = OpLoad %v4float %446 + %448 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_1 + %449 = OpLoad %v4float %448 + %450 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_2 + %451 = OpLoad %v4float %450 + %452 = OpFAdd %v4float %437 %439 + %453 = OpFAdd %v4float %452 %443 + %454 = OpFAdd %v4float %453 %445 + %455 = OpFAdd %v4float %454 %449 + %456 = OpFAdd %v4float %455 %451 + %457 = OpFMul %v4float %456 %135 + %458 = OpFAdd %v4float %447 %441 + %459 = OpFAdd %v4float %458 %435 + %460 = OpFMul %v4float %459 %132 + %461 = OpFSub %v4float %457 %460 + %462 = OpFMul %v4float %461 %133 + %463 = OpFAdd %v4float %457 %462 + %464 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_1 %int_3 + %465 = OpLoad %float %464 + %466 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_2 %int_3 + %467 = OpLoad %float %466 + %468 = OpFAdd %float %465 %467 + %469 = OpFMul %float %float_0_5 %468 + %470 = OpCompositeInsert %v4float %469 %136 0 + %471 = OpLoad %float %466 + %472 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_0 %int_3 + %473 = OpLoad %float %472 + %474 = OpFAdd %float %471 %473 + %475 = OpFMul %float %float_0_5 %474 + %476 = OpCompositeInsert %v4float %475 %470 1 + %477 = OpLoad %float %472 + %478 = OpLoad %float %464 + %479 = OpFAdd %float %477 %478 + %480 = OpFMul %float %float_0_5 %479 + %481 = OpCompositeInsert %v4float %480 %476 2 + %482 = OpLoad %float %472 + %483 = OpLoad %float %464 + %484 = OpFAdd %float %482 %483 + %485 = OpLoad %float %466 + %486 = OpFAdd %float %484 %485 + %487 = OpFMul %float %float_0_333000004 %486 + %488 = OpCompositeInsert %v4float %487 %481 3 + %489 = OpVectorShuffle %v3float %435 %435 0 1 2 + %490 = OpVectorShuffle %v3float %441 %441 0 1 2 + %491 = OpVectorShuffle %v3float %447 %447 0 1 2 + OpBranch %492 + %492 = OpLabel + OpLoopMerge %493 %494 None + OpBranch %495 + %495 = OpLabel + %496 = OpMatrixTimesVector %v4float %429 %77 + %497 = OpCompositeExtract %float %435 0 + %498 = OpCompositeExtract %float %435 1 + %499 = OpCompositeExtract %float %435 2 + %500 = OpCompositeConstruct %v4float %497 %498 %499 %float_1 + %501 = OpMatrixTimesVector %v4float %427 %500 + %502 = OpVectorShuffle %v3float %501 %501 0 1 2 + %503 = OpVectorShuffle %v3float %496 %496 0 1 2 + %504 = OpFSub %v3float %502 %503 + %505 = OpCompositeExtract %float %501 3 + %506 = OpCompositeExtract %float %496 3 + %507 = OpFAdd %float %505 %506 + %508 = OpCompositeConstruct %v3float %507 %507 %507 + %509 = OpFOrdLessThan %v3bool %504 %508 + %510 = OpSelect %v3int %509 %83 %74 + %511 = OpFAdd %v3float %502 %503 + %512 = OpFNegate %float %505 + %513 = OpFSub %float %512 %506 + %514 = OpCompositeConstruct %v3float %513 %513 %513 + %515 = OpFOrdGreaterThan %v3bool %511 %514 + %516 = OpSelect %v3int %515 %83 %74 + %517 = OpIMul %v3int %84 %516 + %518 = OpIAdd %v3int %510 %517 + %519 = OpCompositeExtract %float %441 0 + %520 = OpCompositeExtract %float %441 1 + %521 = OpCompositeExtract %float %441 2 + %522 = OpCompositeConstruct %v4float %519 %520 %521 %float_1 + %523 = OpMatrixTimesVector %v4float %427 %522 + %524 = OpVectorShuffle %v3float %523 %523 0 1 2 + %525 = OpFSub %v3float %524 %503 + %526 = OpCompositeExtract %float %523 3 + %527 = OpFAdd %float %526 %506 + %528 = OpCompositeConstruct %v3float %527 %527 %527 + %529 = OpFOrdLessThan %v3bool %525 %528 + %530 = OpSelect %v3int %529 %83 %74 + %531 = OpFAdd %v3float %524 %503 + %532 = OpFNegate %float %526 + %533 = OpFSub %float %532 %506 + %534 = OpCompositeConstruct %v3float %533 %533 %533 + %535 = OpFOrdGreaterThan %v3bool %531 %534 + %536 = OpSelect %v3int %535 %83 %74 + %537 = OpIMul %v3int %84 %536 + %538 = OpIAdd %v3int %530 %537 + %539 = OpBitwiseOr %v3int %518 %538 + %540 = OpCompositeExtract %float %447 0 + %541 = OpCompositeExtract %float %447 1 + %542 = OpCompositeExtract %float %447 2 + %543 = OpCompositeConstruct %v4float %540 %541 %542 %float_1 + %544 = OpMatrixTimesVector %v4float %427 %543 + %545 = OpVectorShuffle %v3float %544 %544 0 1 2 + %546 = OpFSub %v3float %545 %503 + %547 = OpCompositeExtract %float %544 3 + %548 = OpFAdd %float %547 %506 + %549 = OpCompositeConstruct %v3float %548 %548 %548 + %550 = OpFOrdLessThan %v3bool %546 %549 + %551 = OpSelect %v3int %550 %83 %74 + %552 = OpFAdd %v3float %545 %503 + %553 = OpFNegate %float %547 + %554 = OpFSub %float %553 %506 + %555 = OpCompositeConstruct %v3float %554 %554 %554 + %556 = OpFOrdGreaterThan %v3bool %552 %555 + %557 = OpSelect %v3int %556 %83 %74 + %558 = OpIMul %v3int %84 %557 + %559 = OpIAdd %v3int %551 %558 + %560 = OpBitwiseOr %v3int %539 %559 + %561 = OpINotEqual %v3bool %560 %75 + %562 = OpAny %bool %561 + OpSelectionMerge %563 None + OpBranchConditional %562 %564 %563 + %564 = OpLabel + OpBranch %493 + %563 = OpLabel + %565 = OpFSub %v3float %489 %490 + %566 = OpFSub %v3float %490 %491 + %567 = OpFSub %v3float %491 %489 + %568 = OpFAdd %v3float %489 %490 + %569 = OpFMul %v3float %78 %568 + %570 = OpFSub %v3float %569 %431 + %571 = OpFAdd %v3float %490 %491 + %572 = OpFMul %v3float %78 %571 + %573 = OpFSub %v3float %572 %431 + %574 = OpFAdd %v3float %491 %489 + %575 = OpFMul %v3float %78 %574 + %576 = OpFSub %v3float %575 %431 + %577 = OpDot %float %566 %566 + %578 = OpDot %float %573 %573 + %579 = OpFDiv %float %577 %578 + %580 = OpExtInst %float %1 Sqrt %579 + %581 = OpDot %float %567 %567 + %582 = OpDot %float %576 %576 + %583 = OpFDiv %float %581 %582 + %584 = OpExtInst %float %1 Sqrt %583 + %585 = OpDot %float %565 %565 + %586 = OpDot %float %570 %570 + %587 = OpFDiv %float %585 %586 + %588 = OpExtInst %float %1 Sqrt %587 + %589 = OpCompositeConstruct %v4float %580 %584 %588 %float_1 + %590 = OpFAdd %float %580 %584 + %591 = OpFAdd %float %590 %588 + %592 = OpFMul %float %float_0_333000004 %591 + %593 = OpCompositeInsert %v4float %592 %589 3 + %594 = OpCompositeConstruct %v4float %433 %433 %433 %433 + %595 = OpFMul %v4float %594 %593 + OpBranch %493 + %494 = OpLabel + OpBranch %492 + %493 = OpLabel + %596 = OpPhi %v4float %77 %564 %595 %563 + %597 = OpFMul %v4float %488 %596 + %598 = OpExtInst %v4float %1 FClamp %597 %68 %70 + %599 = OpCompositeExtract %float %598 0 + %600 = OpCompositeExtract %float %598 1 + %601 = OpCompositeExtract %float %598 2 + %602 = OpCompositeExtract %float %598 3 + %603 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_0 + OpStore %603 %599 + %604 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_1 + OpStore %604 %600 + %605 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_2 + OpStore %605 %601 + %606 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %uint_0 + OpStore %606 %602 + OpStore %out_var_PN_POSITION9 %463 + OpBranch %if_merge + %if_merge = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc b/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc new file mode 100644 index 00000000000..1a9b95e085a --- /dev/null +++ b/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc @@ -0,0 +1,1144 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 531 +; Schema: 0 + OpCapability Tessellation + OpCapability SampledBuffer + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %MainHull "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_COLOR0 %in_var_TEXCOORD0 %in_var_VS_To_DS_Position %gl_InvocationID %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_COLOR0 %out_var_TEXCOORD0 %out_var_VS_To_DS_Position %out_var_PN_POSITION %out_var_PN_DisplacementScales %out_var_PN_TessellationMultiplier %out_var_PN_WorldDisplacementMultiplier %gl_TessLevelOuter %gl_TessLevelInner %out_var_PN_POSITION9 + OpExecutionMode %MainHull Triangles + OpExecutionMode %MainHull SpacingFractionalOdd + OpExecutionMode %MainHull VertexOrderCw + OpExecutionMode %MainHull OutputVertices 3 + OpSource HLSL 600 + OpName %FPNTessellationHSToDS "FPNTessellationHSToDS" + OpMemberName %FPNTessellationHSToDS 0 "PassSpecificData" + OpMemberName %FPNTessellationHSToDS 1 "WorldPosition" + OpMemberName %FPNTessellationHSToDS 2 "DisplacementScale" + OpMemberName %FPNTessellationHSToDS 3 "TessellationMultiplier" + OpMemberName %FPNTessellationHSToDS 4 "WorldDisplacementMultiplier" + OpName %FHitProxyVSToDS "FHitProxyVSToDS" + OpMemberName %FHitProxyVSToDS 0 "FactoryInterpolants" + OpMemberName %FHitProxyVSToDS 1 "Position" + OpName %FVertexFactoryInterpolantsVSToDS "FVertexFactoryInterpolantsVSToDS" + OpMemberName %FVertexFactoryInterpolantsVSToDS 0 "InterpolantsVSToPS" + OpName %FVertexFactoryInterpolantsVSToPS "FVertexFactoryInterpolantsVSToPS" + OpMemberName %FVertexFactoryInterpolantsVSToPS 0 "TangentToWorld0" + OpMemberName %FVertexFactoryInterpolantsVSToPS 1 "TangentToWorld2" + OpMemberName %FVertexFactoryInterpolantsVSToPS 2 "Color" + OpMemberName %FVertexFactoryInterpolantsVSToPS 3 "TexCoords" + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_ClipToWorld" + OpMemberName %type_View 3 "View_TranslatedWorldToView" + OpMemberName %type_View 4 "View_ViewToTranslatedWorld" + OpMemberName %type_View 5 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 7 "View_ViewToClip" + OpMemberName %type_View 8 "View_ViewToClipNoAA" + OpMemberName %type_View 9 "View_ClipToView" + OpMemberName %type_View 10 "View_ClipToTranslatedWorld" + OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 12 "View_ScreenToWorld" + OpMemberName %type_View 13 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 14 "View_ViewForward" + OpMemberName %type_View 15 "PrePadding_View_908" + OpMemberName %type_View 16 "View_ViewUp" + OpMemberName %type_View 17 "PrePadding_View_924" + OpMemberName %type_View 18 "View_ViewRight" + OpMemberName %type_View 19 "PrePadding_View_940" + OpMemberName %type_View 20 "View_HMDViewNoRollUp" + OpMemberName %type_View 21 "PrePadding_View_956" + OpMemberName %type_View 22 "View_HMDViewNoRollRight" + OpMemberName %type_View 23 "PrePadding_View_972" + OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 25 "View_ScreenPositionScaleBias" + OpMemberName %type_View 26 "View_WorldCameraOrigin" + OpMemberName %type_View 27 "PrePadding_View_1020" + OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 29 "PrePadding_View_1036" + OpMemberName %type_View 30 "View_WorldViewOrigin" + OpMemberName %type_View 31 "PrePadding_View_1052" + OpMemberName %type_View 32 "View_PreViewTranslation" + OpMemberName %type_View 33 "PrePadding_View_1068" + OpMemberName %type_View 34 "View_PrevProjection" + OpMemberName %type_View 35 "View_PrevViewProj" + OpMemberName %type_View 36 "View_PrevViewRotationProj" + OpMemberName %type_View 37 "View_PrevViewToClip" + OpMemberName %type_View 38 "View_PrevClipToView" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 40 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 44 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 45 "PrePadding_View_1724" + OpMemberName %type_View 46 "View_PrevWorldViewOrigin" + OpMemberName %type_View 47 "PrePadding_View_1740" + OpMemberName %type_View 48 "View_PrevPreViewTranslation" + OpMemberName %type_View 49 "PrePadding_View_1756" + OpMemberName %type_View 50 "View_PrevInvViewProj" + OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 52 "View_ClipToPrevClip" + OpMemberName %type_View 53 "View_TemporalAAJitter" + OpMemberName %type_View 54 "View_GlobalClippingPlane" + OpMemberName %type_View 55 "View_FieldOfViewWideAngles" + OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 57 "View_ViewRectMin" + OpMemberName %type_View 58 "View_ViewSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferSizeAndInvSize" + OpMemberName %type_View 60 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 61 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 62 "View_PreExposure" + OpMemberName %type_View 63 "View_OneOverPreExposure" + OpMemberName %type_View 64 "PrePadding_View_2076" + OpMemberName %type_View 65 "View_DiffuseOverrideParameter" + OpMemberName %type_View 66 "View_SpecularOverrideParameter" + OpMemberName %type_View 67 "View_NormalOverrideParameter" + OpMemberName %type_View 68 "View_RoughnessOverrideParameter" + OpMemberName %type_View 69 "View_PrevFrameGameTime" + OpMemberName %type_View 70 "View_PrevFrameRealTime" + OpMemberName %type_View 71 "View_OutOfBoundsMask" + OpMemberName %type_View 72 "PrePadding_View_2148" + OpMemberName %type_View 73 "PrePadding_View_2152" + OpMemberName %type_View 74 "PrePadding_View_2156" + OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 76 "View_CullingSign" + OpMemberName %type_View 77 "View_NearPlane" + OpMemberName %type_View 78 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 79 "View_GameTime" + OpMemberName %type_View 80 "View_RealTime" + OpMemberName %type_View 81 "View_DeltaTime" + OpMemberName %type_View 82 "View_MaterialTextureMipBias" + OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 84 "View_Random" + OpMemberName %type_View 85 "View_FrameNumber" + OpMemberName %type_View 86 "View_StateFrameIndexMod8" + OpMemberName %type_View 87 "View_StateFrameIndex" + OpMemberName %type_View 88 "View_CameraCut" + OpMemberName %type_View 89 "View_UnlitViewmodeMask" + OpMemberName %type_View 90 "PrePadding_View_2228" + OpMemberName %type_View 91 "PrePadding_View_2232" + OpMemberName %type_View 92 "PrePadding_View_2236" + OpMemberName %type_View 93 "View_DirectionalLightColor" + OpMemberName %type_View 94 "View_DirectionalLightDirection" + OpMemberName %type_View 95 "PrePadding_View_2268" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 98 "View_TemporalAAParams" + OpMemberName %type_View 99 "View_CircleDOFParams" + OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 102 "View_DepthOfFieldScale" + OpMemberName %type_View 103 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 109 "View_GeneralPurposeTweak" + OpMemberName %type_View 110 "View_DemosaicVposOffset" + OpMemberName %type_View 111 "PrePadding_View_2412" + OpMemberName %type_View 112 "View_IndirectLightingColorScale" + OpMemberName %type_View 113 "View_HDR32bppEncodingMode" + OpMemberName %type_View 114 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 115 "View_AtmosphericFogSunPower" + OpMemberName %type_View 116 "View_AtmosphericFogPower" + OpMemberName %type_View 117 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 123 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian" + OpMemberName %type_View 127 "PrePadding_View_2492" + OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance" + OpMemberName %type_View 129 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 131 "PrePadding_View_2520" + OpMemberName %type_View 132 "PrePadding_View_2524" + OpMemberName %type_View 133 "View_AtmosphericFogSunColor" + OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 136 "View_AmbientCubemapTint" + OpMemberName %type_View 137 "View_AmbientCubemapIntensity" + OpMemberName %type_View 138 "View_SkyLightParameters" + OpMemberName %type_View 139 "PrePadding_View_2584" + OpMemberName %type_View 140 "PrePadding_View_2588" + OpMemberName %type_View 141 "View_SkyLightColor" + OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 143 "View_MobilePreviewMode" + OpMemberName %type_View 144 "View_HMDEyePaddingOffset" + OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 146 "View_ShowDecalsMask" + OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 149 "PrePadding_View_2744" + OpMemberName %type_View 150 "PrePadding_View_2748" + OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 152 "View_StereoPassIndex" + OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 155 "View_GlobalVolumeDimension" + OpMemberName %type_View 156 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 157 "View_MaxGlobalDistance" + OpMemberName %type_View 158 "PrePadding_View_2908" + OpMemberName %type_View 159 "View_CursorPosition" + OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 161 "PrePadding_View_2924" + OpMemberName %type_View 162 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 163 "PrePadding_View_2940" + OpMemberName %type_View 164 "View_VolumetricFogGridZParams" + OpMemberName %type_View 165 "PrePadding_View_2956" + OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 167 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 168 "PrePadding_View_2972" + OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 170 "PrePadding_View_2988" + OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 172 "PrePadding_View_3004" + OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 176 "View_StereoIPD" + OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_Primitive "type.Primitive" + OpMemberName %type_Primitive 0 "Primitive_LocalToWorld" + OpMemberName %type_Primitive 1 "Primitive_InvNonUniformScaleAndDeterminantSign" + OpMemberName %type_Primitive 2 "Primitive_ObjectWorldPositionAndRadius" + OpMemberName %type_Primitive 3 "Primitive_WorldToLocal" + OpMemberName %type_Primitive 4 "Primitive_PreviousLocalToWorld" + OpMemberName %type_Primitive 5 "Primitive_PreviousWorldToLocal" + OpMemberName %type_Primitive 6 "Primitive_ActorWorldPosition" + OpMemberName %type_Primitive 7 "Primitive_UseSingleSampleShadowFromStationaryLights" + OpMemberName %type_Primitive 8 "Primitive_ObjectBounds" + OpMemberName %type_Primitive 9 "Primitive_LpvBiasMultiplier" + OpMemberName %type_Primitive 10 "Primitive_DecalReceiverMask" + OpMemberName %type_Primitive 11 "Primitive_PerObjectGBufferData" + OpMemberName %type_Primitive 12 "Primitive_UseVolumetricLightmapShadowFromStationaryLights" + OpMemberName %type_Primitive 13 "Primitive_DrawsVelocity" + OpMemberName %type_Primitive 14 "Primitive_ObjectOrientation" + OpMemberName %type_Primitive 15 "Primitive_NonUniformScale" + OpMemberName %type_Primitive 16 "Primitive_LocalObjectBoundsMin" + OpMemberName %type_Primitive 17 "Primitive_LightingChannelMask" + OpMemberName %type_Primitive 18 "Primitive_LocalObjectBoundsMax" + OpMemberName %type_Primitive 19 "Primitive_LightmapDataIndex" + OpMemberName %type_Primitive 20 "Primitive_PreSkinnedLocalBounds" + OpMemberName %type_Primitive 21 "Primitive_SingleCaptureIndex" + OpMemberName %type_Primitive 22 "Primitive_OutputVelocity" + OpMemberName %type_Primitive 23 "PrePadding_Primitive_420" + OpMemberName %type_Primitive 24 "PrePadding_Primitive_424" + OpMemberName %type_Primitive 25 "PrePadding_Primitive_428" + OpMemberName %type_Primitive 26 "Primitive_CustomPrimitiveData" + OpName %Primitive "Primitive" + OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid" + OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid" + OpName %in_var_COLOR0 "in.var.COLOR0" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position" + OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid" + OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid" + OpName %out_var_COLOR0 "out.var.COLOR0" + OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0" + OpName %out_var_VS_To_DS_Position "out.var.VS_To_DS_Position" + OpName %out_var_PN_POSITION "out.var.PN_POSITION" + OpName %out_var_PN_DisplacementScales "out.var.PN_DisplacementScales" + OpName %out_var_PN_TessellationMultiplier "out.var.PN_TessellationMultiplier" + OpName %out_var_PN_WorldDisplacementMultiplier "out.var.PN_WorldDisplacementMultiplier" + OpName %out_var_PN_POSITION9 "out.var.PN_POSITION9" + OpName %MainHull "MainHull" + OpName %param_var_I "param.var.I" + OpName %temp_var_hullMainRetVal "temp.var.hullMainRetVal" + OpName %if_merge "if.merge" + OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %in_var_COLOR0 UserSemantic "COLOR0" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position" + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorateString %gl_InvocationID UserSemantic "SV_OutputControlPointID" + OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %out_var_COLOR0 UserSemantic "COLOR0" + OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %out_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position" + OpDecorateString %out_var_PN_POSITION UserSemantic "PN_POSITION" + OpDecorateString %out_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales" + OpDecorateString %out_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier" + OpDecorateString %out_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier" + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor" + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor" + OpDecorate %gl_TessLevelInner Patch + OpDecorateString %out_var_PN_POSITION9 UserSemantic "PN_POSITION9" + OpDecorate %out_var_PN_POSITION9 Patch + OpDecorate %in_var_TEXCOORD10_centroid Location 0 + OpDecorate %in_var_TEXCOORD11_centroid Location 1 + OpDecorate %in_var_COLOR0 Location 2 + OpDecorate %in_var_TEXCOORD0 Location 3 + OpDecorate %in_var_VS_To_DS_Position Location 5 + OpDecorate %out_var_COLOR0 Location 0 + OpDecorate %out_var_PN_DisplacementScales Location 1 + OpDecorate %out_var_PN_POSITION Location 2 + OpDecorate %out_var_PN_POSITION9 Location 5 + OpDecorate %out_var_PN_TessellationMultiplier Location 6 + OpDecorate %out_var_PN_WorldDisplacementMultiplier Location 7 + OpDecorate %out_var_TEXCOORD0 Location 8 + OpDecorate %out_var_TEXCOORD10_centroid Location 10 + OpDecorate %out_var_TEXCOORD11_centroid Location 11 + OpDecorate %out_var_VS_To_DS_Position Location 12 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %Primitive DescriptorSet 0 + OpDecorate %Primitive Binding 1 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 13 MatrixStride 16 + OpMemberDecorate %type_View 13 ColMajor + OpMemberDecorate %type_View 14 Offset 896 + OpMemberDecorate %type_View 15 Offset 908 + OpMemberDecorate %type_View 16 Offset 912 + OpMemberDecorate %type_View 17 Offset 924 + OpMemberDecorate %type_View 18 Offset 928 + OpMemberDecorate %type_View 19 Offset 940 + OpMemberDecorate %type_View 20 Offset 944 + OpMemberDecorate %type_View 21 Offset 956 + OpMemberDecorate %type_View 22 Offset 960 + OpMemberDecorate %type_View 23 Offset 972 + OpMemberDecorate %type_View 24 Offset 976 + OpMemberDecorate %type_View 25 Offset 992 + OpMemberDecorate %type_View 26 Offset 1008 + OpMemberDecorate %type_View 27 Offset 1020 + OpMemberDecorate %type_View 28 Offset 1024 + OpMemberDecorate %type_View 29 Offset 1036 + OpMemberDecorate %type_View 30 Offset 1040 + OpMemberDecorate %type_View 31 Offset 1052 + OpMemberDecorate %type_View 32 Offset 1056 + OpMemberDecorate %type_View 33 Offset 1068 + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 43 MatrixStride 16 + OpMemberDecorate %type_View 43 ColMajor + OpMemberDecorate %type_View 44 Offset 1712 + OpMemberDecorate %type_View 45 Offset 1724 + OpMemberDecorate %type_View 46 Offset 1728 + OpMemberDecorate %type_View 47 Offset 1740 + OpMemberDecorate %type_View 48 Offset 1744 + OpMemberDecorate %type_View 49 Offset 1756 + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 52 MatrixStride 16 + OpMemberDecorate %type_View 52 ColMajor + OpMemberDecorate %type_View 53 Offset 1952 + OpMemberDecorate %type_View 54 Offset 1968 + OpMemberDecorate %type_View 55 Offset 1984 + OpMemberDecorate %type_View 56 Offset 1992 + OpMemberDecorate %type_View 57 Offset 2000 + OpMemberDecorate %type_View 58 Offset 2016 + OpMemberDecorate %type_View 59 Offset 2032 + OpMemberDecorate %type_View 60 Offset 2048 + OpMemberDecorate %type_View 61 Offset 2064 + OpMemberDecorate %type_View 62 Offset 2068 + OpMemberDecorate %type_View 63 Offset 2072 + OpMemberDecorate %type_View 64 Offset 2076 + OpMemberDecorate %type_View 65 Offset 2080 + OpMemberDecorate %type_View 66 Offset 2096 + OpMemberDecorate %type_View 67 Offset 2112 + OpMemberDecorate %type_View 68 Offset 2128 + OpMemberDecorate %type_View 69 Offset 2136 + OpMemberDecorate %type_View 70 Offset 2140 + OpMemberDecorate %type_View 71 Offset 2144 + OpMemberDecorate %type_View 72 Offset 2148 + OpMemberDecorate %type_View 73 Offset 2152 + OpMemberDecorate %type_View 74 Offset 2156 + OpMemberDecorate %type_View 75 Offset 2160 + OpMemberDecorate %type_View 76 Offset 2172 + OpMemberDecorate %type_View 77 Offset 2176 + OpMemberDecorate %type_View 78 Offset 2180 + OpMemberDecorate %type_View 79 Offset 2184 + OpMemberDecorate %type_View 80 Offset 2188 + OpMemberDecorate %type_View 81 Offset 2192 + OpMemberDecorate %type_View 82 Offset 2196 + OpMemberDecorate %type_View 83 Offset 2200 + OpMemberDecorate %type_View 84 Offset 2204 + OpMemberDecorate %type_View 85 Offset 2208 + OpMemberDecorate %type_View 86 Offset 2212 + OpMemberDecorate %type_View 87 Offset 2216 + OpMemberDecorate %type_View 88 Offset 2220 + OpMemberDecorate %type_View 89 Offset 2224 + OpMemberDecorate %type_View 90 Offset 2228 + OpMemberDecorate %type_View 91 Offset 2232 + OpMemberDecorate %type_View 92 Offset 2236 + OpMemberDecorate %type_View 93 Offset 2240 + OpMemberDecorate %type_View 94 Offset 2256 + OpMemberDecorate %type_View 95 Offset 2268 + OpMemberDecorate %type_View 96 Offset 2272 + OpMemberDecorate %type_View 97 Offset 2304 + OpMemberDecorate %type_View 98 Offset 2336 + OpMemberDecorate %type_View 99 Offset 2352 + OpMemberDecorate %type_View 100 Offset 2368 + OpMemberDecorate %type_View 101 Offset 2372 + OpMemberDecorate %type_View 102 Offset 2376 + OpMemberDecorate %type_View 103 Offset 2380 + OpMemberDecorate %type_View 104 Offset 2384 + OpMemberDecorate %type_View 105 Offset 2388 + OpMemberDecorate %type_View 106 Offset 2392 + OpMemberDecorate %type_View 107 Offset 2396 + OpMemberDecorate %type_View 108 Offset 2400 + OpMemberDecorate %type_View 109 Offset 2404 + OpMemberDecorate %type_View 110 Offset 2408 + OpMemberDecorate %type_View 111 Offset 2412 + OpMemberDecorate %type_View 112 Offset 2416 + OpMemberDecorate %type_View 113 Offset 2428 + OpMemberDecorate %type_View 114 Offset 2432 + OpMemberDecorate %type_View 115 Offset 2444 + OpMemberDecorate %type_View 116 Offset 2448 + OpMemberDecorate %type_View 117 Offset 2452 + OpMemberDecorate %type_View 118 Offset 2456 + OpMemberDecorate %type_View 119 Offset 2460 + OpMemberDecorate %type_View 120 Offset 2464 + OpMemberDecorate %type_View 121 Offset 2468 + OpMemberDecorate %type_View 122 Offset 2472 + OpMemberDecorate %type_View 123 Offset 2476 + OpMemberDecorate %type_View 124 Offset 2480 + OpMemberDecorate %type_View 125 Offset 2484 + OpMemberDecorate %type_View 126 Offset 2488 + OpMemberDecorate %type_View 127 Offset 2492 + OpMemberDecorate %type_View 128 Offset 2496 + OpMemberDecorate %type_View 129 Offset 2512 + OpMemberDecorate %type_View 130 Offset 2516 + OpMemberDecorate %type_View 131 Offset 2520 + OpMemberDecorate %type_View 132 Offset 2524 + OpMemberDecorate %type_View 133 Offset 2528 + OpMemberDecorate %type_View 134 Offset 2544 + OpMemberDecorate %type_View 135 Offset 2556 + OpMemberDecorate %type_View 136 Offset 2560 + OpMemberDecorate %type_View 137 Offset 2576 + OpMemberDecorate %type_View 138 Offset 2580 + OpMemberDecorate %type_View 139 Offset 2584 + OpMemberDecorate %type_View 140 Offset 2588 + OpMemberDecorate %type_View 141 Offset 2592 + OpMemberDecorate %type_View 142 Offset 2608 + OpMemberDecorate %type_View 143 Offset 2720 + OpMemberDecorate %type_View 144 Offset 2724 + OpMemberDecorate %type_View 145 Offset 2728 + OpMemberDecorate %type_View 146 Offset 2732 + OpMemberDecorate %type_View 147 Offset 2736 + OpMemberDecorate %type_View 148 Offset 2740 + OpMemberDecorate %type_View 149 Offset 2744 + OpMemberDecorate %type_View 150 Offset 2748 + OpMemberDecorate %type_View 151 Offset 2752 + OpMemberDecorate %type_View 152 Offset 2764 + OpMemberDecorate %type_View 153 Offset 2768 + OpMemberDecorate %type_View 154 Offset 2832 + OpMemberDecorate %type_View 155 Offset 2896 + OpMemberDecorate %type_View 156 Offset 2900 + OpMemberDecorate %type_View 157 Offset 2904 + OpMemberDecorate %type_View 158 Offset 2908 + OpMemberDecorate %type_View 159 Offset 2912 + OpMemberDecorate %type_View 160 Offset 2920 + OpMemberDecorate %type_View 161 Offset 2924 + OpMemberDecorate %type_View 162 Offset 2928 + OpMemberDecorate %type_View 163 Offset 2940 + OpMemberDecorate %type_View 164 Offset 2944 + OpMemberDecorate %type_View 165 Offset 2956 + OpMemberDecorate %type_View 166 Offset 2960 + OpMemberDecorate %type_View 167 Offset 2968 + OpMemberDecorate %type_View 168 Offset 2972 + OpMemberDecorate %type_View 169 Offset 2976 + OpMemberDecorate %type_View 170 Offset 2988 + OpMemberDecorate %type_View 171 Offset 2992 + OpMemberDecorate %type_View 172 Offset 3004 + OpMemberDecorate %type_View 173 Offset 3008 + OpMemberDecorate %type_View 174 Offset 3020 + OpMemberDecorate %type_View 175 Offset 3024 + OpMemberDecorate %type_View 176 Offset 3036 + OpMemberDecorate %type_View 177 Offset 3040 + OpMemberDecorate %type_View 178 Offset 3044 + OpDecorate %type_View Block + OpMemberDecorate %type_Primitive 0 Offset 0 + OpMemberDecorate %type_Primitive 0 MatrixStride 16 + OpMemberDecorate %type_Primitive 0 ColMajor + OpMemberDecorate %type_Primitive 1 Offset 64 + OpMemberDecorate %type_Primitive 2 Offset 80 + OpMemberDecorate %type_Primitive 3 Offset 96 + OpMemberDecorate %type_Primitive 3 MatrixStride 16 + OpMemberDecorate %type_Primitive 3 ColMajor + OpMemberDecorate %type_Primitive 4 Offset 160 + OpMemberDecorate %type_Primitive 4 MatrixStride 16 + OpMemberDecorate %type_Primitive 4 ColMajor + OpMemberDecorate %type_Primitive 5 Offset 224 + OpMemberDecorate %type_Primitive 5 MatrixStride 16 + OpMemberDecorate %type_Primitive 5 ColMajor + OpMemberDecorate %type_Primitive 6 Offset 288 + OpMemberDecorate %type_Primitive 7 Offset 300 + OpMemberDecorate %type_Primitive 8 Offset 304 + OpMemberDecorate %type_Primitive 9 Offset 316 + OpMemberDecorate %type_Primitive 10 Offset 320 + OpMemberDecorate %type_Primitive 11 Offset 324 + OpMemberDecorate %type_Primitive 12 Offset 328 + OpMemberDecorate %type_Primitive 13 Offset 332 + OpMemberDecorate %type_Primitive 14 Offset 336 + OpMemberDecorate %type_Primitive 15 Offset 352 + OpMemberDecorate %type_Primitive 16 Offset 368 + OpMemberDecorate %type_Primitive 17 Offset 380 + OpMemberDecorate %type_Primitive 18 Offset 384 + OpMemberDecorate %type_Primitive 19 Offset 396 + OpMemberDecorate %type_Primitive 20 Offset 400 + OpMemberDecorate %type_Primitive 21 Offset 412 + OpMemberDecorate %type_Primitive 22 Offset 416 + OpMemberDecorate %type_Primitive 23 Offset 420 + OpMemberDecorate %type_Primitive 24 Offset 424 + OpMemberDecorate %type_Primitive 25 Offset 428 + OpMemberDecorate %type_Primitive 26 Offset 432 + OpDecorate %type_Primitive Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %v2int = OpTypeVector %int 2 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_3 = OpConstant %uint 3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 + %int_1 = OpConstant %int 1 + %int_0 = OpConstant %int 0 + %int_2 = OpConstant %int 2 + %float_2 = OpConstant %float 2 + %54 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 + %float_0_5 = OpConstant %float 0.5 + %int_3 = OpConstant %int 3 +%float_0_333000004 = OpConstant %float 0.333000004 + %float_1 = OpConstant %float 1 + %59 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %float_15 = OpConstant %float 15 + %61 = OpConstantComposite %v4float %float_15 %float_15 %float_15 %float_15 +%_arr_v2float_uint_2 = OpTypeArray %v2float %uint_2 +%FVertexFactoryInterpolantsVSToPS = OpTypeStruct %v4float %v4float %v4float %_arr_v2float_uint_2 +%FVertexFactoryInterpolantsVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToPS +%FHitProxyVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToDS %v4float +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%FPNTessellationHSToDS = OpTypeStruct %FHitProxyVSToDS %_arr_v4float_uint_3 %v3float %float %float + %v3int = OpTypeVector %int 3 + %65 = OpConstantComposite %v3int %int_0 %int_0 %int_0 + %66 = OpConstantComposite %v3int %int_3 %int_3 %int_3 + %float_0 = OpConstant %float 0 + %68 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %69 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5 + %int_78 = OpConstant %int 78 + %int_15 = OpConstant %int 15 + %int_7 = OpConstant %int 7 + %int_28 = OpConstant %int 28 + %74 = OpConstantComposite %v3int %int_1 %int_1 %int_1 + %75 = OpConstantComposite %v3int %int_2 %int_2 %int_2 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%type_Primitive = OpTypeStruct %mat4v4float %v4float %v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %float %float %float %float %v4float %v4float %v3float %uint %v3float %uint %v3float %int %uint %uint %uint %uint %_arr_v4float_uint_4 +%_ptr_Uniform_type_Primitive = OpTypePointer Uniform %type_Primitive + %uint_12 = OpConstant %uint 12 +%_arr_v4float_uint_12 = OpTypeArray %v4float %uint_12 +%_ptr_Input__arr_v4float_uint_12 = OpTypePointer Input %_arr_v4float_uint_12 +%_arr__arr_v2float_uint_2_uint_12 = OpTypeArray %_arr_v2float_uint_2 %uint_12 +%_ptr_Input__arr__arr_v2float_uint_2_uint_12 = OpTypePointer Input %_arr__arr_v2float_uint_2_uint_12 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3 +%_arr__arr_v2float_uint_2_uint_3 = OpTypeArray %_arr_v2float_uint_2 %uint_3 +%_ptr_Output__arr__arr_v2float_uint_2_uint_3 = OpTypePointer Output %_arr__arr_v2float_uint_2_uint_3 +%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3 +%_ptr_Output__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Output %_arr__arr_v4float_uint_3_uint_3 +%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3 +%_ptr_Output__arr_v3float_uint_3 = OpTypePointer Output %_arr_v3float_uint_3 +%_ptr_Output__arr_float_uint_3 = OpTypePointer Output %_arr_float_uint_3 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %98 = OpTypeFunction %void +%_arr_FHitProxyVSToDS_uint_12 = OpTypeArray %FHitProxyVSToDS %uint_12 +%_ptr_Function__arr_FHitProxyVSToDS_uint_12 = OpTypePointer Function %_arr_FHitProxyVSToDS_uint_12 +%_arr_FPNTessellationHSToDS_uint_3 = OpTypeArray %FPNTessellationHSToDS %uint_3 +%_ptr_Function__arr_FPNTessellationHSToDS_uint_3 = OpTypePointer Function %_arr_FPNTessellationHSToDS_uint_3 +%_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 = OpTypePointer Workgroup %_arr_FPNTessellationHSToDS_uint_3 +%_ptr_Output__arr_v2float_uint_2 = OpTypePointer Output %_arr_v2float_uint_2 +%_ptr_Output_v3float = OpTypePointer Output %v3float +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Function_FPNTessellationHSToDS = OpTypePointer Function %FPNTessellationHSToDS +%_ptr_Workgroup_FPNTessellationHSToDS = OpTypePointer Workgroup %FPNTessellationHSToDS + %bool = OpTypeBool +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float +%_ptr_Workgroup_float = OpTypePointer Workgroup %float +%mat3v3float = OpTypeMatrix %v3float 3 +%_ptr_Function_FVertexFactoryInterpolantsVSToDS = OpTypePointer Function %FVertexFactoryInterpolantsVSToDS +%_ptr_Function_FHitProxyVSToDS = OpTypePointer Function %FHitProxyVSToDS + %v3bool = OpTypeVector %bool 3 +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %View = OpVariable %_ptr_Uniform_type_View Uniform + %Primitive = OpVariable %_ptr_Uniform_type_Primitive Uniform +%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%in_var_COLOR0 = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr__arr_v2float_uint_2_uint_12 Input +%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_12 Input +%gl_InvocationID = OpVariable %_ptr_Input_uint Input +%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_COLOR0 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_TEXCOORD0 = OpVariable %_ptr_Output__arr__arr_v2float_uint_2_uint_3 Output +%out_var_VS_To_DS_Position = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%out_var_PN_POSITION = OpVariable %_ptr_Output__arr__arr_v4float_uint_3_uint_3 Output +%out_var_PN_DisplacementScales = OpVariable %_ptr_Output__arr_v3float_uint_3 Output +%out_var_PN_TessellationMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output +%out_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output +%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output +%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output +%out_var_PN_POSITION9 = OpVariable %_ptr_Output_v4float Output +%float_0_333333343 = OpConstant %float 0.333333343 + %119 = OpConstantComposite %v4float %float_0_333333343 %float_0_333333343 %float_0_333333343 %float_0_333333343 + %120 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5 +%float_0_166666672 = OpConstant %float 0.166666672 + %122 = OpConstantComposite %v4float %float_0_166666672 %float_0_166666672 %float_0_166666672 %float_0_166666672 + %123 = OpUndef %v4float + +; XXX: Original asm used Function here, which is wrong. +; This patches the SPIR-V to be correct. +%temp_var_hullMainRetVal = OpVariable %_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 Workgroup + + %MainHull = OpFunction %void None %98 + %124 = OpLabel +%param_var_I = OpVariable %_ptr_Function__arr_FHitProxyVSToDS_uint_12 Function + %125 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD10_centroid + %126 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD11_centroid + %127 = OpLoad %_arr_v4float_uint_12 %in_var_COLOR0 + %128 = OpLoad %_arr__arr_v2float_uint_2_uint_12 %in_var_TEXCOORD0 + %129 = OpCompositeExtract %v4float %125 0 + %130 = OpCompositeExtract %v4float %126 0 + %131 = OpCompositeExtract %v4float %127 0 + %132 = OpCompositeExtract %_arr_v2float_uint_2 %128 0 + %133 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %129 %130 %131 %132 + %134 = OpCompositeExtract %v4float %125 1 + %135 = OpCompositeExtract %v4float %126 1 + %136 = OpCompositeExtract %v4float %127 1 + %137 = OpCompositeExtract %_arr_v2float_uint_2 %128 1 + %138 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %134 %135 %136 %137 + %139 = OpCompositeExtract %v4float %125 2 + %140 = OpCompositeExtract %v4float %126 2 + %141 = OpCompositeExtract %v4float %127 2 + %142 = OpCompositeExtract %_arr_v2float_uint_2 %128 2 + %143 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %139 %140 %141 %142 + %144 = OpCompositeExtract %v4float %125 3 + %145 = OpCompositeExtract %v4float %126 3 + %146 = OpCompositeExtract %v4float %127 3 + %147 = OpCompositeExtract %_arr_v2float_uint_2 %128 3 + %148 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %144 %145 %146 %147 + %149 = OpCompositeExtract %v4float %125 4 + %150 = OpCompositeExtract %v4float %126 4 + %151 = OpCompositeExtract %v4float %127 4 + %152 = OpCompositeExtract %_arr_v2float_uint_2 %128 4 + %153 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %149 %150 %151 %152 + %154 = OpCompositeExtract %v4float %125 5 + %155 = OpCompositeExtract %v4float %126 5 + %156 = OpCompositeExtract %v4float %127 5 + %157 = OpCompositeExtract %_arr_v2float_uint_2 %128 5 + %158 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %154 %155 %156 %157 + %159 = OpCompositeExtract %v4float %125 6 + %160 = OpCompositeExtract %v4float %126 6 + %161 = OpCompositeExtract %v4float %127 6 + %162 = OpCompositeExtract %_arr_v2float_uint_2 %128 6 + %163 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %159 %160 %161 %162 + %164 = OpCompositeExtract %v4float %125 7 + %165 = OpCompositeExtract %v4float %126 7 + %166 = OpCompositeExtract %v4float %127 7 + %167 = OpCompositeExtract %_arr_v2float_uint_2 %128 7 + %168 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %164 %165 %166 %167 + %169 = OpCompositeExtract %v4float %125 8 + %170 = OpCompositeExtract %v4float %126 8 + %171 = OpCompositeExtract %v4float %127 8 + %172 = OpCompositeExtract %_arr_v2float_uint_2 %128 8 + %173 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %169 %170 %171 %172 + %174 = OpCompositeExtract %v4float %125 9 + %175 = OpCompositeExtract %v4float %126 9 + %176 = OpCompositeExtract %v4float %127 9 + %177 = OpCompositeExtract %_arr_v2float_uint_2 %128 9 + %178 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %174 %175 %176 %177 + %179 = OpCompositeExtract %v4float %125 10 + %180 = OpCompositeExtract %v4float %126 10 + %181 = OpCompositeExtract %v4float %127 10 + %182 = OpCompositeExtract %_arr_v2float_uint_2 %128 10 + %183 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %179 %180 %181 %182 + %184 = OpCompositeExtract %v4float %125 11 + %185 = OpCompositeExtract %v4float %126 11 + %186 = OpCompositeExtract %v4float %127 11 + %187 = OpCompositeExtract %_arr_v2float_uint_2 %128 11 + %188 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %184 %185 %186 %187 + %189 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %133 + %190 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %138 + %191 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %143 + %192 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %148 + %193 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %153 + %194 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %158 + %195 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %163 + %196 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %168 + %197 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %173 + %198 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %178 + %199 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %183 + %200 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %188 + %201 = OpLoad %_arr_v4float_uint_12 %in_var_VS_To_DS_Position + %202 = OpCompositeExtract %v4float %201 0 + %203 = OpCompositeConstruct %FHitProxyVSToDS %189 %202 + %204 = OpCompositeExtract %v4float %201 1 + %205 = OpCompositeConstruct %FHitProxyVSToDS %190 %204 + %206 = OpCompositeExtract %v4float %201 2 + %207 = OpCompositeConstruct %FHitProxyVSToDS %191 %206 + %208 = OpCompositeExtract %v4float %201 3 + %209 = OpCompositeConstruct %FHitProxyVSToDS %192 %208 + %210 = OpCompositeExtract %v4float %201 4 + %211 = OpCompositeConstruct %FHitProxyVSToDS %193 %210 + %212 = OpCompositeExtract %v4float %201 5 + %213 = OpCompositeConstruct %FHitProxyVSToDS %194 %212 + %214 = OpCompositeExtract %v4float %201 6 + %215 = OpCompositeConstruct %FHitProxyVSToDS %195 %214 + %216 = OpCompositeExtract %v4float %201 7 + %217 = OpCompositeConstruct %FHitProxyVSToDS %196 %216 + %218 = OpCompositeExtract %v4float %201 8 + %219 = OpCompositeConstruct %FHitProxyVSToDS %197 %218 + %220 = OpCompositeExtract %v4float %201 9 + %221 = OpCompositeConstruct %FHitProxyVSToDS %198 %220 + %222 = OpCompositeExtract %v4float %201 10 + %223 = OpCompositeConstruct %FHitProxyVSToDS %199 %222 + %224 = OpCompositeExtract %v4float %201 11 + %225 = OpCompositeConstruct %FHitProxyVSToDS %200 %224 + %226 = OpCompositeConstruct %_arr_FHitProxyVSToDS_uint_12 %203 %205 %207 %209 %211 %213 %215 %217 %219 %221 %223 %225 + OpStore %param_var_I %226 + %227 = OpLoad %uint %gl_InvocationID + %228 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %227 %int_0 + %229 = OpLoad %FVertexFactoryInterpolantsVSToDS %228 + %230 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %229 0 + %231 = OpCompositeExtract %v4float %230 0 + %232 = OpCompositeExtract %v4float %230 1 + %233 = OpVectorShuffle %v3float %231 %231 0 1 2 + %234 = OpVectorShuffle %v3float %232 %232 0 1 2 + %235 = OpExtInst %v3float %1 Cross %234 %233 + %236 = OpCompositeExtract %float %232 3 + %237 = OpCompositeConstruct %v3float %236 %236 %236 + %238 = OpFMul %v3float %235 %237 + %239 = OpCompositeConstruct %mat3v3float %233 %238 %234 + %240 = OpCompositeExtract %float %232 0 + %241 = OpCompositeExtract %float %232 1 + %242 = OpCompositeExtract %float %232 2 + %243 = OpCompositeConstruct %v4float %240 %241 %242 %float_0 + %244 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_15 + %245 = OpLoad %v4float %244 + %246 = OpVectorShuffle %v3float %245 %245 0 1 2 + %247 = OpVectorTimesMatrix %v3float %246 %239 + %248 = OpULessThan %bool %227 %uint_2 + %249 = OpIAdd %uint %227 %uint_1 + %250 = OpSelect %uint %248 %249 %uint_0 + %251 = OpIMul %uint %uint_2 %227 + %252 = OpIAdd %uint %uint_3 %251 + %253 = OpIAdd %uint %251 %uint_4 + %254 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %227 + %255 = OpLoad %FHitProxyVSToDS %254 + %256 = OpAccessChain %_ptr_Function_v4float %param_var_I %227 %int_1 + %257 = OpLoad %v4float %256 + %258 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %250 %int_0 + %259 = OpLoad %FVertexFactoryInterpolantsVSToDS %258 + %260 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %259 0 + %261 = OpCompositeExtract %v4float %260 1 + %262 = OpCompositeExtract %float %261 0 + %263 = OpCompositeExtract %float %261 1 + %264 = OpCompositeExtract %float %261 2 + %265 = OpCompositeConstruct %v4float %262 %263 %264 %float_0 + %266 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %252 %int_0 + %267 = OpLoad %FVertexFactoryInterpolantsVSToDS %266 + %268 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %267 0 + %269 = OpCompositeExtract %v4float %268 1 + %270 = OpCompositeExtract %float %269 0 + %271 = OpCompositeExtract %float %269 1 + %272 = OpCompositeExtract %float %269 2 + %273 = OpCompositeConstruct %v4float %270 %271 %272 %float_0 + %274 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %253 %int_0 + %275 = OpLoad %FVertexFactoryInterpolantsVSToDS %274 + %276 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %275 0 + %277 = OpCompositeExtract %v4float %276 1 + %278 = OpCompositeExtract %float %277 0 + %279 = OpCompositeExtract %float %277 1 + %280 = OpCompositeExtract %float %277 2 + %281 = OpCompositeConstruct %v4float %278 %279 %280 %float_0 + %282 = OpLoad %v4float %256 + %283 = OpAccessChain %_ptr_Function_v4float %param_var_I %250 %int_1 + %284 = OpLoad %v4float %283 + %285 = OpFMul %v4float %54 %282 + %286 = OpFAdd %v4float %285 %284 + %287 = OpFSub %v4float %284 %282 + %288 = OpDot %float %287 %243 + %289 = OpCompositeConstruct %v4float %288 %288 %288 %288 + %290 = OpFMul %v4float %289 %243 + %291 = OpFSub %v4float %286 %290 + %292 = OpFMul %v4float %291 %119 + %293 = OpAccessChain %_ptr_Function_v4float %param_var_I %252 %int_1 + %294 = OpLoad %v4float %293 + %295 = OpAccessChain %_ptr_Function_v4float %param_var_I %253 %int_1 + %296 = OpLoad %v4float %295 + %297 = OpFMul %v4float %54 %294 + %298 = OpFAdd %v4float %297 %296 + %299 = OpFSub %v4float %296 %294 + %300 = OpDot %float %299 %273 + %301 = OpCompositeConstruct %v4float %300 %300 %300 %300 + %302 = OpFMul %v4float %301 %273 + %303 = OpFSub %v4float %298 %302 + %304 = OpFMul %v4float %303 %119 + %305 = OpFAdd %v4float %292 %304 + %306 = OpFMul %v4float %305 %120 + %307 = OpLoad %v4float %283 + %308 = OpLoad %v4float %256 + %309 = OpFMul %v4float %54 %307 + %310 = OpFAdd %v4float %309 %308 + %311 = OpFSub %v4float %308 %307 + %312 = OpDot %float %311 %265 + %313 = OpCompositeConstruct %v4float %312 %312 %312 %312 + %314 = OpFMul %v4float %313 %265 + %315 = OpFSub %v4float %310 %314 + %316 = OpFMul %v4float %315 %119 + %317 = OpLoad %v4float %295 + %318 = OpLoad %v4float %293 + %319 = OpFMul %v4float %54 %317 + %320 = OpFAdd %v4float %319 %318 + %321 = OpFSub %v4float %318 %317 + %322 = OpDot %float %321 %281 + %323 = OpCompositeConstruct %v4float %322 %322 %322 %322 + %324 = OpFMul %v4float %323 %281 + %325 = OpFSub %v4float %320 %324 + %326 = OpFMul %v4float %325 %119 + %327 = OpFAdd %v4float %316 %326 + %328 = OpFMul %v4float %327 %120 + %329 = OpCompositeConstruct %_arr_v4float_uint_3 %257 %306 %328 + %330 = OpCompositeConstruct %FPNTessellationHSToDS %255 %329 %247 %float_1 %float_1 + %331 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %255 0 + %332 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %331 0 + %333 = OpCompositeExtract %v4float %332 0 + %334 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD10_centroid %227 + OpStore %334 %333 + %335 = OpCompositeExtract %v4float %332 1 + %336 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD11_centroid %227 + OpStore %336 %335 + %337 = OpCompositeExtract %v4float %332 2 + %338 = OpAccessChain %_ptr_Output_v4float %out_var_COLOR0 %227 + OpStore %338 %337 + %339 = OpCompositeExtract %_arr_v2float_uint_2 %332 3 + %340 = OpAccessChain %_ptr_Output__arr_v2float_uint_2 %out_var_TEXCOORD0 %227 + OpStore %340 %339 + %341 = OpCompositeExtract %v4float %255 1 + %342 = OpAccessChain %_ptr_Output_v4float %out_var_VS_To_DS_Position %227 + OpStore %342 %341 + %343 = OpAccessChain %_ptr_Output__arr_v4float_uint_3 %out_var_PN_POSITION %227 + OpStore %343 %329 + %344 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DisplacementScales %227 + OpStore %344 %247 + %345 = OpAccessChain %_ptr_Output_float %out_var_PN_TessellationMultiplier %227 + OpStore %345 %float_1 + %346 = OpAccessChain %_ptr_Output_float %out_var_PN_WorldDisplacementMultiplier %227 + OpStore %346 %float_1 + %347 = OpAccessChain %_ptr_Workgroup_FPNTessellationHSToDS %temp_var_hullMainRetVal %227 + OpStore %347 %330 + OpControlBarrier %uint_2 %uint_4 %uint_0 + %348 = OpIEqual %bool %227 %uint_0 + OpSelectionMerge %if_merge None + OpBranchConditional %348 %349 %if_merge + %349 = OpLabel + %350 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0 + %351 = OpLoad %mat4v4float %350 + %352 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_7 + %353 = OpLoad %mat4v4float %352 + %354 = OpAccessChain %_ptr_Uniform_v3float %View %int_28 + %355 = OpLoad %v3float %354 + %356 = OpAccessChain %_ptr_Uniform_float %View %int_78 + %357 = OpLoad %float %356 + %358 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_0 + %359 = OpLoad %v4float %358 + %360 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_1 + %361 = OpLoad %v4float %360 + %362 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_2 + %363 = OpLoad %v4float %362 + %364 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_0 + %365 = OpLoad %v4float %364 + %366 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_1 + %367 = OpLoad %v4float %366 + %368 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_2 + %369 = OpLoad %v4float %368 + %370 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_0 + %371 = OpLoad %v4float %370 + %372 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_1 + %373 = OpLoad %v4float %372 + %374 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_2 + %375 = OpLoad %v4float %374 + %376 = OpFAdd %v4float %361 %363 + %377 = OpFAdd %v4float %376 %367 + %378 = OpFAdd %v4float %377 %369 + %379 = OpFAdd %v4float %378 %373 + %380 = OpFAdd %v4float %379 %375 + %381 = OpFMul %v4float %380 %122 + %382 = OpFAdd %v4float %371 %365 + %383 = OpFAdd %v4float %382 %359 + %384 = OpFMul %v4float %383 %119 + %385 = OpFSub %v4float %381 %384 + %386 = OpFMul %v4float %385 %120 + %387 = OpFAdd %v4float %381 %386 + %388 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_1 %int_3 + %389 = OpLoad %float %388 + %390 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_2 %int_3 + %391 = OpLoad %float %390 + %392 = OpFAdd %float %389 %391 + %393 = OpFMul %float %float_0_5 %392 + %394 = OpCompositeInsert %v4float %393 %123 0 + %395 = OpLoad %float %390 + %396 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_0 %int_3 + %397 = OpLoad %float %396 + %398 = OpFAdd %float %395 %397 + %399 = OpFMul %float %float_0_5 %398 + %400 = OpCompositeInsert %v4float %399 %394 1 + %401 = OpLoad %float %396 + %402 = OpLoad %float %388 + %403 = OpFAdd %float %401 %402 + %404 = OpFMul %float %float_0_5 %403 + %405 = OpCompositeInsert %v4float %404 %400 2 + %406 = OpLoad %float %396 + %407 = OpLoad %float %388 + %408 = OpFAdd %float %406 %407 + %409 = OpLoad %float %390 + %410 = OpFAdd %float %408 %409 + %411 = OpFMul %float %float_0_333000004 %410 + %412 = OpCompositeInsert %v4float %411 %405 3 + %413 = OpVectorShuffle %v3float %359 %359 0 1 2 + %414 = OpVectorShuffle %v3float %365 %365 0 1 2 + %415 = OpVectorShuffle %v3float %371 %371 0 1 2 + OpBranch %416 + %416 = OpLabel + OpLoopMerge %417 %418 None + OpBranch %419 + %419 = OpLabel + %420 = OpMatrixTimesVector %v4float %353 %68 + %421 = OpCompositeExtract %float %359 0 + %422 = OpCompositeExtract %float %359 1 + %423 = OpCompositeExtract %float %359 2 + %424 = OpCompositeConstruct %v4float %421 %422 %423 %float_1 + %425 = OpMatrixTimesVector %v4float %351 %424 + %426 = OpVectorShuffle %v3float %425 %425 0 1 2 + %427 = OpVectorShuffle %v3float %420 %420 0 1 2 + %428 = OpFSub %v3float %426 %427 + %429 = OpCompositeExtract %float %425 3 + %430 = OpCompositeExtract %float %420 3 + %431 = OpFAdd %float %429 %430 + %432 = OpCompositeConstruct %v3float %431 %431 %431 + %433 = OpFOrdLessThan %v3bool %428 %432 + %434 = OpSelect %v3int %433 %74 %65 + %435 = OpFAdd %v3float %426 %427 + %436 = OpFNegate %float %429 + %437 = OpFSub %float %436 %430 + %438 = OpCompositeConstruct %v3float %437 %437 %437 + %439 = OpFOrdGreaterThan %v3bool %435 %438 + %440 = OpSelect %v3int %439 %74 %65 + %441 = OpIMul %v3int %75 %440 + %442 = OpIAdd %v3int %434 %441 + %443 = OpCompositeExtract %float %365 0 + %444 = OpCompositeExtract %float %365 1 + %445 = OpCompositeExtract %float %365 2 + %446 = OpCompositeConstruct %v4float %443 %444 %445 %float_1 + %447 = OpMatrixTimesVector %v4float %351 %446 + %448 = OpVectorShuffle %v3float %447 %447 0 1 2 + %449 = OpFSub %v3float %448 %427 + %450 = OpCompositeExtract %float %447 3 + %451 = OpFAdd %float %450 %430 + %452 = OpCompositeConstruct %v3float %451 %451 %451 + %453 = OpFOrdLessThan %v3bool %449 %452 + %454 = OpSelect %v3int %453 %74 %65 + %455 = OpFAdd %v3float %448 %427 + %456 = OpFNegate %float %450 + %457 = OpFSub %float %456 %430 + %458 = OpCompositeConstruct %v3float %457 %457 %457 + %459 = OpFOrdGreaterThan %v3bool %455 %458 + %460 = OpSelect %v3int %459 %74 %65 + %461 = OpIMul %v3int %75 %460 + %462 = OpIAdd %v3int %454 %461 + %463 = OpBitwiseOr %v3int %442 %462 + %464 = OpCompositeExtract %float %371 0 + %465 = OpCompositeExtract %float %371 1 + %466 = OpCompositeExtract %float %371 2 + %467 = OpCompositeConstruct %v4float %464 %465 %466 %float_1 + %468 = OpMatrixTimesVector %v4float %351 %467 + %469 = OpVectorShuffle %v3float %468 %468 0 1 2 + %470 = OpFSub %v3float %469 %427 + %471 = OpCompositeExtract %float %468 3 + %472 = OpFAdd %float %471 %430 + %473 = OpCompositeConstruct %v3float %472 %472 %472 + %474 = OpFOrdLessThan %v3bool %470 %473 + %475 = OpSelect %v3int %474 %74 %65 + %476 = OpFAdd %v3float %469 %427 + %477 = OpFNegate %float %471 + %478 = OpFSub %float %477 %430 + %479 = OpCompositeConstruct %v3float %478 %478 %478 + %480 = OpFOrdGreaterThan %v3bool %476 %479 + %481 = OpSelect %v3int %480 %74 %65 + %482 = OpIMul %v3int %75 %481 + %483 = OpIAdd %v3int %475 %482 + %484 = OpBitwiseOr %v3int %463 %483 + %485 = OpINotEqual %v3bool %484 %66 + %486 = OpAny %bool %485 + OpSelectionMerge %487 None + OpBranchConditional %486 %488 %487 + %488 = OpLabel + OpBranch %417 + %487 = OpLabel + %489 = OpFSub %v3float %413 %414 + %490 = OpFSub %v3float %414 %415 + %491 = OpFSub %v3float %415 %413 + %492 = OpFAdd %v3float %413 %414 + %493 = OpFMul %v3float %69 %492 + %494 = OpFSub %v3float %493 %355 + %495 = OpFAdd %v3float %414 %415 + %496 = OpFMul %v3float %69 %495 + %497 = OpFSub %v3float %496 %355 + %498 = OpFAdd %v3float %415 %413 + %499 = OpFMul %v3float %69 %498 + %500 = OpFSub %v3float %499 %355 + %501 = OpDot %float %490 %490 + %502 = OpDot %float %497 %497 + %503 = OpFDiv %float %501 %502 + %504 = OpExtInst %float %1 Sqrt %503 + %505 = OpDot %float %491 %491 + %506 = OpDot %float %500 %500 + %507 = OpFDiv %float %505 %506 + %508 = OpExtInst %float %1 Sqrt %507 + %509 = OpDot %float %489 %489 + %510 = OpDot %float %494 %494 + %511 = OpFDiv %float %509 %510 + %512 = OpExtInst %float %1 Sqrt %511 + %513 = OpCompositeConstruct %v4float %504 %508 %512 %float_1 + %514 = OpFAdd %float %504 %508 + %515 = OpFAdd %float %514 %512 + %516 = OpFMul %float %float_0_333000004 %515 + %517 = OpCompositeInsert %v4float %516 %513 3 + %518 = OpCompositeConstruct %v4float %357 %357 %357 %357 + %519 = OpFMul %v4float %518 %517 + OpBranch %417 + %418 = OpLabel + OpBranch %416 + %417 = OpLabel + %520 = OpPhi %v4float %68 %488 %519 %487 + %521 = OpFMul %v4float %412 %520 + %522 = OpExtInst %v4float %1 FClamp %521 %59 %61 + %523 = OpCompositeExtract %float %522 0 + %524 = OpCompositeExtract %float %522 1 + %525 = OpCompositeExtract %float %522 2 + %526 = OpCompositeExtract %float %522 3 + %527 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_0 + OpStore %527 %523 + %528 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_1 + OpStore %528 %524 + %529 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_2 + OpStore %529 %525 + %530 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %uint_0 + OpStore %530 %526 + OpStore %out_var_PN_POSITION9 %387 + OpBranch %if_merge + %if_merge = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc b/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc new file mode 100644 index 00000000000..98216e79243 --- /dev/null +++ b/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc @@ -0,0 +1,352 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 179 +; Schema: 0 + OpCapability Tessellation + OpCapability SampledBuffer + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %MainHull "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_VS_To_DS_Position %gl_InvocationID %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_VS_To_DS_Position %out_var_Flat_DisplacementScales %out_var_Flat_TessellationMultiplier %out_var_Flat_WorldDisplacementMultiplier %gl_TessLevelOuter %gl_TessLevelInner + OpExecutionMode %MainHull Triangles + OpExecutionMode %MainHull SpacingFractionalOdd + OpExecutionMode %MainHull VertexOrderCw + OpExecutionMode %MainHull OutputVertices 3 + OpSource HLSL 600 + OpName %FFlatTessellationHSToDS "FFlatTessellationHSToDS" + OpMemberName %FFlatTessellationHSToDS 0 "PassSpecificData" + OpMemberName %FFlatTessellationHSToDS 1 "DisplacementScale" + OpMemberName %FFlatTessellationHSToDS 2 "TessellationMultiplier" + OpMemberName %FFlatTessellationHSToDS 3 "WorldDisplacementMultiplier" + OpName %FBasePassVSToDS "FBasePassVSToDS" + OpMemberName %FBasePassVSToDS 0 "FactoryInterpolants" + OpMemberName %FBasePassVSToDS 1 "BasePassInterpolants" + OpMemberName %FBasePassVSToDS 2 "Position" + OpName %FVertexFactoryInterpolantsVSToDS "FVertexFactoryInterpolantsVSToDS" + OpMemberName %FVertexFactoryInterpolantsVSToDS 0 "InterpolantsVSToPS" + OpName %FVertexFactoryInterpolantsVSToPS "FVertexFactoryInterpolantsVSToPS" + OpMemberName %FVertexFactoryInterpolantsVSToPS 0 "TangentToWorld0" + OpMemberName %FVertexFactoryInterpolantsVSToPS 1 "TangentToWorld2" + OpName %FBasePassInterpolantsVSToDS "FBasePassInterpolantsVSToDS" + OpName %FSharedBasePassInterpolants "FSharedBasePassInterpolants" + OpName %type_Primitive "type.Primitive" + OpMemberName %type_Primitive 0 "Primitive_LocalToWorld" + OpMemberName %type_Primitive 1 "Primitive_InvNonUniformScaleAndDeterminantSign" + OpMemberName %type_Primitive 2 "Primitive_ObjectWorldPositionAndRadius" + OpMemberName %type_Primitive 3 "Primitive_WorldToLocal" + OpMemberName %type_Primitive 4 "Primitive_PreviousLocalToWorld" + OpMemberName %type_Primitive 5 "Primitive_PreviousWorldToLocal" + OpMemberName %type_Primitive 6 "Primitive_ActorWorldPosition" + OpMemberName %type_Primitive 7 "Primitive_UseSingleSampleShadowFromStationaryLights" + OpMemberName %type_Primitive 8 "Primitive_ObjectBounds" + OpMemberName %type_Primitive 9 "Primitive_LpvBiasMultiplier" + OpMemberName %type_Primitive 10 "Primitive_DecalReceiverMask" + OpMemberName %type_Primitive 11 "Primitive_PerObjectGBufferData" + OpMemberName %type_Primitive 12 "Primitive_UseVolumetricLightmapShadowFromStationaryLights" + OpMemberName %type_Primitive 13 "Primitive_DrawsVelocity" + OpMemberName %type_Primitive 14 "Primitive_ObjectOrientation" + OpMemberName %type_Primitive 15 "Primitive_NonUniformScale" + OpMemberName %type_Primitive 16 "Primitive_LocalObjectBoundsMin" + OpMemberName %type_Primitive 17 "Primitive_LightingChannelMask" + OpMemberName %type_Primitive 18 "Primitive_LocalObjectBoundsMax" + OpMemberName %type_Primitive 19 "Primitive_LightmapDataIndex" + OpMemberName %type_Primitive 20 "Primitive_PreSkinnedLocalBounds" + OpMemberName %type_Primitive 21 "Primitive_SingleCaptureIndex" + OpMemberName %type_Primitive 22 "Primitive_OutputVelocity" + OpMemberName %type_Primitive 23 "PrePadding_Primitive_420" + OpMemberName %type_Primitive 24 "PrePadding_Primitive_424" + OpMemberName %type_Primitive 25 "PrePadding_Primitive_428" + OpMemberName %type_Primitive 26 "Primitive_CustomPrimitiveData" + OpName %Primitive "Primitive" + OpName %type_Material "type.Material" + OpMemberName %type_Material 0 "Material_VectorExpressions" + OpMemberName %type_Material 1 "Material_ScalarExpressions" + OpName %Material "Material" + OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid" + OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid" + OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position" + OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid" + OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid" + OpName %out_var_VS_To_DS_Position "out.var.VS_To_DS_Position" + OpName %out_var_Flat_DisplacementScales "out.var.Flat_DisplacementScales" + OpName %out_var_Flat_TessellationMultiplier "out.var.Flat_TessellationMultiplier" + OpName %out_var_Flat_WorldDisplacementMultiplier "out.var.Flat_WorldDisplacementMultiplier" + OpName %MainHull "MainHull" + OpName %param_var_I "param.var.I" + OpName %temp_var_hullMainRetVal "temp.var.hullMainRetVal" + OpName %if_merge "if.merge" + OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position" + OpDecorate %gl_InvocationID BuiltIn InvocationId + OpDecorateString %gl_InvocationID UserSemantic "SV_OutputControlPointID" + OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %out_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position" + OpDecorateString %out_var_Flat_DisplacementScales UserSemantic "Flat_DisplacementScales" + OpDecorateString %out_var_Flat_TessellationMultiplier UserSemantic "Flat_TessellationMultiplier" + OpDecorateString %out_var_Flat_WorldDisplacementMultiplier UserSemantic "Flat_WorldDisplacementMultiplier" + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor" + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor" + OpDecorate %gl_TessLevelInner Patch + OpDecorate %in_var_TEXCOORD10_centroid Location 0 + OpDecorate %in_var_TEXCOORD11_centroid Location 1 + OpDecorate %in_var_VS_To_DS_Position Location 2 + OpDecorate %out_var_Flat_DisplacementScales Location 0 + OpDecorate %out_var_Flat_TessellationMultiplier Location 1 + OpDecorate %out_var_Flat_WorldDisplacementMultiplier Location 2 + OpDecorate %out_var_TEXCOORD10_centroid Location 3 + OpDecorate %out_var_TEXCOORD11_centroid Location 4 + OpDecorate %out_var_VS_To_DS_Position Location 5 + OpDecorate %Primitive DescriptorSet 0 + OpDecorate %Primitive Binding 0 + OpDecorate %Material DescriptorSet 0 + OpDecorate %Material Binding 1 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_Primitive 0 Offset 0 + OpMemberDecorate %type_Primitive 0 MatrixStride 16 + OpMemberDecorate %type_Primitive 0 ColMajor + OpMemberDecorate %type_Primitive 1 Offset 64 + OpMemberDecorate %type_Primitive 2 Offset 80 + OpMemberDecorate %type_Primitive 3 Offset 96 + OpMemberDecorate %type_Primitive 3 MatrixStride 16 + OpMemberDecorate %type_Primitive 3 ColMajor + OpMemberDecorate %type_Primitive 4 Offset 160 + OpMemberDecorate %type_Primitive 4 MatrixStride 16 + OpMemberDecorate %type_Primitive 4 ColMajor + OpMemberDecorate %type_Primitive 5 Offset 224 + OpMemberDecorate %type_Primitive 5 MatrixStride 16 + OpMemberDecorate %type_Primitive 5 ColMajor + OpMemberDecorate %type_Primitive 6 Offset 288 + OpMemberDecorate %type_Primitive 7 Offset 300 + OpMemberDecorate %type_Primitive 8 Offset 304 + OpMemberDecorate %type_Primitive 9 Offset 316 + OpMemberDecorate %type_Primitive 10 Offset 320 + OpMemberDecorate %type_Primitive 11 Offset 324 + OpMemberDecorate %type_Primitive 12 Offset 328 + OpMemberDecorate %type_Primitive 13 Offset 332 + OpMemberDecorate %type_Primitive 14 Offset 336 + OpMemberDecorate %type_Primitive 15 Offset 352 + OpMemberDecorate %type_Primitive 16 Offset 368 + OpMemberDecorate %type_Primitive 17 Offset 380 + OpMemberDecorate %type_Primitive 18 Offset 384 + OpMemberDecorate %type_Primitive 19 Offset 396 + OpMemberDecorate %type_Primitive 20 Offset 400 + OpMemberDecorate %type_Primitive 21 Offset 412 + OpMemberDecorate %type_Primitive 22 Offset 416 + OpMemberDecorate %type_Primitive 23 Offset 420 + OpMemberDecorate %type_Primitive 24 Offset 424 + OpMemberDecorate %type_Primitive 25 Offset 428 + OpMemberDecorate %type_Primitive 26 Offset 432 + OpDecorate %type_Primitive Block + OpDecorate %_arr_v4float_uint_3 ArrayStride 16 + OpDecorate %_arr_v4float_uint_1 ArrayStride 16 + OpMemberDecorate %type_Material 0 Offset 0 + OpMemberDecorate %type_Material 1 Offset 48 + OpDecorate %type_Material Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_4 = OpConstant %uint 4 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_3 = OpConstant %uint 3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 + %int_0 = OpConstant %int 0 + %int_2 = OpConstant %int 2 + %float_0_5 = OpConstant %float 0.5 + %int_1 = OpConstant %int 1 +%float_0_333000004 = OpConstant %float 0.333000004 + %float_1 = OpConstant %float 1 + %49 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %float_15 = OpConstant %float 15 + %51 = OpConstantComposite %v4float %float_15 %float_15 %float_15 %float_15 +%FVertexFactoryInterpolantsVSToPS = OpTypeStruct %v4float %v4float +%FVertexFactoryInterpolantsVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToPS +%FSharedBasePassInterpolants = OpTypeStruct +%FBasePassInterpolantsVSToDS = OpTypeStruct %FSharedBasePassInterpolants +%FBasePassVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToDS %FBasePassInterpolantsVSToDS %v4float +%FFlatTessellationHSToDS = OpTypeStruct %FBasePassVSToDS %v3float %float %float + %int_15 = OpConstant %int 15 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 +%type_Primitive = OpTypeStruct %mat4v4float %v4float %v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %float %float %float %float %v4float %v4float %v3float %uint %v3float %uint %v3float %int %uint %uint %uint %uint %_arr_v4float_uint_4 +%_ptr_Uniform_type_Primitive = OpTypePointer Uniform %type_Primitive +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1 +%type_Material = OpTypeStruct %_arr_v4float_uint_3 %_arr_v4float_uint_1 +%_ptr_Uniform_type_Material = OpTypePointer Uniform %type_Material +%_arr_v4float_uint_3_0 = OpTypeArray %v4float %uint_3 +%_ptr_Input__arr_v4float_uint_3_0 = OpTypePointer Input %_arr_v4float_uint_3_0 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Output__arr_v4float_uint_3_0 = OpTypePointer Output %_arr_v4float_uint_3_0 +%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3 +%_ptr_Output__arr_v3float_uint_3 = OpTypePointer Output %_arr_v3float_uint_3 +%_ptr_Output__arr_float_uint_3 = OpTypePointer Output %_arr_float_uint_3 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 + %void = OpTypeVoid + %67 = OpTypeFunction %void +%_arr_FBasePassVSToDS_uint_3 = OpTypeArray %FBasePassVSToDS %uint_3 +%_ptr_Function__arr_FBasePassVSToDS_uint_3 = OpTypePointer Function %_arr_FBasePassVSToDS_uint_3 +%_arr_FFlatTessellationHSToDS_uint_3 = OpTypeArray %FFlatTessellationHSToDS %uint_3 +%_ptr_Function__arr_FFlatTessellationHSToDS_uint_3 = OpTypePointer Function %_arr_FFlatTessellationHSToDS_uint_3 +%_ptr_Workgroup__arr_FFlatTessellationHSToDS_uint_3 = OpTypePointer Workgroup %_arr_FFlatTessellationHSToDS_uint_3 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_ptr_Output_v3float = OpTypePointer Output %v3float +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Function_FFlatTessellationHSToDS = OpTypePointer Function %FFlatTessellationHSToDS +%_ptr_Workgroup_FFlatTessellationHSToDS = OpTypePointer Workgroup %FFlatTessellationHSToDS + %bool = OpTypeBool +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Workgroup_float = OpTypePointer Workgroup %float +%mat3v3float = OpTypeMatrix %v3float 3 +%_ptr_Function_FVertexFactoryInterpolantsVSToDS = OpTypePointer Function %FVertexFactoryInterpolantsVSToDS +%_ptr_Function_FBasePassVSToDS = OpTypePointer Function %FBasePassVSToDS +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %Primitive = OpVariable %_ptr_Uniform_type_Primitive Uniform + %Material = OpVariable %_ptr_Uniform_type_Material Uniform +%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3_0 Input +%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3_0 Input +%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_3_0 Input +%gl_InvocationID = OpVariable %_ptr_Input_uint Input +%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3_0 Output +%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3_0 Output +%out_var_VS_To_DS_Position = OpVariable %_ptr_Output__arr_v4float_uint_3_0 Output +%out_var_Flat_DisplacementScales = OpVariable %_ptr_Output__arr_v3float_uint_3 Output +%out_var_Flat_TessellationMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output +%out_var_Flat_WorldDisplacementMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output +%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output +%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output + %83 = OpConstantNull %FSharedBasePassInterpolants + %84 = OpConstantComposite %FBasePassInterpolantsVSToDS %83 + %85 = OpUndef %v4float + +; XXX: Original asm used Function here, which is wrong. +; This patches the SPIR-V to be correct. +%temp_var_hullMainRetVal = OpVariable %_ptr_Workgroup__arr_FFlatTessellationHSToDS_uint_3 Workgroup + + %MainHull = OpFunction %void None %67 + %86 = OpLabel +%param_var_I = OpVariable %_ptr_Function__arr_FBasePassVSToDS_uint_3 Function + %87 = OpLoad %_arr_v4float_uint_3_0 %in_var_TEXCOORD10_centroid + %88 = OpLoad %_arr_v4float_uint_3_0 %in_var_TEXCOORD11_centroid + %89 = OpCompositeExtract %v4float %87 0 + %90 = OpCompositeExtract %v4float %88 0 + %91 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %89 %90 + %92 = OpCompositeExtract %v4float %87 1 + %93 = OpCompositeExtract %v4float %88 1 + %94 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %92 %93 + %95 = OpCompositeExtract %v4float %87 2 + %96 = OpCompositeExtract %v4float %88 2 + %97 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %95 %96 + %98 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %91 + %99 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %94 + %100 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %97 + %101 = OpLoad %_arr_v4float_uint_3_0 %in_var_VS_To_DS_Position + %102 = OpCompositeExtract %v4float %101 0 + %103 = OpCompositeConstruct %FBasePassVSToDS %98 %84 %102 + %104 = OpCompositeExtract %v4float %101 1 + %105 = OpCompositeConstruct %FBasePassVSToDS %99 %84 %104 + %106 = OpCompositeExtract %v4float %101 2 + %107 = OpCompositeConstruct %FBasePassVSToDS %100 %84 %106 + %108 = OpCompositeConstruct %_arr_FBasePassVSToDS_uint_3 %103 %105 %107 + OpStore %param_var_I %108 + %109 = OpLoad %uint %gl_InvocationID + %110 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %109 %int_0 + %111 = OpLoad %FVertexFactoryInterpolantsVSToDS %110 + %112 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %111 0 + %113 = OpCompositeExtract %v4float %112 0 + %114 = OpCompositeExtract %v4float %112 1 + %115 = OpVectorShuffle %v3float %113 %113 0 1 2 + %116 = OpVectorShuffle %v3float %114 %114 0 1 2 + %117 = OpExtInst %v3float %1 Cross %116 %115 + %118 = OpCompositeExtract %float %114 3 + %119 = OpCompositeConstruct %v3float %118 %118 %118 + %120 = OpFMul %v3float %117 %119 + %121 = OpCompositeConstruct %mat3v3float %115 %120 %116 + %122 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_15 + %123 = OpLoad %v4float %122 + %124 = OpVectorShuffle %v3float %123 %123 0 1 2 + %125 = OpVectorTimesMatrix %v3float %124 %121 + %126 = OpAccessChain %_ptr_Function_FBasePassVSToDS %param_var_I %109 + %127 = OpLoad %FBasePassVSToDS %126 + %128 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_0 + %129 = OpLoad %float %128 + %130 = OpCompositeConstruct %FFlatTessellationHSToDS %127 %125 %129 %float_1 + %131 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %127 0 + %132 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %131 0 + %133 = OpCompositeExtract %v4float %132 0 + %134 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD10_centroid %109 + OpStore %134 %133 + %135 = OpCompositeExtract %v4float %132 1 + %136 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD11_centroid %109 + OpStore %136 %135 + %137 = OpCompositeExtract %v4float %127 2 + %138 = OpAccessChain %_ptr_Output_v4float %out_var_VS_To_DS_Position %109 + OpStore %138 %137 + %139 = OpAccessChain %_ptr_Output_v3float %out_var_Flat_DisplacementScales %109 + OpStore %139 %125 + %140 = OpAccessChain %_ptr_Output_float %out_var_Flat_TessellationMultiplier %109 + OpStore %140 %129 + %141 = OpAccessChain %_ptr_Output_float %out_var_Flat_WorldDisplacementMultiplier %109 + OpStore %141 %float_1 + %142 = OpAccessChain %_ptr_Workgroup_FFlatTessellationHSToDS %temp_var_hullMainRetVal %109 + OpStore %142 %130 + OpControlBarrier %uint_2 %uint_4 %uint_0 + %143 = OpIEqual %bool %109 %uint_0 + OpSelectionMerge %if_merge None + OpBranchConditional %143 %144 %if_merge + %144 = OpLabel + %145 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_1 %int_2 + %146 = OpLoad %float %145 + %147 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_2 %int_2 + %148 = OpLoad %float %147 + %149 = OpFAdd %float %146 %148 + %150 = OpFMul %float %float_0_5 %149 + %151 = OpCompositeInsert %v4float %150 %85 0 + %152 = OpLoad %float %147 + %153 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_0 %int_2 + %154 = OpLoad %float %153 + %155 = OpFAdd %float %152 %154 + %156 = OpFMul %float %float_0_5 %155 + %157 = OpCompositeInsert %v4float %156 %151 1 + %158 = OpLoad %float %153 + %159 = OpLoad %float %145 + %160 = OpFAdd %float %158 %159 + %161 = OpFMul %float %float_0_5 %160 + %162 = OpCompositeInsert %v4float %161 %157 2 + %163 = OpLoad %float %153 + %164 = OpLoad %float %145 + %165 = OpFAdd %float %163 %164 + %166 = OpLoad %float %147 + %167 = OpFAdd %float %165 %166 + %168 = OpFMul %float %float_0_333000004 %167 + %169 = OpCompositeInsert %v4float %168 %162 3 + %170 = OpExtInst %v4float %1 FClamp %169 %49 %51 + %171 = OpCompositeExtract %float %170 0 + %172 = OpCompositeExtract %float %170 1 + %173 = OpCompositeExtract %float %170 2 + %174 = OpCompositeExtract %float %170 3 + %175 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_0 + OpStore %175 %171 + %176 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_1 + OpStore %176 %172 + %177 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_2 + OpStore %177 %173 + %178 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %uint_0 + OpStore %178 %174 + OpBranch %if_merge + %if_merge = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese b/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese new file mode 100644 index 00000000000..dc543d1f861 --- /dev/null +++ b/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese @@ -0,0 +1,1046 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 310 +; Schema: 0 + OpCapability Tessellation + OpCapability SampledBuffer + OpCapability StorageImageExtendedFormats + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationEvaluation %MainDomain "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_COLOR0 %in_var_TEXCOORD0 %in_var_PRIMITIVE_ID %in_var_VS_to_DS_Position %in_var_PN_POSITION %in_var_PN_DisplacementScales %in_var_PN_TessellationMultiplier %in_var_PN_WorldDisplacementMultiplier %gl_TessLevelOuter %gl_TessLevelInner %in_var_PN_POSITION9 %gl_TessCoord %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_COLOR0 %out_var_TEXCOORD0 %out_var_PRIMITIVE_ID %out_var_TEXCOORD6 %out_var_TEXCOORD8 %out_var_TEXCOORD7 %gl_Position + OpExecutionMode %MainDomain Triangles + OpSource HLSL 600 + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_ClipToWorld" + OpMemberName %type_View 3 "View_TranslatedWorldToView" + OpMemberName %type_View 4 "View_ViewToTranslatedWorld" + OpMemberName %type_View 5 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 7 "View_ViewToClip" + OpMemberName %type_View 8 "View_ViewToClipNoAA" + OpMemberName %type_View 9 "View_ClipToView" + OpMemberName %type_View 10 "View_ClipToTranslatedWorld" + OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 12 "View_ScreenToWorld" + OpMemberName %type_View 13 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 14 "View_ViewForward" + OpMemberName %type_View 15 "PrePadding_View_908" + OpMemberName %type_View 16 "View_ViewUp" + OpMemberName %type_View 17 "PrePadding_View_924" + OpMemberName %type_View 18 "View_ViewRight" + OpMemberName %type_View 19 "PrePadding_View_940" + OpMemberName %type_View 20 "View_HMDViewNoRollUp" + OpMemberName %type_View 21 "PrePadding_View_956" + OpMemberName %type_View 22 "View_HMDViewNoRollRight" + OpMemberName %type_View 23 "PrePadding_View_972" + OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 25 "View_ScreenPositionScaleBias" + OpMemberName %type_View 26 "View_WorldCameraOrigin" + OpMemberName %type_View 27 "PrePadding_View_1020" + OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 29 "PrePadding_View_1036" + OpMemberName %type_View 30 "View_WorldViewOrigin" + OpMemberName %type_View 31 "PrePadding_View_1052" + OpMemberName %type_View 32 "View_PreViewTranslation" + OpMemberName %type_View 33 "PrePadding_View_1068" + OpMemberName %type_View 34 "View_PrevProjection" + OpMemberName %type_View 35 "View_PrevViewProj" + OpMemberName %type_View 36 "View_PrevViewRotationProj" + OpMemberName %type_View 37 "View_PrevViewToClip" + OpMemberName %type_View 38 "View_PrevClipToView" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 40 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 44 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 45 "PrePadding_View_1724" + OpMemberName %type_View 46 "View_PrevWorldViewOrigin" + OpMemberName %type_View 47 "PrePadding_View_1740" + OpMemberName %type_View 48 "View_PrevPreViewTranslation" + OpMemberName %type_View 49 "PrePadding_View_1756" + OpMemberName %type_View 50 "View_PrevInvViewProj" + OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 52 "View_ClipToPrevClip" + OpMemberName %type_View 53 "View_TemporalAAJitter" + OpMemberName %type_View 54 "View_GlobalClippingPlane" + OpMemberName %type_View 55 "View_FieldOfViewWideAngles" + OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 57 "View_ViewRectMin" + OpMemberName %type_View 58 "View_ViewSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferSizeAndInvSize" + OpMemberName %type_View 60 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 61 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 62 "View_PreExposure" + OpMemberName %type_View 63 "View_OneOverPreExposure" + OpMemberName %type_View 64 "PrePadding_View_2076" + OpMemberName %type_View 65 "View_DiffuseOverrideParameter" + OpMemberName %type_View 66 "View_SpecularOverrideParameter" + OpMemberName %type_View 67 "View_NormalOverrideParameter" + OpMemberName %type_View 68 "View_RoughnessOverrideParameter" + OpMemberName %type_View 69 "View_PrevFrameGameTime" + OpMemberName %type_View 70 "View_PrevFrameRealTime" + OpMemberName %type_View 71 "View_OutOfBoundsMask" + OpMemberName %type_View 72 "PrePadding_View_2148" + OpMemberName %type_View 73 "PrePadding_View_2152" + OpMemberName %type_View 74 "PrePadding_View_2156" + OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 76 "View_CullingSign" + OpMemberName %type_View 77 "View_NearPlane" + OpMemberName %type_View 78 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 79 "View_GameTime" + OpMemberName %type_View 80 "View_RealTime" + OpMemberName %type_View 81 "View_DeltaTime" + OpMemberName %type_View 82 "View_MaterialTextureMipBias" + OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 84 "View_Random" + OpMemberName %type_View 85 "View_FrameNumber" + OpMemberName %type_View 86 "View_StateFrameIndexMod8" + OpMemberName %type_View 87 "View_StateFrameIndex" + OpMemberName %type_View 88 "View_CameraCut" + OpMemberName %type_View 89 "View_UnlitViewmodeMask" + OpMemberName %type_View 90 "PrePadding_View_2228" + OpMemberName %type_View 91 "PrePadding_View_2232" + OpMemberName %type_View 92 "PrePadding_View_2236" + OpMemberName %type_View 93 "View_DirectionalLightColor" + OpMemberName %type_View 94 "View_DirectionalLightDirection" + OpMemberName %type_View 95 "PrePadding_View_2268" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 98 "View_TemporalAAParams" + OpMemberName %type_View 99 "View_CircleDOFParams" + OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 102 "View_DepthOfFieldScale" + OpMemberName %type_View 103 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 109 "View_GeneralPurposeTweak" + OpMemberName %type_View 110 "View_DemosaicVposOffset" + OpMemberName %type_View 111 "PrePadding_View_2412" + OpMemberName %type_View 112 "View_IndirectLightingColorScale" + OpMemberName %type_View 113 "View_HDR32bppEncodingMode" + OpMemberName %type_View 114 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 115 "View_AtmosphericFogSunPower" + OpMemberName %type_View 116 "View_AtmosphericFogPower" + OpMemberName %type_View 117 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 123 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian" + OpMemberName %type_View 127 "PrePadding_View_2492" + OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance" + OpMemberName %type_View 129 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 131 "PrePadding_View_2520" + OpMemberName %type_View 132 "PrePadding_View_2524" + OpMemberName %type_View 133 "View_AtmosphericFogSunColor" + OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 136 "View_AmbientCubemapTint" + OpMemberName %type_View 137 "View_AmbientCubemapIntensity" + OpMemberName %type_View 138 "View_SkyLightParameters" + OpMemberName %type_View 139 "PrePadding_View_2584" + OpMemberName %type_View 140 "PrePadding_View_2588" + OpMemberName %type_View 141 "View_SkyLightColor" + OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 143 "View_MobilePreviewMode" + OpMemberName %type_View 144 "View_HMDEyePaddingOffset" + OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 146 "View_ShowDecalsMask" + OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 149 "PrePadding_View_2744" + OpMemberName %type_View 150 "PrePadding_View_2748" + OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 152 "View_StereoPassIndex" + OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 155 "View_GlobalVolumeDimension" + OpMemberName %type_View 156 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 157 "View_MaxGlobalDistance" + OpMemberName %type_View 158 "PrePadding_View_2908" + OpMemberName %type_View 159 "View_CursorPosition" + OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 161 "PrePadding_View_2924" + OpMemberName %type_View 162 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 163 "PrePadding_View_2940" + OpMemberName %type_View 164 "View_VolumetricFogGridZParams" + OpMemberName %type_View 165 "PrePadding_View_2956" + OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 167 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 168 "PrePadding_View_2972" + OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 170 "PrePadding_View_2988" + OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 172 "PrePadding_View_3004" + OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 176 "View_StereoIPD" + OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_sampler "type.sampler" + OpName %type_2d_image "type.2d.image" + OpName %type_ShadowDepthPass "type.ShadowDepthPass" + OpMemberName %type_ShadowDepthPass 0 "PrePadding_ShadowDepthPass_LPV_0" + OpMemberName %type_ShadowDepthPass 1 "PrePadding_ShadowDepthPass_LPV_4" + OpMemberName %type_ShadowDepthPass 2 "PrePadding_ShadowDepthPass_LPV_8" + OpMemberName %type_ShadowDepthPass 3 "PrePadding_ShadowDepthPass_LPV_12" + OpMemberName %type_ShadowDepthPass 4 "PrePadding_ShadowDepthPass_LPV_16" + OpMemberName %type_ShadowDepthPass 5 "PrePadding_ShadowDepthPass_LPV_20" + OpMemberName %type_ShadowDepthPass 6 "PrePadding_ShadowDepthPass_LPV_24" + OpMemberName %type_ShadowDepthPass 7 "PrePadding_ShadowDepthPass_LPV_28" + OpMemberName %type_ShadowDepthPass 8 "PrePadding_ShadowDepthPass_LPV_32" + OpMemberName %type_ShadowDepthPass 9 "PrePadding_ShadowDepthPass_LPV_36" + OpMemberName %type_ShadowDepthPass 10 "PrePadding_ShadowDepthPass_LPV_40" + OpMemberName %type_ShadowDepthPass 11 "PrePadding_ShadowDepthPass_LPV_44" + OpMemberName %type_ShadowDepthPass 12 "PrePadding_ShadowDepthPass_LPV_48" + OpMemberName %type_ShadowDepthPass 13 "PrePadding_ShadowDepthPass_LPV_52" + OpMemberName %type_ShadowDepthPass 14 "PrePadding_ShadowDepthPass_LPV_56" + OpMemberName %type_ShadowDepthPass 15 "PrePadding_ShadowDepthPass_LPV_60" + OpMemberName %type_ShadowDepthPass 16 "PrePadding_ShadowDepthPass_LPV_64" + OpMemberName %type_ShadowDepthPass 17 "PrePadding_ShadowDepthPass_LPV_68" + OpMemberName %type_ShadowDepthPass 18 "PrePadding_ShadowDepthPass_LPV_72" + OpMemberName %type_ShadowDepthPass 19 "PrePadding_ShadowDepthPass_LPV_76" + OpMemberName %type_ShadowDepthPass 20 "PrePadding_ShadowDepthPass_LPV_80" + OpMemberName %type_ShadowDepthPass 21 "PrePadding_ShadowDepthPass_LPV_84" + OpMemberName %type_ShadowDepthPass 22 "PrePadding_ShadowDepthPass_LPV_88" + OpMemberName %type_ShadowDepthPass 23 "PrePadding_ShadowDepthPass_LPV_92" + OpMemberName %type_ShadowDepthPass 24 "PrePadding_ShadowDepthPass_LPV_96" + OpMemberName %type_ShadowDepthPass 25 "PrePadding_ShadowDepthPass_LPV_100" + OpMemberName %type_ShadowDepthPass 26 "PrePadding_ShadowDepthPass_LPV_104" + OpMemberName %type_ShadowDepthPass 27 "PrePadding_ShadowDepthPass_LPV_108" + OpMemberName %type_ShadowDepthPass 28 "PrePadding_ShadowDepthPass_LPV_112" + OpMemberName %type_ShadowDepthPass 29 "PrePadding_ShadowDepthPass_LPV_116" + OpMemberName %type_ShadowDepthPass 30 "PrePadding_ShadowDepthPass_LPV_120" + OpMemberName %type_ShadowDepthPass 31 "PrePadding_ShadowDepthPass_LPV_124" + OpMemberName %type_ShadowDepthPass 32 "PrePadding_ShadowDepthPass_LPV_128" + OpMemberName %type_ShadowDepthPass 33 "PrePadding_ShadowDepthPass_LPV_132" + OpMemberName %type_ShadowDepthPass 34 "PrePadding_ShadowDepthPass_LPV_136" + OpMemberName %type_ShadowDepthPass 35 "PrePadding_ShadowDepthPass_LPV_140" + OpMemberName %type_ShadowDepthPass 36 "PrePadding_ShadowDepthPass_LPV_144" + OpMemberName %type_ShadowDepthPass 37 "PrePadding_ShadowDepthPass_LPV_148" + OpMemberName %type_ShadowDepthPass 38 "PrePadding_ShadowDepthPass_LPV_152" + OpMemberName %type_ShadowDepthPass 39 "PrePadding_ShadowDepthPass_LPV_156" + OpMemberName %type_ShadowDepthPass 40 "PrePadding_ShadowDepthPass_LPV_160" + OpMemberName %type_ShadowDepthPass 41 "PrePadding_ShadowDepthPass_LPV_164" + OpMemberName %type_ShadowDepthPass 42 "PrePadding_ShadowDepthPass_LPV_168" + OpMemberName %type_ShadowDepthPass 43 "PrePadding_ShadowDepthPass_LPV_172" + OpMemberName %type_ShadowDepthPass 44 "PrePadding_ShadowDepthPass_LPV_176" + OpMemberName %type_ShadowDepthPass 45 "PrePadding_ShadowDepthPass_LPV_180" + OpMemberName %type_ShadowDepthPass 46 "PrePadding_ShadowDepthPass_LPV_184" + OpMemberName %type_ShadowDepthPass 47 "PrePadding_ShadowDepthPass_LPV_188" + OpMemberName %type_ShadowDepthPass 48 "PrePadding_ShadowDepthPass_LPV_192" + OpMemberName %type_ShadowDepthPass 49 "PrePadding_ShadowDepthPass_LPV_196" + OpMemberName %type_ShadowDepthPass 50 "PrePadding_ShadowDepthPass_LPV_200" + OpMemberName %type_ShadowDepthPass 51 "PrePadding_ShadowDepthPass_LPV_204" + OpMemberName %type_ShadowDepthPass 52 "PrePadding_ShadowDepthPass_LPV_208" + OpMemberName %type_ShadowDepthPass 53 "PrePadding_ShadowDepthPass_LPV_212" + OpMemberName %type_ShadowDepthPass 54 "PrePadding_ShadowDepthPass_LPV_216" + OpMemberName %type_ShadowDepthPass 55 "PrePadding_ShadowDepthPass_LPV_220" + OpMemberName %type_ShadowDepthPass 56 "PrePadding_ShadowDepthPass_LPV_224" + OpMemberName %type_ShadowDepthPass 57 "PrePadding_ShadowDepthPass_LPV_228" + OpMemberName %type_ShadowDepthPass 58 "PrePadding_ShadowDepthPass_LPV_232" + OpMemberName %type_ShadowDepthPass 59 "PrePadding_ShadowDepthPass_LPV_236" + OpMemberName %type_ShadowDepthPass 60 "PrePadding_ShadowDepthPass_LPV_240" + OpMemberName %type_ShadowDepthPass 61 "PrePadding_ShadowDepthPass_LPV_244" + OpMemberName %type_ShadowDepthPass 62 "PrePadding_ShadowDepthPass_LPV_248" + OpMemberName %type_ShadowDepthPass 63 "PrePadding_ShadowDepthPass_LPV_252" + OpMemberName %type_ShadowDepthPass 64 "PrePadding_ShadowDepthPass_LPV_256" + OpMemberName %type_ShadowDepthPass 65 "PrePadding_ShadowDepthPass_LPV_260" + OpMemberName %type_ShadowDepthPass 66 "PrePadding_ShadowDepthPass_LPV_264" + OpMemberName %type_ShadowDepthPass 67 "PrePadding_ShadowDepthPass_LPV_268" + OpMemberName %type_ShadowDepthPass 68 "ShadowDepthPass_LPV_mRsmToWorld" + OpMemberName %type_ShadowDepthPass 69 "ShadowDepthPass_LPV_mLightColour" + OpMemberName %type_ShadowDepthPass 70 "ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection" + OpMemberName %type_ShadowDepthPass 71 "ShadowDepthPass_LPV_mEyePos" + OpMemberName %type_ShadowDepthPass 72 "ShadowDepthPass_LPV_mOldGridOffset" + OpMemberName %type_ShadowDepthPass 73 "PrePadding_ShadowDepthPass_LPV_396" + OpMemberName %type_ShadowDepthPass 74 "ShadowDepthPass_LPV_mLpvGridOffset" + OpMemberName %type_ShadowDepthPass 75 "ShadowDepthPass_LPV_ClearMultiplier" + OpMemberName %type_ShadowDepthPass 76 "ShadowDepthPass_LPV_LpvScale" + OpMemberName %type_ShadowDepthPass 77 "ShadowDepthPass_LPV_OneOverLpvScale" + OpMemberName %type_ShadowDepthPass 78 "ShadowDepthPass_LPV_DirectionalOcclusionIntensity" + OpMemberName %type_ShadowDepthPass 79 "ShadowDepthPass_LPV_DirectionalOcclusionRadius" + OpMemberName %type_ShadowDepthPass 80 "ShadowDepthPass_LPV_RsmAreaIntensityMultiplier" + OpMemberName %type_ShadowDepthPass 81 "ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier" + OpMemberName %type_ShadowDepthPass 82 "ShadowDepthPass_LPV_SecondaryOcclusionStrength" + OpMemberName %type_ShadowDepthPass 83 "ShadowDepthPass_LPV_SecondaryBounceStrength" + OpMemberName %type_ShadowDepthPass 84 "ShadowDepthPass_LPV_VplInjectionBias" + OpMemberName %type_ShadowDepthPass 85 "ShadowDepthPass_LPV_GeometryVolumeInjectionBias" + OpMemberName %type_ShadowDepthPass 86 "ShadowDepthPass_LPV_EmissiveInjectionMultiplier" + OpMemberName %type_ShadowDepthPass 87 "ShadowDepthPass_LPV_PropagationIndex" + OpMemberName %type_ShadowDepthPass 88 "ShadowDepthPass_ProjectionMatrix" + OpMemberName %type_ShadowDepthPass 89 "ShadowDepthPass_ViewMatrix" + OpMemberName %type_ShadowDepthPass 90 "ShadowDepthPass_ShadowParams" + OpMemberName %type_ShadowDepthPass 91 "ShadowDepthPass_bClampToNearPlane" + OpMemberName %type_ShadowDepthPass 92 "PrePadding_ShadowDepthPass_612" + OpMemberName %type_ShadowDepthPass 93 "PrePadding_ShadowDepthPass_616" + OpMemberName %type_ShadowDepthPass 94 "PrePadding_ShadowDepthPass_620" + OpMemberName %type_ShadowDepthPass 95 "ShadowDepthPass_ShadowViewProjectionMatrices" + OpMemberName %type_ShadowDepthPass 96 "ShadowDepthPass_ShadowViewMatrices" + OpName %ShadowDepthPass "ShadowDepthPass" + OpName %Material_Texture2D_3 "Material_Texture2D_3" + OpName %Material_Texture2D_3Sampler "Material_Texture2D_3Sampler" + OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid" + OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid" + OpName %in_var_COLOR0 "in.var.COLOR0" + OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0" + OpName %in_var_PRIMITIVE_ID "in.var.PRIMITIVE_ID" + OpName %in_var_VS_to_DS_Position "in.var.VS_to_DS_Position" + OpName %in_var_PN_POSITION "in.var.PN_POSITION" + OpName %in_var_PN_DisplacementScales "in.var.PN_DisplacementScales" + OpName %in_var_PN_TessellationMultiplier "in.var.PN_TessellationMultiplier" + OpName %in_var_PN_WorldDisplacementMultiplier "in.var.PN_WorldDisplacementMultiplier" + OpName %in_var_PN_POSITION9 "in.var.PN_POSITION9" + OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid" + OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid" + OpName %out_var_COLOR0 "out.var.COLOR0" + OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0" + OpName %out_var_PRIMITIVE_ID "out.var.PRIMITIVE_ID" + OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6" + OpName %out_var_TEXCOORD8 "out.var.TEXCOORD8" + OpName %out_var_TEXCOORD7 "out.var.TEXCOORD7" + OpName %MainDomain "MainDomain" + OpName %type_sampled_image "type.sampled.image" + OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %in_var_COLOR0 UserSemantic "COLOR0" + OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %in_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID" + OpDecorateString %in_var_VS_to_DS_Position UserSemantic "VS_to_DS_Position" + OpDecorateString %in_var_PN_POSITION UserSemantic "PN_POSITION" + OpDecorateString %in_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales" + OpDecorateString %in_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier" + OpDecorateString %in_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier" + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor" + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor" + OpDecorate %gl_TessLevelInner Patch + OpDecorateString %in_var_PN_POSITION9 UserSemantic "PN_POSITION9" + OpDecorate %in_var_PN_POSITION9 Patch + OpDecorate %gl_TessCoord BuiltIn TessCoord + OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation" + OpDecorate %gl_TessCoord Patch + OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %out_var_COLOR0 UserSemantic "COLOR0" + OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %out_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID" + OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6" + OpDecorateString %out_var_TEXCOORD8 UserSemantic "TEXCOORD8" + OpDecorateString %out_var_TEXCOORD7 UserSemantic "TEXCOORD7" + OpDecorate %gl_Position BuiltIn Position + OpDecorateString %gl_Position UserSemantic "SV_POSITION" + OpDecorate %in_var_COLOR0 Location 0 + OpDecorate %in_var_PN_DisplacementScales Location 1 + OpDecorate %in_var_PN_POSITION Location 2 + OpDecorate %in_var_PN_POSITION9 Location 5 + OpDecorate %in_var_PN_TessellationMultiplier Location 6 + OpDecorate %in_var_PN_WorldDisplacementMultiplier Location 7 + OpDecorate %in_var_PRIMITIVE_ID Location 8 + OpDecorate %in_var_TEXCOORD0 Location 9 + OpDecorate %in_var_TEXCOORD10_centroid Location 10 + OpDecorate %in_var_TEXCOORD11_centroid Location 11 + OpDecorate %in_var_VS_to_DS_Position Location 12 + OpDecorate %out_var_TEXCOORD10_centroid Location 0 + OpDecorate %out_var_TEXCOORD11_centroid Location 1 + OpDecorate %out_var_COLOR0 Location 2 + OpDecorate %out_var_TEXCOORD0 Location 3 + OpDecorate %out_var_PRIMITIVE_ID Location 4 + OpDecorate %out_var_TEXCOORD6 Location 5 + OpDecorate %out_var_TEXCOORD8 Location 6 + OpDecorate %out_var_TEXCOORD7 Location 7 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %ShadowDepthPass DescriptorSet 0 + OpDecorate %ShadowDepthPass Binding 1 + OpDecorate %Material_Texture2D_3 DescriptorSet 0 + OpDecorate %Material_Texture2D_3 Binding 0 + OpDecorate %Material_Texture2D_3Sampler DescriptorSet 0 + OpDecorate %Material_Texture2D_3Sampler Binding 0 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 13 MatrixStride 16 + OpMemberDecorate %type_View 13 ColMajor + OpMemberDecorate %type_View 14 Offset 896 + OpMemberDecorate %type_View 15 Offset 908 + OpMemberDecorate %type_View 16 Offset 912 + OpMemberDecorate %type_View 17 Offset 924 + OpMemberDecorate %type_View 18 Offset 928 + OpMemberDecorate %type_View 19 Offset 940 + OpMemberDecorate %type_View 20 Offset 944 + OpMemberDecorate %type_View 21 Offset 956 + OpMemberDecorate %type_View 22 Offset 960 + OpMemberDecorate %type_View 23 Offset 972 + OpMemberDecorate %type_View 24 Offset 976 + OpMemberDecorate %type_View 25 Offset 992 + OpMemberDecorate %type_View 26 Offset 1008 + OpMemberDecorate %type_View 27 Offset 1020 + OpMemberDecorate %type_View 28 Offset 1024 + OpMemberDecorate %type_View 29 Offset 1036 + OpMemberDecorate %type_View 30 Offset 1040 + OpMemberDecorate %type_View 31 Offset 1052 + OpMemberDecorate %type_View 32 Offset 1056 + OpMemberDecorate %type_View 33 Offset 1068 + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 43 MatrixStride 16 + OpMemberDecorate %type_View 43 ColMajor + OpMemberDecorate %type_View 44 Offset 1712 + OpMemberDecorate %type_View 45 Offset 1724 + OpMemberDecorate %type_View 46 Offset 1728 + OpMemberDecorate %type_View 47 Offset 1740 + OpMemberDecorate %type_View 48 Offset 1744 + OpMemberDecorate %type_View 49 Offset 1756 + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 52 MatrixStride 16 + OpMemberDecorate %type_View 52 ColMajor + OpMemberDecorate %type_View 53 Offset 1952 + OpMemberDecorate %type_View 54 Offset 1968 + OpMemberDecorate %type_View 55 Offset 1984 + OpMemberDecorate %type_View 56 Offset 1992 + OpMemberDecorate %type_View 57 Offset 2000 + OpMemberDecorate %type_View 58 Offset 2016 + OpMemberDecorate %type_View 59 Offset 2032 + OpMemberDecorate %type_View 60 Offset 2048 + OpMemberDecorate %type_View 61 Offset 2064 + OpMemberDecorate %type_View 62 Offset 2068 + OpMemberDecorate %type_View 63 Offset 2072 + OpMemberDecorate %type_View 64 Offset 2076 + OpMemberDecorate %type_View 65 Offset 2080 + OpMemberDecorate %type_View 66 Offset 2096 + OpMemberDecorate %type_View 67 Offset 2112 + OpMemberDecorate %type_View 68 Offset 2128 + OpMemberDecorate %type_View 69 Offset 2136 + OpMemberDecorate %type_View 70 Offset 2140 + OpMemberDecorate %type_View 71 Offset 2144 + OpMemberDecorate %type_View 72 Offset 2148 + OpMemberDecorate %type_View 73 Offset 2152 + OpMemberDecorate %type_View 74 Offset 2156 + OpMemberDecorate %type_View 75 Offset 2160 + OpMemberDecorate %type_View 76 Offset 2172 + OpMemberDecorate %type_View 77 Offset 2176 + OpMemberDecorate %type_View 78 Offset 2180 + OpMemberDecorate %type_View 79 Offset 2184 + OpMemberDecorate %type_View 80 Offset 2188 + OpMemberDecorate %type_View 81 Offset 2192 + OpMemberDecorate %type_View 82 Offset 2196 + OpMemberDecorate %type_View 83 Offset 2200 + OpMemberDecorate %type_View 84 Offset 2204 + OpMemberDecorate %type_View 85 Offset 2208 + OpMemberDecorate %type_View 86 Offset 2212 + OpMemberDecorate %type_View 87 Offset 2216 + OpMemberDecorate %type_View 88 Offset 2220 + OpMemberDecorate %type_View 89 Offset 2224 + OpMemberDecorate %type_View 90 Offset 2228 + OpMemberDecorate %type_View 91 Offset 2232 + OpMemberDecorate %type_View 92 Offset 2236 + OpMemberDecorate %type_View 93 Offset 2240 + OpMemberDecorate %type_View 94 Offset 2256 + OpMemberDecorate %type_View 95 Offset 2268 + OpMemberDecorate %type_View 96 Offset 2272 + OpMemberDecorate %type_View 97 Offset 2304 + OpMemberDecorate %type_View 98 Offset 2336 + OpMemberDecorate %type_View 99 Offset 2352 + OpMemberDecorate %type_View 100 Offset 2368 + OpMemberDecorate %type_View 101 Offset 2372 + OpMemberDecorate %type_View 102 Offset 2376 + OpMemberDecorate %type_View 103 Offset 2380 + OpMemberDecorate %type_View 104 Offset 2384 + OpMemberDecorate %type_View 105 Offset 2388 + OpMemberDecorate %type_View 106 Offset 2392 + OpMemberDecorate %type_View 107 Offset 2396 + OpMemberDecorate %type_View 108 Offset 2400 + OpMemberDecorate %type_View 109 Offset 2404 + OpMemberDecorate %type_View 110 Offset 2408 + OpMemberDecorate %type_View 111 Offset 2412 + OpMemberDecorate %type_View 112 Offset 2416 + OpMemberDecorate %type_View 113 Offset 2428 + OpMemberDecorate %type_View 114 Offset 2432 + OpMemberDecorate %type_View 115 Offset 2444 + OpMemberDecorate %type_View 116 Offset 2448 + OpMemberDecorate %type_View 117 Offset 2452 + OpMemberDecorate %type_View 118 Offset 2456 + OpMemberDecorate %type_View 119 Offset 2460 + OpMemberDecorate %type_View 120 Offset 2464 + OpMemberDecorate %type_View 121 Offset 2468 + OpMemberDecorate %type_View 122 Offset 2472 + OpMemberDecorate %type_View 123 Offset 2476 + OpMemberDecorate %type_View 124 Offset 2480 + OpMemberDecorate %type_View 125 Offset 2484 + OpMemberDecorate %type_View 126 Offset 2488 + OpMemberDecorate %type_View 127 Offset 2492 + OpMemberDecorate %type_View 128 Offset 2496 + OpMemberDecorate %type_View 129 Offset 2512 + OpMemberDecorate %type_View 130 Offset 2516 + OpMemberDecorate %type_View 131 Offset 2520 + OpMemberDecorate %type_View 132 Offset 2524 + OpMemberDecorate %type_View 133 Offset 2528 + OpMemberDecorate %type_View 134 Offset 2544 + OpMemberDecorate %type_View 135 Offset 2556 + OpMemberDecorate %type_View 136 Offset 2560 + OpMemberDecorate %type_View 137 Offset 2576 + OpMemberDecorate %type_View 138 Offset 2580 + OpMemberDecorate %type_View 139 Offset 2584 + OpMemberDecorate %type_View 140 Offset 2588 + OpMemberDecorate %type_View 141 Offset 2592 + OpMemberDecorate %type_View 142 Offset 2608 + OpMemberDecorate %type_View 143 Offset 2720 + OpMemberDecorate %type_View 144 Offset 2724 + OpMemberDecorate %type_View 145 Offset 2728 + OpMemberDecorate %type_View 146 Offset 2732 + OpMemberDecorate %type_View 147 Offset 2736 + OpMemberDecorate %type_View 148 Offset 2740 + OpMemberDecorate %type_View 149 Offset 2744 + OpMemberDecorate %type_View 150 Offset 2748 + OpMemberDecorate %type_View 151 Offset 2752 + OpMemberDecorate %type_View 152 Offset 2764 + OpMemberDecorate %type_View 153 Offset 2768 + OpMemberDecorate %type_View 154 Offset 2832 + OpMemberDecorate %type_View 155 Offset 2896 + OpMemberDecorate %type_View 156 Offset 2900 + OpMemberDecorate %type_View 157 Offset 2904 + OpMemberDecorate %type_View 158 Offset 2908 + OpMemberDecorate %type_View 159 Offset 2912 + OpMemberDecorate %type_View 160 Offset 2920 + OpMemberDecorate %type_View 161 Offset 2924 + OpMemberDecorate %type_View 162 Offset 2928 + OpMemberDecorate %type_View 163 Offset 2940 + OpMemberDecorate %type_View 164 Offset 2944 + OpMemberDecorate %type_View 165 Offset 2956 + OpMemberDecorate %type_View 166 Offset 2960 + OpMemberDecorate %type_View 167 Offset 2968 + OpMemberDecorate %type_View 168 Offset 2972 + OpMemberDecorate %type_View 169 Offset 2976 + OpMemberDecorate %type_View 170 Offset 2988 + OpMemberDecorate %type_View 171 Offset 2992 + OpMemberDecorate %type_View 172 Offset 3004 + OpMemberDecorate %type_View 173 Offset 3008 + OpMemberDecorate %type_View 174 Offset 3020 + OpMemberDecorate %type_View 175 Offset 3024 + OpMemberDecorate %type_View 176 Offset 3036 + OpMemberDecorate %type_View 177 Offset 3040 + OpMemberDecorate %type_View 178 Offset 3044 + OpDecorate %type_View Block + OpDecorate %_arr_mat4v4float_uint_6 ArrayStride 64 + OpMemberDecorate %type_ShadowDepthPass 0 Offset 0 + OpMemberDecorate %type_ShadowDepthPass 1 Offset 4 + OpMemberDecorate %type_ShadowDepthPass 2 Offset 8 + OpMemberDecorate %type_ShadowDepthPass 3 Offset 12 + OpMemberDecorate %type_ShadowDepthPass 4 Offset 16 + OpMemberDecorate %type_ShadowDepthPass 5 Offset 20 + OpMemberDecorate %type_ShadowDepthPass 6 Offset 24 + OpMemberDecorate %type_ShadowDepthPass 7 Offset 28 + OpMemberDecorate %type_ShadowDepthPass 8 Offset 32 + OpMemberDecorate %type_ShadowDepthPass 9 Offset 36 + OpMemberDecorate %type_ShadowDepthPass 10 Offset 40 + OpMemberDecorate %type_ShadowDepthPass 11 Offset 44 + OpMemberDecorate %type_ShadowDepthPass 12 Offset 48 + OpMemberDecorate %type_ShadowDepthPass 13 Offset 52 + OpMemberDecorate %type_ShadowDepthPass 14 Offset 56 + OpMemberDecorate %type_ShadowDepthPass 15 Offset 60 + OpMemberDecorate %type_ShadowDepthPass 16 Offset 64 + OpMemberDecorate %type_ShadowDepthPass 17 Offset 68 + OpMemberDecorate %type_ShadowDepthPass 18 Offset 72 + OpMemberDecorate %type_ShadowDepthPass 19 Offset 76 + OpMemberDecorate %type_ShadowDepthPass 20 Offset 80 + OpMemberDecorate %type_ShadowDepthPass 21 Offset 84 + OpMemberDecorate %type_ShadowDepthPass 22 Offset 88 + OpMemberDecorate %type_ShadowDepthPass 23 Offset 92 + OpMemberDecorate %type_ShadowDepthPass 24 Offset 96 + OpMemberDecorate %type_ShadowDepthPass 25 Offset 100 + OpMemberDecorate %type_ShadowDepthPass 26 Offset 104 + OpMemberDecorate %type_ShadowDepthPass 27 Offset 108 + OpMemberDecorate %type_ShadowDepthPass 28 Offset 112 + OpMemberDecorate %type_ShadowDepthPass 29 Offset 116 + OpMemberDecorate %type_ShadowDepthPass 30 Offset 120 + OpMemberDecorate %type_ShadowDepthPass 31 Offset 124 + OpMemberDecorate %type_ShadowDepthPass 32 Offset 128 + OpMemberDecorate %type_ShadowDepthPass 33 Offset 132 + OpMemberDecorate %type_ShadowDepthPass 34 Offset 136 + OpMemberDecorate %type_ShadowDepthPass 35 Offset 140 + OpMemberDecorate %type_ShadowDepthPass 36 Offset 144 + OpMemberDecorate %type_ShadowDepthPass 37 Offset 148 + OpMemberDecorate %type_ShadowDepthPass 38 Offset 152 + OpMemberDecorate %type_ShadowDepthPass 39 Offset 156 + OpMemberDecorate %type_ShadowDepthPass 40 Offset 160 + OpMemberDecorate %type_ShadowDepthPass 41 Offset 164 + OpMemberDecorate %type_ShadowDepthPass 42 Offset 168 + OpMemberDecorate %type_ShadowDepthPass 43 Offset 172 + OpMemberDecorate %type_ShadowDepthPass 44 Offset 176 + OpMemberDecorate %type_ShadowDepthPass 45 Offset 180 + OpMemberDecorate %type_ShadowDepthPass 46 Offset 184 + OpMemberDecorate %type_ShadowDepthPass 47 Offset 188 + OpMemberDecorate %type_ShadowDepthPass 48 Offset 192 + OpMemberDecorate %type_ShadowDepthPass 49 Offset 196 + OpMemberDecorate %type_ShadowDepthPass 50 Offset 200 + OpMemberDecorate %type_ShadowDepthPass 51 Offset 204 + OpMemberDecorate %type_ShadowDepthPass 52 Offset 208 + OpMemberDecorate %type_ShadowDepthPass 53 Offset 212 + OpMemberDecorate %type_ShadowDepthPass 54 Offset 216 + OpMemberDecorate %type_ShadowDepthPass 55 Offset 220 + OpMemberDecorate %type_ShadowDepthPass 56 Offset 224 + OpMemberDecorate %type_ShadowDepthPass 57 Offset 228 + OpMemberDecorate %type_ShadowDepthPass 58 Offset 232 + OpMemberDecorate %type_ShadowDepthPass 59 Offset 236 + OpMemberDecorate %type_ShadowDepthPass 60 Offset 240 + OpMemberDecorate %type_ShadowDepthPass 61 Offset 244 + OpMemberDecorate %type_ShadowDepthPass 62 Offset 248 + OpMemberDecorate %type_ShadowDepthPass 63 Offset 252 + OpMemberDecorate %type_ShadowDepthPass 64 Offset 256 + OpMemberDecorate %type_ShadowDepthPass 65 Offset 260 + OpMemberDecorate %type_ShadowDepthPass 66 Offset 264 + OpMemberDecorate %type_ShadowDepthPass 67 Offset 268 + OpMemberDecorate %type_ShadowDepthPass 68 Offset 272 + OpMemberDecorate %type_ShadowDepthPass 68 MatrixStride 16 + OpMemberDecorate %type_ShadowDepthPass 68 ColMajor + OpMemberDecorate %type_ShadowDepthPass 69 Offset 336 + OpMemberDecorate %type_ShadowDepthPass 70 Offset 352 + OpMemberDecorate %type_ShadowDepthPass 71 Offset 368 + OpMemberDecorate %type_ShadowDepthPass 72 Offset 384 + OpMemberDecorate %type_ShadowDepthPass 73 Offset 396 + OpMemberDecorate %type_ShadowDepthPass 74 Offset 400 + OpMemberDecorate %type_ShadowDepthPass 75 Offset 412 + OpMemberDecorate %type_ShadowDepthPass 76 Offset 416 + OpMemberDecorate %type_ShadowDepthPass 77 Offset 420 + OpMemberDecorate %type_ShadowDepthPass 78 Offset 424 + OpMemberDecorate %type_ShadowDepthPass 79 Offset 428 + OpMemberDecorate %type_ShadowDepthPass 80 Offset 432 + OpMemberDecorate %type_ShadowDepthPass 81 Offset 436 + OpMemberDecorate %type_ShadowDepthPass 82 Offset 440 + OpMemberDecorate %type_ShadowDepthPass 83 Offset 444 + OpMemberDecorate %type_ShadowDepthPass 84 Offset 448 + OpMemberDecorate %type_ShadowDepthPass 85 Offset 452 + OpMemberDecorate %type_ShadowDepthPass 86 Offset 456 + OpMemberDecorate %type_ShadowDepthPass 87 Offset 460 + OpMemberDecorate %type_ShadowDepthPass 88 Offset 464 + OpMemberDecorate %type_ShadowDepthPass 88 MatrixStride 16 + OpMemberDecorate %type_ShadowDepthPass 88 ColMajor + OpMemberDecorate %type_ShadowDepthPass 89 Offset 528 + OpMemberDecorate %type_ShadowDepthPass 89 MatrixStride 16 + OpMemberDecorate %type_ShadowDepthPass 89 ColMajor + OpMemberDecorate %type_ShadowDepthPass 90 Offset 592 + OpMemberDecorate %type_ShadowDepthPass 91 Offset 608 + OpMemberDecorate %type_ShadowDepthPass 92 Offset 612 + OpMemberDecorate %type_ShadowDepthPass 93 Offset 616 + OpMemberDecorate %type_ShadowDepthPass 94 Offset 620 + OpMemberDecorate %type_ShadowDepthPass 95 Offset 624 + OpMemberDecorate %type_ShadowDepthPass 95 MatrixStride 16 + OpMemberDecorate %type_ShadowDepthPass 95 ColMajor + OpMemberDecorate %type_ShadowDepthPass 96 Offset 1008 + OpMemberDecorate %type_ShadowDepthPass 96 MatrixStride 16 + OpMemberDecorate %type_ShadowDepthPass 96 ColMajor + OpDecorate %type_ShadowDepthPass Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %v2int = OpTypeVector %int 2 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %float_3 = OpConstant %float 3 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %float_6 = OpConstant %float 6 + %57 = OpConstantComposite %v4float %float_6 %float_6 %float_6 %float_6 + %float_1 = OpConstant %float 1 + %int_79 = OpConstant %int 79 +%float_0_200000003 = OpConstant %float 0.200000003 +%float_n0_699999988 = OpConstant %float -0.699999988 + %float_2 = OpConstant %float 2 + %63 = OpConstantComposite %v2float %float_1 %float_2 + %float_n1 = OpConstant %float -1 + %float_10 = OpConstant %float 10 + %float_0_5 = OpConstant %float 0.5 + %67 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5 + %int_88 = OpConstant %int 88 + %int_89 = OpConstant %int 89 + %int_90 = OpConstant %int 90 + %int_91 = OpConstant %int 91 + %float_0 = OpConstant %float 0 +%float_9_99999997en07 = OpConstant %float 9.99999997e-07 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image + %uint_6 = OpConstant %uint 6 +%_arr_mat4v4float_uint_6 = OpTypeArray %mat4v4float %uint_6 + %v3int = OpTypeVector %int 3 +%type_ShadowDepthPass = OpTypeStruct %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %mat4v4float %v4float %v4float %v4float %v3int %int %v3int %float %float %float %float %float %float %float %float %float %float %float %float %int %mat4v4float %mat4v4float %v4float %float %float %float %float %_arr_mat4v4float_uint_6 %_arr_mat4v4float_uint_6 +%_ptr_Uniform_type_ShadowDepthPass = OpTypePointer Uniform %type_ShadowDepthPass + %uint_3 = OpConstant %uint 3 +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3 +%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1 +%_arr__arr_v4float_uint_1_uint_3 = OpTypeArray %_arr_v4float_uint_1 %uint_3 +%_ptr_Input__arr__arr_v4float_uint_1_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_1_uint_3 +%_arr_uint_uint_3 = OpTypeArray %uint %uint_3 +%_ptr_Input__arr_uint_uint_3 = OpTypePointer Input %_arr_uint_uint_3 +%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3 +%_ptr_Input__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_3_uint_3 +%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3 +%_ptr_Input__arr_v3float_uint_3 = OpTypePointer Input %_arr_v3float_uint_3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 +%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_ptr_Output__arr_v4float_uint_1 = OpTypePointer Output %_arr_v4float_uint_1 +%_ptr_Output_uint = OpTypePointer Output %uint +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Output_v3float = OpTypePointer Output %v3float + %void = OpTypeVoid + %106 = OpTypeFunction %void +%_ptr_Function_float = OpTypePointer Function %float + %bool = OpTypeBool +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Function_mat4v4float = OpTypePointer Function %mat4v4float +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%type_sampled_image = OpTypeSampledImage %type_2d_image + %View = OpVariable %_ptr_Uniform_type_View Uniform +%ShadowDepthPass = OpVariable %_ptr_Uniform_type_ShadowDepthPass Uniform +%Material_Texture2D_3 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%Material_Texture2D_3Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_COLOR0 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr__arr_v4float_uint_1_uint_3 Input +%in_var_PRIMITIVE_ID = OpVariable %_ptr_Input__arr_uint_uint_3 Input +%in_var_VS_to_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_PN_POSITION = OpVariable %_ptr_Input__arr__arr_v4float_uint_3_uint_3 Input +%in_var_PN_DisplacementScales = OpVariable %_ptr_Input__arr_v3float_uint_3 Input +%in_var_PN_TessellationMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input +%in_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input +%gl_TessLevelOuter = OpVariable %_ptr_Input__arr_float_uint_4 Input +%gl_TessLevelInner = OpVariable %_ptr_Input__arr_float_uint_2 Input +%in_var_PN_POSITION9 = OpVariable %_ptr_Input_v4float Input +%gl_TessCoord = OpVariable %_ptr_Input_v3float Input +%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output_v4float Output +%out_var_COLOR0 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD0 = OpVariable %_ptr_Output__arr_v4float_uint_1 Output +%out_var_PRIMITIVE_ID = OpVariable %_ptr_Output_uint Output +%out_var_TEXCOORD6 = OpVariable %_ptr_Output_float Output +%out_var_TEXCOORD8 = OpVariable %_ptr_Output_float Output +%out_var_TEXCOORD7 = OpVariable %_ptr_Output_v3float Output +%gl_Position = OpVariable %_ptr_Output_v4float Output + %112 = OpConstantNull %v4float + %113 = OpUndef %v4float +%_ptr_Input_uint = OpTypePointer Input %uint + %MainDomain = OpFunction %void None %106 + %115 = OpLabel + %116 = OpVariable %_ptr_Function_mat4v4float Function + %117 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD10_centroid + %118 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD11_centroid + %119 = OpLoad %_arr_v4float_uint_3 %in_var_COLOR0 + %120 = OpLoad %_arr__arr_v4float_uint_1_uint_3 %in_var_TEXCOORD0 + %121 = OpAccessChain %_ptr_Input_uint %in_var_PRIMITIVE_ID %uint_0 + %122 = OpLoad %uint %121 + %123 = OpCompositeExtract %v4float %117 0 + %124 = OpCompositeExtract %v4float %118 0 + %125 = OpCompositeExtract %v4float %119 0 + %126 = OpCompositeExtract %_arr_v4float_uint_1 %120 0 + %127 = OpCompositeExtract %v4float %117 1 + %128 = OpCompositeExtract %v4float %118 1 + %129 = OpCompositeExtract %v4float %119 1 + %130 = OpCompositeExtract %_arr_v4float_uint_1 %120 1 + %131 = OpCompositeExtract %v4float %117 2 + %132 = OpCompositeExtract %v4float %118 2 + %133 = OpCompositeExtract %v4float %119 2 + %134 = OpCompositeExtract %_arr_v4float_uint_1 %120 2 + %135 = OpLoad %_arr__arr_v4float_uint_3_uint_3 %in_var_PN_POSITION + %136 = OpLoad %_arr_float_uint_3 %in_var_PN_WorldDisplacementMultiplier + %137 = OpCompositeExtract %_arr_v4float_uint_3 %135 0 + %138 = OpCompositeExtract %float %136 0 + %139 = OpCompositeExtract %_arr_v4float_uint_3 %135 1 + %140 = OpCompositeExtract %float %136 1 + %141 = OpCompositeExtract %_arr_v4float_uint_3 %135 2 + %142 = OpCompositeExtract %float %136 2 + %143 = OpCompositeExtract %v4float %137 0 + %144 = OpCompositeExtract %v4float %137 1 + %145 = OpCompositeExtract %v4float %137 2 + %146 = OpCompositeExtract %v4float %139 0 + %147 = OpCompositeExtract %v4float %139 1 + %148 = OpCompositeExtract %v4float %139 2 + %149 = OpCompositeExtract %v4float %141 0 + %150 = OpCompositeExtract %v4float %141 1 + %151 = OpCompositeExtract %v4float %141 2 + %152 = OpLoad %v4float %in_var_PN_POSITION9 + %153 = OpLoad %v3float %gl_TessCoord + %154 = OpCompositeExtract %float %153 0 + %155 = OpCompositeExtract %float %153 1 + %156 = OpCompositeExtract %float %153 2 + %157 = OpFMul %float %154 %154 + %158 = OpFMul %float %155 %155 + %159 = OpFMul %float %156 %156 + %160 = OpFMul %float %157 %float_3 + %161 = OpFMul %float %158 %float_3 + %162 = OpFMul %float %159 %float_3 + %163 = OpCompositeConstruct %v4float %157 %157 %157 %157 + %164 = OpFMul %v4float %143 %163 + %165 = OpCompositeConstruct %v4float %154 %154 %154 %154 + %166 = OpFMul %v4float %164 %165 + %167 = OpCompositeConstruct %v4float %158 %158 %158 %158 + %168 = OpFMul %v4float %146 %167 + %169 = OpCompositeConstruct %v4float %155 %155 %155 %155 + %170 = OpFMul %v4float %168 %169 + %171 = OpFAdd %v4float %166 %170 + %172 = OpCompositeConstruct %v4float %159 %159 %159 %159 + %173 = OpFMul %v4float %149 %172 + %174 = OpCompositeConstruct %v4float %156 %156 %156 %156 + %175 = OpFMul %v4float %173 %174 + %176 = OpFAdd %v4float %171 %175 + %177 = OpCompositeConstruct %v4float %160 %160 %160 %160 + %178 = OpFMul %v4float %144 %177 + %179 = OpFMul %v4float %178 %169 + %180 = OpFAdd %v4float %176 %179 + %181 = OpCompositeConstruct %v4float %161 %161 %161 %161 + %182 = OpFMul %v4float %145 %181 + %183 = OpFMul %v4float %182 %165 + %184 = OpFAdd %v4float %180 %183 + %185 = OpFMul %v4float %147 %181 + %186 = OpFMul %v4float %185 %174 + %187 = OpFAdd %v4float %184 %186 + %188 = OpCompositeConstruct %v4float %162 %162 %162 %162 + %189 = OpFMul %v4float %148 %188 + %190 = OpFMul %v4float %189 %169 + %191 = OpFAdd %v4float %187 %190 + %192 = OpFMul %v4float %150 %188 + %193 = OpFMul %v4float %192 %165 + %194 = OpFAdd %v4float %191 %193 + %195 = OpFMul %v4float %151 %177 + %196 = OpFMul %v4float %195 %174 + %197 = OpFAdd %v4float %194 %196 + %198 = OpFMul %v4float %152 %57 + %199 = OpFMul %v4float %198 %174 + %200 = OpFMul %v4float %199 %165 + %201 = OpFMul %v4float %200 %169 + %202 = OpFAdd %v4float %197 %201 + %203 = OpCompositeExtract %v4float %126 0 + %204 = OpCompositeExtract %v4float %130 0 + %205 = OpVectorShuffle %v3float %123 %123 0 1 2 + %206 = OpCompositeConstruct %v3float %154 %154 %154 + %207 = OpFMul %v3float %205 %206 + %208 = OpVectorShuffle %v3float %127 %127 0 1 2 + %209 = OpCompositeConstruct %v3float %155 %155 %155 + %210 = OpFMul %v3float %208 %209 + %211 = OpFAdd %v3float %207 %210 + %212 = OpFMul %v4float %124 %165 + %213 = OpFMul %v4float %128 %169 + %214 = OpFAdd %v4float %212 %213 + %215 = OpFMul %v4float %125 %165 + %216 = OpFMul %v4float %129 %169 + %217 = OpFAdd %v4float %215 %216 + %218 = OpFMul %v4float %203 %165 + %219 = OpFMul %v4float %204 %169 + %220 = OpFAdd %v4float %218 %219 + %221 = OpCompositeExtract %v4float %134 0 + %222 = OpVectorShuffle %v3float %211 %112 0 1 2 + %223 = OpVectorShuffle %v3float %131 %131 0 1 2 + %224 = OpCompositeConstruct %v3float %156 %156 %156 + %225 = OpFMul %v3float %223 %224 + %226 = OpFAdd %v3float %222 %225 + %227 = OpVectorShuffle %v4float %113 %226 4 5 6 3 + %228 = OpFMul %v4float %132 %174 + %229 = OpFAdd %v4float %214 %228 + %230 = OpFMul %v4float %133 %174 + %231 = OpFAdd %v4float %217 %230 + %232 = OpFMul %v4float %221 %174 + %233 = OpFAdd %v4float %220 %232 + %234 = OpCompositeConstruct %_arr_v4float_uint_1 %233 + %235 = OpVectorShuffle %v2float %233 %233 2 3 + %236 = OpVectorShuffle %v3float %229 %229 0 1 2 + %237 = OpAccessChain %_ptr_Uniform_float %View %int_79 + %238 = OpLoad %float %237 + %239 = OpFMul %float %238 %float_0_200000003 + %240 = OpFMul %float %238 %float_n0_699999988 + %241 = OpFMul %v2float %235 %63 + %242 = OpCompositeConstruct %v2float %239 %240 + %243 = OpFAdd %v2float %242 %241 + %244 = OpLoad %type_2d_image %Material_Texture2D_3 + %245 = OpLoad %type_sampler %Material_Texture2D_3Sampler + %246 = OpSampledImage %type_sampled_image %244 %245 + %247 = OpImageSampleExplicitLod %v4float %246 %243 Lod %float_n1 + %248 = OpCompositeExtract %float %247 0 + %249 = OpFMul %float %248 %float_10 + %250 = OpCompositeExtract %float %231 0 + %251 = OpFSub %float %float_1 %250 + %252 = OpFMul %float %249 %251 + %253 = OpCompositeConstruct %v3float %252 %252 %252 + %254 = OpFMul %v3float %253 %236 + %255 = OpFMul %v3float %254 %67 + %256 = OpFMul %float %138 %154 + %257 = OpFMul %float %140 %155 + %258 = OpFAdd %float %256 %257 + %259 = OpFMul %float %142 %156 + %260 = OpFAdd %float %258 %259 + %261 = OpCompositeConstruct %v3float %260 %260 %260 + %262 = OpFMul %v3float %255 %261 + %263 = OpVectorShuffle %v3float %202 %202 0 1 2 + %264 = OpFAdd %v3float %263 %262 + %265 = OpVectorShuffle %v4float %202 %264 4 5 6 3 + %266 = OpAccessChain %_ptr_Uniform_mat4v4float %ShadowDepthPass %int_88 + %267 = OpLoad %mat4v4float %266 + %268 = OpAccessChain %_ptr_Uniform_mat4v4float %ShadowDepthPass %int_89 + %269 = OpLoad %mat4v4float %268 + OpStore %116 %269 + %270 = OpMatrixTimesVector %v4float %267 %265 + %271 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_91 + %272 = OpLoad %float %271 + %273 = OpFOrdGreaterThan %bool %272 %float_0 + %274 = OpCompositeExtract %float %270 2 + %275 = OpFOrdLessThan %bool %274 %float_0 + %276 = OpLogicalAnd %bool %273 %275 + OpSelectionMerge %277 None + OpBranchConditional %276 %278 %277 + %278 = OpLabel + %279 = OpCompositeInsert %v4float %float_9_99999997en07 %270 2 + %280 = OpCompositeInsert %v4float %float_1 %279 3 + OpBranch %277 + %277 = OpLabel + %281 = OpPhi %v4float %270 %115 %280 %278 + %282 = OpAccessChain %_ptr_Function_float %116 %uint_0 %int_2 + %283 = OpLoad %float %282 + %284 = OpAccessChain %_ptr_Function_float %116 %uint_1 %int_2 + %285 = OpLoad %float %284 + %286 = OpAccessChain %_ptr_Function_float %116 %uint_2 %int_2 + %287 = OpLoad %float %286 + %288 = OpCompositeConstruct %v3float %283 %285 %287 + %289 = OpDot %float %288 %236 + %290 = OpExtInst %float %1 FAbs %289 + %291 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_2 + %292 = OpLoad %float %291 + %293 = OpExtInst %float %1 FAbs %290 + %294 = OpFOrdGreaterThan %bool %293 %float_0 + %295 = OpFMul %float %290 %290 + %296 = OpFSub %float %float_1 %295 + %297 = OpExtInst %float %1 FClamp %296 %float_0 %float_1 + %298 = OpExtInst %float %1 Sqrt %297 + %299 = OpFDiv %float %298 %290 + %300 = OpSelect %float %294 %299 %292 + %301 = OpExtInst %float %1 FClamp %300 %float_0 %292 + %302 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_1 + %303 = OpLoad %float %302 + %304 = OpFMul %float %303 %301 + %305 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_0 + %306 = OpLoad %float %305 + %307 = OpFAdd %float %304 %306 + %308 = OpCompositeExtract %float %281 2 + %309 = OpVectorShuffle %v3float %264 %112 0 1 2 + OpStore %out_var_TEXCOORD10_centroid %227 + OpStore %out_var_TEXCOORD11_centroid %229 + OpStore %out_var_COLOR0 %231 + OpStore %out_var_TEXCOORD0 %234 + OpStore %out_var_PRIMITIVE_ID %122 + OpStore %out_var_TEXCOORD6 %308 + OpStore %out_var_TEXCOORD8 %307 + OpStore %out_var_TEXCOORD7 %309 + OpStore %gl_Position %281 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese b/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese new file mode 100644 index 00000000000..cb55bb42503 --- /dev/null +++ b/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese @@ -0,0 +1,1175 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 581 +; Schema: 0 + OpCapability Tessellation + OpCapability ClipDistance + OpCapability SampledBuffer + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationEvaluation %MainDomain "main" %gl_ClipDistance %in_var_TEXCOORD6 %in_var_TEXCOORD8 %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_VS_To_DS_Position %in_var_VS_To_DS_VertexID %in_var_PN_POSITION %in_var_PN_DisplacementScales %in_var_PN_TessellationMultiplier %in_var_PN_WorldDisplacementMultiplier %in_var_PN_DominantVertex %in_var_PN_DominantVertex1 %in_var_PN_DominantVertex2 %in_var_PN_DominantEdge %in_var_PN_DominantEdge1 %in_var_PN_DominantEdge2 %in_var_PN_DominantEdge3 %in_var_PN_DominantEdge4 %in_var_PN_DominantEdge5 %gl_TessLevelOuter %gl_TessLevelInner %in_var_PN_POSITION9 %gl_TessCoord %gl_Position %out_var_TEXCOORD6 %out_var_TEXCOORD7 %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid + OpExecutionMode %MainDomain Triangles + OpExecutionMode %MainDomain SpacingFractionalOdd + OpExecutionMode %MainDomain VertexOrderCw + OpSource HLSL 600 + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_ClipToWorld" + OpMemberName %type_View 3 "View_TranslatedWorldToView" + OpMemberName %type_View 4 "View_ViewToTranslatedWorld" + OpMemberName %type_View 5 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 7 "View_ViewToClip" + OpMemberName %type_View 8 "View_ViewToClipNoAA" + OpMemberName %type_View 9 "View_ClipToView" + OpMemberName %type_View 10 "View_ClipToTranslatedWorld" + OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 12 "View_ScreenToWorld" + OpMemberName %type_View 13 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 14 "View_ViewForward" + OpMemberName %type_View 15 "PrePadding_View_908" + OpMemberName %type_View 16 "View_ViewUp" + OpMemberName %type_View 17 "PrePadding_View_924" + OpMemberName %type_View 18 "View_ViewRight" + OpMemberName %type_View 19 "PrePadding_View_940" + OpMemberName %type_View 20 "View_HMDViewNoRollUp" + OpMemberName %type_View 21 "PrePadding_View_956" + OpMemberName %type_View 22 "View_HMDViewNoRollRight" + OpMemberName %type_View 23 "PrePadding_View_972" + OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 25 "View_ScreenPositionScaleBias" + OpMemberName %type_View 26 "View_WorldCameraOrigin" + OpMemberName %type_View 27 "PrePadding_View_1020" + OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 29 "PrePadding_View_1036" + OpMemberName %type_View 30 "View_WorldViewOrigin" + OpMemberName %type_View 31 "PrePadding_View_1052" + OpMemberName %type_View 32 "View_PreViewTranslation" + OpMemberName %type_View 33 "PrePadding_View_1068" + OpMemberName %type_View 34 "View_PrevProjection" + OpMemberName %type_View 35 "View_PrevViewProj" + OpMemberName %type_View 36 "View_PrevViewRotationProj" + OpMemberName %type_View 37 "View_PrevViewToClip" + OpMemberName %type_View 38 "View_PrevClipToView" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 40 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 44 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 45 "PrePadding_View_1724" + OpMemberName %type_View 46 "View_PrevWorldViewOrigin" + OpMemberName %type_View 47 "PrePadding_View_1740" + OpMemberName %type_View 48 "View_PrevPreViewTranslation" + OpMemberName %type_View 49 "PrePadding_View_1756" + OpMemberName %type_View 50 "View_PrevInvViewProj" + OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 52 "View_ClipToPrevClip" + OpMemberName %type_View 53 "View_TemporalAAJitter" + OpMemberName %type_View 54 "View_GlobalClippingPlane" + OpMemberName %type_View 55 "View_FieldOfViewWideAngles" + OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 57 "View_ViewRectMin" + OpMemberName %type_View 58 "View_ViewSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferSizeAndInvSize" + OpMemberName %type_View 60 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 61 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 62 "View_PreExposure" + OpMemberName %type_View 63 "View_OneOverPreExposure" + OpMemberName %type_View 64 "PrePadding_View_2076" + OpMemberName %type_View 65 "View_DiffuseOverrideParameter" + OpMemberName %type_View 66 "View_SpecularOverrideParameter" + OpMemberName %type_View 67 "View_NormalOverrideParameter" + OpMemberName %type_View 68 "View_RoughnessOverrideParameter" + OpMemberName %type_View 69 "View_PrevFrameGameTime" + OpMemberName %type_View 70 "View_PrevFrameRealTime" + OpMemberName %type_View 71 "View_OutOfBoundsMask" + OpMemberName %type_View 72 "PrePadding_View_2148" + OpMemberName %type_View 73 "PrePadding_View_2152" + OpMemberName %type_View 74 "PrePadding_View_2156" + OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 76 "View_CullingSign" + OpMemberName %type_View 77 "View_NearPlane" + OpMemberName %type_View 78 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 79 "View_GameTime" + OpMemberName %type_View 80 "View_RealTime" + OpMemberName %type_View 81 "View_DeltaTime" + OpMemberName %type_View 82 "View_MaterialTextureMipBias" + OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 84 "View_Random" + OpMemberName %type_View 85 "View_FrameNumber" + OpMemberName %type_View 86 "View_StateFrameIndexMod8" + OpMemberName %type_View 87 "View_StateFrameIndex" + OpMemberName %type_View 88 "View_CameraCut" + OpMemberName %type_View 89 "View_UnlitViewmodeMask" + OpMemberName %type_View 90 "PrePadding_View_2228" + OpMemberName %type_View 91 "PrePadding_View_2232" + OpMemberName %type_View 92 "PrePadding_View_2236" + OpMemberName %type_View 93 "View_DirectionalLightColor" + OpMemberName %type_View 94 "View_DirectionalLightDirection" + OpMemberName %type_View 95 "PrePadding_View_2268" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 98 "View_TemporalAAParams" + OpMemberName %type_View 99 "View_CircleDOFParams" + OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 102 "View_DepthOfFieldScale" + OpMemberName %type_View 103 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 109 "View_GeneralPurposeTweak" + OpMemberName %type_View 110 "View_DemosaicVposOffset" + OpMemberName %type_View 111 "PrePadding_View_2412" + OpMemberName %type_View 112 "View_IndirectLightingColorScale" + OpMemberName %type_View 113 "View_HDR32bppEncodingMode" + OpMemberName %type_View 114 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 115 "View_AtmosphericFogSunPower" + OpMemberName %type_View 116 "View_AtmosphericFogPower" + OpMemberName %type_View 117 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 123 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian" + OpMemberName %type_View 127 "PrePadding_View_2492" + OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance" + OpMemberName %type_View 129 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 131 "PrePadding_View_2520" + OpMemberName %type_View 132 "PrePadding_View_2524" + OpMemberName %type_View 133 "View_AtmosphericFogSunColor" + OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 136 "View_AmbientCubemapTint" + OpMemberName %type_View 137 "View_AmbientCubemapIntensity" + OpMemberName %type_View 138 "View_SkyLightParameters" + OpMemberName %type_View 139 "PrePadding_View_2584" + OpMemberName %type_View 140 "PrePadding_View_2588" + OpMemberName %type_View 141 "View_SkyLightColor" + OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 143 "View_MobilePreviewMode" + OpMemberName %type_View 144 "View_HMDEyePaddingOffset" + OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 146 "View_ShowDecalsMask" + OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 149 "PrePadding_View_2744" + OpMemberName %type_View 150 "PrePadding_View_2748" + OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 152 "View_StereoPassIndex" + OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 155 "View_GlobalVolumeDimension" + OpMemberName %type_View 156 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 157 "View_MaxGlobalDistance" + OpMemberName %type_View 158 "PrePadding_View_2908" + OpMemberName %type_View 159 "View_CursorPosition" + OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 161 "PrePadding_View_2924" + OpMemberName %type_View 162 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 163 "PrePadding_View_2940" + OpMemberName %type_View 164 "View_VolumetricFogGridZParams" + OpMemberName %type_View 165 "PrePadding_View_2956" + OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 167 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 168 "PrePadding_View_2972" + OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 170 "PrePadding_View_2988" + OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 172 "PrePadding_View_3004" + OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 176 "View_StereoIPD" + OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle" + OpMemberName %type_View 179 "PrePadding_View_3048" + OpMemberName %type_View 180 "PrePadding_View_3052" + OpMemberName %type_View 181 "View_WorldToVirtualTexture" + OpMemberName %type_View 182 "View_VirtualTextureParams" + OpMemberName %type_View 183 "View_XRPassthroughCameraUVs" + OpName %View "View" + OpName %type_sampler "type.sampler" + OpName %type_3d_image "type.3d.image" + OpName %View_GlobalDistanceFieldTexture0 "View_GlobalDistanceFieldTexture0" + OpName %View_GlobalDistanceFieldSampler0 "View_GlobalDistanceFieldSampler0" + OpName %View_GlobalDistanceFieldTexture1 "View_GlobalDistanceFieldTexture1" + OpName %View_GlobalDistanceFieldTexture2 "View_GlobalDistanceFieldTexture2" + OpName %View_GlobalDistanceFieldTexture3 "View_GlobalDistanceFieldTexture3" + OpName %type_Material "type.Material" + OpMemberName %type_Material 0 "Material_VectorExpressions" + OpMemberName %type_Material 1 "Material_ScalarExpressions" + OpName %Material "Material" + OpName %in_var_TEXCOORD6 "in.var.TEXCOORD6" + OpName %in_var_TEXCOORD8 "in.var.TEXCOORD8" + OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid" + OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid" + OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position" + OpName %in_var_VS_To_DS_VertexID "in.var.VS_To_DS_VertexID" + OpName %in_var_PN_POSITION "in.var.PN_POSITION" + OpName %in_var_PN_DisplacementScales "in.var.PN_DisplacementScales" + OpName %in_var_PN_TessellationMultiplier "in.var.PN_TessellationMultiplier" + OpName %in_var_PN_WorldDisplacementMultiplier "in.var.PN_WorldDisplacementMultiplier" + OpName %in_var_PN_DominantVertex "in.var.PN_DominantVertex" + OpName %in_var_PN_DominantVertex1 "in.var.PN_DominantVertex1" + OpName %in_var_PN_DominantVertex2 "in.var.PN_DominantVertex2" + OpName %in_var_PN_DominantEdge "in.var.PN_DominantEdge" + OpName %in_var_PN_DominantEdge1 "in.var.PN_DominantEdge1" + OpName %in_var_PN_DominantEdge2 "in.var.PN_DominantEdge2" + OpName %in_var_PN_DominantEdge3 "in.var.PN_DominantEdge3" + OpName %in_var_PN_DominantEdge4 "in.var.PN_DominantEdge4" + OpName %in_var_PN_DominantEdge5 "in.var.PN_DominantEdge5" + OpName %in_var_PN_POSITION9 "in.var.PN_POSITION9" + OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6" + OpName %out_var_TEXCOORD7 "out.var.TEXCOORD7" + OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid" + OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid" + OpName %MainDomain "MainDomain" + OpName %type_sampled_image "type.sampled.image" + OpDecorate %gl_ClipDistance BuiltIn ClipDistance + OpDecorateString %gl_ClipDistance UserSemantic "SV_ClipDistance" + OpDecorateString %in_var_TEXCOORD6 UserSemantic "TEXCOORD6" + OpDecorateString %in_var_TEXCOORD8 UserSemantic "TEXCOORD8" + OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position" + OpDecorateString %in_var_VS_To_DS_VertexID UserSemantic "VS_To_DS_VertexID" + OpDecorateString %in_var_PN_POSITION UserSemantic "PN_POSITION" + OpDecorateString %in_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales" + OpDecorateString %in_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier" + OpDecorateString %in_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier" + OpDecorateString %in_var_PN_DominantVertex UserSemantic "PN_DominantVertex" + OpDecorateString %in_var_PN_DominantVertex1 UserSemantic "PN_DominantVertex" + OpDecorateString %in_var_PN_DominantVertex2 UserSemantic "PN_DominantVertex" + OpDecorateString %in_var_PN_DominantEdge UserSemantic "PN_DominantEdge" + OpDecorateString %in_var_PN_DominantEdge1 UserSemantic "PN_DominantEdge" + OpDecorateString %in_var_PN_DominantEdge2 UserSemantic "PN_DominantEdge" + OpDecorateString %in_var_PN_DominantEdge3 UserSemantic "PN_DominantEdge" + OpDecorateString %in_var_PN_DominantEdge4 UserSemantic "PN_DominantEdge" + OpDecorateString %in_var_PN_DominantEdge5 UserSemantic "PN_DominantEdge" + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor" + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor" + OpDecorate %gl_TessLevelInner Patch + OpDecorateString %in_var_PN_POSITION9 UserSemantic "PN_POSITION9" + OpDecorate %in_var_PN_POSITION9 Patch + OpDecorate %gl_TessCoord BuiltIn TessCoord + OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation" + OpDecorate %gl_TessCoord Patch + OpDecorate %gl_Position BuiltIn Position + OpDecorateString %gl_Position UserSemantic "SV_POSITION" + OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6" + OpDecorateString %out_var_TEXCOORD7 UserSemantic "TEXCOORD7" + OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorate %in_var_PN_DisplacementScales Location 0 + OpDecorate %in_var_PN_DominantEdge Location 1 + OpDecorate %in_var_PN_DominantEdge1 Location 2 + OpDecorate %in_var_PN_DominantEdge2 Location 3 + OpDecorate %in_var_PN_DominantEdge3 Location 4 + OpDecorate %in_var_PN_DominantEdge4 Location 5 + OpDecorate %in_var_PN_DominantEdge5 Location 6 + OpDecorate %in_var_PN_DominantVertex Location 7 + OpDecorate %in_var_PN_DominantVertex1 Location 8 + OpDecorate %in_var_PN_DominantVertex2 Location 9 + OpDecorate %in_var_PN_POSITION Location 10 + OpDecorate %in_var_PN_POSITION9 Location 13 + OpDecorate %in_var_PN_TessellationMultiplier Location 14 + OpDecorate %in_var_PN_WorldDisplacementMultiplier Location 15 + OpDecorate %in_var_TEXCOORD10_centroid Location 16 + OpDecorate %in_var_TEXCOORD11_centroid Location 17 + OpDecorate %in_var_TEXCOORD6 Location 18 + OpDecorate %in_var_TEXCOORD8 Location 19 + OpDecorate %in_var_VS_To_DS_Position Location 20 + OpDecorate %in_var_VS_To_DS_VertexID Location 21 + OpDecorate %out_var_TEXCOORD6 Location 0 + OpDecorate %out_var_TEXCOORD7 Location 1 + OpDecorate %out_var_TEXCOORD10_centroid Location 2 + OpDecorate %out_var_TEXCOORD11_centroid Location 3 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %View_GlobalDistanceFieldTexture0 DescriptorSet 0 + OpDecorate %View_GlobalDistanceFieldTexture0 Binding 0 + OpDecorate %View_GlobalDistanceFieldSampler0 DescriptorSet 0 + OpDecorate %View_GlobalDistanceFieldSampler0 Binding 0 + OpDecorate %View_GlobalDistanceFieldTexture1 DescriptorSet 0 + OpDecorate %View_GlobalDistanceFieldTexture1 Binding 1 + OpDecorate %View_GlobalDistanceFieldTexture2 DescriptorSet 0 + OpDecorate %View_GlobalDistanceFieldTexture2 Binding 2 + OpDecorate %View_GlobalDistanceFieldTexture3 DescriptorSet 0 + OpDecorate %View_GlobalDistanceFieldTexture3 Binding 3 + OpDecorate %Material DescriptorSet 0 + OpDecorate %Material Binding 1 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 13 MatrixStride 16 + OpMemberDecorate %type_View 13 ColMajor + OpMemberDecorate %type_View 14 Offset 896 + OpMemberDecorate %type_View 15 Offset 908 + OpMemberDecorate %type_View 16 Offset 912 + OpMemberDecorate %type_View 17 Offset 924 + OpMemberDecorate %type_View 18 Offset 928 + OpMemberDecorate %type_View 19 Offset 940 + OpMemberDecorate %type_View 20 Offset 944 + OpMemberDecorate %type_View 21 Offset 956 + OpMemberDecorate %type_View 22 Offset 960 + OpMemberDecorate %type_View 23 Offset 972 + OpMemberDecorate %type_View 24 Offset 976 + OpMemberDecorate %type_View 25 Offset 992 + OpMemberDecorate %type_View 26 Offset 1008 + OpMemberDecorate %type_View 27 Offset 1020 + OpMemberDecorate %type_View 28 Offset 1024 + OpMemberDecorate %type_View 29 Offset 1036 + OpMemberDecorate %type_View 30 Offset 1040 + OpMemberDecorate %type_View 31 Offset 1052 + OpMemberDecorate %type_View 32 Offset 1056 + OpMemberDecorate %type_View 33 Offset 1068 + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 43 MatrixStride 16 + OpMemberDecorate %type_View 43 ColMajor + OpMemberDecorate %type_View 44 Offset 1712 + OpMemberDecorate %type_View 45 Offset 1724 + OpMemberDecorate %type_View 46 Offset 1728 + OpMemberDecorate %type_View 47 Offset 1740 + OpMemberDecorate %type_View 48 Offset 1744 + OpMemberDecorate %type_View 49 Offset 1756 + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 52 MatrixStride 16 + OpMemberDecorate %type_View 52 ColMajor + OpMemberDecorate %type_View 53 Offset 1952 + OpMemberDecorate %type_View 54 Offset 1968 + OpMemberDecorate %type_View 55 Offset 1984 + OpMemberDecorate %type_View 56 Offset 1992 + OpMemberDecorate %type_View 57 Offset 2000 + OpMemberDecorate %type_View 58 Offset 2016 + OpMemberDecorate %type_View 59 Offset 2032 + OpMemberDecorate %type_View 60 Offset 2048 + OpMemberDecorate %type_View 61 Offset 2064 + OpMemberDecorate %type_View 62 Offset 2068 + OpMemberDecorate %type_View 63 Offset 2072 + OpMemberDecorate %type_View 64 Offset 2076 + OpMemberDecorate %type_View 65 Offset 2080 + OpMemberDecorate %type_View 66 Offset 2096 + OpMemberDecorate %type_View 67 Offset 2112 + OpMemberDecorate %type_View 68 Offset 2128 + OpMemberDecorate %type_View 69 Offset 2136 + OpMemberDecorate %type_View 70 Offset 2140 + OpMemberDecorate %type_View 71 Offset 2144 + OpMemberDecorate %type_View 72 Offset 2148 + OpMemberDecorate %type_View 73 Offset 2152 + OpMemberDecorate %type_View 74 Offset 2156 + OpMemberDecorate %type_View 75 Offset 2160 + OpMemberDecorate %type_View 76 Offset 2172 + OpMemberDecorate %type_View 77 Offset 2176 + OpMemberDecorate %type_View 78 Offset 2180 + OpMemberDecorate %type_View 79 Offset 2184 + OpMemberDecorate %type_View 80 Offset 2188 + OpMemberDecorate %type_View 81 Offset 2192 + OpMemberDecorate %type_View 82 Offset 2196 + OpMemberDecorate %type_View 83 Offset 2200 + OpMemberDecorate %type_View 84 Offset 2204 + OpMemberDecorate %type_View 85 Offset 2208 + OpMemberDecorate %type_View 86 Offset 2212 + OpMemberDecorate %type_View 87 Offset 2216 + OpMemberDecorate %type_View 88 Offset 2220 + OpMemberDecorate %type_View 89 Offset 2224 + OpMemberDecorate %type_View 90 Offset 2228 + OpMemberDecorate %type_View 91 Offset 2232 + OpMemberDecorate %type_View 92 Offset 2236 + OpMemberDecorate %type_View 93 Offset 2240 + OpMemberDecorate %type_View 94 Offset 2256 + OpMemberDecorate %type_View 95 Offset 2268 + OpMemberDecorate %type_View 96 Offset 2272 + OpMemberDecorate %type_View 97 Offset 2304 + OpMemberDecorate %type_View 98 Offset 2336 + OpMemberDecorate %type_View 99 Offset 2352 + OpMemberDecorate %type_View 100 Offset 2368 + OpMemberDecorate %type_View 101 Offset 2372 + OpMemberDecorate %type_View 102 Offset 2376 + OpMemberDecorate %type_View 103 Offset 2380 + OpMemberDecorate %type_View 104 Offset 2384 + OpMemberDecorate %type_View 105 Offset 2388 + OpMemberDecorate %type_View 106 Offset 2392 + OpMemberDecorate %type_View 107 Offset 2396 + OpMemberDecorate %type_View 108 Offset 2400 + OpMemberDecorate %type_View 109 Offset 2404 + OpMemberDecorate %type_View 110 Offset 2408 + OpMemberDecorate %type_View 111 Offset 2412 + OpMemberDecorate %type_View 112 Offset 2416 + OpMemberDecorate %type_View 113 Offset 2428 + OpMemberDecorate %type_View 114 Offset 2432 + OpMemberDecorate %type_View 115 Offset 2444 + OpMemberDecorate %type_View 116 Offset 2448 + OpMemberDecorate %type_View 117 Offset 2452 + OpMemberDecorate %type_View 118 Offset 2456 + OpMemberDecorate %type_View 119 Offset 2460 + OpMemberDecorate %type_View 120 Offset 2464 + OpMemberDecorate %type_View 121 Offset 2468 + OpMemberDecorate %type_View 122 Offset 2472 + OpMemberDecorate %type_View 123 Offset 2476 + OpMemberDecorate %type_View 124 Offset 2480 + OpMemberDecorate %type_View 125 Offset 2484 + OpMemberDecorate %type_View 126 Offset 2488 + OpMemberDecorate %type_View 127 Offset 2492 + OpMemberDecorate %type_View 128 Offset 2496 + OpMemberDecorate %type_View 129 Offset 2512 + OpMemberDecorate %type_View 130 Offset 2516 + OpMemberDecorate %type_View 131 Offset 2520 + OpMemberDecorate %type_View 132 Offset 2524 + OpMemberDecorate %type_View 133 Offset 2528 + OpMemberDecorate %type_View 134 Offset 2544 + OpMemberDecorate %type_View 135 Offset 2556 + OpMemberDecorate %type_View 136 Offset 2560 + OpMemberDecorate %type_View 137 Offset 2576 + OpMemberDecorate %type_View 138 Offset 2580 + OpMemberDecorate %type_View 139 Offset 2584 + OpMemberDecorate %type_View 140 Offset 2588 + OpMemberDecorate %type_View 141 Offset 2592 + OpMemberDecorate %type_View 142 Offset 2608 + OpMemberDecorate %type_View 143 Offset 2720 + OpMemberDecorate %type_View 144 Offset 2724 + OpMemberDecorate %type_View 145 Offset 2728 + OpMemberDecorate %type_View 146 Offset 2732 + OpMemberDecorate %type_View 147 Offset 2736 + OpMemberDecorate %type_View 148 Offset 2740 + OpMemberDecorate %type_View 149 Offset 2744 + OpMemberDecorate %type_View 150 Offset 2748 + OpMemberDecorate %type_View 151 Offset 2752 + OpMemberDecorate %type_View 152 Offset 2764 + OpMemberDecorate %type_View 153 Offset 2768 + OpMemberDecorate %type_View 154 Offset 2832 + OpMemberDecorate %type_View 155 Offset 2896 + OpMemberDecorate %type_View 156 Offset 2900 + OpMemberDecorate %type_View 157 Offset 2904 + OpMemberDecorate %type_View 158 Offset 2908 + OpMemberDecorate %type_View 159 Offset 2912 + OpMemberDecorate %type_View 160 Offset 2920 + OpMemberDecorate %type_View 161 Offset 2924 + OpMemberDecorate %type_View 162 Offset 2928 + OpMemberDecorate %type_View 163 Offset 2940 + OpMemberDecorate %type_View 164 Offset 2944 + OpMemberDecorate %type_View 165 Offset 2956 + OpMemberDecorate %type_View 166 Offset 2960 + OpMemberDecorate %type_View 167 Offset 2968 + OpMemberDecorate %type_View 168 Offset 2972 + OpMemberDecorate %type_View 169 Offset 2976 + OpMemberDecorate %type_View 170 Offset 2988 + OpMemberDecorate %type_View 171 Offset 2992 + OpMemberDecorate %type_View 172 Offset 3004 + OpMemberDecorate %type_View 173 Offset 3008 + OpMemberDecorate %type_View 174 Offset 3020 + OpMemberDecorate %type_View 175 Offset 3024 + OpMemberDecorate %type_View 176 Offset 3036 + OpMemberDecorate %type_View 177 Offset 3040 + OpMemberDecorate %type_View 178 Offset 3044 + OpMemberDecorate %type_View 179 Offset 3048 + OpMemberDecorate %type_View 180 Offset 3052 + OpMemberDecorate %type_View 181 Offset 3056 + OpMemberDecorate %type_View 181 MatrixStride 16 + OpMemberDecorate %type_View 181 ColMajor + OpMemberDecorate %type_View 182 Offset 3120 + OpMemberDecorate %type_View 183 Offset 3136 + OpDecorate %type_View Block + OpDecorate %_arr_v4float_uint_5 ArrayStride 16 + OpMemberDecorate %type_Material 0 Offset 0 + OpMemberDecorate %type_Material 1 Offset 80 + OpDecorate %type_Material Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %v2int = OpTypeVector %int 2 +%float_0_00100000005 = OpConstant %float 0.00100000005 + %uint_0 = OpConstant %uint 0 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %float_3 = OpConstant %float 3 + %uint_1 = OpConstant %uint 1 + %float_6 = OpConstant %float 6 + %67 = OpConstantComposite %v4float %float_6 %float_6 %float_6 %float_6 + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %int_3 = OpConstant %int 3 + %float_2 = OpConstant %float 2 + %int_26 = OpConstant %int 26 + %int_32 = OpConstant %int 32 + %int_54 = OpConstant %int 54 + %int_153 = OpConstant %int 153 + %int_154 = OpConstant %int 154 + %int_156 = OpConstant %int 156 + %int_157 = OpConstant %int 157 + %float_10 = OpConstant %float 10 + %uint_3 = OpConstant %uint 3 + %81 = OpConstantComposite %v3float %float_0 %float_0 %float_0 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float %float %float %mat4v4float %v4float %_arr_v4float_uint_2 +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler +%type_3d_image = OpTypeImage %float 3D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image + %uint_5 = OpConstant %uint 5 +%_arr_v4float_uint_5 = OpTypeArray %v4float %uint_5 +%type_Material = OpTypeStruct %_arr_v4float_uint_5 %_arr_v4float_uint_2 +%_ptr_Uniform_type_Material = OpTypePointer Uniform %type_Material +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%_ptr_Output__arr_float_uint_1 = OpTypePointer Output %_arr_float_uint_1 +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3 +%_arr_uint_uint_3 = OpTypeArray %uint %uint_3 +%_ptr_Input__arr_uint_uint_3 = OpTypePointer Input %_arr_uint_uint_3 +%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3 +%_ptr_Input__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_3_uint_3 +%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3 +%_ptr_Input__arr_v3float_uint_3 = OpTypePointer Input %_arr_v3float_uint_3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 +%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3 +%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3 +%_ptr_Input__arr_v2float_uint_3 = OpTypePointer Input %_arr_v2float_uint_3 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %109 = OpTypeFunction %void +%_ptr_Output_float = OpTypePointer Output %float +%mat3v3float = OpTypeMatrix %v3float 3 + %bool = OpTypeBool +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%type_sampled_image = OpTypeSampledImage %type_3d_image + %View = OpVariable %_ptr_Uniform_type_View Uniform +%View_GlobalDistanceFieldTexture0 = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant +%View_GlobalDistanceFieldSampler0 = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%View_GlobalDistanceFieldTexture1 = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant +%View_GlobalDistanceFieldTexture2 = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant +%View_GlobalDistanceFieldTexture3 = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant + %Material = OpVariable %_ptr_Uniform_type_Material Uniform +%gl_ClipDistance = OpVariable %_ptr_Output__arr_float_uint_1 Output +%in_var_TEXCOORD6 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_TEXCOORD8 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_VS_To_DS_VertexID = OpVariable %_ptr_Input__arr_uint_uint_3 Input +%in_var_PN_POSITION = OpVariable %_ptr_Input__arr__arr_v4float_uint_3_uint_3 Input +%in_var_PN_DisplacementScales = OpVariable %_ptr_Input__arr_v3float_uint_3 Input +%in_var_PN_TessellationMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input +%in_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input +%in_var_PN_DominantVertex = OpVariable %_ptr_Input__arr_v2float_uint_3 Input +%in_var_PN_DominantVertex1 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_PN_DominantVertex2 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input +%in_var_PN_DominantEdge = OpVariable %_ptr_Input__arr_v2float_uint_3 Input +%in_var_PN_DominantEdge1 = OpVariable %_ptr_Input__arr_v2float_uint_3 Input +%in_var_PN_DominantEdge2 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_PN_DominantEdge3 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_PN_DominantEdge4 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input +%in_var_PN_DominantEdge5 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input +%gl_TessLevelOuter = OpVariable %_ptr_Input__arr_float_uint_4 Input +%gl_TessLevelInner = OpVariable %_ptr_Input__arr_float_uint_2 Input +%in_var_PN_POSITION9 = OpVariable %_ptr_Input_v4float Input +%gl_TessCoord = OpVariable %_ptr_Input_v3float Input +%gl_Position = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD6 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD7 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output_v4float Output + %117 = OpConstantNull %v4float + %118 = OpUndef %v4float + %MainDomain = OpFunction %void None %109 + %119 = OpLabel + %120 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD6 + %121 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD8 + %122 = OpCompositeExtract %v4float %120 0 + %123 = OpCompositeExtract %v4float %121 0 + %124 = OpCompositeExtract %v4float %120 1 + %125 = OpCompositeExtract %v4float %121 1 + %126 = OpCompositeExtract %v4float %120 2 + %127 = OpCompositeExtract %v4float %121 2 + %128 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD10_centroid + %129 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD11_centroid + %130 = OpCompositeExtract %v4float %128 0 + %131 = OpCompositeExtract %v4float %129 0 + %132 = OpCompositeExtract %v4float %128 1 + %133 = OpCompositeExtract %v4float %129 1 + %134 = OpCompositeExtract %v4float %128 2 + %135 = OpCompositeExtract %v4float %129 2 + %136 = OpLoad %_arr__arr_v4float_uint_3_uint_3 %in_var_PN_POSITION + %137 = OpLoad %_arr_float_uint_3 %in_var_PN_WorldDisplacementMultiplier + %138 = OpLoad %_arr_v4float_uint_3 %in_var_PN_DominantVertex1 + %139 = OpLoad %_arr_v3float_uint_3 %in_var_PN_DominantVertex2 + %140 = OpCompositeExtract %v4float %138 0 + %141 = OpCompositeExtract %v3float %139 0 + %142 = OpCompositeExtract %v4float %138 1 + %143 = OpCompositeExtract %v3float %139 1 + %144 = OpCompositeExtract %v4float %138 2 + %145 = OpCompositeExtract %v3float %139 2 + %146 = OpLoad %_arr_v4float_uint_3 %in_var_PN_DominantEdge2 + %147 = OpLoad %_arr_v4float_uint_3 %in_var_PN_DominantEdge3 + %148 = OpLoad %_arr_v3float_uint_3 %in_var_PN_DominantEdge4 + %149 = OpLoad %_arr_v3float_uint_3 %in_var_PN_DominantEdge5 + %150 = OpCompositeExtract %v4float %146 0 + %151 = OpCompositeExtract %v4float %147 0 + %152 = OpCompositeExtract %v3float %148 0 + %153 = OpCompositeExtract %v3float %149 0 + %154 = OpCompositeExtract %v4float %146 1 + %155 = OpCompositeExtract %v4float %147 1 + %156 = OpCompositeExtract %v3float %148 1 + %157 = OpCompositeExtract %v3float %149 1 + %158 = OpCompositeExtract %v4float %146 2 + %159 = OpCompositeExtract %v4float %147 2 + %160 = OpCompositeExtract %v3float %148 2 + %161 = OpCompositeExtract %v3float %149 2 + %162 = OpCompositeExtract %_arr_v4float_uint_3 %136 0 + %163 = OpCompositeExtract %float %137 0 + %164 = OpCompositeExtract %_arr_v4float_uint_3 %136 1 + %165 = OpCompositeExtract %float %137 1 + %166 = OpCompositeExtract %_arr_v4float_uint_3 %136 2 + %167 = OpCompositeExtract %float %137 2 + %168 = OpCompositeExtract %v4float %162 0 + %169 = OpCompositeExtract %v4float %162 1 + %170 = OpCompositeExtract %v4float %162 2 + %171 = OpCompositeExtract %v4float %164 0 + %172 = OpCompositeExtract %v4float %164 1 + %173 = OpCompositeExtract %v4float %164 2 + %174 = OpCompositeExtract %v4float %166 0 + %175 = OpCompositeExtract %v4float %166 1 + %176 = OpCompositeExtract %v4float %166 2 + %177 = OpLoad %v4float %in_var_PN_POSITION9 + %178 = OpLoad %v3float %gl_TessCoord + %179 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0 + %180 = OpLoad %mat4v4float %179 + %181 = OpAccessChain %_ptr_Uniform_v3float %View %int_26 + %182 = OpLoad %v3float %181 + %183 = OpAccessChain %_ptr_Uniform_v3float %View %int_32 + %184 = OpLoad %v3float %183 + %185 = OpAccessChain %_ptr_Uniform_v4float %View %int_54 + %186 = OpLoad %v4float %185 + %187 = OpCompositeExtract %float %178 0 + %188 = OpCompositeExtract %float %178 1 + %189 = OpCompositeExtract %float %178 2 + %190 = OpFMul %float %187 %187 + %191 = OpFMul %float %188 %188 + %192 = OpFMul %float %189 %189 + %193 = OpFMul %float %190 %float_3 + %194 = OpFMul %float %191 %float_3 + %195 = OpFMul %float %192 %float_3 + %196 = OpCompositeConstruct %v4float %190 %190 %190 %190 + %197 = OpFMul %v4float %168 %196 + %198 = OpCompositeConstruct %v4float %187 %187 %187 %187 + %199 = OpFMul %v4float %197 %198 + %200 = OpCompositeConstruct %v4float %191 %191 %191 %191 + %201 = OpFMul %v4float %171 %200 + %202 = OpCompositeConstruct %v4float %188 %188 %188 %188 + %203 = OpFMul %v4float %201 %202 + %204 = OpFAdd %v4float %199 %203 + %205 = OpCompositeConstruct %v4float %192 %192 %192 %192 + %206 = OpFMul %v4float %174 %205 + %207 = OpCompositeConstruct %v4float %189 %189 %189 %189 + %208 = OpFMul %v4float %206 %207 + %209 = OpFAdd %v4float %204 %208 + %210 = OpCompositeConstruct %v4float %193 %193 %193 %193 + %211 = OpFMul %v4float %169 %210 + %212 = OpFMul %v4float %211 %202 + %213 = OpFAdd %v4float %209 %212 + %214 = OpCompositeConstruct %v4float %194 %194 %194 %194 + %215 = OpFMul %v4float %170 %214 + %216 = OpFMul %v4float %215 %198 + %217 = OpFAdd %v4float %213 %216 + %218 = OpFMul %v4float %172 %214 + %219 = OpFMul %v4float %218 %207 + %220 = OpFAdd %v4float %217 %219 + %221 = OpCompositeConstruct %v4float %195 %195 %195 %195 + %222 = OpFMul %v4float %173 %221 + %223 = OpFMul %v4float %222 %202 + %224 = OpFAdd %v4float %220 %223 + %225 = OpFMul %v4float %175 %221 + %226 = OpFMul %v4float %225 %198 + %227 = OpFAdd %v4float %224 %226 + %228 = OpFMul %v4float %176 %210 + %229 = OpFMul %v4float %228 %207 + %230 = OpFAdd %v4float %227 %229 + %231 = OpFMul %v4float %177 %67 + %232 = OpFMul %v4float %231 %207 + %233 = OpFMul %v4float %232 %198 + %234 = OpFMul %v4float %233 %202 + %235 = OpFAdd %v4float %230 %234 + %236 = OpVectorShuffle %v3float %130 %130 0 1 2 + %237 = OpCompositeConstruct %v3float %187 %187 %187 + %238 = OpFMul %v3float %236 %237 + %239 = OpVectorShuffle %v3float %132 %132 0 1 2 + %240 = OpCompositeConstruct %v3float %188 %188 %188 + %241 = OpFMul %v3float %239 %240 + %242 = OpFAdd %v3float %238 %241 + %243 = OpFMul %v4float %131 %198 + %244 = OpFMul %v4float %133 %202 + %245 = OpFAdd %v4float %243 %244 + %246 = OpFMul %v4float %122 %198 + %247 = OpFMul %v4float %124 %202 + %248 = OpFAdd %v4float %246 %247 + %249 = OpFMul %v4float %123 %198 + %250 = OpFMul %v4float %125 %202 + %251 = OpFAdd %v4float %249 %250 + %252 = OpVectorShuffle %v3float %242 %117 0 1 2 + %253 = OpVectorShuffle %v3float %134 %134 0 1 2 + %254 = OpCompositeConstruct %v3float %189 %189 %189 + %255 = OpFMul %v3float %253 %254 + %256 = OpFAdd %v3float %252 %255 + %257 = OpVectorShuffle %v4float %118 %256 4 5 6 3 + %258 = OpFMul %v4float %135 %207 + %259 = OpFAdd %v4float %245 %258 + %260 = OpFMul %v4float %126 %207 + %261 = OpFAdd %v4float %248 %260 + %262 = OpFMul %v4float %127 %207 + %263 = OpFAdd %v4float %251 %262 + %264 = OpVectorShuffle %v3float %235 %235 0 1 2 + %265 = OpVectorShuffle %v3float %256 %117 0 1 2 + %266 = OpVectorShuffle %v3float %259 %259 0 1 2 + %267 = OpExtInst %v3float %1 Cross %266 %265 + %268 = OpCompositeExtract %float %259 3 + %269 = OpCompositeConstruct %v3float %268 %268 %268 + %270 = OpFMul %v3float %267 %269 + %271 = OpCompositeConstruct %mat3v3float %265 %270 %266 + %272 = OpFAdd %v3float %264 %182 + %273 = OpCompositeExtract %float %259 0 + %274 = OpCompositeExtract %float %259 1 + %275 = OpCompositeExtract %float %259 2 + %276 = OpCompositeConstruct %v4float %273 %274 %275 %float_0 + %277 = OpFOrdEqual %bool %187 %float_0 + %278 = OpSelect %int %277 %int_1 %int_0 + %279 = OpConvertSToF %float %278 + %280 = OpFOrdEqual %bool %188 %float_0 + %281 = OpSelect %int %280 %int_1 %int_0 + %282 = OpConvertSToF %float %281 + %283 = OpFOrdEqual %bool %189 %float_0 + %284 = OpSelect %int %283 %int_1 %int_0 + %285 = OpConvertSToF %float %284 + %286 = OpFAdd %float %279 %282 + %287 = OpFAdd %float %286 %285 + %288 = OpFOrdEqual %bool %287 %float_2 + %289 = OpSelect %int %288 %int_1 %int_0 + %290 = OpConvertSToF %float %289 + %291 = OpFOrdEqual %bool %287 %float_1 + %292 = OpSelect %int %291 %int_1 %int_0 + %293 = OpConvertSToF %float %292 + %294 = OpFOrdEqual %bool %287 %float_0 + %295 = OpSelect %int %294 %int_1 %int_0 + %296 = OpConvertSToF %float %295 + %297 = OpFOrdEqual %bool %290 %float_1 + OpSelectionMerge %298 None + OpBranchConditional %297 %299 %300 + %300 = OpLabel + %301 = OpFOrdNotEqual %bool %293 %float_0 + OpSelectionMerge %302 None + OpBranchConditional %301 %303 %302 + %303 = OpLabel + %304 = OpCompositeConstruct %v4float %279 %279 %279 %279 + %305 = OpFMul %v4float %304 %150 + %306 = OpCompositeConstruct %v4float %282 %282 %282 %282 + %307 = OpFMul %v4float %306 %154 + %308 = OpFAdd %v4float %305 %307 + %309 = OpCompositeConstruct %v4float %285 %285 %285 %285 + %310 = OpFMul %v4float %309 %158 + %311 = OpFAdd %v4float %308 %310 + %312 = OpFMul %v4float %304 %151 + %313 = OpFMul %v4float %306 %155 + %314 = OpFAdd %v4float %312 %313 + %315 = OpFMul %v4float %309 %159 + %316 = OpFAdd %v4float %314 %315 + %317 = OpFMul %v4float %202 %311 + %318 = OpFMul %v4float %207 %316 + %319 = OpFAdd %v4float %317 %318 + %320 = OpFMul %v4float %304 %319 + %321 = OpFMul %v4float %207 %311 + %322 = OpFMul %v4float %198 %316 + %323 = OpFAdd %v4float %321 %322 + %324 = OpFMul %v4float %306 %323 + %325 = OpFAdd %v4float %320 %324 + %326 = OpFMul %v4float %198 %311 + %327 = OpFMul %v4float %202 %316 + %328 = OpFAdd %v4float %326 %327 + %329 = OpFMul %v4float %309 %328 + %330 = OpFAdd %v4float %325 %329 + %331 = OpCompositeConstruct %v3float %279 %279 %279 + %332 = OpFMul %v3float %331 %152 + %333 = OpCompositeConstruct %v3float %282 %282 %282 + %334 = OpFMul %v3float %333 %156 + %335 = OpFAdd %v3float %332 %334 + %336 = OpCompositeConstruct %v3float %285 %285 %285 + %337 = OpFMul %v3float %336 %160 + %338 = OpFAdd %v3float %335 %337 + %339 = OpFMul %v3float %331 %153 + %340 = OpFMul %v3float %333 %157 + %341 = OpFAdd %v3float %339 %340 + %342 = OpFMul %v3float %336 %161 + %343 = OpFAdd %v3float %341 %342 + %344 = OpFMul %v3float %240 %338 + %345 = OpFMul %v3float %254 %343 + %346 = OpFAdd %v3float %344 %345 + %347 = OpFMul %v3float %331 %346 + %348 = OpFMul %v3float %254 %338 + %349 = OpFMul %v3float %237 %343 + %350 = OpFAdd %v3float %348 %349 + %351 = OpFMul %v3float %333 %350 + %352 = OpFAdd %v3float %347 %351 + %353 = OpFMul %v3float %237 %338 + %354 = OpFMul %v3float %240 %343 + %355 = OpFAdd %v3float %353 %354 + %356 = OpFMul %v3float %336 %355 + %357 = OpFAdd %v3float %352 %356 + OpBranch %302 + %302 = OpLabel + %358 = OpPhi %v4float %276 %300 %330 %303 + %359 = OpPhi %v3float %265 %300 %357 %303 + OpBranch %298 + %299 = OpLabel + %360 = OpFAdd %float %282 %285 + %361 = OpFOrdEqual %bool %360 %float_2 + %362 = OpSelect %int %361 %int_1 %int_0 + %363 = OpConvertSToF %float %362 + %364 = OpFAdd %float %285 %279 + %365 = OpFOrdEqual %bool %364 %float_2 + %366 = OpSelect %int %365 %int_1 %int_0 + %367 = OpConvertSToF %float %366 + %368 = OpFOrdEqual %bool %286 %float_2 + %369 = OpSelect %int %368 %int_1 %int_0 + %370 = OpConvertSToF %float %369 + %371 = OpCompositeConstruct %v4float %363 %363 %363 %363 + %372 = OpFMul %v4float %371 %140 + %373 = OpCompositeConstruct %v4float %367 %367 %367 %367 + %374 = OpFMul %v4float %373 %142 + %375 = OpFAdd %v4float %372 %374 + %376 = OpCompositeConstruct %v4float %370 %370 %370 %370 + %377 = OpFMul %v4float %376 %144 + %378 = OpFAdd %v4float %375 %377 + %379 = OpCompositeConstruct %v3float %363 %363 %363 + %380 = OpFMul %v3float %379 %141 + %381 = OpCompositeConstruct %v3float %367 %367 %367 + %382 = OpFMul %v3float %381 %143 + %383 = OpFAdd %v3float %380 %382 + %384 = OpCompositeConstruct %v3float %370 %370 %370 + %385 = OpFMul %v3float %384 %145 + %386 = OpFAdd %v3float %383 %385 + OpBranch %298 + %298 = OpLabel + %387 = OpPhi %v4float %378 %299 %358 %302 + %388 = OpPhi %v3float %386 %299 %359 %302 + %389 = OpFOrdEqual %bool %296 %float_0 + OpSelectionMerge %390 None + OpBranchConditional %389 %391 %390 + %391 = OpLabel + %392 = OpVectorShuffle %v3float %387 %387 0 1 2 + %393 = OpExtInst %v3float %1 Cross %392 %388 + %394 = OpCompositeExtract %float %387 3 + %395 = OpCompositeConstruct %v3float %394 %394 %394 + %396 = OpFMul %v3float %393 %395 + %397 = OpCompositeConstruct %mat3v3float %388 %396 %392 + OpBranch %390 + %390 = OpLabel + %398 = OpPhi %mat3v3float %271 %298 %397 %391 + %399 = OpAccessChain %_ptr_Uniform_float %View %int_157 + %400 = OpLoad %float %399 + %401 = OpAccessChain %_ptr_Uniform_v4float %View %int_153 %int_0 + %402 = OpLoad %v4float %401 + %403 = OpVectorShuffle %v3float %402 %402 0 1 2 + %404 = OpVectorShuffle %v3float %402 %402 3 3 3 + %405 = OpFSub %v3float %272 %403 + %406 = OpFAdd %v3float %405 %404 + %407 = OpExtInst %v3float %1 FMax %406 %81 + %408 = OpFAdd %v3float %403 %404 + %409 = OpFSub %v3float %408 %272 + %410 = OpExtInst %v3float %1 FMax %409 %81 + %411 = OpExtInst %v3float %1 FMin %407 %410 + %412 = OpCompositeExtract %float %411 0 + %413 = OpCompositeExtract %float %411 1 + %414 = OpCompositeExtract %float %411 2 + %415 = OpExtInst %float %1 FMin %413 %414 + %416 = OpExtInst %float %1 FMin %412 %415 + %417 = OpAccessChain %_ptr_Uniform_float %View %int_153 %int_0 %int_3 + %418 = OpLoad %float %417 + %419 = OpAccessChain %_ptr_Uniform_float %View %int_156 + %420 = OpLoad %float %419 + %421 = OpFMul %float %418 %420 + %422 = OpFOrdGreaterThan %bool %416 %421 + OpSelectionMerge %423 DontFlatten + OpBranchConditional %422 %424 %425 + %425 = OpLabel + %426 = OpAccessChain %_ptr_Uniform_v4float %View %int_153 %int_1 + %427 = OpLoad %v4float %426 + %428 = OpVectorShuffle %v3float %427 %427 0 1 2 + %429 = OpVectorShuffle %v3float %427 %427 3 3 3 + %430 = OpFSub %v3float %272 %428 + %431 = OpFAdd %v3float %430 %429 + %432 = OpExtInst %v3float %1 FMax %431 %81 + %433 = OpFAdd %v3float %428 %429 + %434 = OpFSub %v3float %433 %272 + %435 = OpExtInst %v3float %1 FMax %434 %81 + %436 = OpExtInst %v3float %1 FMin %432 %435 + %437 = OpCompositeExtract %float %436 0 + %438 = OpCompositeExtract %float %436 1 + %439 = OpCompositeExtract %float %436 2 + %440 = OpExtInst %float %1 FMin %438 %439 + %441 = OpExtInst %float %1 FMin %437 %440 + %442 = OpAccessChain %_ptr_Uniform_float %View %int_153 %int_1 %int_3 + %443 = OpLoad %float %442 + %444 = OpFMul %float %443 %420 + %445 = OpFOrdGreaterThan %bool %441 %444 + OpSelectionMerge %446 DontFlatten + OpBranchConditional %445 %447 %448 + %448 = OpLabel + %449 = OpAccessChain %_ptr_Uniform_v4float %View %int_153 %int_2 + %450 = OpLoad %v4float %449 + %451 = OpVectorShuffle %v3float %450 %450 0 1 2 + %452 = OpVectorShuffle %v3float %450 %450 3 3 3 + %453 = OpFSub %v3float %272 %451 + %454 = OpFAdd %v3float %453 %452 + %455 = OpExtInst %v3float %1 FMax %454 %81 + %456 = OpFAdd %v3float %451 %452 + %457 = OpFSub %v3float %456 %272 + %458 = OpExtInst %v3float %1 FMax %457 %81 + %459 = OpExtInst %v3float %1 FMin %455 %458 + %460 = OpCompositeExtract %float %459 0 + %461 = OpCompositeExtract %float %459 1 + %462 = OpCompositeExtract %float %459 2 + %463 = OpExtInst %float %1 FMin %461 %462 + %464 = OpExtInst %float %1 FMin %460 %463 + %465 = OpAccessChain %_ptr_Uniform_v4float %View %int_153 %int_3 + %466 = OpLoad %v4float %465 + %467 = OpVectorShuffle %v3float %466 %466 0 1 2 + %468 = OpVectorShuffle %v3float %466 %466 3 3 3 + %469 = OpFSub %v3float %272 %467 + %470 = OpFAdd %v3float %469 %468 + %471 = OpExtInst %v3float %1 FMax %470 %81 + %472 = OpFAdd %v3float %467 %468 + %473 = OpFSub %v3float %472 %272 + %474 = OpExtInst %v3float %1 FMax %473 %81 + %475 = OpExtInst %v3float %1 FMin %471 %474 + %476 = OpCompositeExtract %float %475 0 + %477 = OpCompositeExtract %float %475 1 + %478 = OpCompositeExtract %float %475 2 + %479 = OpExtInst %float %1 FMin %477 %478 + %480 = OpExtInst %float %1 FMin %476 %479 + %481 = OpAccessChain %_ptr_Uniform_float %View %int_153 %int_2 %int_3 + %482 = OpLoad %float %481 + %483 = OpFMul %float %482 %420 + %484 = OpFOrdGreaterThan %bool %464 %483 + OpSelectionMerge %485 DontFlatten + OpBranchConditional %484 %486 %487 + %487 = OpLabel + %488 = OpAccessChain %_ptr_Uniform_float %View %int_153 %int_3 %int_3 + %489 = OpLoad %float %488 + %490 = OpFMul %float %489 %420 + %491 = OpFOrdGreaterThan %bool %480 %490 + OpSelectionMerge %492 None + OpBranchConditional %491 %493 %492 + %493 = OpLabel + %494 = OpFMul %float %480 %float_10 + %495 = OpAccessChain %_ptr_Uniform_float %View %int_154 %int_3 %int_3 + %496 = OpLoad %float %495 + %497 = OpFMul %float %494 %496 + %498 = OpExtInst %float %1 FClamp %497 %float_0 %float_1 + %499 = OpAccessChain %_ptr_Uniform_v4float %View %int_154 %uint_3 + %500 = OpLoad %v4float %499 + %501 = OpVectorShuffle %v3float %500 %500 3 3 3 + %502 = OpFMul %v3float %272 %501 + %503 = OpVectorShuffle %v3float %500 %500 0 1 2 + %504 = OpFAdd %v3float %502 %503 + %505 = OpLoad %type_3d_image %View_GlobalDistanceFieldTexture3 + %506 = OpLoad %type_sampler %View_GlobalDistanceFieldSampler0 + %507 = OpSampledImage %type_sampled_image %505 %506 + %508 = OpImageSampleExplicitLod %v4float %507 %504 Lod %float_0 + %509 = OpCompositeExtract %float %508 0 + %510 = OpExtInst %float %1 FMix %400 %509 %498 + OpBranch %492 + %492 = OpLabel + %511 = OpPhi %float %400 %487 %510 %493 + OpBranch %485 + %486 = OpLabel + %512 = OpAccessChain %_ptr_Uniform_v4float %View %int_154 %uint_2 + %513 = OpLoad %v4float %512 + %514 = OpVectorShuffle %v3float %513 %513 3 3 3 + %515 = OpFMul %v3float %272 %514 + %516 = OpVectorShuffle %v3float %513 %513 0 1 2 + %517 = OpFAdd %v3float %515 %516 + %518 = OpLoad %type_3d_image %View_GlobalDistanceFieldTexture2 + %519 = OpLoad %type_sampler %View_GlobalDistanceFieldSampler0 + %520 = OpSampledImage %type_sampled_image %518 %519 + %521 = OpImageSampleExplicitLod %v4float %520 %517 Lod %float_0 + %522 = OpCompositeExtract %float %521 0 + OpBranch %485 + %485 = OpLabel + %523 = OpPhi %float %522 %486 %511 %492 + OpBranch %446 + %447 = OpLabel + %524 = OpAccessChain %_ptr_Uniform_v4float %View %int_154 %uint_1 + %525 = OpLoad %v4float %524 + %526 = OpVectorShuffle %v3float %525 %525 3 3 3 + %527 = OpFMul %v3float %272 %526 + %528 = OpVectorShuffle %v3float %525 %525 0 1 2 + %529 = OpFAdd %v3float %527 %528 + %530 = OpLoad %type_3d_image %View_GlobalDistanceFieldTexture1 + %531 = OpLoad %type_sampler %View_GlobalDistanceFieldSampler0 + %532 = OpSampledImage %type_sampled_image %530 %531 + %533 = OpImageSampleExplicitLod %v4float %532 %529 Lod %float_0 + %534 = OpCompositeExtract %float %533 0 + OpBranch %446 + %446 = OpLabel + %535 = OpPhi %float %534 %447 %523 %485 + OpBranch %423 + %424 = OpLabel + %536 = OpAccessChain %_ptr_Uniform_v4float %View %int_154 %uint_0 + %537 = OpLoad %v4float %536 + %538 = OpVectorShuffle %v3float %537 %537 3 3 3 + %539 = OpFMul %v3float %272 %538 + %540 = OpVectorShuffle %v3float %537 %537 0 1 2 + %541 = OpFAdd %v3float %539 %540 + %542 = OpLoad %type_3d_image %View_GlobalDistanceFieldTexture0 + %543 = OpLoad %type_sampler %View_GlobalDistanceFieldSampler0 + %544 = OpSampledImage %type_sampled_image %542 %543 + %545 = OpImageSampleExplicitLod %v4float %544 %541 Lod %float_0 + %546 = OpCompositeExtract %float %545 0 + OpBranch %423 + %423 = OpLabel + %547 = OpPhi %float %546 %424 %535 %446 + %548 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_2 + %549 = OpLoad %float %548 + %550 = OpFAdd %float %547 %549 + %551 = OpExtInst %float %1 FMin %550 %float_0 + %552 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_3 + %553 = OpLoad %float %552 + %554 = OpFMul %float %551 %553 + %555 = OpCompositeExtract %v3float %398 2 + %556 = OpCompositeConstruct %v3float %554 %554 %554 + %557 = OpFMul %v3float %555 %556 + %558 = OpFMul %float %163 %187 + %559 = OpFMul %float %165 %188 + %560 = OpFAdd %float %558 %559 + %561 = OpFMul %float %167 %189 + %562 = OpFAdd %float %560 %561 + %563 = OpCompositeConstruct %v3float %562 %562 %562 + %564 = OpFMul %v3float %557 %563 + %565 = OpFAdd %v3float %264 %564 + %566 = OpVectorShuffle %v4float %235 %565 4 5 6 3 + %567 = OpVectorShuffle %v3float %565 %117 0 1 2 + %568 = OpFSub %v3float %567 %184 + %569 = OpCompositeExtract %float %568 0 + %570 = OpCompositeExtract %float %568 1 + %571 = OpCompositeExtract %float %568 2 + %572 = OpCompositeConstruct %v4float %569 %570 %571 %float_1 + %573 = OpDot %float %186 %572 + %574 = OpMatrixTimesVector %v4float %180 %566 + %575 = OpCompositeExtract %float %574 3 + %576 = OpFMul %float %float_0_00100000005 %575 + %577 = OpCompositeExtract %float %574 2 + %578 = OpFAdd %float %577 %576 + %579 = OpCompositeInsert %v4float %578 %574 2 + OpStore %gl_Position %579 + OpStore %out_var_TEXCOORD6 %261 + OpStore %out_var_TEXCOORD7 %263 + OpStore %out_var_TEXCOORD10_centroid %257 + OpStore %out_var_TEXCOORD11_centroid %259 + %580 = OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_0 + OpStore %580 %573 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese b/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese new file mode 100644 index 00000000000..e792c7e1160 --- /dev/null +++ b/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese @@ -0,0 +1,547 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 236 +; Schema: 0 + OpCapability Tessellation + OpCapability SampledBuffer + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationEvaluation %MainDomain "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_VS_to_DS_Position %in_var_VS_To_DS_VertexID %in_var_PN_POSITION %in_var_PN_DisplacementScales %in_var_PN_TessellationMultiplier %in_var_PN_WorldDisplacementMultiplier %in_var_PN_DominantVertex %in_var_PN_DominantVertex1 %in_var_PN_DominantVertex2 %in_var_PN_DominantEdge %in_var_PN_DominantEdge1 %in_var_PN_DominantEdge2 %in_var_PN_DominantEdge3 %in_var_PN_DominantEdge4 %in_var_PN_DominantEdge5 %gl_TessLevelOuter %gl_TessLevelInner %in_var_PN_POSITION9 %gl_TessCoord %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_TEXCOORD6 %out_var_TEXCOORD7 %gl_Position + OpExecutionMode %MainDomain Triangles + OpSource HLSL 600 + OpName %type_ShadowDepthPass "type.ShadowDepthPass" + OpMemberName %type_ShadowDepthPass 0 "PrePadding_ShadowDepthPass_LPV_0" + OpMemberName %type_ShadowDepthPass 1 "PrePadding_ShadowDepthPass_LPV_4" + OpMemberName %type_ShadowDepthPass 2 "PrePadding_ShadowDepthPass_LPV_8" + OpMemberName %type_ShadowDepthPass 3 "PrePadding_ShadowDepthPass_LPV_12" + OpMemberName %type_ShadowDepthPass 4 "PrePadding_ShadowDepthPass_LPV_16" + OpMemberName %type_ShadowDepthPass 5 "PrePadding_ShadowDepthPass_LPV_20" + OpMemberName %type_ShadowDepthPass 6 "PrePadding_ShadowDepthPass_LPV_24" + OpMemberName %type_ShadowDepthPass 7 "PrePadding_ShadowDepthPass_LPV_28" + OpMemberName %type_ShadowDepthPass 8 "PrePadding_ShadowDepthPass_LPV_32" + OpMemberName %type_ShadowDepthPass 9 "PrePadding_ShadowDepthPass_LPV_36" + OpMemberName %type_ShadowDepthPass 10 "PrePadding_ShadowDepthPass_LPV_40" + OpMemberName %type_ShadowDepthPass 11 "PrePadding_ShadowDepthPass_LPV_44" + OpMemberName %type_ShadowDepthPass 12 "PrePadding_ShadowDepthPass_LPV_48" + OpMemberName %type_ShadowDepthPass 13 "PrePadding_ShadowDepthPass_LPV_52" + OpMemberName %type_ShadowDepthPass 14 "PrePadding_ShadowDepthPass_LPV_56" + OpMemberName %type_ShadowDepthPass 15 "PrePadding_ShadowDepthPass_LPV_60" + OpMemberName %type_ShadowDepthPass 16 "PrePadding_ShadowDepthPass_LPV_64" + OpMemberName %type_ShadowDepthPass 17 "PrePadding_ShadowDepthPass_LPV_68" + OpMemberName %type_ShadowDepthPass 18 "PrePadding_ShadowDepthPass_LPV_72" + OpMemberName %type_ShadowDepthPass 19 "PrePadding_ShadowDepthPass_LPV_76" + OpMemberName %type_ShadowDepthPass 20 "PrePadding_ShadowDepthPass_LPV_80" + OpMemberName %type_ShadowDepthPass 21 "PrePadding_ShadowDepthPass_LPV_84" + OpMemberName %type_ShadowDepthPass 22 "PrePadding_ShadowDepthPass_LPV_88" + OpMemberName %type_ShadowDepthPass 23 "PrePadding_ShadowDepthPass_LPV_92" + OpMemberName %type_ShadowDepthPass 24 "PrePadding_ShadowDepthPass_LPV_96" + OpMemberName %type_ShadowDepthPass 25 "PrePadding_ShadowDepthPass_LPV_100" + OpMemberName %type_ShadowDepthPass 26 "PrePadding_ShadowDepthPass_LPV_104" + OpMemberName %type_ShadowDepthPass 27 "PrePadding_ShadowDepthPass_LPV_108" + OpMemberName %type_ShadowDepthPass 28 "PrePadding_ShadowDepthPass_LPV_112" + OpMemberName %type_ShadowDepthPass 29 "PrePadding_ShadowDepthPass_LPV_116" + OpMemberName %type_ShadowDepthPass 30 "PrePadding_ShadowDepthPass_LPV_120" + OpMemberName %type_ShadowDepthPass 31 "PrePadding_ShadowDepthPass_LPV_124" + OpMemberName %type_ShadowDepthPass 32 "PrePadding_ShadowDepthPass_LPV_128" + OpMemberName %type_ShadowDepthPass 33 "PrePadding_ShadowDepthPass_LPV_132" + OpMemberName %type_ShadowDepthPass 34 "PrePadding_ShadowDepthPass_LPV_136" + OpMemberName %type_ShadowDepthPass 35 "PrePadding_ShadowDepthPass_LPV_140" + OpMemberName %type_ShadowDepthPass 36 "PrePadding_ShadowDepthPass_LPV_144" + OpMemberName %type_ShadowDepthPass 37 "PrePadding_ShadowDepthPass_LPV_148" + OpMemberName %type_ShadowDepthPass 38 "PrePadding_ShadowDepthPass_LPV_152" + OpMemberName %type_ShadowDepthPass 39 "PrePadding_ShadowDepthPass_LPV_156" + OpMemberName %type_ShadowDepthPass 40 "PrePadding_ShadowDepthPass_LPV_160" + OpMemberName %type_ShadowDepthPass 41 "PrePadding_ShadowDepthPass_LPV_164" + OpMemberName %type_ShadowDepthPass 42 "PrePadding_ShadowDepthPass_LPV_168" + OpMemberName %type_ShadowDepthPass 43 "PrePadding_ShadowDepthPass_LPV_172" + OpMemberName %type_ShadowDepthPass 44 "PrePadding_ShadowDepthPass_LPV_176" + OpMemberName %type_ShadowDepthPass 45 "PrePadding_ShadowDepthPass_LPV_180" + OpMemberName %type_ShadowDepthPass 46 "PrePadding_ShadowDepthPass_LPV_184" + OpMemberName %type_ShadowDepthPass 47 "PrePadding_ShadowDepthPass_LPV_188" + OpMemberName %type_ShadowDepthPass 48 "PrePadding_ShadowDepthPass_LPV_192" + OpMemberName %type_ShadowDepthPass 49 "PrePadding_ShadowDepthPass_LPV_196" + OpMemberName %type_ShadowDepthPass 50 "PrePadding_ShadowDepthPass_LPV_200" + OpMemberName %type_ShadowDepthPass 51 "PrePadding_ShadowDepthPass_LPV_204" + OpMemberName %type_ShadowDepthPass 52 "PrePadding_ShadowDepthPass_LPV_208" + OpMemberName %type_ShadowDepthPass 53 "PrePadding_ShadowDepthPass_LPV_212" + OpMemberName %type_ShadowDepthPass 54 "PrePadding_ShadowDepthPass_LPV_216" + OpMemberName %type_ShadowDepthPass 55 "PrePadding_ShadowDepthPass_LPV_220" + OpMemberName %type_ShadowDepthPass 56 "PrePadding_ShadowDepthPass_LPV_224" + OpMemberName %type_ShadowDepthPass 57 "PrePadding_ShadowDepthPass_LPV_228" + OpMemberName %type_ShadowDepthPass 58 "PrePadding_ShadowDepthPass_LPV_232" + OpMemberName %type_ShadowDepthPass 59 "PrePadding_ShadowDepthPass_LPV_236" + OpMemberName %type_ShadowDepthPass 60 "PrePadding_ShadowDepthPass_LPV_240" + OpMemberName %type_ShadowDepthPass 61 "PrePadding_ShadowDepthPass_LPV_244" + OpMemberName %type_ShadowDepthPass 62 "PrePadding_ShadowDepthPass_LPV_248" + OpMemberName %type_ShadowDepthPass 63 "PrePadding_ShadowDepthPass_LPV_252" + OpMemberName %type_ShadowDepthPass 64 "PrePadding_ShadowDepthPass_LPV_256" + OpMemberName %type_ShadowDepthPass 65 "PrePadding_ShadowDepthPass_LPV_260" + OpMemberName %type_ShadowDepthPass 66 "PrePadding_ShadowDepthPass_LPV_264" + OpMemberName %type_ShadowDepthPass 67 "PrePadding_ShadowDepthPass_LPV_268" + OpMemberName %type_ShadowDepthPass 68 "ShadowDepthPass_LPV_mRsmToWorld" + OpMemberName %type_ShadowDepthPass 69 "ShadowDepthPass_LPV_mLightColour" + OpMemberName %type_ShadowDepthPass 70 "ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection" + OpMemberName %type_ShadowDepthPass 71 "ShadowDepthPass_LPV_mEyePos" + OpMemberName %type_ShadowDepthPass 72 "ShadowDepthPass_LPV_mOldGridOffset" + OpMemberName %type_ShadowDepthPass 73 "PrePadding_ShadowDepthPass_LPV_396" + OpMemberName %type_ShadowDepthPass 74 "ShadowDepthPass_LPV_mLpvGridOffset" + OpMemberName %type_ShadowDepthPass 75 "ShadowDepthPass_LPV_ClearMultiplier" + OpMemberName %type_ShadowDepthPass 76 "ShadowDepthPass_LPV_LpvScale" + OpMemberName %type_ShadowDepthPass 77 "ShadowDepthPass_LPV_OneOverLpvScale" + OpMemberName %type_ShadowDepthPass 78 "ShadowDepthPass_LPV_DirectionalOcclusionIntensity" + OpMemberName %type_ShadowDepthPass 79 "ShadowDepthPass_LPV_DirectionalOcclusionRadius" + OpMemberName %type_ShadowDepthPass 80 "ShadowDepthPass_LPV_RsmAreaIntensityMultiplier" + OpMemberName %type_ShadowDepthPass 81 "ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier" + OpMemberName %type_ShadowDepthPass 82 "ShadowDepthPass_LPV_SecondaryOcclusionStrength" + OpMemberName %type_ShadowDepthPass 83 "ShadowDepthPass_LPV_SecondaryBounceStrength" + OpMemberName %type_ShadowDepthPass 84 "ShadowDepthPass_LPV_VplInjectionBias" + OpMemberName %type_ShadowDepthPass 85 "ShadowDepthPass_LPV_GeometryVolumeInjectionBias" + OpMemberName %type_ShadowDepthPass 86 "ShadowDepthPass_LPV_EmissiveInjectionMultiplier" + OpMemberName %type_ShadowDepthPass 87 "ShadowDepthPass_LPV_PropagationIndex" + OpMemberName %type_ShadowDepthPass 88 "ShadowDepthPass_ProjectionMatrix" + OpMemberName %type_ShadowDepthPass 89 "ShadowDepthPass_ViewMatrix" + OpMemberName %type_ShadowDepthPass 90 "ShadowDepthPass_ShadowParams" + OpMemberName %type_ShadowDepthPass 91 "ShadowDepthPass_bClampToNearPlane" + OpMemberName %type_ShadowDepthPass 92 "PrePadding_ShadowDepthPass_612" + OpMemberName %type_ShadowDepthPass 93 "PrePadding_ShadowDepthPass_616" + OpMemberName %type_ShadowDepthPass 94 "PrePadding_ShadowDepthPass_620" + OpMemberName %type_ShadowDepthPass 95 "ShadowDepthPass_ShadowViewProjectionMatrices" + OpMemberName %type_ShadowDepthPass 96 "ShadowDepthPass_ShadowViewMatrices" + OpName %ShadowDepthPass "ShadowDepthPass" + OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid" + OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid" + OpName %in_var_VS_to_DS_Position "in.var.VS_to_DS_Position" + OpName %in_var_VS_To_DS_VertexID "in.var.VS_To_DS_VertexID" + OpName %in_var_PN_POSITION "in.var.PN_POSITION" + OpName %in_var_PN_DisplacementScales "in.var.PN_DisplacementScales" + OpName %in_var_PN_TessellationMultiplier "in.var.PN_TessellationMultiplier" + OpName %in_var_PN_WorldDisplacementMultiplier "in.var.PN_WorldDisplacementMultiplier" + OpName %in_var_PN_DominantVertex "in.var.PN_DominantVertex" + OpName %in_var_PN_DominantVertex1 "in.var.PN_DominantVertex1" + OpName %in_var_PN_DominantVertex2 "in.var.PN_DominantVertex2" + OpName %in_var_PN_DominantEdge "in.var.PN_DominantEdge" + OpName %in_var_PN_DominantEdge1 "in.var.PN_DominantEdge1" + OpName %in_var_PN_DominantEdge2 "in.var.PN_DominantEdge2" + OpName %in_var_PN_DominantEdge3 "in.var.PN_DominantEdge3" + OpName %in_var_PN_DominantEdge4 "in.var.PN_DominantEdge4" + OpName %in_var_PN_DominantEdge5 "in.var.PN_DominantEdge5" + OpName %in_var_PN_POSITION9 "in.var.PN_POSITION9" + OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid" + OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid" + OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6" + OpName %out_var_TEXCOORD7 "out.var.TEXCOORD7" + OpName %MainDomain "MainDomain" + OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %in_var_VS_to_DS_Position UserSemantic "VS_to_DS_Position" + OpDecorateString %in_var_VS_To_DS_VertexID UserSemantic "VS_To_DS_VertexID" + OpDecorateString %in_var_PN_POSITION UserSemantic "PN_POSITION" + OpDecorateString %in_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales" + OpDecorateString %in_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier" + OpDecorateString %in_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier" + OpDecorateString %in_var_PN_DominantVertex UserSemantic "PN_DominantVertex" + OpDecorateString %in_var_PN_DominantVertex1 UserSemantic "PN_DominantVertex" + OpDecorateString %in_var_PN_DominantVertex2 UserSemantic "PN_DominantVertex" + OpDecorateString %in_var_PN_DominantEdge UserSemantic "PN_DominantEdge" + OpDecorateString %in_var_PN_DominantEdge1 UserSemantic "PN_DominantEdge" + OpDecorateString %in_var_PN_DominantEdge2 UserSemantic "PN_DominantEdge" + OpDecorateString %in_var_PN_DominantEdge3 UserSemantic "PN_DominantEdge" + OpDecorateString %in_var_PN_DominantEdge4 UserSemantic "PN_DominantEdge" + OpDecorateString %in_var_PN_DominantEdge5 UserSemantic "PN_DominantEdge" + OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter + OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor" + OpDecorate %gl_TessLevelOuter Patch + OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner + OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor" + OpDecorate %gl_TessLevelInner Patch + OpDecorateString %in_var_PN_POSITION9 UserSemantic "PN_POSITION9" + OpDecorate %in_var_PN_POSITION9 Patch + OpDecorate %gl_TessCoord BuiltIn TessCoord + OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation" + OpDecorate %gl_TessCoord Patch + OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid" + OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid" + OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6" + OpDecorateString %out_var_TEXCOORD7 UserSemantic "TEXCOORD7" + OpDecorate %gl_Position BuiltIn Position + OpDecorateString %gl_Position UserSemantic "SV_POSITION" + OpDecorate %in_var_PN_DisplacementScales Location 0 + OpDecorate %in_var_PN_DominantEdge Location 1 + OpDecorate %in_var_PN_DominantEdge1 Location 2 + OpDecorate %in_var_PN_DominantEdge2 Location 3 + OpDecorate %in_var_PN_DominantEdge3 Location 4 + OpDecorate %in_var_PN_DominantEdge4 Location 5 + OpDecorate %in_var_PN_DominantEdge5 Location 6 + OpDecorate %in_var_PN_DominantVertex Location 7 + OpDecorate %in_var_PN_DominantVertex1 Location 8 + OpDecorate %in_var_PN_DominantVertex2 Location 9 + OpDecorate %in_var_PN_POSITION Location 10 + OpDecorate %in_var_PN_POSITION9 Location 13 + OpDecorate %in_var_PN_TessellationMultiplier Location 14 + OpDecorate %in_var_PN_WorldDisplacementMultiplier Location 15 + OpDecorate %in_var_TEXCOORD10_centroid Location 16 + OpDecorate %in_var_TEXCOORD11_centroid Location 17 + OpDecorate %in_var_VS_To_DS_VertexID Location 18 + OpDecorate %in_var_VS_to_DS_Position Location 19 + OpDecorate %out_var_TEXCOORD10_centroid Location 0 + OpDecorate %out_var_TEXCOORD11_centroid Location 1 + OpDecorate %out_var_TEXCOORD6 Location 2 + OpDecorate %out_var_TEXCOORD7 Location 3 + OpDecorate %ShadowDepthPass DescriptorSet 0 + OpDecorate %ShadowDepthPass Binding 0 + OpDecorate %_arr_mat4v4float_uint_6 ArrayStride 64 + OpMemberDecorate %type_ShadowDepthPass 0 Offset 0 + OpMemberDecorate %type_ShadowDepthPass 1 Offset 4 + OpMemberDecorate %type_ShadowDepthPass 2 Offset 8 + OpMemberDecorate %type_ShadowDepthPass 3 Offset 12 + OpMemberDecorate %type_ShadowDepthPass 4 Offset 16 + OpMemberDecorate %type_ShadowDepthPass 5 Offset 20 + OpMemberDecorate %type_ShadowDepthPass 6 Offset 24 + OpMemberDecorate %type_ShadowDepthPass 7 Offset 28 + OpMemberDecorate %type_ShadowDepthPass 8 Offset 32 + OpMemberDecorate %type_ShadowDepthPass 9 Offset 36 + OpMemberDecorate %type_ShadowDepthPass 10 Offset 40 + OpMemberDecorate %type_ShadowDepthPass 11 Offset 44 + OpMemberDecorate %type_ShadowDepthPass 12 Offset 48 + OpMemberDecorate %type_ShadowDepthPass 13 Offset 52 + OpMemberDecorate %type_ShadowDepthPass 14 Offset 56 + OpMemberDecorate %type_ShadowDepthPass 15 Offset 60 + OpMemberDecorate %type_ShadowDepthPass 16 Offset 64 + OpMemberDecorate %type_ShadowDepthPass 17 Offset 68 + OpMemberDecorate %type_ShadowDepthPass 18 Offset 72 + OpMemberDecorate %type_ShadowDepthPass 19 Offset 76 + OpMemberDecorate %type_ShadowDepthPass 20 Offset 80 + OpMemberDecorate %type_ShadowDepthPass 21 Offset 84 + OpMemberDecorate %type_ShadowDepthPass 22 Offset 88 + OpMemberDecorate %type_ShadowDepthPass 23 Offset 92 + OpMemberDecorate %type_ShadowDepthPass 24 Offset 96 + OpMemberDecorate %type_ShadowDepthPass 25 Offset 100 + OpMemberDecorate %type_ShadowDepthPass 26 Offset 104 + OpMemberDecorate %type_ShadowDepthPass 27 Offset 108 + OpMemberDecorate %type_ShadowDepthPass 28 Offset 112 + OpMemberDecorate %type_ShadowDepthPass 29 Offset 116 + OpMemberDecorate %type_ShadowDepthPass 30 Offset 120 + OpMemberDecorate %type_ShadowDepthPass 31 Offset 124 + OpMemberDecorate %type_ShadowDepthPass 32 Offset 128 + OpMemberDecorate %type_ShadowDepthPass 33 Offset 132 + OpMemberDecorate %type_ShadowDepthPass 34 Offset 136 + OpMemberDecorate %type_ShadowDepthPass 35 Offset 140 + OpMemberDecorate %type_ShadowDepthPass 36 Offset 144 + OpMemberDecorate %type_ShadowDepthPass 37 Offset 148 + OpMemberDecorate %type_ShadowDepthPass 38 Offset 152 + OpMemberDecorate %type_ShadowDepthPass 39 Offset 156 + OpMemberDecorate %type_ShadowDepthPass 40 Offset 160 + OpMemberDecorate %type_ShadowDepthPass 41 Offset 164 + OpMemberDecorate %type_ShadowDepthPass 42 Offset 168 + OpMemberDecorate %type_ShadowDepthPass 43 Offset 172 + OpMemberDecorate %type_ShadowDepthPass 44 Offset 176 + OpMemberDecorate %type_ShadowDepthPass 45 Offset 180 + OpMemberDecorate %type_ShadowDepthPass 46 Offset 184 + OpMemberDecorate %type_ShadowDepthPass 47 Offset 188 + OpMemberDecorate %type_ShadowDepthPass 48 Offset 192 + OpMemberDecorate %type_ShadowDepthPass 49 Offset 196 + OpMemberDecorate %type_ShadowDepthPass 50 Offset 200 + OpMemberDecorate %type_ShadowDepthPass 51 Offset 204 + OpMemberDecorate %type_ShadowDepthPass 52 Offset 208 + OpMemberDecorate %type_ShadowDepthPass 53 Offset 212 + OpMemberDecorate %type_ShadowDepthPass 54 Offset 216 + OpMemberDecorate %type_ShadowDepthPass 55 Offset 220 + OpMemberDecorate %type_ShadowDepthPass 56 Offset 224 + OpMemberDecorate %type_ShadowDepthPass 57 Offset 228 + OpMemberDecorate %type_ShadowDepthPass 58 Offset 232 + OpMemberDecorate %type_ShadowDepthPass 59 Offset 236 + OpMemberDecorate %type_ShadowDepthPass 60 Offset 240 + OpMemberDecorate %type_ShadowDepthPass 61 Offset 244 + OpMemberDecorate %type_ShadowDepthPass 62 Offset 248 + OpMemberDecorate %type_ShadowDepthPass 63 Offset 252 + OpMemberDecorate %type_ShadowDepthPass 64 Offset 256 + OpMemberDecorate %type_ShadowDepthPass 65 Offset 260 + OpMemberDecorate %type_ShadowDepthPass 66 Offset 264 + OpMemberDecorate %type_ShadowDepthPass 67 Offset 268 + OpMemberDecorate %type_ShadowDepthPass 68 Offset 272 + OpMemberDecorate %type_ShadowDepthPass 68 MatrixStride 16 + OpMemberDecorate %type_ShadowDepthPass 68 ColMajor + OpMemberDecorate %type_ShadowDepthPass 69 Offset 336 + OpMemberDecorate %type_ShadowDepthPass 70 Offset 352 + OpMemberDecorate %type_ShadowDepthPass 71 Offset 368 + OpMemberDecorate %type_ShadowDepthPass 72 Offset 384 + OpMemberDecorate %type_ShadowDepthPass 73 Offset 396 + OpMemberDecorate %type_ShadowDepthPass 74 Offset 400 + OpMemberDecorate %type_ShadowDepthPass 75 Offset 412 + OpMemberDecorate %type_ShadowDepthPass 76 Offset 416 + OpMemberDecorate %type_ShadowDepthPass 77 Offset 420 + OpMemberDecorate %type_ShadowDepthPass 78 Offset 424 + OpMemberDecorate %type_ShadowDepthPass 79 Offset 428 + OpMemberDecorate %type_ShadowDepthPass 80 Offset 432 + OpMemberDecorate %type_ShadowDepthPass 81 Offset 436 + OpMemberDecorate %type_ShadowDepthPass 82 Offset 440 + OpMemberDecorate %type_ShadowDepthPass 83 Offset 444 + OpMemberDecorate %type_ShadowDepthPass 84 Offset 448 + OpMemberDecorate %type_ShadowDepthPass 85 Offset 452 + OpMemberDecorate %type_ShadowDepthPass 86 Offset 456 + OpMemberDecorate %type_ShadowDepthPass 87 Offset 460 + OpMemberDecorate %type_ShadowDepthPass 88 Offset 464 + OpMemberDecorate %type_ShadowDepthPass 88 MatrixStride 16 + OpMemberDecorate %type_ShadowDepthPass 88 ColMajor + OpMemberDecorate %type_ShadowDepthPass 89 Offset 528 + OpMemberDecorate %type_ShadowDepthPass 89 MatrixStride 16 + OpMemberDecorate %type_ShadowDepthPass 89 ColMajor + OpMemberDecorate %type_ShadowDepthPass 90 Offset 592 + OpMemberDecorate %type_ShadowDepthPass 91 Offset 608 + OpMemberDecorate %type_ShadowDepthPass 92 Offset 612 + OpMemberDecorate %type_ShadowDepthPass 93 Offset 616 + OpMemberDecorate %type_ShadowDepthPass 94 Offset 620 + OpMemberDecorate %type_ShadowDepthPass 95 Offset 624 + OpMemberDecorate %type_ShadowDepthPass 95 MatrixStride 16 + OpMemberDecorate %type_ShadowDepthPass 95 ColMajor + OpMemberDecorate %type_ShadowDepthPass 96 Offset 1008 + OpMemberDecorate %type_ShadowDepthPass 96 MatrixStride 16 + OpMemberDecorate %type_ShadowDepthPass 96 ColMajor + OpDecorate %type_ShadowDepthPass Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_4 = OpConstant %uint 4 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %float_3 = OpConstant %float 3 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %float_6 = OpConstant %float 6 + %48 = OpConstantComposite %v4float %float_6 %float_6 %float_6 %float_6 + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %int_3 = OpConstant %int 3 + %int_88 = OpConstant %int 88 + %int_89 = OpConstant %int 89 + %int_90 = OpConstant %int 90 + %int_91 = OpConstant %int 91 +%float_9_99999997en07 = OpConstant %float 9.99999997e-07 + %uint_6 = OpConstant %uint 6 +%_arr_mat4v4float_uint_6 = OpTypeArray %mat4v4float %uint_6 + %v3int = OpTypeVector %int 3 +%type_ShadowDepthPass = OpTypeStruct %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %mat4v4float %v4float %v4float %v4float %v3int %int %v3int %float %float %float %float %float %float %float %float %float %float %float %float %int %mat4v4float %mat4v4float %v4float %float %float %float %float %_arr_mat4v4float_uint_6 %_arr_mat4v4float_uint_6 +%_ptr_Uniform_type_ShadowDepthPass = OpTypePointer Uniform %type_ShadowDepthPass + %uint_3 = OpConstant %uint 3 +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3 +%_arr_uint_uint_3 = OpTypeArray %uint %uint_3 +%_ptr_Input__arr_uint_uint_3 = OpTypePointer Input %_arr_uint_uint_3 +%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3 +%_ptr_Input__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_3_uint_3 +%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3 +%_ptr_Input__arr_v3float_uint_3 = OpTypePointer Input %_arr_v3float_uint_3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 +%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3 +%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3 +%_ptr_Input__arr_v2float_uint_3 = OpTypePointer Input %_arr_v2float_uint_3 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Output_v3float = OpTypePointer Output %v3float + %void = OpTypeVoid + %83 = OpTypeFunction %void +%_ptr_Function_float = OpTypePointer Function %float + %bool = OpTypeBool +%_ptr_Function_mat4v4float = OpTypePointer Function %mat4v4float +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float +%ShadowDepthPass = OpVariable %_ptr_Uniform_type_ShadowDepthPass Uniform +%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_VS_to_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_VS_To_DS_VertexID = OpVariable %_ptr_Input__arr_uint_uint_3 Input +%in_var_PN_POSITION = OpVariable %_ptr_Input__arr__arr_v4float_uint_3_uint_3 Input +%in_var_PN_DisplacementScales = OpVariable %_ptr_Input__arr_v3float_uint_3 Input +%in_var_PN_TessellationMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input +%in_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input +%in_var_PN_DominantVertex = OpVariable %_ptr_Input__arr_v2float_uint_3 Input +%in_var_PN_DominantVertex1 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_PN_DominantVertex2 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input +%in_var_PN_DominantEdge = OpVariable %_ptr_Input__arr_v2float_uint_3 Input +%in_var_PN_DominantEdge1 = OpVariable %_ptr_Input__arr_v2float_uint_3 Input +%in_var_PN_DominantEdge2 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_PN_DominantEdge3 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%in_var_PN_DominantEdge4 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input +%in_var_PN_DominantEdge5 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input +%gl_TessLevelOuter = OpVariable %_ptr_Input__arr_float_uint_4 Input +%gl_TessLevelInner = OpVariable %_ptr_Input__arr_float_uint_2 Input +%in_var_PN_POSITION9 = OpVariable %_ptr_Input_v4float Input +%gl_TessCoord = OpVariable %_ptr_Input_v3float Input +%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD6 = OpVariable %_ptr_Output_float Output +%out_var_TEXCOORD7 = OpVariable %_ptr_Output_v3float Output +%gl_Position = OpVariable %_ptr_Output_v4float Output + %89 = OpConstantNull %v4float + %90 = OpUndef %v4float + %MainDomain = OpFunction %void None %83 + %91 = OpLabel + %92 = OpVariable %_ptr_Function_mat4v4float Function + %93 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD10_centroid + %94 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD11_centroid + %95 = OpCompositeExtract %v4float %93 0 + %96 = OpCompositeExtract %v4float %94 0 + %97 = OpCompositeExtract %v4float %93 1 + %98 = OpCompositeExtract %v4float %94 1 + %99 = OpCompositeExtract %v4float %93 2 + %100 = OpCompositeExtract %v4float %94 2 + %101 = OpLoad %_arr__arr_v4float_uint_3_uint_3 %in_var_PN_POSITION + %102 = OpCompositeExtract %_arr_v4float_uint_3 %101 0 + %103 = OpCompositeExtract %_arr_v4float_uint_3 %101 1 + %104 = OpCompositeExtract %_arr_v4float_uint_3 %101 2 + %105 = OpCompositeExtract %v4float %102 0 + %106 = OpCompositeExtract %v4float %102 1 + %107 = OpCompositeExtract %v4float %102 2 + %108 = OpCompositeExtract %v4float %103 0 + %109 = OpCompositeExtract %v4float %103 1 + %110 = OpCompositeExtract %v4float %103 2 + %111 = OpCompositeExtract %v4float %104 0 + %112 = OpCompositeExtract %v4float %104 1 + %113 = OpCompositeExtract %v4float %104 2 + %114 = OpLoad %v4float %in_var_PN_POSITION9 + %115 = OpLoad %v3float %gl_TessCoord + %116 = OpCompositeExtract %float %115 0 + %117 = OpCompositeExtract %float %115 1 + %118 = OpCompositeExtract %float %115 2 + %119 = OpFMul %float %116 %116 + %120 = OpFMul %float %117 %117 + %121 = OpFMul %float %118 %118 + %122 = OpFMul %float %119 %float_3 + %123 = OpFMul %float %120 %float_3 + %124 = OpFMul %float %121 %float_3 + %125 = OpCompositeConstruct %v4float %119 %119 %119 %119 + %126 = OpFMul %v4float %105 %125 + %127 = OpCompositeConstruct %v4float %116 %116 %116 %116 + %128 = OpFMul %v4float %126 %127 + %129 = OpCompositeConstruct %v4float %120 %120 %120 %120 + %130 = OpFMul %v4float %108 %129 + %131 = OpCompositeConstruct %v4float %117 %117 %117 %117 + %132 = OpFMul %v4float %130 %131 + %133 = OpFAdd %v4float %128 %132 + %134 = OpCompositeConstruct %v4float %121 %121 %121 %121 + %135 = OpFMul %v4float %111 %134 + %136 = OpCompositeConstruct %v4float %118 %118 %118 %118 + %137 = OpFMul %v4float %135 %136 + %138 = OpFAdd %v4float %133 %137 + %139 = OpCompositeConstruct %v4float %122 %122 %122 %122 + %140 = OpFMul %v4float %106 %139 + %141 = OpFMul %v4float %140 %131 + %142 = OpFAdd %v4float %138 %141 + %143 = OpCompositeConstruct %v4float %123 %123 %123 %123 + %144 = OpFMul %v4float %107 %143 + %145 = OpFMul %v4float %144 %127 + %146 = OpFAdd %v4float %142 %145 + %147 = OpFMul %v4float %109 %143 + %148 = OpFMul %v4float %147 %136 + %149 = OpFAdd %v4float %146 %148 + %150 = OpCompositeConstruct %v4float %124 %124 %124 %124 + %151 = OpFMul %v4float %110 %150 + %152 = OpFMul %v4float %151 %131 + %153 = OpFAdd %v4float %149 %152 + %154 = OpFMul %v4float %112 %150 + %155 = OpFMul %v4float %154 %127 + %156 = OpFAdd %v4float %153 %155 + %157 = OpFMul %v4float %113 %139 + %158 = OpFMul %v4float %157 %136 + %159 = OpFAdd %v4float %156 %158 + %160 = OpFMul %v4float %114 %48 + %161 = OpFMul %v4float %160 %136 + %162 = OpFMul %v4float %161 %127 + %163 = OpFMul %v4float %162 %131 + %164 = OpFAdd %v4float %159 %163 + %165 = OpVectorShuffle %v3float %95 %95 0 1 2 + %166 = OpCompositeConstruct %v3float %116 %116 %116 + %167 = OpFMul %v3float %165 %166 + %168 = OpVectorShuffle %v3float %97 %97 0 1 2 + %169 = OpCompositeConstruct %v3float %117 %117 %117 + %170 = OpFMul %v3float %168 %169 + %171 = OpFAdd %v3float %167 %170 + %172 = OpFMul %v4float %96 %127 + %173 = OpFMul %v4float %98 %131 + %174 = OpFAdd %v4float %172 %173 + %175 = OpVectorShuffle %v3float %171 %89 0 1 2 + %176 = OpVectorShuffle %v3float %99 %99 0 1 2 + %177 = OpCompositeConstruct %v3float %118 %118 %118 + %178 = OpFMul %v3float %176 %177 + %179 = OpFAdd %v3float %175 %178 + %180 = OpVectorShuffle %v4float %90 %179 4 5 6 3 + %181 = OpFMul %v4float %100 %136 + %182 = OpFAdd %v4float %174 %181 + %183 = OpVectorShuffle %v3float %182 %182 0 1 2 + %184 = OpVectorShuffle %v4float %164 %164 4 5 6 3 + %185 = OpAccessChain %_ptr_Uniform_mat4v4float %ShadowDepthPass %int_88 + %186 = OpLoad %mat4v4float %185 + %187 = OpAccessChain %_ptr_Uniform_mat4v4float %ShadowDepthPass %int_89 + %188 = OpLoad %mat4v4float %187 + OpStore %92 %188 + %189 = OpMatrixTimesVector %v4float %186 %184 + %190 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_91 + %191 = OpLoad %float %190 + %192 = OpFOrdGreaterThan %bool %191 %float_0 + %193 = OpCompositeExtract %float %189 2 + %194 = OpFOrdLessThan %bool %193 %float_0 + %195 = OpLogicalAnd %bool %192 %194 + OpSelectionMerge %196 None + OpBranchConditional %195 %197 %196 + %197 = OpLabel + %198 = OpCompositeInsert %v4float %float_9_99999997en07 %189 2 + %199 = OpCompositeInsert %v4float %float_1 %198 3 + OpBranch %196 + %196 = OpLabel + %200 = OpPhi %v4float %189 %91 %199 %197 + %201 = OpAccessChain %_ptr_Function_float %92 %uint_0 %int_2 + %202 = OpLoad %float %201 + %203 = OpAccessChain %_ptr_Function_float %92 %uint_1 %int_2 + %204 = OpLoad %float %203 + %205 = OpAccessChain %_ptr_Function_float %92 %uint_2 %int_2 + %206 = OpLoad %float %205 + %207 = OpCompositeConstruct %v3float %202 %204 %206 + %208 = OpDot %float %207 %183 + %209 = OpExtInst %float %1 FAbs %208 + %210 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_2 + %211 = OpLoad %float %210 + %212 = OpExtInst %float %1 FAbs %209 + %213 = OpFOrdGreaterThan %bool %212 %float_0 + %214 = OpFMul %float %209 %209 + %215 = OpFSub %float %float_1 %214 + %216 = OpExtInst %float %1 FClamp %215 %float_0 %float_1 + %217 = OpExtInst %float %1 Sqrt %216 + %218 = OpFDiv %float %217 %209 + %219 = OpSelect %float %213 %218 %211 + %220 = OpExtInst %float %1 FClamp %219 %float_0 %211 + %221 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_1 + %222 = OpLoad %float %221 + %223 = OpFMul %float %222 %220 + %224 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_0 + %225 = OpLoad %float %224 + %226 = OpFAdd %float %223 %225 + %227 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_3 + %228 = OpLoad %float %227 + %229 = OpCompositeExtract %float %200 2 + %230 = OpFMul %float %229 %228 + %231 = OpFAdd %float %230 %226 + %232 = OpCompositeExtract %float %200 3 + %233 = OpFMul %float %231 %232 + %234 = OpCompositeInsert %v4float %233 %200 2 + %235 = OpVectorShuffle %v3float %164 %89 0 1 2 + OpStore %out_var_TEXCOORD10_centroid %180 + OpStore %out_var_TEXCOORD11_centroid %182 + OpStore %out_var_TEXCOORD6 %float_0 + OpStore %out_var_TEXCOORD7 %235 + OpStore %gl_Position %234 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/vert/array-missing-copies.asm.vert b/shaders-ue4/asm/vert/array-missing-copies.asm.vert new file mode 100644 index 00000000000..23dc7275601 --- /dev/null +++ b/shaders-ue4/asm/vert/array-missing-copies.asm.vert @@ -0,0 +1,1131 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 487 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %Main "main" %in_var_ATTRIBUTE0 %in_var_ATTRIBUTE1 %out_var_TEXCOORD0 %out_var_TEXCOORD1 %out_var_TEXCOORD2 %out_var_TEXCOORD3 %out_var_TEXCOORD8 %gl_Position + OpSource HLSL 600 + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_TranslatedWorldToView" + OpMemberName %type_View 3 "View_ViewToTranslatedWorld" + OpMemberName %type_View 4 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 6 "View_ViewToClip" + OpMemberName %type_View 7 "View_ViewToClipNoAA" + OpMemberName %type_View 8 "View_ClipToView" + OpMemberName %type_View 9 "View_ClipToTranslatedWorld" + OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 11 "View_ScreenToWorld" + OpMemberName %type_View 12 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 13 "View_ViewForward" + OpMemberName %type_View 14 "PrePadding_View_844" + OpMemberName %type_View 15 "View_ViewUp" + OpMemberName %type_View 16 "PrePadding_View_860" + OpMemberName %type_View 17 "View_ViewRight" + OpMemberName %type_View 18 "PrePadding_View_876" + OpMemberName %type_View 19 "View_HMDViewNoRollUp" + OpMemberName %type_View 20 "PrePadding_View_892" + OpMemberName %type_View 21 "View_HMDViewNoRollRight" + OpMemberName %type_View 22 "PrePadding_View_908" + OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 24 "View_ScreenPositionScaleBias" + OpMemberName %type_View 25 "View_WorldCameraOrigin" + OpMemberName %type_View 26 "PrePadding_View_956" + OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 28 "PrePadding_View_972" + OpMemberName %type_View 29 "View_WorldViewOrigin" + OpMemberName %type_View 30 "PrePadding_View_988" + OpMemberName %type_View 31 "View_PreViewTranslation" + OpMemberName %type_View 32 "PrePadding_View_1004" + OpMemberName %type_View 33 "View_PrevProjection" + OpMemberName %type_View 34 "View_PrevViewProj" + OpMemberName %type_View 35 "View_PrevViewRotationProj" + OpMemberName %type_View 36 "View_PrevViewToClip" + OpMemberName %type_View 37 "View_PrevClipToView" + OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 43 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 44 "PrePadding_View_1660" + OpMemberName %type_View 45 "View_PrevWorldViewOrigin" + OpMemberName %type_View 46 "PrePadding_View_1676" + OpMemberName %type_View 47 "View_PrevPreViewTranslation" + OpMemberName %type_View 48 "PrePadding_View_1692" + OpMemberName %type_View 49 "View_PrevInvViewProj" + OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 51 "View_ClipToPrevClip" + OpMemberName %type_View 52 "View_TemporalAAJitter" + OpMemberName %type_View 53 "View_GlobalClippingPlane" + OpMemberName %type_View 54 "View_FieldOfViewWideAngles" + OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 56 "View_ViewRectMin" + OpMemberName %type_View 57 "View_ViewSizeAndInvSize" + OpMemberName %type_View 58 "View_BufferSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 60 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 61 "View_PreExposure" + OpMemberName %type_View 62 "View_OneOverPreExposure" + OpMemberName %type_View 63 "PrePadding_View_2012" + OpMemberName %type_View 64 "View_DiffuseOverrideParameter" + OpMemberName %type_View 65 "View_SpecularOverrideParameter" + OpMemberName %type_View 66 "View_NormalOverrideParameter" + OpMemberName %type_View 67 "View_RoughnessOverrideParameter" + OpMemberName %type_View 68 "View_PrevFrameGameTime" + OpMemberName %type_View 69 "View_PrevFrameRealTime" + OpMemberName %type_View 70 "View_OutOfBoundsMask" + OpMemberName %type_View 71 "PrePadding_View_2084" + OpMemberName %type_View 72 "PrePadding_View_2088" + OpMemberName %type_View 73 "PrePadding_View_2092" + OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 75 "View_CullingSign" + OpMemberName %type_View 76 "View_NearPlane" + OpMemberName %type_View 77 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 78 "View_GameTime" + OpMemberName %type_View 79 "View_RealTime" + OpMemberName %type_View 80 "View_DeltaTime" + OpMemberName %type_View 81 "View_MaterialTextureMipBias" + OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 83 "View_Random" + OpMemberName %type_View 84 "View_FrameNumber" + OpMemberName %type_View 85 "View_StateFrameIndexMod8" + OpMemberName %type_View 86 "View_StateFrameIndex" + OpMemberName %type_View 87 "View_CameraCut" + OpMemberName %type_View 88 "View_UnlitViewmodeMask" + OpMemberName %type_View 89 "PrePadding_View_2164" + OpMemberName %type_View 90 "PrePadding_View_2168" + OpMemberName %type_View 91 "PrePadding_View_2172" + OpMemberName %type_View 92 "View_DirectionalLightColor" + OpMemberName %type_View 93 "View_DirectionalLightDirection" + OpMemberName %type_View 94 "PrePadding_View_2204" + OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 97 "View_TemporalAAParams" + OpMemberName %type_View 98 "View_CircleDOFParams" + OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 101 "View_DepthOfFieldScale" + OpMemberName %type_View 102 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 108 "View_GeneralPurposeTweak" + OpMemberName %type_View 109 "View_DemosaicVposOffset" + OpMemberName %type_View 110 "PrePadding_View_2348" + OpMemberName %type_View 111 "View_IndirectLightingColorScale" + OpMemberName %type_View 112 "View_HDR32bppEncodingMode" + OpMemberName %type_View 113 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 114 "View_AtmosphericFogSunPower" + OpMemberName %type_View 115 "View_AtmosphericFogPower" + OpMemberName %type_View 116 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 122 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 125 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 127 "View_AtmosphericFogSunColor" + OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 130 "View_AmbientCubemapTint" + OpMemberName %type_View 131 "View_AmbientCubemapIntensity" + OpMemberName %type_View 132 "View_SkyLightParameters" + OpMemberName %type_View 133 "PrePadding_View_2488" + OpMemberName %type_View 134 "PrePadding_View_2492" + OpMemberName %type_View 135 "View_SkyLightColor" + OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 137 "View_MobilePreviewMode" + OpMemberName %type_View 138 "View_HMDEyePaddingOffset" + OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 140 "View_ShowDecalsMask" + OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 143 "PrePadding_View_2648" + OpMemberName %type_View 144 "PrePadding_View_2652" + OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 146 "View_StereoPassIndex" + OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 149 "View_GlobalVolumeDimension" + OpMemberName %type_View 150 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 151 "View_MaxGlobalDistance" + OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 153 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 154 "PrePadding_View_2828" + OpMemberName %type_View 155 "View_VolumetricFogGridZParams" + OpMemberName %type_View 156 "PrePadding_View_2844" + OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 158 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 159 "PrePadding_View_2860" + OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 161 "PrePadding_View_2876" + OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 163 "PrePadding_View_2892" + OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 167 "View_StereoIPD" + OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_MobileBasePass "type.MobileBasePass" + OpMemberName %type_MobileBasePass 0 "MobileBasePass_Fog_ExponentialFogParameters" + OpMemberName %type_MobileBasePass 1 "MobileBasePass_Fog_ExponentialFogParameters2" + OpMemberName %type_MobileBasePass 2 "MobileBasePass_Fog_ExponentialFogColorParameter" + OpMemberName %type_MobileBasePass 3 "MobileBasePass_Fog_ExponentialFogParameters3" + OpMemberName %type_MobileBasePass 4 "MobileBasePass_Fog_InscatteringLightDirection" + OpMemberName %type_MobileBasePass 5 "MobileBasePass_Fog_DirectionalInscatteringColor" + OpMemberName %type_MobileBasePass 6 "MobileBasePass_Fog_SinCosInscatteringColorCubemapRotation" + OpMemberName %type_MobileBasePass 7 "PrePadding_MobileBasePass_Fog_104" + OpMemberName %type_MobileBasePass 8 "PrePadding_MobileBasePass_Fog_108" + OpMemberName %type_MobileBasePass 9 "MobileBasePass_Fog_FogInscatteringTextureParameters" + OpMemberName %type_MobileBasePass 10 "MobileBasePass_Fog_ApplyVolumetricFog" + OpMemberName %type_MobileBasePass 11 "PrePadding_MobileBasePass_PlanarReflection_128" + OpMemberName %type_MobileBasePass 12 "PrePadding_MobileBasePass_PlanarReflection_132" + OpMemberName %type_MobileBasePass 13 "PrePadding_MobileBasePass_PlanarReflection_136" + OpMemberName %type_MobileBasePass 14 "PrePadding_MobileBasePass_PlanarReflection_140" + OpMemberName %type_MobileBasePass 15 "PrePadding_MobileBasePass_PlanarReflection_144" + OpMemberName %type_MobileBasePass 16 "PrePadding_MobileBasePass_PlanarReflection_148" + OpMemberName %type_MobileBasePass 17 "PrePadding_MobileBasePass_PlanarReflection_152" + OpMemberName %type_MobileBasePass 18 "PrePadding_MobileBasePass_PlanarReflection_156" + OpMemberName %type_MobileBasePass 19 "MobileBasePass_PlanarReflection_ReflectionPlane" + OpMemberName %type_MobileBasePass 20 "MobileBasePass_PlanarReflection_PlanarReflectionOrigin" + OpMemberName %type_MobileBasePass 21 "MobileBasePass_PlanarReflection_PlanarReflectionXAxis" + OpMemberName %type_MobileBasePass 22 "MobileBasePass_PlanarReflection_PlanarReflectionYAxis" + OpMemberName %type_MobileBasePass 23 "MobileBasePass_PlanarReflection_InverseTransposeMirrorMatrix" + OpMemberName %type_MobileBasePass 24 "MobileBasePass_PlanarReflection_PlanarReflectionParameters" + OpMemberName %type_MobileBasePass 25 "PrePadding_MobileBasePass_PlanarReflection_284" + OpMemberName %type_MobileBasePass 26 "MobileBasePass_PlanarReflection_PlanarReflectionParameters2" + OpMemberName %type_MobileBasePass 27 "PrePadding_MobileBasePass_PlanarReflection_296" + OpMemberName %type_MobileBasePass 28 "PrePadding_MobileBasePass_PlanarReflection_300" + OpMemberName %type_MobileBasePass 29 "MobileBasePass_PlanarReflection_ProjectionWithExtraFOV" + OpMemberName %type_MobileBasePass 30 "MobileBasePass_PlanarReflection_PlanarReflectionScreenScaleBias" + OpMemberName %type_MobileBasePass 31 "MobileBasePass_PlanarReflection_PlanarReflectionScreenBound" + OpMemberName %type_MobileBasePass 32 "MobileBasePass_PlanarReflection_bIsStereo" + OpName %MobileBasePass "MobileBasePass" + OpName %type_Primitive "type.Primitive" + OpMemberName %type_Primitive 0 "Primitive_LocalToWorld" + OpMemberName %type_Primitive 1 "Primitive_InvNonUniformScaleAndDeterminantSign" + OpMemberName %type_Primitive 2 "Primitive_ObjectWorldPositionAndRadius" + OpMemberName %type_Primitive 3 "Primitive_WorldToLocal" + OpMemberName %type_Primitive 4 "Primitive_PreviousLocalToWorld" + OpMemberName %type_Primitive 5 "Primitive_PreviousWorldToLocal" + OpMemberName %type_Primitive 6 "Primitive_ActorWorldPosition" + OpMemberName %type_Primitive 7 "Primitive_UseSingleSampleShadowFromStationaryLights" + OpMemberName %type_Primitive 8 "Primitive_ObjectBounds" + OpMemberName %type_Primitive 9 "Primitive_LpvBiasMultiplier" + OpMemberName %type_Primitive 10 "Primitive_DecalReceiverMask" + OpMemberName %type_Primitive 11 "Primitive_PerObjectGBufferData" + OpMemberName %type_Primitive 12 "Primitive_UseVolumetricLightmapShadowFromStationaryLights" + OpMemberName %type_Primitive 13 "Primitive_UseEditorDepthTest" + OpMemberName %type_Primitive 14 "Primitive_ObjectOrientation" + OpMemberName %type_Primitive 15 "Primitive_NonUniformScale" + OpMemberName %type_Primitive 16 "Primitive_LocalObjectBoundsMin" + OpMemberName %type_Primitive 17 "PrePadding_Primitive_380" + OpMemberName %type_Primitive 18 "Primitive_LocalObjectBoundsMax" + OpMemberName %type_Primitive 19 "Primitive_LightingChannelMask" + OpMemberName %type_Primitive 20 "Primitive_LightmapDataIndex" + OpMemberName %type_Primitive 21 "Primitive_SingleCaptureIndex" + OpName %Primitive "Primitive" + OpName %type_LandscapeParameters "type.LandscapeParameters" + OpMemberName %type_LandscapeParameters 0 "LandscapeParameters_HeightmapUVScaleBias" + OpMemberName %type_LandscapeParameters 1 "LandscapeParameters_WeightmapUVScaleBias" + OpMemberName %type_LandscapeParameters 2 "LandscapeParameters_LandscapeLightmapScaleBias" + OpMemberName %type_LandscapeParameters 3 "LandscapeParameters_SubsectionSizeVertsLayerUVPan" + OpMemberName %type_LandscapeParameters 4 "LandscapeParameters_SubsectionOffsetParams" + OpMemberName %type_LandscapeParameters 5 "LandscapeParameters_LightmapSubsectionOffsetParams" + OpMemberName %type_LandscapeParameters 6 "LandscapeParameters_LocalToWorldNoScaling" + OpName %LandscapeParameters "LandscapeParameters" + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "LodBias" + OpMemberName %type__Globals 1 "LodValues" + OpMemberName %type__Globals 2 "SectionLods" + OpMemberName %type__Globals 3 "NeighborSectionLod" + OpName %_Globals "$Globals" + OpName %in_var_ATTRIBUTE0 "in.var.ATTRIBUTE0" + OpName %in_var_ATTRIBUTE1 "in.var.ATTRIBUTE1" + OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0" + OpName %out_var_TEXCOORD1 "out.var.TEXCOORD1" + OpName %out_var_TEXCOORD2 "out.var.TEXCOORD2" + OpName %out_var_TEXCOORD3 "out.var.TEXCOORD3" + OpName %out_var_TEXCOORD8 "out.var.TEXCOORD8" + OpName %Main "Main" + OpDecorateString %in_var_ATTRIBUTE0 UserSemantic "ATTRIBUTE0" + OpDecorateString %in_var_ATTRIBUTE1 UserSemantic "ATTRIBUTE1" + OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0" + OpDecorateString %out_var_TEXCOORD1 UserSemantic "TEXCOORD1" + OpDecorateString %out_var_TEXCOORD2 UserSemantic "TEXCOORD2" + OpDecorateString %out_var_TEXCOORD3 UserSemantic "TEXCOORD3" + OpDecorateString %out_var_TEXCOORD8 UserSemantic "TEXCOORD8" + OpDecorate %gl_Position BuiltIn Position + OpDecorateString %gl_Position UserSemantic "SV_POSITION" + OpDecorate %in_var_ATTRIBUTE0 Location 0 + OpDecorate %in_var_ATTRIBUTE1 Location 1 + OpDecorate %out_var_TEXCOORD0 Location 0 + OpDecorate %out_var_TEXCOORD1 Location 1 + OpDecorate %out_var_TEXCOORD2 Location 2 + OpDecorate %out_var_TEXCOORD3 Location 3 + OpDecorate %out_var_TEXCOORD8 Location 4 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 0 + OpDecorate %MobileBasePass DescriptorSet 0 + OpDecorate %MobileBasePass Binding 1 + OpDecorate %Primitive DescriptorSet 0 + OpDecorate %Primitive Binding 2 + OpDecorate %LandscapeParameters DescriptorSet 0 + OpDecorate %LandscapeParameters Binding 3 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 4 + OpDecorate %_arr_v4float_uint_2_0 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 14 Offset 844 + OpMemberDecorate %type_View 15 Offset 848 + OpMemberDecorate %type_View 16 Offset 860 + OpMemberDecorate %type_View 17 Offset 864 + OpMemberDecorate %type_View 18 Offset 876 + OpMemberDecorate %type_View 19 Offset 880 + OpMemberDecorate %type_View 20 Offset 892 + OpMemberDecorate %type_View 21 Offset 896 + OpMemberDecorate %type_View 22 Offset 908 + OpMemberDecorate %type_View 23 Offset 912 + OpMemberDecorate %type_View 24 Offset 928 + OpMemberDecorate %type_View 25 Offset 944 + OpMemberDecorate %type_View 26 Offset 956 + OpMemberDecorate %type_View 27 Offset 960 + OpMemberDecorate %type_View 28 Offset 972 + OpMemberDecorate %type_View 29 Offset 976 + OpMemberDecorate %type_View 30 Offset 988 + OpMemberDecorate %type_View 31 Offset 992 + OpMemberDecorate %type_View 32 Offset 1004 + OpMemberDecorate %type_View 33 Offset 1008 + OpMemberDecorate %type_View 33 MatrixStride 16 + OpMemberDecorate %type_View 33 ColMajor + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 44 Offset 1660 + OpMemberDecorate %type_View 45 Offset 1664 + OpMemberDecorate %type_View 46 Offset 1676 + OpMemberDecorate %type_View 47 Offset 1680 + OpMemberDecorate %type_View 48 Offset 1692 + OpMemberDecorate %type_View 49 Offset 1696 + OpMemberDecorate %type_View 49 MatrixStride 16 + OpMemberDecorate %type_View 49 ColMajor + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 53 Offset 1904 + OpMemberDecorate %type_View 54 Offset 1920 + OpMemberDecorate %type_View 55 Offset 1928 + OpMemberDecorate %type_View 56 Offset 1936 + OpMemberDecorate %type_View 57 Offset 1952 + OpMemberDecorate %type_View 58 Offset 1968 + OpMemberDecorate %type_View 59 Offset 1984 + OpMemberDecorate %type_View 60 Offset 2000 + OpMemberDecorate %type_View 61 Offset 2004 + OpMemberDecorate %type_View 62 Offset 2008 + OpMemberDecorate %type_View 63 Offset 2012 + OpMemberDecorate %type_View 64 Offset 2016 + OpMemberDecorate %type_View 65 Offset 2032 + OpMemberDecorate %type_View 66 Offset 2048 + OpMemberDecorate %type_View 67 Offset 2064 + OpMemberDecorate %type_View 68 Offset 2072 + OpMemberDecorate %type_View 69 Offset 2076 + OpMemberDecorate %type_View 70 Offset 2080 + OpMemberDecorate %type_View 71 Offset 2084 + OpMemberDecorate %type_View 72 Offset 2088 + OpMemberDecorate %type_View 73 Offset 2092 + OpMemberDecorate %type_View 74 Offset 2096 + OpMemberDecorate %type_View 75 Offset 2108 + OpMemberDecorate %type_View 76 Offset 2112 + OpMemberDecorate %type_View 77 Offset 2116 + OpMemberDecorate %type_View 78 Offset 2120 + OpMemberDecorate %type_View 79 Offset 2124 + OpMemberDecorate %type_View 80 Offset 2128 + OpMemberDecorate %type_View 81 Offset 2132 + OpMemberDecorate %type_View 82 Offset 2136 + OpMemberDecorate %type_View 83 Offset 2140 + OpMemberDecorate %type_View 84 Offset 2144 + OpMemberDecorate %type_View 85 Offset 2148 + OpMemberDecorate %type_View 86 Offset 2152 + OpMemberDecorate %type_View 87 Offset 2156 + OpMemberDecorate %type_View 88 Offset 2160 + OpMemberDecorate %type_View 89 Offset 2164 + OpMemberDecorate %type_View 90 Offset 2168 + OpMemberDecorate %type_View 91 Offset 2172 + OpMemberDecorate %type_View 92 Offset 2176 + OpMemberDecorate %type_View 93 Offset 2192 + OpMemberDecorate %type_View 94 Offset 2204 + OpMemberDecorate %type_View 95 Offset 2208 + OpMemberDecorate %type_View 96 Offset 2240 + OpMemberDecorate %type_View 97 Offset 2272 + OpMemberDecorate %type_View 98 Offset 2288 + OpMemberDecorate %type_View 99 Offset 2304 + OpMemberDecorate %type_View 100 Offset 2308 + OpMemberDecorate %type_View 101 Offset 2312 + OpMemberDecorate %type_View 102 Offset 2316 + OpMemberDecorate %type_View 103 Offset 2320 + OpMemberDecorate %type_View 104 Offset 2324 + OpMemberDecorate %type_View 105 Offset 2328 + OpMemberDecorate %type_View 106 Offset 2332 + OpMemberDecorate %type_View 107 Offset 2336 + OpMemberDecorate %type_View 108 Offset 2340 + OpMemberDecorate %type_View 109 Offset 2344 + OpMemberDecorate %type_View 110 Offset 2348 + OpMemberDecorate %type_View 111 Offset 2352 + OpMemberDecorate %type_View 112 Offset 2364 + OpMemberDecorate %type_View 113 Offset 2368 + OpMemberDecorate %type_View 114 Offset 2380 + OpMemberDecorate %type_View 115 Offset 2384 + OpMemberDecorate %type_View 116 Offset 2388 + OpMemberDecorate %type_View 117 Offset 2392 + OpMemberDecorate %type_View 118 Offset 2396 + OpMemberDecorate %type_View 119 Offset 2400 + OpMemberDecorate %type_View 120 Offset 2404 + OpMemberDecorate %type_View 121 Offset 2408 + OpMemberDecorate %type_View 122 Offset 2412 + OpMemberDecorate %type_View 123 Offset 2416 + OpMemberDecorate %type_View 124 Offset 2420 + OpMemberDecorate %type_View 125 Offset 2424 + OpMemberDecorate %type_View 126 Offset 2428 + OpMemberDecorate %type_View 127 Offset 2432 + OpMemberDecorate %type_View 128 Offset 2448 + OpMemberDecorate %type_View 129 Offset 2460 + OpMemberDecorate %type_View 130 Offset 2464 + OpMemberDecorate %type_View 131 Offset 2480 + OpMemberDecorate %type_View 132 Offset 2484 + OpMemberDecorate %type_View 133 Offset 2488 + OpMemberDecorate %type_View 134 Offset 2492 + OpMemberDecorate %type_View 135 Offset 2496 + OpMemberDecorate %type_View 136 Offset 2512 + OpMemberDecorate %type_View 137 Offset 2624 + OpMemberDecorate %type_View 138 Offset 2628 + OpMemberDecorate %type_View 139 Offset 2632 + OpMemberDecorate %type_View 140 Offset 2636 + OpMemberDecorate %type_View 141 Offset 2640 + OpMemberDecorate %type_View 142 Offset 2644 + OpMemberDecorate %type_View 143 Offset 2648 + OpMemberDecorate %type_View 144 Offset 2652 + OpMemberDecorate %type_View 145 Offset 2656 + OpMemberDecorate %type_View 146 Offset 2668 + OpMemberDecorate %type_View 147 Offset 2672 + OpMemberDecorate %type_View 148 Offset 2736 + OpMemberDecorate %type_View 149 Offset 2800 + OpMemberDecorate %type_View 150 Offset 2804 + OpMemberDecorate %type_View 151 Offset 2808 + OpMemberDecorate %type_View 152 Offset 2812 + OpMemberDecorate %type_View 153 Offset 2816 + OpMemberDecorate %type_View 154 Offset 2828 + OpMemberDecorate %type_View 155 Offset 2832 + OpMemberDecorate %type_View 156 Offset 2844 + OpMemberDecorate %type_View 157 Offset 2848 + OpMemberDecorate %type_View 158 Offset 2856 + OpMemberDecorate %type_View 159 Offset 2860 + OpMemberDecorate %type_View 160 Offset 2864 + OpMemberDecorate %type_View 161 Offset 2876 + OpMemberDecorate %type_View 162 Offset 2880 + OpMemberDecorate %type_View 163 Offset 2892 + OpMemberDecorate %type_View 164 Offset 2896 + OpMemberDecorate %type_View 165 Offset 2908 + OpMemberDecorate %type_View 166 Offset 2912 + OpMemberDecorate %type_View 167 Offset 2924 + OpMemberDecorate %type_View 168 Offset 2928 + OpMemberDecorate %type_View 169 Offset 2932 + OpDecorate %type_View Block + OpDecorate %_arr_mat4v4float_uint_2 ArrayStride 64 + OpMemberDecorate %type_MobileBasePass 0 Offset 0 + OpMemberDecorate %type_MobileBasePass 1 Offset 16 + OpMemberDecorate %type_MobileBasePass 2 Offset 32 + OpMemberDecorate %type_MobileBasePass 3 Offset 48 + OpMemberDecorate %type_MobileBasePass 4 Offset 64 + OpMemberDecorate %type_MobileBasePass 5 Offset 80 + OpMemberDecorate %type_MobileBasePass 6 Offset 96 + OpMemberDecorate %type_MobileBasePass 7 Offset 104 + OpMemberDecorate %type_MobileBasePass 8 Offset 108 + OpMemberDecorate %type_MobileBasePass 9 Offset 112 + OpMemberDecorate %type_MobileBasePass 10 Offset 124 + OpMemberDecorate %type_MobileBasePass 11 Offset 128 + OpMemberDecorate %type_MobileBasePass 12 Offset 132 + OpMemberDecorate %type_MobileBasePass 13 Offset 136 + OpMemberDecorate %type_MobileBasePass 14 Offset 140 + OpMemberDecorate %type_MobileBasePass 15 Offset 144 + OpMemberDecorate %type_MobileBasePass 16 Offset 148 + OpMemberDecorate %type_MobileBasePass 17 Offset 152 + OpMemberDecorate %type_MobileBasePass 18 Offset 156 + OpMemberDecorate %type_MobileBasePass 19 Offset 160 + OpMemberDecorate %type_MobileBasePass 20 Offset 176 + OpMemberDecorate %type_MobileBasePass 21 Offset 192 + OpMemberDecorate %type_MobileBasePass 22 Offset 208 + OpMemberDecorate %type_MobileBasePass 23 Offset 224 + OpMemberDecorate %type_MobileBasePass 23 MatrixStride 16 + OpMemberDecorate %type_MobileBasePass 23 ColMajor + OpMemberDecorate %type_MobileBasePass 24 Offset 272 + OpMemberDecorate %type_MobileBasePass 25 Offset 284 + OpMemberDecorate %type_MobileBasePass 26 Offset 288 + OpMemberDecorate %type_MobileBasePass 27 Offset 296 + OpMemberDecorate %type_MobileBasePass 28 Offset 300 + OpMemberDecorate %type_MobileBasePass 29 Offset 304 + OpMemberDecorate %type_MobileBasePass 29 MatrixStride 16 + OpMemberDecorate %type_MobileBasePass 29 ColMajor + OpMemberDecorate %type_MobileBasePass 30 Offset 432 + OpMemberDecorate %type_MobileBasePass 31 Offset 464 + OpMemberDecorate %type_MobileBasePass 32 Offset 472 + OpDecorate %type_MobileBasePass Block + OpMemberDecorate %type_Primitive 0 Offset 0 + OpMemberDecorate %type_Primitive 0 MatrixStride 16 + OpMemberDecorate %type_Primitive 0 ColMajor + OpMemberDecorate %type_Primitive 1 Offset 64 + OpMemberDecorate %type_Primitive 2 Offset 80 + OpMemberDecorate %type_Primitive 3 Offset 96 + OpMemberDecorate %type_Primitive 3 MatrixStride 16 + OpMemberDecorate %type_Primitive 3 ColMajor + OpMemberDecorate %type_Primitive 4 Offset 160 + OpMemberDecorate %type_Primitive 4 MatrixStride 16 + OpMemberDecorate %type_Primitive 4 ColMajor + OpMemberDecorate %type_Primitive 5 Offset 224 + OpMemberDecorate %type_Primitive 5 MatrixStride 16 + OpMemberDecorate %type_Primitive 5 ColMajor + OpMemberDecorate %type_Primitive 6 Offset 288 + OpMemberDecorate %type_Primitive 7 Offset 300 + OpMemberDecorate %type_Primitive 8 Offset 304 + OpMemberDecorate %type_Primitive 9 Offset 316 + OpMemberDecorate %type_Primitive 10 Offset 320 + OpMemberDecorate %type_Primitive 11 Offset 324 + OpMemberDecorate %type_Primitive 12 Offset 328 + OpMemberDecorate %type_Primitive 13 Offset 332 + OpMemberDecorate %type_Primitive 14 Offset 336 + OpMemberDecorate %type_Primitive 15 Offset 352 + OpMemberDecorate %type_Primitive 16 Offset 368 + OpMemberDecorate %type_Primitive 17 Offset 380 + OpMemberDecorate %type_Primitive 18 Offset 384 + OpMemberDecorate %type_Primitive 19 Offset 396 + OpMemberDecorate %type_Primitive 20 Offset 400 + OpMemberDecorate %type_Primitive 21 Offset 404 + OpDecorate %type_Primitive Block + OpMemberDecorate %type_LandscapeParameters 0 Offset 0 + OpMemberDecorate %type_LandscapeParameters 1 Offset 16 + OpMemberDecorate %type_LandscapeParameters 2 Offset 32 + OpMemberDecorate %type_LandscapeParameters 3 Offset 48 + OpMemberDecorate %type_LandscapeParameters 4 Offset 64 + OpMemberDecorate %type_LandscapeParameters 5 Offset 80 + OpMemberDecorate %type_LandscapeParameters 6 Offset 96 + OpMemberDecorate %type_LandscapeParameters 6 MatrixStride 16 + OpMemberDecorate %type_LandscapeParameters 6 ColMajor + OpDecorate %type_LandscapeParameters Block + OpMemberDecorate %type__Globals 0 Offset 0 + OpMemberDecorate %type__Globals 1 Offset 16 + OpMemberDecorate %type__Globals 2 Offset 32 + OpMemberDecorate %type__Globals 3 Offset 48 + OpDecorate %type__Globals Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 +%float_0_00999999978 = OpConstant %float 0.00999999978 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %float_0 = OpConstant %float 0 + %40 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %float_255 = OpConstant %float 255 + %44 = OpConstantComposite %v4float %float_255 %float_255 %float_255 %float_255 + %float_0_5 = OpConstant %float 0.5 + %46 = OpConstantComposite %v2float %float_0_5 %float_0_5 + %float_2 = OpConstant %float 2 + %48 = OpConstantComposite %v2float %float_2 %float_2 + %float_1 = OpConstant %float 1 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %float_3 = OpConstant %float 3 + %float_0_25 = OpConstant %float 0.25 + %uint_3 = OpConstant %uint 3 + %float_4 = OpConstant %float 4 +%float_0_125 = OpConstant %float 0.125 + %float_5 = OpConstant %float 5 +%float_0_0625 = OpConstant %float 0.0625 +%float_0_03125 = OpConstant %float 0.03125 + %60 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %int_5 = OpConstant %int 5 + %int_4 = OpConstant %int 4 + %63 = OpConstantComposite %v3float %float_0 %float_0 %float_0 + %int_25 = OpConstant %int 25 + %int_27 = OpConstant %int 27 + %int_31 = OpConstant %int 31 + %67 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 +%float_32768 = OpConstant %float 32768 +%_arr_v4float_uint_2_0 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2_0 %_arr_v4float_uint_2_0 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%mat3v4float = OpTypeMatrix %v4float 3 +%_arr_mat4v4float_uint_2 = OpTypeArray %mat4v4float %uint_2 +%type_MobileBasePass = OpTypeStruct %v4float %v4float %v4float %v4float %v4float %v4float %v2float %float %float %v3float %float %float %float %float %float %float %float %float %float %v4float %v4float %v4float %v4float %mat3v4float %v3float %float %v2float %float %float %_arr_mat4v4float_uint_2 %_arr_v4float_uint_2_0 %v2float %uint +%_ptr_Uniform_type_MobileBasePass = OpTypePointer Uniform %type_MobileBasePass +%type_Primitive = OpTypeStruct %mat4v4float %v4float %v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %float %float %float %float %v4float %v4float %v3float %float %v3float %uint %uint %int +%_ptr_Uniform_type_Primitive = OpTypePointer Uniform %type_Primitive +%type_LandscapeParameters = OpTypeStruct %v4float %v4float %v4float %v4float %v4float %v4float %mat4v4float +%_ptr_Uniform_type_LandscapeParameters = OpTypePointer Uniform %type_LandscapeParameters +%type__Globals = OpTypeStruct %v4float %v4float %v4float %_arr_v4float_uint_4 +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input__arr_v4float_uint_2 = OpTypePointer Input %_arr_v4float_uint_2 +%_ptr_Output_v2float = OpTypePointer Output %v2float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %80 = OpTypeFunction %void +%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1 +%_ptr_Function__arr_v4float_uint_1 = OpTypePointer Function %_arr_v4float_uint_1 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %bool = OpTypeBool +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %v3bool = OpTypeVector %bool 3 +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %View = OpVariable %_ptr_Uniform_type_View Uniform +%MobileBasePass = OpVariable %_ptr_Uniform_type_MobileBasePass Uniform + %Primitive = OpVariable %_ptr_Uniform_type_Primitive Uniform +%LandscapeParameters = OpVariable %_ptr_Uniform_type_LandscapeParameters Uniform + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%in_var_ATTRIBUTE0 = OpVariable %_ptr_Input_v4float Input +%in_var_ATTRIBUTE1 = OpVariable %_ptr_Input__arr_v4float_uint_2 Input +%out_var_TEXCOORD0 = OpVariable %_ptr_Output_v2float Output +%out_var_TEXCOORD1 = OpVariable %_ptr_Output_v2float Output +%out_var_TEXCOORD2 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD3 = OpVariable %_ptr_Output_v4float Output +%out_var_TEXCOORD8 = OpVariable %_ptr_Output_v4float Output +%gl_Position = OpVariable %_ptr_Output_v4float Output +%float_0_0078125 = OpConstant %float 0.0078125 + %float_n127 = OpConstant %float -127 + %92 = OpConstantNull %v4float +%float_0_00392156886 = OpConstant %float 0.00392156886 + %94 = OpConstantComposite %v2float %float_0_00392156886 %float_0_00392156886 +%float_65280 = OpConstant %float 65280 + %Main = OpFunction %void None %80 + %96 = OpLabel + %97 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function + %98 = OpLoad %v4float %in_var_ATTRIBUTE0 + %99 = OpLoad %_arr_v4float_uint_2 %in_var_ATTRIBUTE1 + %100 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0 + %101 = OpLoad %mat4v4float %100 + %102 = OpAccessChain %_ptr_Uniform_v3float %View %int_27 + %103 = OpLoad %v3float %102 + %104 = OpAccessChain %_ptr_Uniform_v3float %View %int_31 + %105 = OpLoad %v3float %104 + OpBranch %106 + %106 = OpLabel + %107 = OpPhi %int %int_0 %96 %108 %109 + %110 = OpSLessThan %bool %107 %int_1 + OpLoopMerge %111 %109 Unroll + OpBranchConditional %110 %109 %111 + %109 = OpLabel + %112 = OpAccessChain %_ptr_Function_v4float %97 %107 + OpStore %112 %40 + %108 = OpIAdd %int %107 %int_1 + OpBranch %106 + %111 = OpLabel + %113 = OpCompositeExtract %v4float %99 0 + %114 = OpCompositeExtract %v4float %99 1 + %115 = OpFMul %v4float %98 %44 + %116 = OpVectorShuffle %v2float %115 %115 2 3 + %117 = OpFMul %v2float %116 %46 + %118 = OpExtInst %v2float %1 Fract %117 + %119 = OpFMul %v2float %118 %48 + %120 = OpFSub %v2float %116 %119 + %121 = OpFMul %v2float %120 %94 + %122 = OpVectorShuffle %v2float %115 %92 0 1 + %123 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %int_3 + %124 = OpLoad %float %123 + %125 = OpCompositeConstruct %v2float %124 %124 + %126 = OpFMul %v2float %122 %125 + %127 = OpCompositeExtract %float %126 1 + %128 = OpCompositeExtract %float %126 0 + %129 = OpFSub %float %float_1 %128 + %130 = OpFSub %float %float_1 %127 + %131 = OpCompositeConstruct %v4float %127 %128 %129 %130 + %132 = OpFMul %v4float %131 %67 + %133 = OpCompositeExtract %float %119 1 + %134 = OpFOrdGreaterThan %bool %133 %float_0_5 + OpSelectionMerge %135 None + OpBranchConditional %134 %136 %137 + %136 = OpLabel + %138 = OpCompositeExtract %float %119 0 + %139 = OpFOrdGreaterThan %bool %138 %float_0_5 + OpSelectionMerge %140 None + OpBranchConditional %139 %141 %142 + %141 = OpLabel + %143 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 %int_3 + %144 = OpLoad %float %143 + %145 = OpCompositeConstruct %v4float %144 %144 %144 %144 + %146 = OpFMul %v4float %132 %145 + %147 = OpFSub %v4float %60 %132 + %148 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3 %int_3 + %149 = OpLoad %v4float %148 + %150 = OpFMul %v4float %147 %149 + %151 = OpFAdd %v4float %146 %150 + OpBranch %140 + %142 = OpLabel + %152 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 %int_2 + %153 = OpLoad %float %152 + %154 = OpCompositeConstruct %v4float %153 %153 %153 %153 + %155 = OpFMul %v4float %132 %154 + %156 = OpFSub %v4float %60 %132 + %157 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3 %int_2 + %158 = OpLoad %v4float %157 + %159 = OpFMul %v4float %156 %158 + %160 = OpFAdd %v4float %155 %159 + OpBranch %140 + %140 = OpLabel + %161 = OpPhi %v4float %151 %141 %160 %142 + OpBranch %135 + %137 = OpLabel + %162 = OpCompositeExtract %float %119 0 + %163 = OpFOrdGreaterThan %bool %162 %float_0_5 + OpSelectionMerge %164 None + OpBranchConditional %163 %165 %166 + %165 = OpLabel + %167 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 %int_1 + %168 = OpLoad %float %167 + %169 = OpCompositeConstruct %v4float %168 %168 %168 %168 + %170 = OpFMul %v4float %132 %169 + %171 = OpFSub %v4float %60 %132 + %172 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3 %int_1 + %173 = OpLoad %v4float %172 + %174 = OpFMul %v4float %171 %173 + %175 = OpFAdd %v4float %170 %174 + OpBranch %164 + %166 = OpLabel + %176 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 %int_0 + %177 = OpLoad %float %176 + %178 = OpCompositeConstruct %v4float %177 %177 %177 %177 + %179 = OpFMul %v4float %132 %178 + %180 = OpFSub %v4float %60 %132 + %181 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3 %int_0 + %182 = OpLoad %v4float %181 + %183 = OpFMul %v4float %180 %182 + %184 = OpFAdd %v4float %179 %183 + OpBranch %164 + %164 = OpLabel + %185 = OpPhi %v4float %175 %165 %184 %166 + OpBranch %135 + %135 = OpLabel + %186 = OpPhi %v4float %161 %140 %185 %164 + %187 = OpFAdd %float %128 %127 + %188 = OpFOrdGreaterThan %bool %187 %float_1 + OpSelectionMerge %189 None + OpBranchConditional %188 %190 %191 + %190 = OpLabel + %192 = OpFOrdLessThan %bool %128 %127 + OpSelectionMerge %193 None + OpBranchConditional %192 %194 %195 + %194 = OpLabel + %196 = OpCompositeExtract %float %186 3 + OpBranch %193 + %195 = OpLabel + %197 = OpCompositeExtract %float %186 2 + OpBranch %193 + %193 = OpLabel + %198 = OpPhi %float %196 %194 %197 %195 + OpBranch %189 + %191 = OpLabel + %199 = OpFOrdLessThan %bool %128 %127 + OpSelectionMerge %200 None + OpBranchConditional %199 %201 %202 + %201 = OpLabel + %203 = OpCompositeExtract %float %186 1 + OpBranch %200 + %202 = OpLabel + %204 = OpCompositeExtract %float %186 0 + OpBranch %200 + %200 = OpLabel + %205 = OpPhi %float %203 %201 %204 %202 + OpBranch %189 + %189 = OpLabel + %206 = OpPhi %float %198 %193 %205 %200 + %207 = OpExtInst %float %1 Floor %206 + %208 = OpFSub %float %206 %207 + %209 = OpFOrdLessThan %bool %207 %float_1 + %210 = OpCompositeExtract %float %114 0 + %211 = OpCompositeExtract %float %114 1 + %212 = OpCompositeConstruct %v3float %float_1 %210 %211 + %213 = OpFOrdLessThan %bool %207 %float_2 + %214 = OpCompositeExtract %float %114 2 + %215 = OpCompositeConstruct %v3float %float_0_5 %211 %214 + %216 = OpFOrdLessThan %bool %207 %float_3 + %217 = OpCompositeExtract %float %114 3 + %218 = OpCompositeConstruct %v3float %float_0_25 %214 %217 + %219 = OpFOrdLessThan %bool %207 %float_4 + %220 = OpCompositeExtract %float %121 0 + %221 = OpCompositeConstruct %v3float %float_0_125 %217 %220 + %222 = OpFOrdLessThan %bool %207 %float_5 + %223 = OpCompositeExtract %float %121 1 + %224 = OpCompositeConstruct %v3float %float_0_0625 %220 %223 + %225 = OpCompositeConstruct %v3float %float_0_03125 %223 %223 + %226 = OpCompositeConstruct %v3bool %222 %222 %222 + %227 = OpSelect %v3float %226 %224 %225 + %228 = OpCompositeConstruct %v3bool %219 %219 %219 + %229 = OpSelect %v3float %228 %221 %227 + %230 = OpCompositeConstruct %v3bool %216 %216 %216 + %231 = OpSelect %v3float %230 %218 %229 + %232 = OpCompositeConstruct %v3bool %213 %213 %213 + %233 = OpSelect %v3float %232 %215 %231 + %234 = OpCompositeConstruct %v3bool %209 %209 %209 + %235 = OpSelect %v3float %234 %212 %233 + %236 = OpCompositeExtract %float %235 0 + %237 = OpCompositeExtract %float %235 1 + %238 = OpCompositeExtract %float %235 2 + %239 = OpCompositeExtract %float %113 0 + %240 = OpFMul %float %239 %float_65280 + %241 = OpCompositeExtract %float %113 1 + %242 = OpFMul %float %241 %float_255 + %243 = OpFAdd %float %240 %242 + %244 = OpFSub %float %243 %float_32768 + %245 = OpFMul %float %244 %float_0_0078125 + %246 = OpCompositeExtract %float %113 2 + %247 = OpFMul %float %246 %float_65280 + %248 = OpCompositeExtract %float %113 3 + %249 = OpFMul %float %248 %float_255 + %250 = OpFAdd %float %247 %249 + %251 = OpFSub %float %250 %float_32768 + %252 = OpFMul %float %251 %float_0_0078125 + %253 = OpExtInst %float %1 FMix %245 %252 %237 + %254 = OpExtInst %float %1 FMix %245 %252 %238 + %255 = OpCompositeConstruct %v2float %236 %236 + %256 = OpFMul %v2float %122 %255 + %257 = OpExtInst %v2float %1 Floor %256 + %258 = OpAccessChain %_ptr_Uniform_v4float %LandscapeParameters %int_3 + %259 = OpAccessChain %_ptr_Uniform_float %LandscapeParameters %int_3 %int_0 + %260 = OpLoad %float %259 + %261 = OpFMul %float %260 %236 + %262 = OpFSub %float %261 %float_1 + %263 = OpFMul %float %260 %float_0_5 + %264 = OpFMul %float %263 %236 + %265 = OpExtInst %float %1 FMax %264 %float_2 + %266 = OpFSub %float %265 %float_1 + %267 = OpCompositeConstruct %v2float %262 %266 + %268 = OpAccessChain %_ptr_Uniform_float %LandscapeParameters %int_3 %int_1 + %269 = OpLoad %float %268 + %270 = OpCompositeConstruct %v2float %269 %269 + %271 = OpFMul %v2float %267 %270 + %272 = OpCompositeExtract %float %271 0 + %273 = OpCompositeConstruct %v2float %272 %272 + %274 = OpFDiv %v2float %257 %273 + %275 = OpFMul %v2float %257 %46 + %276 = OpExtInst %v2float %1 Floor %275 + %277 = OpCompositeExtract %float %271 1 + %278 = OpCompositeConstruct %v2float %277 %277 + %279 = OpFDiv %v2float %276 %278 + %280 = OpCompositeExtract %float %274 0 + %281 = OpCompositeExtract %float %274 1 + %282 = OpCompositeConstruct %v3float %280 %281 %253 + %283 = OpCompositeExtract %float %279 0 + %284 = OpCompositeExtract %float %279 1 + %285 = OpCompositeConstruct %v3float %283 %284 %254 + %286 = OpCompositeConstruct %v3float %208 %208 %208 + %287 = OpExtInst %v3float %1 FMix %282 %285 %286 + %288 = OpVectorShuffle %v2float %119 %92 0 1 + %289 = OpAccessChain %_ptr_Uniform_v4float %LandscapeParameters %int_4 + %290 = OpLoad %v4float %289 + %291 = OpVectorShuffle %v2float %290 %290 3 3 + %292 = OpFMul %v2float %288 %291 + %293 = OpCompositeExtract %float %292 0 + %294 = OpCompositeExtract %float %292 1 + %295 = OpCompositeConstruct %v3float %293 %294 %float_0 + %296 = OpFAdd %v3float %287 %295 + %297 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_0 + %298 = OpLoad %v4float %297 + %299 = OpVectorShuffle %v3float %298 %298 0 1 2 + %300 = OpVectorShuffle %v3float %296 %296 0 0 0 + %301 = OpFMul %v3float %299 %300 + %302 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_1 + %303 = OpLoad %v4float %302 + %304 = OpVectorShuffle %v3float %303 %303 0 1 2 + %305 = OpVectorShuffle %v3float %296 %296 1 1 1 + %306 = OpFMul %v3float %304 %305 + %307 = OpFAdd %v3float %301 %306 + %308 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_2 + %309 = OpLoad %v4float %308 + %310 = OpVectorShuffle %v3float %309 %309 0 1 2 + %311 = OpVectorShuffle %v3float %296 %296 2 2 2 + %312 = OpFMul %v3float %310 %311 + %313 = OpFAdd %v3float %307 %312 + %314 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_3 + %315 = OpLoad %v4float %314 + %316 = OpVectorShuffle %v3float %315 %315 0 1 2 + %317 = OpFAdd %v3float %316 %105 + %318 = OpFAdd %v3float %313 %317 + %319 = OpCompositeExtract %float %318 0 + %320 = OpCompositeExtract %float %318 1 + %321 = OpCompositeExtract %float %318 2 + %322 = OpCompositeConstruct %v4float %319 %320 %321 %float_1 + %323 = OpVectorShuffle %v2float %287 %287 0 1 + %324 = OpLoad %v4float %258 + %325 = OpVectorShuffle %v2float %324 %324 2 3 + %326 = OpFAdd %v2float %323 %325 + %327 = OpFAdd %v2float %326 %292 + %328 = OpAccessChain %_ptr_Uniform_v4float %LandscapeParameters %int_1 + %329 = OpLoad %v4float %328 + %330 = OpVectorShuffle %v2float %329 %329 0 1 + %331 = OpFMul %v2float %323 %330 + %332 = OpVectorShuffle %v2float %329 %329 2 3 + %333 = OpFAdd %v2float %331 %332 + %334 = OpVectorShuffle %v2float %290 %290 2 2 + %335 = OpFMul %v2float %288 %334 + %336 = OpFAdd %v2float %333 %335 + %337 = OpVectorShuffle %v2float %327 %92 0 1 + %338 = OpVectorShuffle %v4float %322 %322 4 5 6 3 + %339 = OpMatrixTimesVector %v4float %101 %338 + %340 = OpVectorShuffle %v3float %322 %92 0 1 2 + %341 = OpFSub %v3float %340 %103 + %342 = OpAccessChain %_ptr_Uniform_v4float %MobileBasePass %int_2 + %343 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_2 %int_3 + %344 = OpLoad %float %343 + %345 = OpDot %float %341 %341 + %346 = OpExtInst %float %1 InverseSqrt %345 + %347 = OpFMul %float %345 %346 + %348 = OpCompositeConstruct %v3float %346 %346 %346 + %349 = OpFMul %v3float %341 %348 + %350 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_0 %int_0 + %351 = OpLoad %float %350 + %352 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_1 %int_0 + %353 = OpLoad %float %352 + %354 = OpCompositeExtract %float %341 2 + %355 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_0 %int_3 + %356 = OpLoad %float %355 + %357 = OpExtInst %float %1 FMax %float_0 %356 + %358 = OpFOrdGreaterThan %bool %357 %float_0 + OpSelectionMerge %359 None + OpBranchConditional %358 %360 %359 + %360 = OpLabel + %361 = OpFMul %float %357 %346 + %362 = OpFMul %float %361 %354 + %363 = OpAccessChain %_ptr_Uniform_float %View %int_25 %int_2 + %364 = OpLoad %float %363 + %365 = OpFAdd %float %364 %362 + %366 = OpFSub %float %354 %362 + %367 = OpFSub %float %float_1 %361 + %368 = OpFMul %float %367 %347 + %369 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_0 %int_1 + %370 = OpLoad %float %369 + %371 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_3 %int_1 + %372 = OpLoad %float %371 + %373 = OpFSub %float %365 %372 + %374 = OpFMul %float %370 %373 + %375 = OpExtInst %float %1 FMax %float_n127 %374 + %376 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_3 %int_0 + %377 = OpLoad %float %376 + %378 = OpFNegate %float %375 + %379 = OpExtInst %float %1 Exp2 %378 + %380 = OpFMul %float %377 %379 + %381 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_1 %int_1 + %382 = OpLoad %float %381 + %383 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_1 %int_3 + %384 = OpLoad %float %383 + %385 = OpFSub %float %365 %384 + %386 = OpFMul %float %382 %385 + %387 = OpExtInst %float %1 FMax %float_n127 %386 + %388 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_1 %int_2 + %389 = OpLoad %float %388 + %390 = OpFNegate %float %387 + %391 = OpExtInst %float %1 Exp2 %390 + %392 = OpFMul %float %389 %391 + OpBranch %359 + %359 = OpLabel + %393 = OpPhi %float %347 %189 %368 %360 + %394 = OpPhi %float %353 %189 %392 %360 + %395 = OpPhi %float %351 %189 %380 %360 + %396 = OpPhi %float %354 %189 %366 %360 + %397 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_0 %int_1 + %398 = OpLoad %float %397 + %399 = OpFMul %float %398 %396 + %400 = OpExtInst %float %1 FMax %float_n127 %399 + %401 = OpFNegate %float %400 + %402 = OpExtInst %float %1 Exp2 %401 + %403 = OpFSub %float %float_1 %402 + %404 = OpFDiv %float %403 %400 + %405 = OpExtInst %float %1 Log %float_2 + %406 = OpFMul %float %405 %405 + %407 = OpFMul %float %float_0_5 %406 + %408 = OpFMul %float %407 %400 + %409 = OpFSub %float %405 %408 + %410 = OpExtInst %float %1 FAbs %400 + %411 = OpFOrdGreaterThan %bool %410 %float_0_00999999978 + %412 = OpSelect %float %411 %404 %409 + %413 = OpFMul %float %395 %412 + %414 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_1 %int_1 + %415 = OpLoad %float %414 + %416 = OpFMul %float %415 %396 + %417 = OpExtInst %float %1 FMax %float_n127 %416 + %418 = OpFNegate %float %417 + %419 = OpExtInst %float %1 Exp2 %418 + %420 = OpFSub %float %float_1 %419 + %421 = OpFDiv %float %420 %417 + %422 = OpFMul %float %407 %417 + %423 = OpFSub %float %405 %422 + %424 = OpExtInst %float %1 FAbs %417 + %425 = OpFOrdGreaterThan %bool %424 %float_0_00999999978 + %426 = OpSelect %float %425 %421 %423 + %427 = OpFMul %float %394 %426 + %428 = OpFAdd %float %413 %427 + %429 = OpFMul %float %428 %393 + %430 = OpLoad %v4float %342 + %431 = OpVectorShuffle %v3float %430 %430 0 1 2 + %432 = OpAccessChain %_ptr_Uniform_v4float %MobileBasePass %int_4 + %433 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_4 %int_3 + %434 = OpLoad %float %433 + %435 = OpFOrdGreaterThanEqual %bool %434 %float_0 + OpSelectionMerge %436 DontFlatten + OpBranchConditional %435 %437 %436 + %437 = OpLabel + %438 = OpAccessChain %_ptr_Uniform_v4float %MobileBasePass %int_5 + %439 = OpLoad %v4float %438 + %440 = OpVectorShuffle %v3float %439 %439 0 1 2 + %441 = OpLoad %v4float %432 + %442 = OpVectorShuffle %v3float %441 %441 0 1 2 + %443 = OpDot %float %349 %442 + %444 = OpExtInst %float %1 FClamp %443 %float_0 %float_1 + %445 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_5 %int_3 + %446 = OpLoad %float %445 + %447 = OpExtInst %float %1 Pow %444 %446 + %448 = OpCompositeConstruct %v3float %447 %447 %447 + %449 = OpFMul %v3float %440 %448 + %450 = OpFSub %float %393 %434 + %451 = OpExtInst %float %1 FMax %450 %float_0 + %452 = OpFMul %float %428 %451 + %453 = OpFNegate %float %452 + %454 = OpExtInst %float %1 Exp2 %453 + %455 = OpExtInst %float %1 FClamp %454 %float_0 %float_1 + %456 = OpFSub %float %float_1 %455 + %457 = OpCompositeConstruct %v3float %456 %456 %456 + %458 = OpFMul %v3float %449 %457 + OpBranch %436 + %436 = OpLabel + %459 = OpPhi %v3float %63 %359 %458 %437 + %460 = OpFNegate %float %429 + %461 = OpExtInst %float %1 Exp2 %460 + %462 = OpExtInst %float %1 FClamp %461 %float_0 %float_1 + %463 = OpExtInst %float %1 FMax %462 %344 + %464 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_3 %int_3 + %465 = OpLoad %float %464 + %466 = OpFOrdGreaterThan %bool %465 %float_0 + %467 = OpFOrdGreaterThan %bool %347 %465 + %468 = OpLogicalAnd %bool %466 %467 + %469 = OpCompositeConstruct %v3bool %468 %468 %468 + %470 = OpSelect %v3float %469 %63 %459 + %471 = OpSelect %float %468 %float_1 %463 + %472 = OpFSub %float %float_1 %471 + %473 = OpCompositeConstruct %v3float %472 %472 %472 + %474 = OpFMul %v3float %431 %473 + %475 = OpFAdd %v3float %474 %470 + %476 = OpCompositeExtract %float %475 0 + %477 = OpCompositeExtract %float %475 1 + %478 = OpCompositeExtract %float %475 2 + %479 = OpCompositeConstruct %v4float %476 %477 %478 %471 + %480 = OpAccessChain %_ptr_Function_v4float %97 %int_0 + OpStore %480 %479 + %481 = OpCompositeExtract %float %339 3 + %482 = OpCompositeInsert %v4float %481 %338 3 + %483 = OpLoad %_arr_v4float_uint_1 %97 + %484 = OpCompositeExtract %v4float %483 0 + %485 = OpVectorShuffle %v4float %92 %484 0 1 4 5 + %486 = OpVectorShuffle %v4float %92 %484 0 1 6 7 + OpStore %out_var_TEXCOORD0 %337 + OpStore %out_var_TEXCOORD1 %336 + OpStore %out_var_TEXCOORD2 %485 + OpStore %out_var_TEXCOORD3 %486 + OpStore %out_var_TEXCOORD8 %482 + OpStore %gl_Position %339 + OpReturn + OpFunctionEnd diff --git a/shaders-ue4/asm/vert/texture-buffer.asm.vert b/shaders-ue4/asm/vert/texture-buffer.asm.vert new file mode 100644 index 00000000000..6d52623a145 --- /dev/null +++ b/shaders-ue4/asm/vert/texture-buffer.asm.vert @@ -0,0 +1,1054 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 397 +; Schema: 0 + OpCapability Shader + OpCapability SampledBuffer + OpCapability StorageImageExtendedFormats + OpExtension "SPV_GOOGLE_hlsl_functionality1" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %Main "main" %gl_VertexIndex %gl_InstanceIndex %in_var_ATTRIBUTE0 %out_var_TEXCOORD6 %gl_Position + OpSource HLSL 600 + OpName %type_View "type.View" + OpMemberName %type_View 0 "View_TranslatedWorldToClip" + OpMemberName %type_View 1 "View_WorldToClip" + OpMemberName %type_View 2 "View_TranslatedWorldToView" + OpMemberName %type_View 3 "View_ViewToTranslatedWorld" + OpMemberName %type_View 4 "View_TranslatedWorldToCameraView" + OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld" + OpMemberName %type_View 6 "View_ViewToClip" + OpMemberName %type_View 7 "View_ViewToClipNoAA" + OpMemberName %type_View 8 "View_ClipToView" + OpMemberName %type_View 9 "View_ClipToTranslatedWorld" + OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld" + OpMemberName %type_View 11 "View_ScreenToWorld" + OpMemberName %type_View 12 "View_ScreenToTranslatedWorld" + OpMemberName %type_View 13 "View_ViewForward" + OpMemberName %type_View 14 "PrePadding_View_844" + OpMemberName %type_View 15 "View_ViewUp" + OpMemberName %type_View 16 "PrePadding_View_860" + OpMemberName %type_View 17 "View_ViewRight" + OpMemberName %type_View 18 "PrePadding_View_876" + OpMemberName %type_View 19 "View_HMDViewNoRollUp" + OpMemberName %type_View 20 "PrePadding_View_892" + OpMemberName %type_View 21 "View_HMDViewNoRollRight" + OpMemberName %type_View 22 "PrePadding_View_908" + OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform" + OpMemberName %type_View 24 "View_ScreenPositionScaleBias" + OpMemberName %type_View 25 "View_WorldCameraOrigin" + OpMemberName %type_View 26 "PrePadding_View_956" + OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin" + OpMemberName %type_View 28 "PrePadding_View_972" + OpMemberName %type_View 29 "View_WorldViewOrigin" + OpMemberName %type_View 30 "PrePadding_View_988" + OpMemberName %type_View 31 "View_PreViewTranslation" + OpMemberName %type_View 32 "PrePadding_View_1004" + OpMemberName %type_View 33 "View_PrevProjection" + OpMemberName %type_View 34 "View_PrevViewProj" + OpMemberName %type_View 35 "View_PrevViewRotationProj" + OpMemberName %type_View 36 "View_PrevViewToClip" + OpMemberName %type_View 37 "View_PrevClipToView" + OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip" + OpMemberName %type_View 39 "View_PrevTranslatedWorldToView" + OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld" + OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView" + OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld" + OpMemberName %type_View 43 "View_PrevWorldCameraOrigin" + OpMemberName %type_View 44 "PrePadding_View_1660" + OpMemberName %type_View 45 "View_PrevWorldViewOrigin" + OpMemberName %type_View 46 "PrePadding_View_1676" + OpMemberName %type_View 47 "View_PrevPreViewTranslation" + OpMemberName %type_View 48 "PrePadding_View_1692" + OpMemberName %type_View 49 "View_PrevInvViewProj" + OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld" + OpMemberName %type_View 51 "View_ClipToPrevClip" + OpMemberName %type_View 52 "View_TemporalAAJitter" + OpMemberName %type_View 53 "View_GlobalClippingPlane" + OpMemberName %type_View 54 "View_FieldOfViewWideAngles" + OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles" + OpMemberName %type_View 56 "View_ViewRectMin" + OpMemberName %type_View 57 "View_ViewSizeAndInvSize" + OpMemberName %type_View 58 "View_BufferSizeAndInvSize" + OpMemberName %type_View 59 "View_BufferBilinearUVMinMax" + OpMemberName %type_View 60 "View_NumSceneColorMSAASamples" + OpMemberName %type_View 61 "View_PreExposure" + OpMemberName %type_View 62 "View_OneOverPreExposure" + OpMemberName %type_View 63 "PrePadding_View_2012" + OpMemberName %type_View 64 "View_DiffuseOverrideParameter" + OpMemberName %type_View 65 "View_SpecularOverrideParameter" + OpMemberName %type_View 66 "View_NormalOverrideParameter" + OpMemberName %type_View 67 "View_RoughnessOverrideParameter" + OpMemberName %type_View 68 "View_PrevFrameGameTime" + OpMemberName %type_View 69 "View_PrevFrameRealTime" + OpMemberName %type_View 70 "View_OutOfBoundsMask" + OpMemberName %type_View 71 "PrePadding_View_2084" + OpMemberName %type_View 72 "PrePadding_View_2088" + OpMemberName %type_View 73 "PrePadding_View_2092" + OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame" + OpMemberName %type_View 75 "View_CullingSign" + OpMemberName %type_View 76 "View_NearPlane" + OpMemberName %type_View 77 "View_AdaptiveTessellationFactor" + OpMemberName %type_View 78 "View_GameTime" + OpMemberName %type_View 79 "View_RealTime" + OpMemberName %type_View 80 "View_DeltaTime" + OpMemberName %type_View 81 "View_MaterialTextureMipBias" + OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply" + OpMemberName %type_View 83 "View_Random" + OpMemberName %type_View 84 "View_FrameNumber" + OpMemberName %type_View 85 "View_StateFrameIndexMod8" + OpMemberName %type_View 86 "View_StateFrameIndex" + OpMemberName %type_View 87 "View_CameraCut" + OpMemberName %type_View 88 "View_UnlitViewmodeMask" + OpMemberName %type_View 89 "PrePadding_View_2164" + OpMemberName %type_View 90 "PrePadding_View_2168" + OpMemberName %type_View 91 "PrePadding_View_2172" + OpMemberName %type_View 92 "View_DirectionalLightColor" + OpMemberName %type_View 93 "View_DirectionalLightDirection" + OpMemberName %type_View 94 "PrePadding_View_2204" + OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin" + OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize" + OpMemberName %type_View 97 "View_TemporalAAParams" + OpMemberName %type_View 98 "View_CircleDOFParams" + OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth" + OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance" + OpMemberName %type_View 101 "View_DepthOfFieldScale" + OpMemberName %type_View 102 "View_DepthOfFieldFocalLength" + OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion" + OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion" + OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion" + OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel" + OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled" + OpMemberName %type_View 108 "View_GeneralPurposeTweak" + OpMemberName %type_View 109 "View_DemosaicVposOffset" + OpMemberName %type_View 110 "PrePadding_View_2348" + OpMemberName %type_View 111 "View_IndirectLightingColorScale" + OpMemberName %type_View 112 "View_HDR32bppEncodingMode" + OpMemberName %type_View 113 "View_AtmosphericFogSunDirection" + OpMemberName %type_View 114 "View_AtmosphericFogSunPower" + OpMemberName %type_View 115 "View_AtmosphericFogPower" + OpMemberName %type_View 116 "View_AtmosphericFogDensityScale" + OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset" + OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset" + OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale" + OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale" + OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh" + OpMemberName %type_View 122 "View_AtmosphericFogStartDistance" + OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset" + OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale" + OpMemberName %type_View 125 "View_AtmosphericFogRenderMask" + OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum" + OpMemberName %type_View 127 "View_AtmosphericFogSunColor" + OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias" + OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask" + OpMemberName %type_View 130 "View_AmbientCubemapTint" + OpMemberName %type_View 131 "View_AmbientCubemapIntensity" + OpMemberName %type_View 132 "View_SkyLightParameters" + OpMemberName %type_View 133 "PrePadding_View_2488" + OpMemberName %type_View 134 "PrePadding_View_2492" + OpMemberName %type_View 135 "View_SkyLightColor" + OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap" + OpMemberName %type_View 137 "View_MobilePreviewMode" + OpMemberName %type_View 138 "View_HMDEyePaddingOffset" + OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip" + OpMemberName %type_View 140 "View_ShowDecalsMask" + OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode" + OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity" + OpMemberName %type_View 143 "PrePadding_View_2648" + OpMemberName %type_View 144 "PrePadding_View_2652" + OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight" + OpMemberName %type_View 146 "View_StereoPassIndex" + OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent" + OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul" + OpMemberName %type_View 149 "View_GlobalVolumeDimension" + OpMemberName %type_View 150 "View_GlobalVolumeTexelSize" + OpMemberName %type_View 151 "View_MaxGlobalDistance" + OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering" + OpMemberName %type_View 153 "View_VolumetricFogInvGridSize" + OpMemberName %type_View 154 "PrePadding_View_2828" + OpMemberName %type_View 155 "View_VolumetricFogGridZParams" + OpMemberName %type_View 156 "PrePadding_View_2844" + OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV" + OpMemberName %type_View 158 "View_VolumetricFogMaxDistance" + OpMemberName %type_View 159 "PrePadding_View_2860" + OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale" + OpMemberName %type_View 161 "PrePadding_View_2876" + OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd" + OpMemberName %type_View 163 "PrePadding_View_2892" + OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize" + OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize" + OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize" + OpMemberName %type_View 167 "View_StereoIPD" + OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag" + OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle" + OpName %View "View" + OpName %type_Primitive "type.Primitive" + OpMemberName %type_Primitive 0 "Primitive_LocalToWorld" + OpMemberName %type_Primitive 1 "Primitive_InvNonUniformScaleAndDeterminantSign" + OpMemberName %type_Primitive 2 "Primitive_ObjectWorldPositionAndRadius" + OpMemberName %type_Primitive 3 "Primitive_WorldToLocal" + OpMemberName %type_Primitive 4 "Primitive_PreviousLocalToWorld" + OpMemberName %type_Primitive 5 "Primitive_PreviousWorldToLocal" + OpMemberName %type_Primitive 6 "Primitive_ActorWorldPosition" + OpMemberName %type_Primitive 7 "Primitive_UseSingleSampleShadowFromStationaryLights" + OpMemberName %type_Primitive 8 "Primitive_ObjectBounds" + OpMemberName %type_Primitive 9 "Primitive_LpvBiasMultiplier" + OpMemberName %type_Primitive 10 "Primitive_DecalReceiverMask" + OpMemberName %type_Primitive 11 "Primitive_PerObjectGBufferData" + OpMemberName %type_Primitive 12 "Primitive_UseVolumetricLightmapShadowFromStationaryLights" + OpMemberName %type_Primitive 13 "Primitive_UseEditorDepthTest" + OpMemberName %type_Primitive 14 "Primitive_ObjectOrientation" + OpMemberName %type_Primitive 15 "Primitive_NonUniformScale" + OpMemberName %type_Primitive 16 "Primitive_LocalObjectBoundsMin" + OpMemberName %type_Primitive 17 "PrePadding_Primitive_380" + OpMemberName %type_Primitive 18 "Primitive_LocalObjectBoundsMax" + OpMemberName %type_Primitive 19 "Primitive_LightingChannelMask" + OpMemberName %type_Primitive 20 "Primitive_LightmapDataIndex" + OpMemberName %type_Primitive 21 "Primitive_SingleCaptureIndex" + OpName %Primitive "Primitive" + OpName %type_MobileShadowDepthPass "type.MobileShadowDepthPass" + OpMemberName %type_MobileShadowDepthPass 0 "PrePadding_MobileShadowDepthPass_0" + OpMemberName %type_MobileShadowDepthPass 1 "PrePadding_MobileShadowDepthPass_4" + OpMemberName %type_MobileShadowDepthPass 2 "PrePadding_MobileShadowDepthPass_8" + OpMemberName %type_MobileShadowDepthPass 3 "PrePadding_MobileShadowDepthPass_12" + OpMemberName %type_MobileShadowDepthPass 4 "PrePadding_MobileShadowDepthPass_16" + OpMemberName %type_MobileShadowDepthPass 5 "PrePadding_MobileShadowDepthPass_20" + OpMemberName %type_MobileShadowDepthPass 6 "PrePadding_MobileShadowDepthPass_24" + OpMemberName %type_MobileShadowDepthPass 7 "PrePadding_MobileShadowDepthPass_28" + OpMemberName %type_MobileShadowDepthPass 8 "PrePadding_MobileShadowDepthPass_32" + OpMemberName %type_MobileShadowDepthPass 9 "PrePadding_MobileShadowDepthPass_36" + OpMemberName %type_MobileShadowDepthPass 10 "PrePadding_MobileShadowDepthPass_40" + OpMemberName %type_MobileShadowDepthPass 11 "PrePadding_MobileShadowDepthPass_44" + OpMemberName %type_MobileShadowDepthPass 12 "PrePadding_MobileShadowDepthPass_48" + OpMemberName %type_MobileShadowDepthPass 13 "PrePadding_MobileShadowDepthPass_52" + OpMemberName %type_MobileShadowDepthPass 14 "PrePadding_MobileShadowDepthPass_56" + OpMemberName %type_MobileShadowDepthPass 15 "PrePadding_MobileShadowDepthPass_60" + OpMemberName %type_MobileShadowDepthPass 16 "PrePadding_MobileShadowDepthPass_64" + OpMemberName %type_MobileShadowDepthPass 17 "PrePadding_MobileShadowDepthPass_68" + OpMemberName %type_MobileShadowDepthPass 18 "PrePadding_MobileShadowDepthPass_72" + OpMemberName %type_MobileShadowDepthPass 19 "PrePadding_MobileShadowDepthPass_76" + OpMemberName %type_MobileShadowDepthPass 20 "MobileShadowDepthPass_ProjectionMatrix" + OpMemberName %type_MobileShadowDepthPass 21 "MobileShadowDepthPass_ShadowParams" + OpMemberName %type_MobileShadowDepthPass 22 "MobileShadowDepthPass_bClampToNearPlane" + OpMemberName %type_MobileShadowDepthPass 23 "PrePadding_MobileShadowDepthPass_156" + OpMemberName %type_MobileShadowDepthPass 24 "MobileShadowDepthPass_ShadowViewProjectionMatrices" + OpName %MobileShadowDepthPass "MobileShadowDepthPass" + OpName %type_EmitterDynamicUniforms "type.EmitterDynamicUniforms" + OpMemberName %type_EmitterDynamicUniforms 0 "EmitterDynamicUniforms_LocalToWorldScale" + OpMemberName %type_EmitterDynamicUniforms 1 "EmitterDynamicUniforms_EmitterInstRandom" + OpMemberName %type_EmitterDynamicUniforms 2 "PrePadding_EmitterDynamicUniforms_12" + OpMemberName %type_EmitterDynamicUniforms 3 "EmitterDynamicUniforms_AxisLockRight" + OpMemberName %type_EmitterDynamicUniforms 4 "EmitterDynamicUniforms_AxisLockUp" + OpMemberName %type_EmitterDynamicUniforms 5 "EmitterDynamicUniforms_DynamicColor" + OpMemberName %type_EmitterDynamicUniforms 6 "EmitterDynamicUniforms_MacroUVParameters" + OpName %EmitterDynamicUniforms "EmitterDynamicUniforms" + OpName %type_EmitterUniforms "type.EmitterUniforms" + OpMemberName %type_EmitterUniforms 0 "EmitterUniforms_ColorCurve" + OpMemberName %type_EmitterUniforms 1 "EmitterUniforms_ColorScale" + OpMemberName %type_EmitterUniforms 2 "EmitterUniforms_ColorBias" + OpMemberName %type_EmitterUniforms 3 "EmitterUniforms_MiscCurve" + OpMemberName %type_EmitterUniforms 4 "EmitterUniforms_MiscScale" + OpMemberName %type_EmitterUniforms 5 "EmitterUniforms_MiscBias" + OpMemberName %type_EmitterUniforms 6 "EmitterUniforms_SizeBySpeed" + OpMemberName %type_EmitterUniforms 7 "EmitterUniforms_SubImageSize" + OpMemberName %type_EmitterUniforms 8 "EmitterUniforms_TangentSelector" + OpMemberName %type_EmitterUniforms 9 "EmitterUniforms_CameraFacingBlend" + OpMemberName %type_EmitterUniforms 10 "EmitterUniforms_RemoveHMDRoll" + OpMemberName %type_EmitterUniforms 11 "EmitterUniforms_RotationRateScale" + OpMemberName %type_EmitterUniforms 12 "EmitterUniforms_RotationBias" + OpMemberName %type_EmitterUniforms 13 "EmitterUniforms_CameraMotionBlurAmount" + OpMemberName %type_EmitterUniforms 14 "PrePadding_EmitterUniforms_172" + OpMemberName %type_EmitterUniforms 15 "EmitterUniforms_PivotOffset" + OpName %EmitterUniforms "EmitterUniforms" + OpName %type_buffer_image "type.buffer.image" + OpName %ParticleIndices "ParticleIndices" + OpName %type__Globals "type.$Globals" + OpMemberName %type__Globals 0 "ParticleIndicesOffset" + OpName %_Globals "$Globals" + OpName %type_2d_image "type.2d.image" + OpName %PositionTexture "PositionTexture" + OpName %type_sampler "type.sampler" + OpName %PositionTextureSampler "PositionTextureSampler" + OpName %VelocityTexture "VelocityTexture" + OpName %VelocityTextureSampler "VelocityTextureSampler" + OpName %AttributesTexture "AttributesTexture" + OpName %AttributesTextureSampler "AttributesTextureSampler" + OpName %CurveTexture "CurveTexture" + OpName %CurveTextureSampler "CurveTextureSampler" + OpName %in_var_ATTRIBUTE0 "in.var.ATTRIBUTE0" + OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6" + OpName %Main "Main" + OpName %type_sampled_image "type.sampled.image" + OpDecorate %gl_VertexIndex BuiltIn VertexIndex + OpDecorateString %gl_VertexIndex UserSemantic "SV_VertexID" + OpDecorate %gl_InstanceIndex BuiltIn InstanceIndex + OpDecorateString %gl_InstanceIndex UserSemantic "SV_InstanceID" + OpDecorateString %in_var_ATTRIBUTE0 UserSemantic "ATTRIBUTE0" + OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6" + OpDecorate %gl_Position BuiltIn Position + OpDecorateString %gl_Position UserSemantic "SV_POSITION" + OpDecorate %in_var_ATTRIBUTE0 Location 0 + OpDecorate %out_var_TEXCOORD6 Location 0 + OpDecorate %View DescriptorSet 0 + OpDecorate %View Binding 1 + OpDecorate %Primitive DescriptorSet 0 + OpDecorate %Primitive Binding 2 + OpDecorate %MobileShadowDepthPass DescriptorSet 0 + OpDecorate %MobileShadowDepthPass Binding 3 + OpDecorate %EmitterDynamicUniforms DescriptorSet 0 + OpDecorate %EmitterDynamicUniforms Binding 4 + OpDecorate %EmitterUniforms DescriptorSet 0 + OpDecorate %EmitterUniforms Binding 5 + OpDecorate %ParticleIndices DescriptorSet 0 + OpDecorate %ParticleIndices Binding 0 + OpDecorate %_Globals DescriptorSet 0 + OpDecorate %_Globals Binding 6 + OpDecorate %PositionTexture DescriptorSet 0 + OpDecorate %PositionTexture Binding 1 + OpDecorate %PositionTextureSampler DescriptorSet 0 + OpDecorate %PositionTextureSampler Binding 0 + OpDecorate %VelocityTexture DescriptorSet 0 + OpDecorate %VelocityTexture Binding 2 + OpDecorate %VelocityTextureSampler DescriptorSet 0 + OpDecorate %VelocityTextureSampler Binding 1 + OpDecorate %AttributesTexture DescriptorSet 0 + OpDecorate %AttributesTexture Binding 3 + OpDecorate %AttributesTextureSampler DescriptorSet 0 + OpDecorate %AttributesTextureSampler Binding 2 + OpDecorate %CurveTexture DescriptorSet 0 + OpDecorate %CurveTexture Binding 4 + OpDecorate %CurveTextureSampler DescriptorSet 0 + OpDecorate %CurveTextureSampler Binding 3 + OpDecorate %_arr_v4float_uint_2 ArrayStride 16 + OpDecorate %_arr_v4float_uint_7 ArrayStride 16 + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %type_View 0 Offset 0 + OpMemberDecorate %type_View 0 MatrixStride 16 + OpMemberDecorate %type_View 0 ColMajor + OpMemberDecorate %type_View 1 Offset 64 + OpMemberDecorate %type_View 1 MatrixStride 16 + OpMemberDecorate %type_View 1 ColMajor + OpMemberDecorate %type_View 2 Offset 128 + OpMemberDecorate %type_View 2 MatrixStride 16 + OpMemberDecorate %type_View 2 ColMajor + OpMemberDecorate %type_View 3 Offset 192 + OpMemberDecorate %type_View 3 MatrixStride 16 + OpMemberDecorate %type_View 3 ColMajor + OpMemberDecorate %type_View 4 Offset 256 + OpMemberDecorate %type_View 4 MatrixStride 16 + OpMemberDecorate %type_View 4 ColMajor + OpMemberDecorate %type_View 5 Offset 320 + OpMemberDecorate %type_View 5 MatrixStride 16 + OpMemberDecorate %type_View 5 ColMajor + OpMemberDecorate %type_View 6 Offset 384 + OpMemberDecorate %type_View 6 MatrixStride 16 + OpMemberDecorate %type_View 6 ColMajor + OpMemberDecorate %type_View 7 Offset 448 + OpMemberDecorate %type_View 7 MatrixStride 16 + OpMemberDecorate %type_View 7 ColMajor + OpMemberDecorate %type_View 8 Offset 512 + OpMemberDecorate %type_View 8 MatrixStride 16 + OpMemberDecorate %type_View 8 ColMajor + OpMemberDecorate %type_View 9 Offset 576 + OpMemberDecorate %type_View 9 MatrixStride 16 + OpMemberDecorate %type_View 9 ColMajor + OpMemberDecorate %type_View 10 Offset 640 + OpMemberDecorate %type_View 10 MatrixStride 16 + OpMemberDecorate %type_View 10 ColMajor + OpMemberDecorate %type_View 11 Offset 704 + OpMemberDecorate %type_View 11 MatrixStride 16 + OpMemberDecorate %type_View 11 ColMajor + OpMemberDecorate %type_View 12 Offset 768 + OpMemberDecorate %type_View 12 MatrixStride 16 + OpMemberDecorate %type_View 12 ColMajor + OpMemberDecorate %type_View 13 Offset 832 + OpMemberDecorate %type_View 14 Offset 844 + OpMemberDecorate %type_View 15 Offset 848 + OpMemberDecorate %type_View 16 Offset 860 + OpMemberDecorate %type_View 17 Offset 864 + OpMemberDecorate %type_View 18 Offset 876 + OpMemberDecorate %type_View 19 Offset 880 + OpMemberDecorate %type_View 20 Offset 892 + OpMemberDecorate %type_View 21 Offset 896 + OpMemberDecorate %type_View 22 Offset 908 + OpMemberDecorate %type_View 23 Offset 912 + OpMemberDecorate %type_View 24 Offset 928 + OpMemberDecorate %type_View 25 Offset 944 + OpMemberDecorate %type_View 26 Offset 956 + OpMemberDecorate %type_View 27 Offset 960 + OpMemberDecorate %type_View 28 Offset 972 + OpMemberDecorate %type_View 29 Offset 976 + OpMemberDecorate %type_View 30 Offset 988 + OpMemberDecorate %type_View 31 Offset 992 + OpMemberDecorate %type_View 32 Offset 1004 + OpMemberDecorate %type_View 33 Offset 1008 + OpMemberDecorate %type_View 33 MatrixStride 16 + OpMemberDecorate %type_View 33 ColMajor + OpMemberDecorate %type_View 34 Offset 1072 + OpMemberDecorate %type_View 34 MatrixStride 16 + OpMemberDecorate %type_View 34 ColMajor + OpMemberDecorate %type_View 35 Offset 1136 + OpMemberDecorate %type_View 35 MatrixStride 16 + OpMemberDecorate %type_View 35 ColMajor + OpMemberDecorate %type_View 36 Offset 1200 + OpMemberDecorate %type_View 36 MatrixStride 16 + OpMemberDecorate %type_View 36 ColMajor + OpMemberDecorate %type_View 37 Offset 1264 + OpMemberDecorate %type_View 37 MatrixStride 16 + OpMemberDecorate %type_View 37 ColMajor + OpMemberDecorate %type_View 38 Offset 1328 + OpMemberDecorate %type_View 38 MatrixStride 16 + OpMemberDecorate %type_View 38 ColMajor + OpMemberDecorate %type_View 39 Offset 1392 + OpMemberDecorate %type_View 39 MatrixStride 16 + OpMemberDecorate %type_View 39 ColMajor + OpMemberDecorate %type_View 40 Offset 1456 + OpMemberDecorate %type_View 40 MatrixStride 16 + OpMemberDecorate %type_View 40 ColMajor + OpMemberDecorate %type_View 41 Offset 1520 + OpMemberDecorate %type_View 41 MatrixStride 16 + OpMemberDecorate %type_View 41 ColMajor + OpMemberDecorate %type_View 42 Offset 1584 + OpMemberDecorate %type_View 42 MatrixStride 16 + OpMemberDecorate %type_View 42 ColMajor + OpMemberDecorate %type_View 43 Offset 1648 + OpMemberDecorate %type_View 44 Offset 1660 + OpMemberDecorate %type_View 45 Offset 1664 + OpMemberDecorate %type_View 46 Offset 1676 + OpMemberDecorate %type_View 47 Offset 1680 + OpMemberDecorate %type_View 48 Offset 1692 + OpMemberDecorate %type_View 49 Offset 1696 + OpMemberDecorate %type_View 49 MatrixStride 16 + OpMemberDecorate %type_View 49 ColMajor + OpMemberDecorate %type_View 50 Offset 1760 + OpMemberDecorate %type_View 50 MatrixStride 16 + OpMemberDecorate %type_View 50 ColMajor + OpMemberDecorate %type_View 51 Offset 1824 + OpMemberDecorate %type_View 51 MatrixStride 16 + OpMemberDecorate %type_View 51 ColMajor + OpMemberDecorate %type_View 52 Offset 1888 + OpMemberDecorate %type_View 53 Offset 1904 + OpMemberDecorate %type_View 54 Offset 1920 + OpMemberDecorate %type_View 55 Offset 1928 + OpMemberDecorate %type_View 56 Offset 1936 + OpMemberDecorate %type_View 57 Offset 1952 + OpMemberDecorate %type_View 58 Offset 1968 + OpMemberDecorate %type_View 59 Offset 1984 + OpMemberDecorate %type_View 60 Offset 2000 + OpMemberDecorate %type_View 61 Offset 2004 + OpMemberDecorate %type_View 62 Offset 2008 + OpMemberDecorate %type_View 63 Offset 2012 + OpMemberDecorate %type_View 64 Offset 2016 + OpMemberDecorate %type_View 65 Offset 2032 + OpMemberDecorate %type_View 66 Offset 2048 + OpMemberDecorate %type_View 67 Offset 2064 + OpMemberDecorate %type_View 68 Offset 2072 + OpMemberDecorate %type_View 69 Offset 2076 + OpMemberDecorate %type_View 70 Offset 2080 + OpMemberDecorate %type_View 71 Offset 2084 + OpMemberDecorate %type_View 72 Offset 2088 + OpMemberDecorate %type_View 73 Offset 2092 + OpMemberDecorate %type_View 74 Offset 2096 + OpMemberDecorate %type_View 75 Offset 2108 + OpMemberDecorate %type_View 76 Offset 2112 + OpMemberDecorate %type_View 77 Offset 2116 + OpMemberDecorate %type_View 78 Offset 2120 + OpMemberDecorate %type_View 79 Offset 2124 + OpMemberDecorate %type_View 80 Offset 2128 + OpMemberDecorate %type_View 81 Offset 2132 + OpMemberDecorate %type_View 82 Offset 2136 + OpMemberDecorate %type_View 83 Offset 2140 + OpMemberDecorate %type_View 84 Offset 2144 + OpMemberDecorate %type_View 85 Offset 2148 + OpMemberDecorate %type_View 86 Offset 2152 + OpMemberDecorate %type_View 87 Offset 2156 + OpMemberDecorate %type_View 88 Offset 2160 + OpMemberDecorate %type_View 89 Offset 2164 + OpMemberDecorate %type_View 90 Offset 2168 + OpMemberDecorate %type_View 91 Offset 2172 + OpMemberDecorate %type_View 92 Offset 2176 + OpMemberDecorate %type_View 93 Offset 2192 + OpMemberDecorate %type_View 94 Offset 2204 + OpMemberDecorate %type_View 95 Offset 2208 + OpMemberDecorate %type_View 96 Offset 2240 + OpMemberDecorate %type_View 97 Offset 2272 + OpMemberDecorate %type_View 98 Offset 2288 + OpMemberDecorate %type_View 99 Offset 2304 + OpMemberDecorate %type_View 100 Offset 2308 + OpMemberDecorate %type_View 101 Offset 2312 + OpMemberDecorate %type_View 102 Offset 2316 + OpMemberDecorate %type_View 103 Offset 2320 + OpMemberDecorate %type_View 104 Offset 2324 + OpMemberDecorate %type_View 105 Offset 2328 + OpMemberDecorate %type_View 106 Offset 2332 + OpMemberDecorate %type_View 107 Offset 2336 + OpMemberDecorate %type_View 108 Offset 2340 + OpMemberDecorate %type_View 109 Offset 2344 + OpMemberDecorate %type_View 110 Offset 2348 + OpMemberDecorate %type_View 111 Offset 2352 + OpMemberDecorate %type_View 112 Offset 2364 + OpMemberDecorate %type_View 113 Offset 2368 + OpMemberDecorate %type_View 114 Offset 2380 + OpMemberDecorate %type_View 115 Offset 2384 + OpMemberDecorate %type_View 116 Offset 2388 + OpMemberDecorate %type_View 117 Offset 2392 + OpMemberDecorate %type_View 118 Offset 2396 + OpMemberDecorate %type_View 119 Offset 2400 + OpMemberDecorate %type_View 120 Offset 2404 + OpMemberDecorate %type_View 121 Offset 2408 + OpMemberDecorate %type_View 122 Offset 2412 + OpMemberDecorate %type_View 123 Offset 2416 + OpMemberDecorate %type_View 124 Offset 2420 + OpMemberDecorate %type_View 125 Offset 2424 + OpMemberDecorate %type_View 126 Offset 2428 + OpMemberDecorate %type_View 127 Offset 2432 + OpMemberDecorate %type_View 128 Offset 2448 + OpMemberDecorate %type_View 129 Offset 2460 + OpMemberDecorate %type_View 130 Offset 2464 + OpMemberDecorate %type_View 131 Offset 2480 + OpMemberDecorate %type_View 132 Offset 2484 + OpMemberDecorate %type_View 133 Offset 2488 + OpMemberDecorate %type_View 134 Offset 2492 + OpMemberDecorate %type_View 135 Offset 2496 + OpMemberDecorate %type_View 136 Offset 2512 + OpMemberDecorate %type_View 137 Offset 2624 + OpMemberDecorate %type_View 138 Offset 2628 + OpMemberDecorate %type_View 139 Offset 2632 + OpMemberDecorate %type_View 140 Offset 2636 + OpMemberDecorate %type_View 141 Offset 2640 + OpMemberDecorate %type_View 142 Offset 2644 + OpMemberDecorate %type_View 143 Offset 2648 + OpMemberDecorate %type_View 144 Offset 2652 + OpMemberDecorate %type_View 145 Offset 2656 + OpMemberDecorate %type_View 146 Offset 2668 + OpMemberDecorate %type_View 147 Offset 2672 + OpMemberDecorate %type_View 148 Offset 2736 + OpMemberDecorate %type_View 149 Offset 2800 + OpMemberDecorate %type_View 150 Offset 2804 + OpMemberDecorate %type_View 151 Offset 2808 + OpMemberDecorate %type_View 152 Offset 2812 + OpMemberDecorate %type_View 153 Offset 2816 + OpMemberDecorate %type_View 154 Offset 2828 + OpMemberDecorate %type_View 155 Offset 2832 + OpMemberDecorate %type_View 156 Offset 2844 + OpMemberDecorate %type_View 157 Offset 2848 + OpMemberDecorate %type_View 158 Offset 2856 + OpMemberDecorate %type_View 159 Offset 2860 + OpMemberDecorate %type_View 160 Offset 2864 + OpMemberDecorate %type_View 161 Offset 2876 + OpMemberDecorate %type_View 162 Offset 2880 + OpMemberDecorate %type_View 163 Offset 2892 + OpMemberDecorate %type_View 164 Offset 2896 + OpMemberDecorate %type_View 165 Offset 2908 + OpMemberDecorate %type_View 166 Offset 2912 + OpMemberDecorate %type_View 167 Offset 2924 + OpMemberDecorate %type_View 168 Offset 2928 + OpMemberDecorate %type_View 169 Offset 2932 + OpDecorate %type_View Block + OpMemberDecorate %type_Primitive 0 Offset 0 + OpMemberDecorate %type_Primitive 0 MatrixStride 16 + OpMemberDecorate %type_Primitive 0 ColMajor + OpMemberDecorate %type_Primitive 1 Offset 64 + OpMemberDecorate %type_Primitive 2 Offset 80 + OpMemberDecorate %type_Primitive 3 Offset 96 + OpMemberDecorate %type_Primitive 3 MatrixStride 16 + OpMemberDecorate %type_Primitive 3 ColMajor + OpMemberDecorate %type_Primitive 4 Offset 160 + OpMemberDecorate %type_Primitive 4 MatrixStride 16 + OpMemberDecorate %type_Primitive 4 ColMajor + OpMemberDecorate %type_Primitive 5 Offset 224 + OpMemberDecorate %type_Primitive 5 MatrixStride 16 + OpMemberDecorate %type_Primitive 5 ColMajor + OpMemberDecorate %type_Primitive 6 Offset 288 + OpMemberDecorate %type_Primitive 7 Offset 300 + OpMemberDecorate %type_Primitive 8 Offset 304 + OpMemberDecorate %type_Primitive 9 Offset 316 + OpMemberDecorate %type_Primitive 10 Offset 320 + OpMemberDecorate %type_Primitive 11 Offset 324 + OpMemberDecorate %type_Primitive 12 Offset 328 + OpMemberDecorate %type_Primitive 13 Offset 332 + OpMemberDecorate %type_Primitive 14 Offset 336 + OpMemberDecorate %type_Primitive 15 Offset 352 + OpMemberDecorate %type_Primitive 16 Offset 368 + OpMemberDecorate %type_Primitive 17 Offset 380 + OpMemberDecorate %type_Primitive 18 Offset 384 + OpMemberDecorate %type_Primitive 19 Offset 396 + OpMemberDecorate %type_Primitive 20 Offset 400 + OpMemberDecorate %type_Primitive 21 Offset 404 + OpDecorate %type_Primitive Block + OpDecorate %_arr_mat4v4float_uint_6 ArrayStride 64 + OpMemberDecorate %type_MobileShadowDepthPass 0 Offset 0 + OpMemberDecorate %type_MobileShadowDepthPass 1 Offset 4 + OpMemberDecorate %type_MobileShadowDepthPass 2 Offset 8 + OpMemberDecorate %type_MobileShadowDepthPass 3 Offset 12 + OpMemberDecorate %type_MobileShadowDepthPass 4 Offset 16 + OpMemberDecorate %type_MobileShadowDepthPass 5 Offset 20 + OpMemberDecorate %type_MobileShadowDepthPass 6 Offset 24 + OpMemberDecorate %type_MobileShadowDepthPass 7 Offset 28 + OpMemberDecorate %type_MobileShadowDepthPass 8 Offset 32 + OpMemberDecorate %type_MobileShadowDepthPass 9 Offset 36 + OpMemberDecorate %type_MobileShadowDepthPass 10 Offset 40 + OpMemberDecorate %type_MobileShadowDepthPass 11 Offset 44 + OpMemberDecorate %type_MobileShadowDepthPass 12 Offset 48 + OpMemberDecorate %type_MobileShadowDepthPass 13 Offset 52 + OpMemberDecorate %type_MobileShadowDepthPass 14 Offset 56 + OpMemberDecorate %type_MobileShadowDepthPass 15 Offset 60 + OpMemberDecorate %type_MobileShadowDepthPass 16 Offset 64 + OpMemberDecorate %type_MobileShadowDepthPass 17 Offset 68 + OpMemberDecorate %type_MobileShadowDepthPass 18 Offset 72 + OpMemberDecorate %type_MobileShadowDepthPass 19 Offset 76 + OpMemberDecorate %type_MobileShadowDepthPass 20 Offset 80 + OpMemberDecorate %type_MobileShadowDepthPass 20 MatrixStride 16 + OpMemberDecorate %type_MobileShadowDepthPass 20 ColMajor + OpMemberDecorate %type_MobileShadowDepthPass 21 Offset 144 + OpMemberDecorate %type_MobileShadowDepthPass 22 Offset 152 + OpMemberDecorate %type_MobileShadowDepthPass 23 Offset 156 + OpMemberDecorate %type_MobileShadowDepthPass 24 Offset 160 + OpMemberDecorate %type_MobileShadowDepthPass 24 MatrixStride 16 + OpMemberDecorate %type_MobileShadowDepthPass 24 ColMajor + OpDecorate %type_MobileShadowDepthPass Block + OpMemberDecorate %type_EmitterDynamicUniforms 0 Offset 0 + OpMemberDecorate %type_EmitterDynamicUniforms 1 Offset 8 + OpMemberDecorate %type_EmitterDynamicUniforms 2 Offset 12 + OpMemberDecorate %type_EmitterDynamicUniforms 3 Offset 16 + OpMemberDecorate %type_EmitterDynamicUniforms 4 Offset 32 + OpMemberDecorate %type_EmitterDynamicUniforms 5 Offset 48 + OpMemberDecorate %type_EmitterDynamicUniforms 6 Offset 64 + OpDecorate %type_EmitterDynamicUniforms Block + OpMemberDecorate %type_EmitterUniforms 0 Offset 0 + OpMemberDecorate %type_EmitterUniforms 1 Offset 16 + OpMemberDecorate %type_EmitterUniforms 2 Offset 32 + OpMemberDecorate %type_EmitterUniforms 3 Offset 48 + OpMemberDecorate %type_EmitterUniforms 4 Offset 64 + OpMemberDecorate %type_EmitterUniforms 5 Offset 80 + OpMemberDecorate %type_EmitterUniforms 6 Offset 96 + OpMemberDecorate %type_EmitterUniforms 7 Offset 112 + OpMemberDecorate %type_EmitterUniforms 8 Offset 128 + OpMemberDecorate %type_EmitterUniforms 9 Offset 144 + OpMemberDecorate %type_EmitterUniforms 10 Offset 156 + OpMemberDecorate %type_EmitterUniforms 11 Offset 160 + OpMemberDecorate %type_EmitterUniforms 12 Offset 164 + OpMemberDecorate %type_EmitterUniforms 13 Offset 168 + OpMemberDecorate %type_EmitterUniforms 14 Offset 172 + OpMemberDecorate %type_EmitterUniforms 15 Offset 176 + OpDecorate %type_EmitterUniforms Block + OpMemberDecorate %type__Globals 0 Offset 0 + OpDecorate %type__Globals Block + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%mat4v4float = OpTypeMatrix %v4float 4 + %v3float = OpTypeVector %float 3 + %v2float = OpTypeVector %float 2 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_7 = OpConstant %uint 7 + %uint_4 = OpConstant %uint 4 + %float_0 = OpConstant %float 0 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %uint_16 = OpConstant %uint 16 + %int_3 = OpConstant %int 3 + %float_1 = OpConstant %float 1 +%float_9_99999975en05 = OpConstant %float 9.99999975e-05 + %54 = OpConstantComposite %v3float %float_0 %float_0 %float_9_99999975en05 + %int_2 = OpConstant %int 2 + %int_5 = OpConstant %int 5 + %int_4 = OpConstant %int 4 + %float_0_5 = OpConstant %float 0.5 + %float_n0_5 = OpConstant %float -0.5 + %float_2 = OpConstant %float 2 + %61 = OpConstantComposite %v2float %float_2 %float_2 + %int_6 = OpConstant %int 6 + %63 = OpConstantComposite %v2float %float_1 %float_1 + %int_11 = OpConstant %int 11 + %int_15 = OpConstant %int 15 + %int_8 = OpConstant %int 8 + %int_9 = OpConstant %int 9 + %int_10 = OpConstant %int 10 + %int_12 = OpConstant %int 12 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 +%mat3v3float = OpTypeMatrix %v3float 3 + %int_20 = OpConstant %int 20 + %int_22 = OpConstant %int 22 +%float_9_99999997en07 = OpConstant %float 9.99999997e-07 + %int_21 = OpConstant %int 21 + %int_17 = OpConstant %int 17 + %int_19 = OpConstant %int 19 + %int_27 = OpConstant %int 27 + %int_31 = OpConstant %int 31 + %uint_3 = OpConstant %uint 3 + %82 = OpConstantComposite %v3float %float_0 %float_0 %float_1 +%float_0_00999999978 = OpConstant %float 0.00999999978 +%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2 +%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float +%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View +%type_Primitive = OpTypeStruct %mat4v4float %v4float %v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %float %float %float %float %v4float %v4float %v3float %float %v3float %uint %uint %int +%_ptr_Uniform_type_Primitive = OpTypePointer Uniform %type_Primitive + %uint_6 = OpConstant %uint 6 +%_arr_mat4v4float_uint_6 = OpTypeArray %mat4v4float %uint_6 +%type_MobileShadowDepthPass = OpTypeStruct %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %mat4v4float %v2float %float %float %_arr_mat4v4float_uint_6 +%_ptr_Uniform_type_MobileShadowDepthPass = OpTypePointer Uniform %type_MobileShadowDepthPass +%type_EmitterDynamicUniforms = OpTypeStruct %v2float %float %float %v4float %v4float %v4float %v4float +%_ptr_Uniform_type_EmitterDynamicUniforms = OpTypePointer Uniform %type_EmitterDynamicUniforms +%type_EmitterUniforms = OpTypeStruct %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v3float %float %float %float %float %float %v2float +%_ptr_Uniform_type_EmitterUniforms = OpTypePointer Uniform %type_EmitterUniforms +%type_buffer_image = OpTypeImage %float Buffer 2 0 0 1 Rg32f +%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image +%type__Globals = OpTypeStruct %uint +%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type_sampler = OpTypeSampler +%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %99 = OpTypeFunction %void +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %bool = OpTypeBool +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float +%type_sampled_image = OpTypeSampledImage %type_2d_image + %View = OpVariable %_ptr_Uniform_type_View Uniform + %Primitive = OpVariable %_ptr_Uniform_type_Primitive Uniform +%MobileShadowDepthPass = OpVariable %_ptr_Uniform_type_MobileShadowDepthPass Uniform +%EmitterDynamicUniforms = OpVariable %_ptr_Uniform_type_EmitterDynamicUniforms Uniform +%EmitterUniforms = OpVariable %_ptr_Uniform_type_EmitterUniforms Uniform +%ParticleIndices = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant + %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform +%PositionTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%PositionTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%VelocityTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%VelocityTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%AttributesTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%AttributesTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%CurveTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%CurveTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +%gl_VertexIndex = OpVariable %_ptr_Input_uint Input +%gl_InstanceIndex = OpVariable %_ptr_Input_uint Input +%in_var_ATTRIBUTE0 = OpVariable %_ptr_Input_v2float Input +%out_var_TEXCOORD6 = OpVariable %_ptr_Output_float Output +%gl_Position = OpVariable %_ptr_Output_v4float Output +%float_6_28318548 = OpConstant %float 6.28318548 + %108 = OpConstantNull %v3float + %Main = OpFunction %void None %99 + %109 = OpLabel + %110 = OpLoad %uint %gl_VertexIndex + %111 = OpLoad %uint %gl_InstanceIndex + %112 = OpLoad %v2float %in_var_ATTRIBUTE0 + %113 = OpAccessChain %_ptr_Uniform_v3float %View %int_15 + %114 = OpLoad %v3float %113 + %115 = OpAccessChain %_ptr_Uniform_v3float %View %int_17 + %116 = OpLoad %v3float %115 + %117 = OpAccessChain %_ptr_Uniform_v3float %View %int_19 + %118 = OpLoad %v3float %117 + %119 = OpAccessChain %_ptr_Uniform_v3float %View %int_21 + %120 = OpLoad %v3float %119 + %121 = OpAccessChain %_ptr_Uniform_v3float %View %int_27 + %122 = OpLoad %v3float %121 + %123 = OpAccessChain %_ptr_Uniform_v3float %View %int_31 + %124 = OpLoad %v3float %123 + %125 = OpIMul %uint %111 %uint_16 + %126 = OpUDiv %uint %110 %uint_4 + %127 = OpIAdd %uint %125 %126 + %128 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 + %129 = OpLoad %uint %128 + %130 = OpIAdd %uint %129 %127 + %131 = OpLoad %type_buffer_image %ParticleIndices + %132 = OpImageFetch %v4float %131 %130 None + %133 = OpVectorShuffle %v2float %132 %132 0 1 + %134 = OpLoad %type_2d_image %PositionTexture + %135 = OpLoad %type_sampler %PositionTextureSampler + %136 = OpSampledImage %type_sampled_image %134 %135 + %137 = OpImageSampleExplicitLod %v4float %136 %133 Lod %float_0 + %138 = OpLoad %type_2d_image %VelocityTexture + %139 = OpLoad %type_sampler %VelocityTextureSampler + %140 = OpSampledImage %type_sampled_image %138 %139 + %141 = OpImageSampleExplicitLod %v4float %140 %133 Lod %float_0 + %142 = OpLoad %type_2d_image %AttributesTexture + %143 = OpLoad %type_sampler %AttributesTextureSampler + %144 = OpSampledImage %type_sampled_image %142 %143 + %145 = OpImageSampleExplicitLod %v4float %144 %133 Lod %float_0 + %146 = OpCompositeExtract %float %137 3 + %147 = OpExtInst %float %1 Step %146 %float_1 + %148 = OpVectorShuffle %v3float %141 %141 0 1 2 + %149 = OpAccessChain %_ptr_Uniform_mat4v4float %Primitive %int_0 + %150 = OpLoad %mat4v4float %149 + %151 = OpCompositeExtract %v4float %150 0 + %152 = OpVectorShuffle %v3float %151 %151 0 1 2 + %153 = OpCompositeExtract %v4float %150 1 + %154 = OpVectorShuffle %v3float %153 %153 0 1 2 + %155 = OpCompositeExtract %v4float %150 2 + %156 = OpVectorShuffle %v3float %155 %155 0 1 2 + %157 = OpCompositeConstruct %mat3v3float %152 %154 %156 + %158 = OpMatrixTimesVector %v3float %157 %148 + %159 = OpFAdd %v3float %158 %54 + %160 = OpExtInst %v3float %1 Normalize %159 + %161 = OpExtInst %float %1 Length %158 + %162 = OpAccessChain %_ptr_Uniform_v4float %EmitterUniforms %int_3 + %163 = OpLoad %v4float %162 + %164 = OpVectorShuffle %v2float %163 %163 0 1 + %165 = OpVectorShuffle %v2float %163 %163 2 3 + %166 = OpCompositeConstruct %v2float %146 %146 + %167 = OpFMul %v2float %165 %166 + %168 = OpFAdd %v2float %164 %167 + %169 = OpLoad %type_2d_image %CurveTexture + %170 = OpLoad %type_sampler %CurveTextureSampler + %171 = OpSampledImage %type_sampled_image %169 %170 + %172 = OpImageSampleExplicitLod %v4float %171 %168 Lod %float_0 + %173 = OpAccessChain %_ptr_Uniform_v4float %EmitterUniforms %int_4 + %174 = OpLoad %v4float %173 + %175 = OpFMul %v4float %172 %174 + %176 = OpAccessChain %_ptr_Uniform_v4float %EmitterUniforms %int_5 + %177 = OpLoad %v4float %176 + %178 = OpFAdd %v4float %175 %177 + %179 = OpCompositeExtract %float %145 0 + %180 = OpFOrdLessThan %bool %179 %float_0_5 + %181 = OpSelect %float %180 %float_0 %float_n0_5 + %182 = OpCompositeExtract %float %145 1 + %183 = OpFOrdLessThan %bool %182 %float_0_5 + %184 = OpSelect %float %183 %float_0 %float_n0_5 + %185 = OpCompositeConstruct %v2float %181 %184 + %186 = OpVectorShuffle %v2float %145 %145 0 1 + %187 = OpFAdd %v2float %186 %185 + %188 = OpFMul %v2float %187 %61 + %189 = OpVectorShuffle %v2float %178 %178 0 1 + %190 = OpAccessChain %_ptr_Uniform_v2float %EmitterDynamicUniforms %int_0 + %191 = OpLoad %v2float %190 + %192 = OpFMul %v2float %189 %191 + %193 = OpAccessChain %_ptr_Uniform_v4float %EmitterUniforms %int_6 + %194 = OpLoad %v4float %193 + %195 = OpVectorShuffle %v2float %194 %194 0 1 + %196 = OpCompositeConstruct %v2float %161 %161 + %197 = OpFMul %v2float %195 %196 + %198 = OpExtInst %v2float %1 FMax %197 %63 + %199 = OpVectorShuffle %v2float %194 %194 2 3 + %200 = OpExtInst %v2float %1 FMin %198 %199 + %201 = OpFMul %v2float %188 %192 + %202 = OpFMul %v2float %201 %200 + %203 = OpCompositeConstruct %v2float %147 %147 + %204 = OpFMul %v2float %202 %203 + %205 = OpCompositeExtract %float %145 3 + %206 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_11 + %207 = OpLoad %float %206 + %208 = OpFMul %float %205 %207 + %209 = OpCompositeExtract %float %145 2 + %210 = OpFMul %float %208 %146 + %211 = OpFAdd %float %209 %210 + %212 = OpFMul %float %211 %float_6_28318548 + %213 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_0 + %214 = OpLoad %v4float %213 + %215 = OpVectorShuffle %v3float %214 %214 0 1 2 + %216 = OpVectorShuffle %v3float %137 %108 0 0 0 + %217 = OpFMul %v3float %215 %216 + %218 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_1 + %219 = OpLoad %v4float %218 + %220 = OpVectorShuffle %v3float %219 %219 0 1 2 + %221 = OpVectorShuffle %v3float %137 %108 1 1 1 + %222 = OpFMul %v3float %220 %221 + %223 = OpFAdd %v3float %217 %222 + %224 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_2 + %225 = OpLoad %v4float %224 + %226 = OpVectorShuffle %v3float %225 %225 0 1 2 + %227 = OpVectorShuffle %v3float %137 %108 2 2 2 + %228 = OpFMul %v3float %226 %227 + %229 = OpFAdd %v3float %223 %228 + %230 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_3 + %231 = OpLoad %v4float %230 + %232 = OpVectorShuffle %v3float %231 %231 0 1 2 + %233 = OpFAdd %v3float %232 %124 + %234 = OpFAdd %v3float %229 %233 + %235 = OpCompositeExtract %float %234 0 + %236 = OpCompositeExtract %float %234 1 + %237 = OpCompositeExtract %float %234 2 + %238 = OpCompositeConstruct %v4float %235 %236 %237 %float_1 + %239 = OpVectorShuffle %v3float %238 %238 0 1 2 + %240 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_10 + %241 = OpLoad %float %240 + %242 = OpCompositeConstruct %v3float %241 %241 %241 + %243 = OpExtInst %v3float %1 FMix %116 %120 %242 + %244 = OpExtInst %v3float %1 FMix %114 %118 %242 + %245 = OpAccessChain %_ptr_Uniform_v4float %EmitterDynamicUniforms %int_3 + %246 = OpLoad %v4float %245 + %247 = OpVectorShuffle %v3float %246 %246 0 1 2 + %248 = OpAccessChain %_ptr_Uniform_float %EmitterDynamicUniforms %int_3 %int_3 + %249 = OpLoad %float %248 + %250 = OpCompositeConstruct %v3float %249 %249 %249 + %251 = OpExtInst %v3float %1 FMix %243 %247 %250 + %252 = OpFNegate %v3float %244 + %253 = OpAccessChain %_ptr_Uniform_v4float %EmitterDynamicUniforms %int_4 + %254 = OpLoad %v4float %253 + %255 = OpVectorShuffle %v3float %254 %254 0 1 2 + %256 = OpAccessChain %_ptr_Uniform_float %EmitterDynamicUniforms %int_4 %int_3 + %257 = OpLoad %float %256 + %258 = OpCompositeConstruct %v3float %257 %257 %257 + %259 = OpExtInst %v3float %1 FMix %252 %255 %258 + %260 = OpFSub %v3float %122 %239 + %261 = OpDot %float %260 %260 + %262 = OpExtInst %float %1 FMax %261 %float_0_00999999978 + %263 = OpExtInst %float %1 Sqrt %262 + %264 = OpCompositeConstruct %v3float %263 %263 %263 + %265 = OpFDiv %v3float %260 %264 + %266 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_9 %int_0 + %267 = OpLoad %float %266 + %268 = OpFOrdGreaterThan %bool %267 %float_0 + OpSelectionMerge %269 DontFlatten + OpBranchConditional %268 %270 %271 + %270 = OpLabel + %272 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_9 %int_1 + %273 = OpLoad %float %272 + %274 = OpFMul %float %261 %273 + %275 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_9 %int_2 + %276 = OpLoad %float %275 + %277 = OpFSub %float %274 %276 + %278 = OpExtInst %float %1 FClamp %277 %float_0 %float_1 + %279 = OpExtInst %v3float %1 Cross %265 %82 + %280 = OpDot %float %279 %279 + %281 = OpExtInst %float %1 FMax %280 %float_0_00999999978 + %282 = OpExtInst %float %1 Sqrt %281 + %283 = OpCompositeConstruct %v3float %282 %282 %282 + %284 = OpFDiv %v3float %279 %283 + %285 = OpExtInst %v3float %1 Cross %265 %284 + %286 = OpCompositeConstruct %v3float %278 %278 %278 + %287 = OpExtInst %v3float %1 FMix %251 %284 %286 + %288 = OpExtInst %v3float %1 Normalize %287 + %289 = OpExtInst %v3float %1 FMix %259 %285 %286 + %290 = OpExtInst %v3float %1 Normalize %289 + OpBranch %269 + %271 = OpLabel + %291 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_8 %int_1 + %292 = OpLoad %float %291 + %293 = OpFOrdGreaterThan %bool %292 %float_0 + OpSelectionMerge %294 Flatten + OpBranchConditional %293 %295 %296 + %295 = OpLabel + %297 = OpExtInst %v3float %1 Cross %265 %160 + %298 = OpDot %float %297 %297 + %299 = OpExtInst %float %1 FMax %298 %float_0_00999999978 + %300 = OpExtInst %float %1 Sqrt %299 + %301 = OpCompositeConstruct %v3float %300 %300 %300 + %302 = OpFDiv %v3float %297 %301 + %303 = OpFNegate %v3float %160 + OpBranch %294 + %296 = OpLabel + %304 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_8 %int_2 + %305 = OpLoad %float %304 + %306 = OpFOrdGreaterThan %bool %305 %float_0 + OpSelectionMerge %307 None + OpBranchConditional %306 %308 %309 + %308 = OpLabel + %310 = OpExtInst %v3float %1 Cross %247 %265 + %311 = OpDot %float %310 %310 + %312 = OpExtInst %float %1 FMax %311 %float_0_00999999978 + %313 = OpExtInst %float %1 Sqrt %312 + %314 = OpCompositeConstruct %v3float %313 %313 %313 + %315 = OpFDiv %v3float %310 %314 + %316 = OpFNegate %v3float %315 + OpBranch %307 + %309 = OpLabel + %317 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_8 %int_3 + %318 = OpLoad %float %317 + %319 = OpFOrdGreaterThan %bool %318 %float_0 + OpSelectionMerge %320 None + OpBranchConditional %319 %321 %320 + %321 = OpLabel + %322 = OpExtInst %v3float %1 Cross %265 %82 + %323 = OpDot %float %322 %322 + %324 = OpExtInst %float %1 FMax %323 %float_0_00999999978 + %325 = OpExtInst %float %1 Sqrt %324 + %326 = OpCompositeConstruct %v3float %325 %325 %325 + %327 = OpFDiv %v3float %322 %326 + %328 = OpExtInst %v3float %1 Cross %265 %327 + OpBranch %320 + %320 = OpLabel + %329 = OpPhi %v3float %251 %309 %327 %321 + %330 = OpPhi %v3float %259 %309 %328 %321 + OpBranch %307 + %307 = OpLabel + %331 = OpPhi %v3float %247 %308 %329 %320 + %332 = OpPhi %v3float %316 %308 %330 %320 + OpBranch %294 + %294 = OpLabel + %333 = OpPhi %v3float %302 %295 %331 %307 + %334 = OpPhi %v3float %303 %295 %332 %307 + OpBranch %269 + %269 = OpLabel + %335 = OpPhi %v3float %288 %270 %333 %294 + %336 = OpPhi %v3float %290 %270 %334 %294 + %337 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_12 + %338 = OpLoad %float %337 + %339 = OpFAdd %float %212 %338 + %340 = OpExtInst %float %1 Sin %339 + %341 = OpExtInst %float %1 Cos %339 + %342 = OpCompositeConstruct %v3float %340 %340 %340 + %343 = OpFMul %v3float %342 %336 + %344 = OpCompositeConstruct %v3float %341 %341 %341 + %345 = OpFMul %v3float %344 %335 + %346 = OpFAdd %v3float %343 %345 + %347 = OpFMul %v3float %344 %336 + %348 = OpFMul %v3float %342 %335 + %349 = OpFSub %v3float %347 %348 + %350 = OpCompositeExtract %float %204 0 + %351 = OpCompositeExtract %float %112 0 + %352 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_15 %int_0 + %353 = OpLoad %float %352 + %354 = OpFAdd %float %351 %353 + %355 = OpFMul %float %350 %354 + %356 = OpCompositeConstruct %v3float %355 %355 %355 + %357 = OpFMul %v3float %356 %346 + %358 = OpCompositeExtract %float %204 1 + %359 = OpCompositeExtract %float %112 1 + %360 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_15 %int_1 + %361 = OpLoad %float %360 + %362 = OpFAdd %float %359 %361 + %363 = OpFMul %float %358 %362 + %364 = OpCompositeConstruct %v3float %363 %363 %363 + %365 = OpFMul %v3float %364 %349 + %366 = OpFAdd %v3float %357 %365 + %367 = OpFAdd %v3float %239 %366 + %368 = OpCompositeExtract %float %367 0 + %369 = OpCompositeExtract %float %367 1 + %370 = OpCompositeExtract %float %367 2 + %371 = OpCompositeConstruct %v4float %368 %369 %370 %float_1 + %372 = OpVectorShuffle %v4float %371 %371 4 5 6 3 + %373 = OpAccessChain %_ptr_Uniform_mat4v4float %MobileShadowDepthPass %int_20 + %374 = OpLoad %mat4v4float %373 + %375 = OpMatrixTimesVector %v4float %374 %372 + %376 = OpAccessChain %_ptr_Uniform_float %MobileShadowDepthPass %int_22 + %377 = OpLoad %float %376 + %378 = OpFOrdGreaterThan %bool %377 %float_0 + %379 = OpCompositeExtract %float %375 2 + %380 = OpFOrdLessThan %bool %379 %float_0 + %381 = OpLogicalAnd %bool %378 %380 + OpSelectionMerge %382 None + OpBranchConditional %381 %383 %382 + %383 = OpLabel + %384 = OpCompositeInsert %v4float %float_9_99999997en07 %375 2 + %385 = OpCompositeInsert %v4float %float_1 %384 3 + OpBranch %382 + %382 = OpLabel + %386 = OpPhi %v4float %375 %269 %385 %383 + %387 = OpAccessChain %_ptr_Uniform_float %MobileShadowDepthPass %int_21 %int_0 + %388 = OpLoad %float %387 + %389 = OpAccessChain %_ptr_Uniform_float %MobileShadowDepthPass %int_21 %int_1 + %390 = OpLoad %float %389 + %391 = OpCompositeExtract %float %386 2 + %392 = OpFMul %float %391 %390 + %393 = OpFAdd %float %392 %388 + %394 = OpCompositeExtract %float %386 3 + %395 = OpFMul %float %393 %394 + %396 = OpCompositeInsert %v4float %395 %386 2 + OpStore %out_var_TEXCOORD6 %float_0 + OpStore %gl_Position %396 + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/bitcast_icmp.asm.comp b/shaders/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000000..b7b4e0b2e1e --- /dev/null +++ b/shaders/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,101 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %inputs Restrict + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + OpDecorate %outputs Restrict + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %bool = OpTypeBool + %bvec4 = OpTypeVector %bool 4 + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + %uzero = OpConstant %uint 0 + %uone = OpConstant %uint 1 + %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone + %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + + %result_slt = OpSLessThan %bvec4 %input0 %input1 + %result_sle = OpSLessThanEqual %bvec4 %input0 %input1 + %result_ult = OpULessThan %bvec4 %input0 %input1 + %result_ule = OpULessThanEqual %bvec4 %input0 %input1 + %result_sgt = OpSGreaterThan %bvec4 %input0 %input1 + %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1 + %result_ugt = OpUGreaterThan %bvec4 %input0 %input1 + %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1 + + %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_slt + + %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sle + + %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ult + + %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ule + + %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sgt + + %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sge + + %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ugt + + %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_uge + + + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/control-flow-hints.asm.comp b/shaders/asm/comp/control-flow-hints.asm.comp new file mode 100644 index 00000000000..74a15955c25 --- /dev/null +++ b/shaders/asm/comp/control-flow-hints.asm.comp @@ -0,0 +1,146 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 85 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource HLSL 500 + OpName %main "main" + OpName %_main_ "@main(" + OpName %i "i" + OpName %bar "bar" + OpMemberName %bar 0 "@data" + OpName %bar_0 "bar" + OpName %foo "foo" + OpName %i_0 "i" + OpName %v "v" + OpName %w "w" + OpName %value "value" + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %bar 0 Offset 0 + OpDecorate %bar BufferBlock + OpDecorate %bar_0 DescriptorSet 0 + OpDecorate %bar_0 Binding 0 + OpDecorate %foo DescriptorSet 0 + OpDecorate %foo Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %int_16 = OpConstant %int 16 + %bool = OpTypeBool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %bar = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_bar = OpTypePointer Uniform %bar + %bar_0 = OpVariable %_ptr_Uniform_bar Uniform + %foo = OpVariable %_ptr_Uniform_bar Uniform +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_1 = OpConstant %int 1 + %int_15 = OpConstant %int 15 +%_ptr_Function_float = OpTypePointer Function %float + %int_10 = OpConstant %int 10 + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %float_10 = OpConstant %float 10 + %int_20 = OpConstant %int 20 + %float_5 = OpConstant %float 5 + %72 = OpConstantComposite %v4float %float_5 %float_5 %float_5 %float_5 + %float_20 = OpConstant %float 20 + %float_40 = OpConstant %float 40 + %main = OpFunction %void None %3 + %5 = OpLabel + %84 = OpFunctionCall %void %_main_ + OpReturn + OpFunctionEnd + %_main_ = OpFunction %void None %3 + %7 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %i_0 = OpVariable %_ptr_Function_int Function + %v = OpVariable %_ptr_Function_float Function + %w = OpVariable %_ptr_Function_float Function + %value = OpVariable %_ptr_Function_float Function + OpStore %i %int_0 + OpBranch %12 + %12 = OpLabel + OpLoopMerge %14 %15 Unroll + OpBranch %16 + %16 = OpLabel + %17 = OpLoad %int %i + %20 = OpSLessThan %bool %17 %int_16 + OpBranchConditional %20 %13 %14 + %13 = OpLabel + %27 = OpLoad %int %i + %29 = OpLoad %int %i + %31 = OpAccessChain %_ptr_Uniform_v4float %foo %int_0 %29 + %32 = OpLoad %v4float %31 + %33 = OpAccessChain %_ptr_Uniform_v4float %bar_0 %int_0 %27 + OpStore %33 %32 + OpBranch %15 + %15 = OpLabel + %34 = OpLoad %int %i + %36 = OpIAdd %int %34 %int_1 + OpStore %i %36 + OpBranch %12 + %14 = OpLabel + OpStore %i_0 %int_0 + OpBranch %38 + %38 = OpLabel + OpLoopMerge %40 %41 DontUnroll + OpBranch %42 + %42 = OpLabel + %43 = OpLoad %int %i_0 + %44 = OpSLessThan %bool %43 %int_16 + OpBranchConditional %44 %39 %40 + %39 = OpLabel + %46 = OpLoad %int %i_0 + %47 = OpISub %int %int_15 %46 + %48 = OpLoad %int %i_0 + %49 = OpAccessChain %_ptr_Uniform_v4float %foo %int_0 %48 + %50 = OpLoad %v4float %49 + %51 = OpAccessChain %_ptr_Uniform_v4float %bar_0 %int_0 %47 + OpStore %51 %50 + OpBranch %41 + %41 = OpLabel + %52 = OpLoad %int %i_0 + %53 = OpIAdd %int %52 %int_1 + OpStore %i_0 %53 + OpBranch %38 + %40 = OpLabel + %60 = OpAccessChain %_ptr_Uniform_float %bar_0 %int_0 %int_10 %uint_0 + %61 = OpLoad %float %60 + OpStore %v %61 + %63 = OpAccessChain %_ptr_Uniform_float %foo %int_0 %int_10 %uint_0 + %64 = OpLoad %float %63 + OpStore %w %64 + %65 = OpLoad %float %v + %67 = OpFOrdGreaterThan %bool %65 %float_10 + OpSelectionMerge %69 DontFlatten + OpBranchConditional %67 %68 %69 + %68 = OpLabel + %73 = OpAccessChain %_ptr_Uniform_v4float %foo %int_0 %int_20 + OpStore %73 %72 + OpBranch %69 + %69 = OpLabel + OpStore %value %float_20 + %76 = OpLoad %float %w + %78 = OpFOrdGreaterThan %bool %76 %float_40 + OpSelectionMerge %80 Flatten + OpBranchConditional %78 %79 %80 + %79 = OpLabel + OpStore %value %float_20 + OpBranch %80 + %80 = OpLabel + %81 = OpLoad %float %value + %82 = OpCompositeConstruct %v4float %81 %81 %81 %81 + %83 = OpAccessChain %_ptr_Uniform_v4float %foo %int_0 %int_20 + OpStore %83 %82 + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000000..6c060eedad9 --- /dev/null +++ b/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,203 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 139 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a1" + OpMemberName %SSBO 1 "a2" + OpMemberName %SSBO 2 "a3" + OpMemberName %SSBO 3 "a4" + OpMemberName %SSBO 4 "b1" + OpMemberName %SSBO 5 "b2" + OpMemberName %SSBO 6 "b3" + OpMemberName %SSBO 7 "b4" + OpMemberName %SSBO 8 "c1" + OpMemberName %SSBO 9 "c2" + OpMemberName %SSBO 10 "c3" + OpMemberName %SSBO 11 "c4" + OpName %_ "" + OpName %i "i" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 8 + OpMemberDecorate %SSBO 2 Offset 16 + OpMemberDecorate %SSBO 3 Offset 32 + OpMemberDecorate %SSBO 4 Offset 48 + OpMemberDecorate %SSBO 5 Offset 56 + OpMemberDecorate %SSBO 6 Offset 64 + OpMemberDecorate %SSBO 7 Offset 80 + OpMemberDecorate %SSBO 8 Offset 96 + OpMemberDecorate %SSBO 9 Offset 104 + OpMemberDecorate %SSBO 10 Offset 112 + OpMemberDecorate %SSBO 11 Offset 128 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %7 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 + %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_4 = OpConstant %int 4 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %int_8 = OpConstant %int 8 + %int_1 = OpConstant %int 1 + %int_5 = OpConstant %int 5 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float + %int_9 = OpConstant %int 9 + %int_2 = OpConstant %int 2 + %int_6 = OpConstant %int 6 +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %int_10 = OpConstant %int 10 + %int_3 = OpConstant %int 3 + %int_7 = OpConstant %int 7 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_11 = OpConstant %int 11 +%_ptr_Function_int = OpTypePointer Function %int + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %main = OpFunction %void None %7 + %35 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %37 = OpLoad %float %36 + %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %39 = OpLoad %float %38 + %40 = OpExtInst %float %1 NMin %37 %39 + %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %41 %40 + %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %43 = OpLoad %v2float %42 + %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %45 = OpLoad %v2float %44 + %46 = OpExtInst %v2float %1 NMin %43 %45 + %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %47 %46 + %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %49 = OpLoad %v3float %48 + %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %51 = OpLoad %v3float %50 + %52 = OpExtInst %v3float %1 NMin %49 %51 + %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %53 %52 + %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %55 = OpLoad %v4float %54 + %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %57 = OpLoad %v4float %56 + %58 = OpExtInst %v4float %1 NMin %55 %57 + %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %59 %58 + %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %61 = OpLoad %float %60 + %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %63 = OpLoad %float %62 + %64 = OpExtInst %float %1 NMax %61 %63 + %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %65 %64 + %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %67 = OpLoad %v2float %66 + %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %69 = OpLoad %v2float %68 + %70 = OpExtInst %v2float %1 NMax %67 %69 + %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %71 %70 + %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %73 = OpLoad %v3float %72 + %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %75 = OpLoad %v3float %74 + %76 = OpExtInst %v3float %1 NMax %73 %75 + %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %77 %76 + %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %79 = OpLoad %v4float %78 + %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %81 = OpLoad %v4float %80 + %82 = OpExtInst %v4float %1 NMax %79 %81 + %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %83 %82 + %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %87 = OpLoad %float %86 + %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %89 = OpLoad %float %88 + %90 = OpExtInst %float %1 NClamp %85 %87 %89 + %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %91 %90 + %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + %93 = OpLoad %v2float %92 + %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %95 = OpLoad %v2float %94 + %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %97 = OpLoad %v2float %96 + %98 = OpExtInst %v2float %1 NClamp %93 %95 %97 + %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %99 %98 + %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + %101 = OpLoad %v3float %100 + %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %103 = OpLoad %v3float %102 + %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %105 = OpLoad %v3float %104 + %106 = OpExtInst %v3float %1 NClamp %101 %103 %105 + %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %107 %106 + %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + %109 = OpLoad %v4float %108 + %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %111 = OpLoad %v4float %110 + %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %113 = OpLoad %v4float %112 + %114 = OpExtInst %v4float %1 NClamp %109 %111 %113 + %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %115 %114 + OpStore %i %int_0 + OpBranch %116 + %116 = OpLabel + OpLoopMerge %117 %118 None + OpBranch %119 + %119 = OpLabel + %120 = OpLoad %int %i + %121 = OpSLessThan %bool %120 %int_2 + OpBranchConditional %121 %122 %117 + %122 = OpLabel + %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %124 = OpLoad %v2float %123 + %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %126 = OpLoad %v2float %125 + %127 = OpExtInst %v2float %1 NMin %124 %126 + %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %128 %127 + OpBranch %118 + %118 = OpLabel + %129 = OpLoad %int %i + %130 = OpIAdd %int %129 %int_1 + OpStore %i %130 + %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %132 = OpLoad %float %131 + %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0 + %134 = OpLoad %float %133 + %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1 + %136 = OpLoad %float %135 + %137 = OpExtInst %float %1 NClamp %132 %134 %136 + %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %138 %137 + OpBranch %116 + %117 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/switch-break-ladder.asm.comp b/shaders/asm/comp/switch-break-ladder.asm.invalid.comp similarity index 100% rename from shaders/asm/comp/switch-break-ladder.asm.comp rename to shaders/asm/comp/switch-break-ladder.asm.invalid.comp diff --git a/shaders/asm/comp/undefined-constant-composite.asm.comp b/shaders/asm/comp/undefined-constant-composite.asm.comp new file mode 100644 index 00000000000..9de0501fe21 --- /dev/null +++ b/shaders/asm/comp/undefined-constant-composite.asm.comp @@ -0,0 +1,102 @@ +; +; The shader below is based on the following GLSL shader: +; +; #version 450 +; +; struct Pair { +; int first; +; int second; +; }; +; +; const Pair constant_pair = { 100, 200 }; +; +; layout(set=0, binding=0, std430) buffer InputBlock { +; int array[10]; +; } inputValues; +; +; layout(set=0, binding=1, std430) buffer OutputBlock { +; int array[10]; +; } outputValues; +; +; int add_second (int value, Pair pair) { +; return value + pair.second; +; } +; +; void main() { +; uint idx = gl_GlobalInvocationID.x; +; outputValues.array[idx] = add_second(inputValues.array[idx], constant_pair); +; } +; +; However, the first element of constant_pair has been modified to be undefined. +; + OpCapability Shader + %std450 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %_arr_int_uint_10 ArrayStride 4 + OpMemberDecorate %OutputBlock 0 Offset 0 + OpDecorate %OutputBlock BufferBlock + OpDecorate %outputValues DescriptorSet 0 + OpDecorate %outputValues Binding 1 + OpMemberDecorate %InputBlock 0 Offset 0 + OpDecorate %InputBlock BufferBlock + OpDecorate %inputValues DescriptorSet 0 + OpDecorate %inputValues Binding 0 + %void = OpTypeVoid + %void_func = OpTypeFunction %void + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_200 = OpConstant %int 200 + %uint_0 = OpConstant %uint 0 + %uint_10 = OpConstant %uint 10 + %_ptr_Function_int = OpTypePointer Function %int + %Pair = OpTypeStruct %int %int + %_ptr_Function_Pair = OpTypePointer Function %Pair + %add_second_func_type = OpTypeFunction %int %_ptr_Function_int %_ptr_Function_Pair + %_ptr_Function_uint = OpTypePointer Function %uint + %_ptr_Input_v3uint = OpTypePointer Input %v3uint + %_ptr_Input_uint = OpTypePointer Input %uint + %_arr_int_uint_10 = OpTypeArray %int %uint_10 + %OutputBlock = OpTypeStruct %_arr_int_uint_10 +%_ptr_Uniform_OutputBlock = OpTypePointer Uniform %OutputBlock + %outputValues = OpVariable %_ptr_Uniform_OutputBlock Uniform + %InputBlock = OpTypeStruct %_arr_int_uint_10 + %_ptr_Uniform_InputBlock = OpTypePointer Uniform %InputBlock + %inputValues = OpVariable %_ptr_Uniform_InputBlock Uniform + ; Replaced %int_100 with an undefined int. + %undef_int = OpUndef %int + ; Composed a constant Pair with the undefined int in the first member. + %const_Pair = OpConstantComposite %Pair %undef_int %int_200 + %_ptr_Uniform_int = OpTypePointer Uniform %int + %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %main = OpFunction %void None %void_func + %main_label = OpLabel + %param_1 = OpVariable %_ptr_Function_int Function + %param_2 = OpVariable %_ptr_Function_Pair Function + %gidx_ptr = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %gidx = OpLoad %uint %gidx_ptr + %input_value_ptr = OpAccessChain %_ptr_Uniform_int %inputValues %int_0 %gidx + %input_value = OpLoad %int %input_value_ptr + OpStore %param_1 %input_value + OpStore %param_2 %const_Pair + %retval = OpFunctionCall %int %add_second %param_1 %param_2 + %output_value_ptr = OpAccessChain %_ptr_Uniform_int %outputValues %int_0 %gidx + OpStore %output_value_ptr %retval + OpReturn + OpFunctionEnd + %add_second = OpFunction %int None %add_second_func_type + %value_ptr = OpFunctionParameter %_ptr_Function_int + %pair = OpFunctionParameter %_ptr_Function_Pair + %add_second_label = OpLabel + %value = OpLoad %int %value_ptr + ; Access the second struct member, which is defined. + %pair_second_ptr = OpAccessChain %_ptr_Function_int %pair %int_1 + %pair_second = OpLoad %int %pair_second_ptr + %add_result = OpIAdd %int %value %pair_second + OpReturnValue %add_result + OpFunctionEnd diff --git a/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag b/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag new file mode 100644 index 00000000000..a3d64c09d7e --- /dev/null +++ b/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag @@ -0,0 +1,163 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 2 +; Bound: 113 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %xIn_1 %_entryPointOutput + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 500 + OpName %main "main" + OpName %sample_fetch_t21_vi3_ "sample_fetch(t21;vi3;" + OpName %tex "tex" + OpName %UV "UV" + OpName %sample_sampler_t21_vf2_ "sample_sampler(t21;vf2;" + OpName %tex_0 "tex" + OpName %UV_0 "UV" + OpName %_main_vf4_ "@main(vf4;" + OpName %xIn "xIn" + OpName %Sampler "Sampler" + OpName %coord "coord" + OpName %value "value" + OpName %SampledImage "SampledImage" + OpName %param "param" + OpName %param_0 "param" + OpName %param_1 "param" + OpName %param_2 "param" + OpName %xIn_0 "xIn" + OpName %xIn_1 "xIn" + OpName %_entryPointOutput "@entryPointOutput" + OpName %param_3 "param" + OpDecorate %Sampler DescriptorSet 0 + OpDecorate %Sampler Binding 0 + OpDecorate %SampledImage DescriptorSet 0 + OpDecorate %SampledImage Binding 0 + OpDecorate %xIn_1 BuiltIn FragCoord + OpDecorate %_entryPointOutput Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %7 = OpTypeImage %float 2D 0 0 0 1 Unknown +%_ptr_Function_7 = OpTypePointer Function %7 + %int = OpTypeInt 32 1 + %v3int = OpTypeVector %int 3 +%_ptr_Function_v3int = OpTypePointer Function %v3int + %v4float = OpTypeVector %float 4 + %13 = OpTypeFunction %v4float %_ptr_Function_7 %_ptr_Function_v3int + %v2float = OpTypeVector %float 2 +%_ptr_Function_v2float = OpTypePointer Function %v2float + %20 = OpTypeFunction %v4float %_ptr_Function_7 %_ptr_Function_v2float +%_ptr_Function_v4float = OpTypePointer Function %v4float + %26 = OpTypeFunction %v4float %_ptr_Function_v4float + %v2int = OpTypeVector %int 2 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 +%_ptr_Function_int = OpTypePointer Function %int + %43 = OpTypeSampler +%_ptr_UniformConstant_43 = OpTypePointer UniformConstant %43 + %Sampler = OpVariable %_ptr_UniformConstant_43 UniformConstant + %47 = OpTypeSampledImage %7 + %uint_0 = OpConstant %uint 0 +%_ptr_Function_float = OpTypePointer Function %float + %float_1280 = OpConstant %float 1280 + %uint_1 = OpConstant %uint 1 + %float_720 = OpConstant %float 720 + %int_0 = OpConstant %int 0 +%_ptr_UniformConstant_7 = OpTypePointer UniformConstant %7 +%SampledImage = OpVariable %_ptr_UniformConstant_7 UniformConstant +%_ptr_Input_v4float = OpTypePointer Input %v4float + %xIn_1 = OpVariable %_ptr_Input_v4float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %xIn_0 = OpVariable %_ptr_Function_v4float Function + %param_3 = OpVariable %_ptr_Function_v4float Function + %107 = OpLoad %v4float %xIn_1 + OpStore %xIn_0 %107 + %111 = OpLoad %v4float %xIn_0 + OpStore %param_3 %111 + %112 = OpFunctionCall %v4float %_main_vf4_ %param_3 + OpStore %_entryPointOutput %112 + OpReturn + OpFunctionEnd +%sample_fetch_t21_vi3_ = OpFunction %v4float None %13 + %tex = OpFunctionParameter %_ptr_Function_7 + %UV = OpFunctionParameter %_ptr_Function_v3int + %17 = OpLabel + %30 = OpLoad %7 %tex + %32 = OpLoad %v3int %UV + %33 = OpVectorShuffle %v2int %32 %32 0 1 + %37 = OpAccessChain %_ptr_Function_int %UV %uint_2 + %38 = OpLoad %int %37 + %39 = OpImageFetch %v4float %30 %33 Lod %38 + OpReturnValue %39 + OpFunctionEnd +%sample_sampler_t21_vf2_ = OpFunction %v4float None %20 + %tex_0 = OpFunctionParameter %_ptr_Function_7 + %UV_0 = OpFunctionParameter %_ptr_Function_v2float + %24 = OpLabel + %42 = OpLoad %7 %tex_0 + %46 = OpLoad %43 %Sampler + %48 = OpSampledImage %47 %42 %46 + %49 = OpLoad %v2float %UV_0 + %50 = OpImageSampleImplicitLod %v4float %48 %49 + OpReturnValue %50 + OpFunctionEnd + %_main_vf4_ = OpFunction %v4float None %26 + %xIn = OpFunctionParameter %_ptr_Function_v4float + %29 = OpLabel + %coord = OpVariable %_ptr_Function_v3int Function + %value = OpVariable %_ptr_Function_v4float Function + %param = OpVariable %_ptr_Function_7 Function + %param_0 = OpVariable %_ptr_Function_v3int Function + %param_1 = OpVariable %_ptr_Function_7 Function + %param_2 = OpVariable %_ptr_Function_v2float Function + %56 = OpAccessChain %_ptr_Function_float %xIn %uint_0 + %57 = OpLoad %float %56 + %59 = OpFMul %float %57 %float_1280 + %60 = OpConvertFToS %int %59 + %62 = OpAccessChain %_ptr_Function_float %xIn %uint_1 + %63 = OpLoad %float %62 + %65 = OpFMul %float %63 %float_720 + %66 = OpConvertFToS %int %65 + %68 = OpCompositeConstruct %v3int %60 %66 %int_0 + OpStore %coord %68 + %73 = OpLoad %7 %SampledImage + OpStore %param %73 + %75 = OpLoad %v3int %coord + OpStore %param_0 %75 + %76 = OpFunctionCall %v4float %sample_fetch_t21_vi3_ %param %param_0 + OpStore %value %76 + %77 = OpLoad %7 %SampledImage + %78 = OpLoad %v3int %coord + %79 = OpVectorShuffle %v2int %78 %78 0 1 + %80 = OpAccessChain %_ptr_Function_int %coord %uint_2 + %81 = OpLoad %int %80 + %82 = OpImageFetch %v4float %77 %79 Lod %81 + %83 = OpLoad %v4float %value + %84 = OpFAdd %v4float %83 %82 + OpStore %value %84 + %86 = OpLoad %7 %SampledImage + OpStore %param_1 %86 + %88 = OpLoad %v4float %xIn + %89 = OpVectorShuffle %v2float %88 %88 0 1 + OpStore %param_2 %89 + %90 = OpFunctionCall %v4float %sample_sampler_t21_vf2_ %param_1 %param_2 + %91 = OpLoad %v4float %value + %92 = OpFAdd %v4float %91 %90 + OpStore %value %92 + %93 = OpLoad %7 %SampledImage + %94 = OpLoad %43 %Sampler + %95 = OpSampledImage %47 %93 %94 + %96 = OpLoad %v4float %xIn + %97 = OpVectorShuffle %v2float %96 %96 0 1 + %98 = OpImageSampleImplicitLod %v4float %95 %97 + %99 = OpLoad %v4float %value + %100 = OpFAdd %v4float %99 %98 + OpStore %value %100 + %101 = OpLoad %v4float %value + OpReturnValue %101 + OpFunctionEnd diff --git a/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag b/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag new file mode 100644 index 00000000000..a232bd48987 --- /dev/null +++ b/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 36 +; Schema: 0 + OpCapability Shader + OpCapability ImageQuery + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %b "b" + OpName %uSampler2D "uSampler2D" + OpName %c "c" + OpName %uSampler2DMS "uSampler2DMS" + OpName %l1 "l1" + OpName %s0 "s0" + OpDecorate %uSampler2D DescriptorSet 0 + OpDecorate %uSampler2D Binding 0 + OpDecorate %uSampler2DMS DescriptorSet 0 + OpDecorate %uSampler2DMS Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %v2int = OpTypeVector %int 2 +%_ptr_Function_v2int = OpTypePointer Function %v2int + %float = OpTypeFloat 32 + %11 = OpTypeImage %float 2D 0 0 0 1 Unknown +%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %11 + %uSampler2D = OpVariable %_ptr_UniformConstant_12 UniformConstant + %int_0 = OpConstant %int 0 + %20 = OpTypeImage %float 2D 0 0 1 1 Unknown +%_ptr_UniformConstant_21 = OpTypePointer UniformConstant %20 +%uSampler2DMS = OpVariable %_ptr_UniformConstant_21 UniformConstant +%_ptr_Function_int = OpTypePointer Function %int + %main = OpFunction %void None %3 + %5 = OpLabel + %b = OpVariable %_ptr_Function_v2int Function + %c = OpVariable %_ptr_Function_v2int Function + %l1 = OpVariable %_ptr_Function_int Function + %s0 = OpVariable %_ptr_Function_int Function + %15 = OpLoad %11 %uSampler2D + %18 = OpImageQuerySizeLod %v2int %15 %int_0 + OpStore %b %18 + %24 = OpLoad %20 %uSampler2DMS + %26 = OpImageQuerySize %v2int %24 + OpStore %c %26 + %29 = OpLoad %11 %uSampler2D + %31 = OpImageQueryLevels %int %29 + OpStore %l1 %31 + %33 = OpLoad %20 %uSampler2DMS + %35 = OpImageQuerySamples %int %33 + OpStore %s0 %35 + OpReturn + OpFunctionEnd diff --git a/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag b/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag new file mode 100644 index 00000000000..628a9f5ba91 --- /dev/null +++ b/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag @@ -0,0 +1,42 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 10 +; Bound: 21 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 320 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %v "v" + OpDecorate %v RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 +%_ptr_Function_v3float = OpTypePointer Function %v3float + %float_0 = OpConstant %float 0 + %11 = OpConstantComposite %v3float %float_0 %float_0 %float_0 + %bool = OpTypeBool + %false = OpConstantFalse %bool + %float_99 = OpConstant %float 99 + %uint = OpTypeInt 32 0 +%uint_spec_3 = OpSpecConstant %uint 3 +%_ptr_Function_float = OpTypePointer Function %float + %main = OpFunction %void None %3 + %5 = OpLabel + %v = OpVariable %_ptr_Function_v3float Function + OpStore %v %11 + OpSelectionMerge %15 None + OpBranchConditional %false %14 %15 + %14 = OpLabel + %20 = OpAccessChain %_ptr_Function_float %v %uint_spec_3 + OpStore %20 %float_99 + OpBranch %15 + %15 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders/asm/frag/out-of-bounds-access.asm.frag b/shaders/asm/frag/out-of-bounds-access.asm.frag new file mode 100644 index 00000000000..542b74b2fd9 --- /dev/null +++ b/shaders/asm/frag/out-of-bounds-access.asm.frag @@ -0,0 +1,47 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 10 +; Bound: 21 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 320 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %v "v" + OpDecorate %v RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 +%_ptr_Function_v3float = OpTypePointer Function %v3float + %float_0 = OpConstant %float 0 + %11 = OpConstantComposite %v3float %float_0 %float_0 %float_0 + %bool = OpTypeBool + %false = OpConstantFalse %bool + %float_99 = OpConstant %float 99 + %float_88 = OpConstant %float 88 + %uint = OpTypeInt 32 0 + %uint_3 = OpConstant %uint 3 + %sint = OpTypeInt 32 1 + %sint_3 = OpConstant %sint -1 +%_ptr_Function_float = OpTypePointer Function %float + %main = OpFunction %void None %3 + %5 = OpLabel + %v = OpVariable %_ptr_Function_v3float Function + OpStore %v %11 + OpSelectionMerge %15 None + OpBranchConditional %false %14 %15 + %14 = OpLabel + %20 = OpAccessChain %_ptr_Function_float %v %uint_3 + OpStore %20 %float_99 + %99 = OpAccessChain %_ptr_Function_float %v %sint_3 + OpStore %99 %float_88 + OpBranch %15 + %15 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders/asm/frag/pack-and-unpack-uint2.asm.frag b/shaders/asm/frag/pack-and-unpack-uint2.asm.frag new file mode 100644 index 00000000000..43d0970e8d5 --- /dev/null +++ b/shaders/asm/frag/pack-and-unpack-uint2.asm.frag @@ -0,0 +1,55 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 34 +; Schema: 0 + OpCapability Shader + OpCapability Int64 + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_ARB_gpu_shader_int64" + OpName %main "main" + OpName %packed "packed" + OpName %unpacked "unpacked" + OpName %FragColor "FragColor" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %ulong = OpTypeInt 64 0 +%_ptr_Function_ulong = OpTypePointer Function %ulong + %uint = OpTypeInt 32 0 + %v2uint = OpTypeVector %uint 2 + %uint_18 = OpConstant %uint 18 + %uint_52 = OpConstant %uint 52 + %13 = OpConstantComposite %v2uint %uint_18 %uint_52 +%_ptr_Function_v2uint = OpTypePointer Function %v2uint + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %uint_0 = OpConstant %uint 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %uint_1 = OpConstant %uint 1 + %float_1 = OpConstant %float 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %packed = OpVariable %_ptr_Function_ulong Function + %unpacked = OpVariable %_ptr_Function_v2uint Function + %14 = OpBitcast %ulong %13 + OpStore %packed %14 + %17 = OpLoad %ulong %packed + %18 = OpBitcast %v2uint %17 + OpStore %unpacked %18 + %25 = OpAccessChain %_ptr_Function_uint %unpacked %uint_0 + %26 = OpLoad %uint %25 + %27 = OpConvertUToF %float %26 + %29 = OpAccessChain %_ptr_Function_uint %unpacked %uint_1 + %30 = OpLoad %uint %29 + %31 = OpConvertUToF %float %30 + %33 = OpCompositeConstruct %v4float %27 %31 %float_1 %float_1 + OpStore %FragColor %33 + OpReturn + OpFunctionEnd diff --git a/shaders/asm/frag/switch-preserve-sign-extension.asm.frag b/shaders/asm/frag/switch-preserve-sign-extension.asm.frag new file mode 100644 index 00000000000..97140ee5536 --- /dev/null +++ b/shaders/asm/frag/switch-preserve-sign-extension.asm.frag @@ -0,0 +1,44 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 10 +; Bound: 19 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 330 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %sw "sw" + OpName %result "result" + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_42 = OpConstant %int 42 + %int_0 = OpConstant %int 0 + %int_420 = OpConstant %int 420 + %main = OpFunction %void None %3 + %5 = OpLabel + %sw = OpVariable %_ptr_Function_int Function + %result = OpVariable %_ptr_Function_int Function + OpStore %sw %int_42 + OpStore %result %int_0 + %12 = OpLoad %int %sw + OpSelectionMerge %16 None + OpSwitch %12 %16 -42 %13 420 %14 -1234 %15 + %13 = OpLabel + OpStore %result %int_42 + OpBranch %14 + %14 = OpLabel + OpStore %result %int_420 + OpBranch %15 + %15 = OpLabel + OpStore %result %int_420 + OpBranch %16 + %16 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag b/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag index ae7a972d7b2..e7e6f37ea27 100644 --- a/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag +++ b/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag @@ -5,6 +5,7 @@ ; Schema: 0 OpCapability Shader OpCapability StorageInputOutput16 + OpCapability Float16 OpExtension "SPV_KHR_16bit_storage" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 diff --git a/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert b/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert index b566a3d1a0f..1ff67798bdd 100644 --- a/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert +++ b/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert @@ -49,8 +49,10 @@ %28 = OpConstant %17 2 %33 = OpConstant %12 20 %34 = OpConstant %12 30 + %int_3 = OpConstant %12 -3 + %bar = OpSpecConstantOp %12 SRem %13 %int_3 %35 = OpTypeVector %12 4 - %36 = OpSpecConstantComposite %35 %33 %34 %15 %15 + %36 = OpSpecConstantComposite %35 %33 %34 %15 %bar %40 = OpTypeVector %12 2 %41 = OpSpecConstantOp %40 VectorShuffle %36 %36 1 0 %foo = OpSpecConstantOp %12 CompositeExtract %36 1 diff --git a/shaders/desktop-only/frag/image-size.frag b/shaders/desktop-only/frag/image-size.frag new file mode 100644 index 00000000000..ffd0bfa20d3 --- /dev/null +++ b/shaders/desktop-only/frag/image-size.frag @@ -0,0 +1,10 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(r32f, set = 0, binding = 0) uniform image2D uImage1; +layout(r32f, set = 0, binding = 1) uniform image2D uImage2; + +void main() +{ + FragColor = vec4(imageSize(uImage1), imageSize(uImage2)); +} diff --git a/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag b/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag new file mode 100644 index 00000000000..ffd0bfa20d3 --- /dev/null +++ b/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag @@ -0,0 +1,10 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(r32f, set = 0, binding = 0) uniform image2D uImage1; +layout(r32f, set = 0, binding = 1) uniform image2D uImage2; + +void main() +{ + FragColor = vec4(imageSize(uImage1), imageSize(uImage2)); +} diff --git a/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert b/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert similarity index 100% rename from shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert rename to shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert diff --git a/shaders/desktop-only/vert/shader-draw-parameters.desktop.vert b/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert similarity index 100% rename from shaders/desktop-only/vert/shader-draw-parameters.desktop.vert rename to shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert diff --git a/shaders/frag/avoid-expression-lowering-to-loop.frag b/shaders/frag/avoid-expression-lowering-to-loop.frag new file mode 100644 index 00000000000..3473875a40a --- /dev/null +++ b/shaders/frag/avoid-expression-lowering-to-loop.frag @@ -0,0 +1,23 @@ +#version 310 es +precision mediump float; +precision mediump int; + +layout(binding = 0) uniform mediump sampler2D tex; +layout(binding = 1) uniform Count +{ + float count; +}; + +layout(location = 0) in highp vec4 vertex; +layout(location = 0) out vec4 fragColor; + +void main() { + + highp float size = 1.0 / float(textureSize(tex, 0).x); + float r = 0.0; + float d = dFdx(vertex.x); + for (float i = 0.0; i < count ; i += 1.0) + r += size * d; + + fragColor = vec4(r); +} diff --git a/shaders/frag/barycentric-khr.frag b/shaders/frag/barycentric-khr.frag new file mode 100644 index 00000000000..fcaca04e23c --- /dev/null +++ b/shaders/frag/barycentric-khr.frag @@ -0,0 +1,11 @@ +#version 450 +#extension GL_EXT_fragment_shader_barycentric : require + +layout(location = 0) out vec2 value; +layout(location = 0) pervertexEXT in vec2 vUV[3]; +layout(location = 3) pervertexEXT in vec2 vUV2[3]; + +void main () { + value = gl_BaryCoordEXT.x * vUV[0] + gl_BaryCoordEXT.y * vUV[1] + gl_BaryCoordEXT.z * vUV[2]; + value += gl_BaryCoordNoPerspEXT.x * vUV2[0] + gl_BaryCoordNoPerspEXT.y * vUV2[1] + gl_BaryCoordNoPerspEXT.z * vUV2[2]; +} diff --git a/shaders/frag/barycentric-nv.frag b/shaders/frag/barycentric-nv.frag index 340408370b9..38d651a780b 100644 --- a/shaders/frag/barycentric-nv.frag +++ b/shaders/frag/barycentric-nv.frag @@ -2,17 +2,10 @@ #extension GL_NV_fragment_shader_barycentric : require layout(location = 0) out vec2 value; - -layout(set = 0, binding = 0) readonly buffer Vertices -{ - vec2 uvs[]; -}; +layout(location = 0) pervertexNV in vec2 vUV[3]; +layout(location = 1) pervertexNV in vec2 vUV2[3]; void main () { - int prim = gl_PrimitiveID; - vec2 uv0 = uvs[3 * prim + 0]; - vec2 uv1 = uvs[3 * prim + 1]; - vec2 uv2 = uvs[3 * prim + 2]; - value = gl_BaryCoordNV.x * uv0 + gl_BaryCoordNV.y * uv1 + gl_BaryCoordNV.z * uv2; - value += gl_BaryCoordNoPerspNV.x * uv0 + gl_BaryCoordNoPerspNV.y * uv1 + gl_BaryCoordNoPerspNV.z * uv2; + value = gl_BaryCoordNV.x * vUV[0] + gl_BaryCoordNV.y * vUV[1] + gl_BaryCoordNV.z * vUV[2]; + value += gl_BaryCoordNoPerspNV.x * vUV2[0] + gl_BaryCoordNoPerspNV.y * vUV2[1] + gl_BaryCoordNoPerspNV.z * vUV2[2]; } diff --git a/shaders/frag/modf-pointer-function-analysis.frag b/shaders/frag/modf-pointer-function-analysis.frag new file mode 100644 index 00000000000..21e51262f0c --- /dev/null +++ b/shaders/frag/modf-pointer-function-analysis.frag @@ -0,0 +1,25 @@ +#version 450 + +layout(location = 0) in vec4 v; +layout(location = 0) out vec4 vo0; +layout(location = 1) out vec4 vo1; + +vec4 modf_inner(out vec4 tmp) +{ + return modf(v, tmp); +} + +float modf_inner_partial(inout vec4 tmp) +{ + return modf(v.x, tmp.x); +} + +void main() +{ + vec4 tmp; + vo0 = modf_inner(tmp); + vo1 = tmp; + + vo0.x += modf_inner_partial(tmp); + vo1.x += tmp.x; +} diff --git a/shaders/frag/pixel-interlock-ordered.frag b/shaders/frag/pixel-interlock-ordered.frag new file mode 100644 index 00000000000..4439f0672b5 --- /dev/null +++ b/shaders/frag/pixel-interlock-ordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, 0xff); + endInvocationInterlockARB(); +} diff --git a/shaders/frag/pixel-interlock-unordered.frag b/shaders/frag/pixel-interlock-unordered.frag new file mode 100644 index 00000000000..f8fd468c1bd --- /dev/null +++ b/shaders/frag/pixel-interlock-unordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_unordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, 0xff); + endInvocationInterlockARB(); +} diff --git a/shaders/frag/post-depth-coverage-es.frag b/shaders/frag/post-depth-coverage-es.frag new file mode 100644 index 00000000000..ecc57e4a8f2 --- /dev/null +++ b/shaders/frag/post-depth-coverage-es.frag @@ -0,0 +1,13 @@ +#version 310 es +#extension GL_EXT_post_depth_coverage : require +#extension GL_OES_sample_variables : require +precision mediump float; + +layout(early_fragment_tests, post_depth_coverage) in; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(gl_SampleMaskIn[0]); +} diff --git a/shaders/frag/post-depth-coverage.frag b/shaders/frag/post-depth-coverage.frag new file mode 100644 index 00000000000..4f134b4f3bb --- /dev/null +++ b/shaders/frag/post-depth-coverage.frag @@ -0,0 +1,11 @@ +#version 450 +#extension GL_ARB_post_depth_coverage : require + +layout(post_depth_coverage) in; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(gl_SampleMaskIn[0]); +} diff --git a/shaders/frag/round-even.frag b/shaders/frag/round-even.frag new file mode 100644 index 00000000000..594ac162939 --- /dev/null +++ b/shaders/frag/round-even.frag @@ -0,0 +1,11 @@ +#version 450 + +layout(location = 0) in vec4 vA; +layout(location = 1) in float vB; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = roundEven(vA); + FragColor *= roundEven(vB); +} diff --git a/shaders/frag/round.frag b/shaders/frag/round.frag new file mode 100644 index 00000000000..c87b0abbff5 --- /dev/null +++ b/shaders/frag/round.frag @@ -0,0 +1,11 @@ +#version 450 + +layout(location = 0) in vec4 vA; +layout(location = 1) in float vB; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = round(vA); + FragColor *= round(vB); +} diff --git a/shaders/frag/sample-interlock-ordered.frag b/shaders/frag/sample-interlock-ordered.frag new file mode 100644 index 00000000000..fa80dc9f82b --- /dev/null +++ b/shaders/frag/sample-interlock-ordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(sample_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, gl_SampleMaskIn[0]); + endInvocationInterlockARB(); +} diff --git a/shaders/frag/sample-interlock-unordered.frag b/shaders/frag/sample-interlock-unordered.frag new file mode 100644 index 00000000000..6fe5437f3aa --- /dev/null +++ b/shaders/frag/sample-interlock-unordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(sample_interlock_unordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, 0xff); + endInvocationInterlockARB(); +} diff --git a/shaders/frag/struct-type-unrelated-alias.frag b/shaders/frag/struct-type-unrelated-alias.frag new file mode 100644 index 00000000000..d1c7905225d --- /dev/null +++ b/shaders/frag/struct-type-unrelated-alias.frag @@ -0,0 +1,19 @@ +#version 450 + +layout(location = 0) out float FragColor; + +struct T +{ + float a; +}; + +void main() +{ + T foo; + struct T { float b; }; + T bar; + + foo.a = 10.0; + bar.b = 20.0; + FragColor = foo.a + bar.b; +} diff --git a/shaders/frag/switch-unreachable-break.frag b/shaders/frag/switch-unreachable-break.frag new file mode 100644 index 00000000000..b0421e60ef3 --- /dev/null +++ b/shaders/frag/switch-unreachable-break.frag @@ -0,0 +1,32 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vInput; + +layout(set = 0, binding = 0) uniform UBO +{ + int cond; + int cond2; +}; + +void main() +{ + bool frog = false; + switch (cond) + { + case 1: + if (cond2 < 50) + break; + else + discard; + + break; + + default: + frog = true; + break; + } + + FragColor = frog ? vec4(10.0) : vec4(20.0); +} + diff --git a/shaders/frag/ubo-load-row-major-workaround.frag b/shaders/frag/ubo-load-row-major-workaround.frag new file mode 100644 index 00000000000..03205ee8276 --- /dev/null +++ b/shaders/frag/ubo-load-row-major-workaround.frag @@ -0,0 +1,44 @@ +#version 450 + +struct RowMajor +{ + mat4 B; +}; + +struct NestedRowMajor +{ + RowMajor rm; +}; + +layout(set = 0, binding = 0, row_major) uniform UBO +{ + mat4 A; + layout(column_major) mat4 C; // This should also be worked around. +}; + + +layout(set = 0, binding = 1, row_major) uniform UBO2 +{ + RowMajor rm; +}; + +layout(set = 0, binding = 2, row_major) uniform UBO3 +{ + NestedRowMajor rm2; +}; + +layout(set = 0, binding = 3) uniform UBONoWorkaround +{ + mat4 D; +}; + +layout(location = 0) in vec4 Clip; +layout(location = 0) out vec4 FragColor; + +void main() +{ + NestedRowMajor rm2_loaded = rm2; + FragColor = rm2_loaded.rm.B * rm.B * A * C * Clip; + FragColor += D * Clip; + FragColor += A[1] * Clip; +} diff --git a/shaders/geom/geometry-passthrough.geom b/shaders/geom/geometry-passthrough.geom new file mode 100644 index 00000000000..7f1997c76a0 --- /dev/null +++ b/shaders/geom/geometry-passthrough.geom @@ -0,0 +1,28 @@ +#version 450 +#extension GL_NV_geometry_shader_passthrough : require + +layout(triangles) in; + +layout(passthrough) in gl_PerVertex +{ + vec4 gl_Position; +} gl_in[]; + +layout(passthrough, location = 0) in VertexBlock +{ + int a; + int b; +} v1[]; + +layout(location = 2) in VertexBlock2 +{ + int a; + layout(passthrough) int b; +} v2[]; + +layout(passthrough, location = 4) in vec4 vPoint[]; + +void main() +{ + gl_Layer = gl_InvocationID + v1[0].a + v2[1].b; +} diff --git a/shaders/geom/multi-stream.geom b/shaders/geom/multi-stream.geom new file mode 100644 index 00000000000..19b3bbb9c22 --- /dev/null +++ b/shaders/geom/multi-stream.geom @@ -0,0 +1,15 @@ +#version 450 + +layout(triangles) in; +layout(points, max_vertices = 2) out; + +void main() +{ + gl_Position = gl_in[0].gl_Position; + EmitStreamVertex(0); + EndStreamPrimitive(0); + gl_Position = gl_in[0].gl_Position + 2; + EmitStreamVertex(1); + EndStreamPrimitive(1); +} + diff --git a/shaders/geom/transform-feedback-streams.geom b/shaders/geom/transform-feedback-streams.geom new file mode 100644 index 00000000000..1e628907567 --- /dev/null +++ b/shaders/geom/transform-feedback-streams.geom @@ -0,0 +1,24 @@ +#version 450 +layout(max_vertices = 2, points) out; +layout(points) in; +layout(stream = 1, xfb_stride = 32, xfb_offset = 16, xfb_buffer = 2, location = 0) out vec4 vFoo; + +layout(stream = 1, xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(stream = 2, xfb_buffer = 3) out VertOut +{ + layout(xfb_stride = 16, xfb_offset = 0, location = 1) vec4 vBar; +}; + +void main() +{ + gl_Position = vec4(1.0); + vFoo = vec4(3.0); + EmitStreamVertex(1); + vBar = vec4(5.0); + EmitStreamVertex(2); +} diff --git a/shaders/legacy/fragment/explicit-lod.legacy.vert b/shaders/legacy/fragment/explicit-lod.legacy.vert new file mode 100644 index 00000000000..d2cbd5a4f94 --- /dev/null +++ b/shaders/legacy/fragment/explicit-lod.legacy.vert @@ -0,0 +1,12 @@ +#version 310 es + +precision mediump float; + +layout(binding = 0) uniform sampler2D tex; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = textureLod(tex, vec2(0.4, 0.6), 3.0); +} diff --git a/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag b/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag new file mode 100644 index 00000000000..b1e7d1b4f30 --- /dev/null +++ b/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag @@ -0,0 +1,37 @@ +#version 450 + +struct Foo +{ + vec4 a; + vec4 b; +}; + +struct Bar +{ + vec4 a; + vec4 b; +}; + +struct Baz +{ + Foo foo; + Bar bar; +}; + +layout(location = 0) in VertexIn +{ + Foo a; + Bar b; +}; + +layout(location = 4) in Baz baz; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + Baz bazzy = baz; + Foo bazzy_foo = baz.foo; + Bar bazzy_bar = baz.bar; + FragColor = a.a + b.b + bazzy.foo.b + bazzy_foo.a + bazzy_bar.b; +} diff --git a/shaders/legacy/fragment/round.legacy.frag b/shaders/legacy/fragment/round.legacy.frag new file mode 100644 index 00000000000..c87b0abbff5 --- /dev/null +++ b/shaders/legacy/fragment/round.legacy.frag @@ -0,0 +1,11 @@ +#version 450 + +layout(location = 0) in vec4 vA; +layout(location = 1) in float vB; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = round(vA); + FragColor *= round(vB); +} diff --git a/shaders/legacy/fragment/switch.legacy.frag b/shaders/legacy/fragment/switch.legacy.frag new file mode 100644 index 00000000000..d5117981731 --- /dev/null +++ b/shaders/legacy/fragment/switch.legacy.frag @@ -0,0 +1,43 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in float vIndexF; + +void main() +{ + int vIndex = int(vIndexF); + vec4 v = vec4(0.0); + switch (vIndex) + { + case 2: + v = vec4(0, 2, 3, 4); + break; + case 4: + case 5: + v = vec4(1, 2, 3, 4); + break; + case 8: + case 9: + v = vec4(40, 20, 30, 40); + break; + case 10: + v = vec4(10.0); + case 11: + v += 1.0; + case 12: + v += 2.0; + break; + default: + v = vec4(10, 20, 30, 40); + break; + } + + vec4 w = vec4(20.0); + switch (vIndex) + { + case 10: + case 20: + w = vec4(40.0); + } + FragColor = v + w; +} diff --git a/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert b/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert new file mode 100644 index 00000000000..f6ad932eea1 --- /dev/null +++ b/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert @@ -0,0 +1,15 @@ +#version 450 + +struct Foo +{ + float a[4]; +}; + +layout(location = 0) out Foo foo; + +void main() +{ + gl_Position = vec4(1.0); + for (int i = 0; i < 4; i++) + foo.a[i] = float(i + 2); +} diff --git a/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert b/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert new file mode 100644 index 00000000000..57e914c1feb --- /dev/null +++ b/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert @@ -0,0 +1,40 @@ +#version 450 + +struct Foo +{ + vec4 a; + vec4 b; +}; + +struct Bar +{ + vec4 a; + vec4 b; +}; + +struct Baz +{ + Foo foo; + Bar bar; +}; + +layout(location = 0) out VertexIn +{ + Foo a; + Bar b; +}; + +layout(location = 4) out Baz baz; + +void main() +{ + a.a = vec4(10.0); + a.b = vec4(20.0); + b.a = vec4(30.0); + b.b = vec4(40.0); + a = Foo(vec4(50.0), vec4(60.0)); + b = Bar(vec4(50.0), vec4(60.0)); + baz.foo = Foo(vec4(100.0), vec4(200.0)); + baz.bar = Bar(vec4(300.0), vec4(400.0)); + baz = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))); +} diff --git a/shaders/legacy/vert/switch-nested.legacy.vert b/shaders/legacy/vert/switch-nested.legacy.vert new file mode 100644 index 00000000000..6726c1c6930 --- /dev/null +++ b/shaders/legacy/vert/switch-nested.legacy.vert @@ -0,0 +1,28 @@ +#version 450 + +layout(set = 0, binding = 0) uniform UBO +{ + int func_arg; + int inner_func_arg; +}; + +vec4 test_inner_func(bool b) +{ + if (b) + return vec4(1.0); + else + return vec4(0.0); +} + +vec4 test_func(bool b) +{ + if (b) + return test_inner_func(inner_func_arg != 0); + else + return vec4(0.0); +} + +void main() +{ + gl_Position = test_func(func_arg != 0); +} diff --git a/shaders/legacy/vert/transpose.legacy.vert b/shaders/legacy/vert/transpose.legacy.vert index 84f618262ac..588c28d53db 100644 --- a/shaders/legacy/vert/transpose.legacy.vert +++ b/shaders/legacy/vert/transpose.legacy.vert @@ -15,6 +15,18 @@ void main() vec4 c1 = M * (MVPColMajor * Position); vec4 c2 = M * (Position * MVPRowMajor); vec4 c3 = M * (Position * MVPColMajor); - gl_Position = c0 + c1 + c2 + c3; + + vec4 c4 = transpose(MVPRowMajor) * Position; + vec4 c5 = transpose(MVPColMajor) * Position; + vec4 c6 = Position * transpose(MVPRowMajor); + vec4 c7 = Position * transpose(MVPColMajor); + + // Multiplying by scalar forces resolution of the transposition + vec4 c8 = (MVPRowMajor * 2.0) * Position; + vec4 c9 = (transpose(MVPColMajor) * 2.0) * Position; + vec4 c10 = Position * (MVPRowMajor * 2.0); + vec4 c11 = Position * (transpose(MVPColMajor) * 2.0); + + gl_Position = c0 + c1 + c2 + c3 + c4 + c5 + c6 + c7 + c8 + c9 + c10 + c11; } diff --git a/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh new file mode 100644 index 00000000000..0f1beef75b9 --- /dev/null +++ b/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(lines, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out vec4 vOut[]; +layout(location = 1) perprimitiveEXT out vec4 vPrim[]; + +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[]; + +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[]; + +shared float shared_float[16]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +taskPayloadSharedEXT TaskPayload payload; + +void main() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22) + { + vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0, 1) + gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} diff --git a/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh b/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh new file mode 100644 index 00000000000..3d037bcd5ea --- /dev/null +++ b/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(points, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out vec4 vOut[]; +layout(location = 1) perprimitiveEXT out vec4 vPrim[]; + +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[]; + +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[]; + +shared float shared_float[16]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +taskPayloadSharedEXT TaskPayload payload; + +void main() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22) + { + vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} diff --git a/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh new file mode 100644 index 00000000000..944525aa505 --- /dev/null +++ b/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(triangles, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out vec4 vOut[]; +layout(location = 1) perprimitiveEXT out vec4 vPrim[]; + +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[]; + +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[]; + +shared float shared_float[16]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +taskPayloadSharedEXT TaskPayload payload; + +void main() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22) + { + vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0, 1, 2) + gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} diff --git a/shaders/tese/load-array-of-array.tese b/shaders/tese/load-array-of-array.tese new file mode 100644 index 00000000000..7383f7086eb --- /dev/null +++ b/shaders/tese/load-array-of-array.tese @@ -0,0 +1,10 @@ +#version 450 +layout(ccw, quads) in; + +layout(location = 0) in vec4 vTexCoord[][1]; + +void main() +{ + vec4 tmp[gl_MaxPatchVertices][1] = vTexCoord; + gl_Position = tmp[0][0] + tmp[2][0] + tmp[3][0]; +} diff --git a/shaders/tese/patch-input-array.tese b/shaders/tese/patch-input-array.tese new file mode 100644 index 00000000000..741b2c3b9bb --- /dev/null +++ b/shaders/tese/patch-input-array.tese @@ -0,0 +1,9 @@ +#version 450 + +layout(quads) in; +layout(location = 0) patch in float P[4]; + +void main() +{ + gl_Position = vec4(P[0], P[1], P[2], P[3]); +} diff --git a/shaders/vert/no-contraction.vert b/shaders/vert/no-contraction.vert new file mode 100644 index 00000000000..206fbf0de80 --- /dev/null +++ b/shaders/vert/no-contraction.vert @@ -0,0 +1,15 @@ +#version 450 + +layout(location = 0) in vec4 vA; +layout(location = 1) in vec4 vB; +layout(location = 2) in vec4 vC; + +void main() +{ + precise vec4 mul = vA * vB; + precise vec4 add = vA + vB; + precise vec4 sub = vA - vB; + precise vec4 mad = vA * vB + vC; + precise vec4 summed = mul + add + sub + mad; + gl_Position = summed; +} diff --git a/shaders/vert/row-major-workaround.vert b/shaders/vert/row-major-workaround.vert new file mode 100644 index 00000000000..edb8a842ebc --- /dev/null +++ b/shaders/vert/row-major-workaround.vert @@ -0,0 +1,28 @@ +#version 310 es + +layout(binding = 0) uniform Buffer +{ + layout(row_major) highp mat4 HP; + layout(row_major) mediump mat4 MP; +}; + +layout(binding = 1) uniform Buffer2 +{ + layout(row_major) mediump mat4 MP2; +}; + + +layout(location = 0) in vec4 Hin; +layout(location = 1) in mediump vec4 Min; +layout(location = 0) out vec4 H; +layout(location = 1) out mediump vec4 M; +layout(location = 2) out mediump vec4 M2; + +void main() +{ + gl_Position = vec4(1.0); + H = HP * Hin; + M = MP * Min; + M2 = MP2 * Min; +} + diff --git a/shaders/vert/transform-feedback-decorations.vert b/shaders/vert/transform-feedback-decorations.vert new file mode 100644 index 00000000000..b825dd112d8 --- /dev/null +++ b/shaders/vert/transform-feedback-decorations.vert @@ -0,0 +1,20 @@ +#version 450 +layout(xfb_stride = 32, xfb_offset = 16, xfb_buffer = 2, location = 0) out vec4 vFoo; + +layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(xfb_buffer = 3) out VertOut +{ + layout(xfb_stride = 16, xfb_offset = 0, location = 1) vec4 vBar; +}; + +void main() +{ + gl_Position = vec4(1.0); + vFoo = vec4(3.0); + vBar = vec4(5.0); +} diff --git a/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp b/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp index a1da941fdb1..f0421b292f5 100644 --- a/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp +++ b/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp @@ -2,7 +2,7 @@ #extension GL_EXT_buffer_reference : require layout(local_size_x = 1) in; -layout(buffer_reference) buffer Block +layout(buffer_reference, buffer_reference_align = 4) buffer Block { float v; }; diff --git a/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp b/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp new file mode 100644 index 00000000000..cf0a8e49331 --- /dev/null +++ b/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp @@ -0,0 +1,24 @@ +#version 450 +#extension GL_EXT_buffer_reference : require + +layout(buffer_reference) buffer Foo +{ + uint v; +}; + +layout(buffer_reference, buffer_reference_align = 8) buffer Bar +{ + uint a; + uint b; + Foo foo; +}; + +layout(push_constant) uniform Push +{ + Bar bar; +}; + +void main() +{ + atomicAdd(bar.b, 1u); +} diff --git a/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp b/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp new file mode 100644 index 00000000000..41b44519dde --- /dev/null +++ b/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp @@ -0,0 +1,25 @@ +#version 450 +#extension GL_EXT_buffer_reference : require + +layout(buffer_reference) buffer Foo +{ + uint v; +}; + +layout(buffer_reference, buffer_reference_align = 8) buffer Bar +{ + uint a; + uint b; + Foo foo; +}; + +layout(push_constant) uniform Push +{ + Bar bar; +}; + +void main() +{ + uint v = bar.b; + atomicAdd(bar.a, v); +} diff --git a/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp b/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp new file mode 100644 index 00000000000..1afb6f772e2 --- /dev/null +++ b/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp @@ -0,0 +1,19 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require + +layout(buffer_reference) buffer PtrInt +{ + int value; +}; + +layout(set = 0, binding = 0) buffer Buf +{ + uvec2 ptr; + PtrInt ptrint; +}; + +void main() +{ + ptr = uvec2(ptrint); +} diff --git a/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp b/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp new file mode 100644 index 00000000000..b3880823682 --- /dev/null +++ b/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp @@ -0,0 +1,18 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require + +layout(buffer_reference) buffer PtrInt +{ + int value; +}; + +layout(set = 0, binding = 0) buffer Buf +{ + uvec2 ptr; +}; + +void main() +{ + PtrInt(ptr).value = 10; +} diff --git a/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp b/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp new file mode 100644 index 00000000000..c65463168a1 --- /dev/null +++ b/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp @@ -0,0 +1,31 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 64) in; + +layout(std430, buffer_reference) readonly buffer RO +{ + vec4 v[]; +}; + +layout(std430, buffer_reference) restrict buffer RW +{ + vec4 v[]; +}; + +layout(std430, buffer_reference) coherent writeonly buffer WO +{ + vec4 v[]; +}; + +layout(push_constant, std430) uniform Registers +{ + RO ro; + RW rw; + WO wo; +} registers; + +void main() +{ + registers.rw.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x]; + registers.wo.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x]; +} diff --git a/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp b/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp new file mode 100644 index 00000000000..987cb878e79 --- /dev/null +++ b/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp @@ -0,0 +1,18 @@ +#version 450 +#extension GL_EXT_shader_atomic_float : require + +shared float shared_v; +layout(set = 0, binding = 0, r32f) uniform image2D uImage; + +layout(set = 0, binding = 1) buffer SSBO +{ + float v; +}; + +void main() +{ + float value = atomicAdd(shared_v, 2.0); + atomicAdd(v, value); + imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), value); + value = imageAtomicExchange(uImage, ivec2(gl_GlobalInvocationID.xy), value); +} diff --git a/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp b/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp new file mode 100644 index 00000000000..c7dc397e35f --- /dev/null +++ b/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp @@ -0,0 +1,58 @@ +#version 460 +#extension GL_EXT_ray_query : require +#extension GL_EXT_ray_tracing : require +#extension GL_EXT_ray_flags_primitive_culling : require +layout(primitive_culling); + +layout(set = 0, binding = 0) uniform accelerationStructureEXT AS; + +layout(set = 0, binding = 1) uniform Params +{ + uint ray_flags; + uint cull_mask; + vec3 origin; + float tmin; + vec3 dir; + float tmax; + float thit; + uvec2 bda; +}; + +rayQueryEXT q2[2]; + +void main() +{ + rayQueryEXT q; + bool res; + uint type; + float fval; + vec3 fvals; + int ival; + mat4x3 matrices; + + rayQueryInitializeEXT(q, AS, ray_flags, cull_mask, origin, tmin, dir, tmax); + rayQueryInitializeEXT(q2[1], accelerationStructureEXT(bda), ray_flags, cull_mask, origin, tmin, dir, tmax); + + res = rayQueryProceedEXT(q); + rayQueryTerminateEXT(q2[0]); + rayQueryGenerateIntersectionEXT(q, thit); + rayQueryConfirmIntersectionEXT(q2[1]); + fval = rayQueryGetRayTMinEXT(q); + type = rayQueryGetRayFlagsEXT(q2[0]); + fvals = rayQueryGetWorldRayDirectionEXT(q); + fvals = rayQueryGetWorldRayOriginEXT(q); + type = rayQueryGetIntersectionTypeEXT(q2[1], true); + res = rayQueryGetIntersectionCandidateAABBOpaqueEXT(q2[1]); + fval = rayQueryGetIntersectionTEXT(q2[1], false); + ival = rayQueryGetIntersectionInstanceCustomIndexEXT(q, true); + ival = rayQueryGetIntersectionInstanceIdEXT(q2[0], false); + type = rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(q, true); + ival = rayQueryGetIntersectionGeometryIndexEXT(q2[1], false); + ival = rayQueryGetIntersectionPrimitiveIndexEXT(q, true); + fvals.xy = rayQueryGetIntersectionBarycentricsEXT(q2[0], false); + res = rayQueryGetIntersectionFrontFaceEXT(q, true); + fvals = rayQueryGetIntersectionObjectRayDirectionEXT(q, false); + fvals = rayQueryGetIntersectionObjectRayOriginEXT(q2[0], true); + matrices = rayQueryGetIntersectionObjectToWorldEXT(q, false); + matrices = rayQueryGetIntersectionWorldToObjectEXT(q2[1], true); +} diff --git a/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag b/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag new file mode 100644 index 00000000000..ba57b8c5afa --- /dev/null +++ b/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag @@ -0,0 +1,41 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 19 +; Schema: 0 + OpCapability Shader + OpCapability DemoteToHelperInvocationEXT + OpExtension "SPV_EXT_demote_to_helper_invocation" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_demote_to_helper_invocation" + OpName %main "main" + OpName %FragColor "FragColor" + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %bool = OpTypeBool +%_ptr_Function_bool = OpTypePointer Function %bool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %19 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %9 = OpIsHelperInvocationEXT %bool + OpDemoteToHelperInvocationEXT + %10 = OpLogicalNot %bool %9 + OpSelectionMerge %12 None + OpBranchConditional %10 %11 %12 + %11 = OpLabel + OpStore %FragColor %19 + OpBranch %12 + %12 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag b/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag new file mode 100644 index 00000000000..8b8bb61ff7b --- /dev/null +++ b/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag @@ -0,0 +1,8 @@ +#version 450 +#extension GL_EXT_demote_to_helper_invocation : require + +void main() +{ + demote; + bool helper = helperInvocationEXT(); +} diff --git a/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag b/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag index f59b07c07e7..136133eb300 100644 --- a/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag +++ b/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag @@ -1,28 +1,52 @@ #version 450 #extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_samplerless_texture_functions : require layout(binding = 0) uniform texture2D uSamplers[]; +layout(binding = 0) uniform texture2DMS uSamplersMS[]; layout(binding = 4) uniform sampler2D uCombinedSamplers[]; layout(binding = 1) uniform sampler uSamps[]; layout(location = 0) flat in int vIndex; layout(location = 1) in vec2 vUV; layout(location = 0) out vec4 FragColor; +layout(r32f, binding = 5) uniform image2D uImages[]; +layout(r32ui, binding = 5) uniform uimage2D uImagesU32[]; + layout(set = 0, binding = 2) uniform UBO { vec4 v[64]; } ubos[]; -layout(set = 0, binding = 3) readonly buffer SSBO +layout(set = 0, binding = 3) buffer SSBO { + uint counter; vec4 v[]; } ssbos[]; void main() { int i = vIndex; - FragColor = texture(sampler2D(uSamplers[nonuniformEXT(i + 10)], uSamps[nonuniformEXT(i + 40)]), vUV); + FragColor = texture(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV); FragColor = texture(uCombinedSamplers[nonuniformEXT(i + 10)], vUV); FragColor += ubos[nonuniformEXT(i + 20)].v[nonuniformEXT(i + 40)]; FragColor += ssbos[nonuniformEXT(i + 50)].v[nonuniformEXT(i + 60)]; + ssbos[nonuniformEXT(i + 60)].v[nonuniformEXT(i + 70)] = vec4(20.0); + + FragColor = texelFetch(uSamplers[nonuniformEXT(i + 10)], ivec2(vUV), 0); + atomicAdd(ssbos[nonuniformEXT(i + 100)].counter, 100u); + + vec2 queried = textureQueryLod(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV); + queried += textureQueryLod(uCombinedSamplers[nonuniformEXT(i + 10)], vUV); + FragColor.xy += queried; + + FragColor.x += float(textureQueryLevels(uSamplers[nonuniformEXT(i + 20)])); + FragColor.y += float(textureSamples(uSamplersMS[nonuniformEXT(i + 20)])); + FragColor.xy += vec2(textureSize(uSamplers[nonuniformEXT(i + 20)], 0)); + + FragColor += imageLoad(uImages[nonuniformEXT(i + 50)], ivec2(vUV)); + FragColor.xy += vec2(imageSize(uImages[nonuniformEXT(i + 20)])); + imageStore(uImages[nonuniformEXT(i + 60)], ivec2(vUV), vec4(50.0)); + + imageAtomicAdd(uImagesU32[nonuniformEXT(i + 70)], ivec2(vUV), 40u); } diff --git a/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit b/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit new file mode 100644 index 00000000000..dab437780d0 --- /dev/null +++ b/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit @@ -0,0 +1,17 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +rayPayloadInEXT float payload; + +void in_func() +{ + if (payload > 0.0) + ignoreIntersectionEXT; + else + terminateRayEXT; +} + +void main() +{ + in_func(); +} diff --git a/shaders/vulkan/rahit/terminators.nocompat.vk.rahit b/shaders/vulkan/rahit/terminators.nocompat.vk.rahit new file mode 100644 index 00000000000..943be33c571 --- /dev/null +++ b/shaders/vulkan/rahit/terminators.nocompat.vk.rahit @@ -0,0 +1,17 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +rayPayloadInNV float payload; + +void in_func() +{ + if (payload > 0.0) + ignoreIntersectionNV(); + else + terminateRayNV(); +} + +void main() +{ + in_func(); +} diff --git a/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall b/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall new file mode 100644 index 00000000000..53c594b6a13 --- /dev/null +++ b/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 4) callableDataInEXT float c; + +void main() +{ + executeCallableEXT(10, 4); +} diff --git a/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..fd82f5bf93e --- /dev/null +++ b/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,11 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT Foo { float a; float b; } payload; +hitAttributeEXT Foo2 { float a; float b; } hit; + +void main() +{ + payload.a = hit.a; + payload.b = hit.b; +} diff --git a/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit new file mode 100644 index 00000000000..8d367e3b87f --- /dev/null +++ b/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit @@ -0,0 +1,11 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV Foo { float a; float b; } payload; +hitAttributeNV Foo2 { float a; float b; } hit; + +void main() +{ + payload.a = hit.a; + payload.b = hit.b; +} diff --git a/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..7b8d53dd8bf --- /dev/null +++ b/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,16 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT Foo { float a; float b; } payload; +hitAttributeEXT Foo2 { float a; float b; } hit; + +void in_function() +{ + payload.a = hit.a; + payload.b = hit.b; +} + +void main() +{ + in_function(); +} diff --git a/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit new file mode 100644 index 00000000000..23a5c6519d0 --- /dev/null +++ b/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit @@ -0,0 +1,16 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV Foo { float a; float b; } payload; +hitAttributeNV Foo2 { float a; float b; } hit; + +void in_function() +{ + payload.a = hit.a; + payload.b = hit.b; +} + +void main() +{ + in_function(); +} diff --git a/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..145687739ff --- /dev/null +++ b/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,10 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec2 payload; +hitAttributeEXT vec2 hit; + +void main() +{ + payload = hit; +} diff --git a/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit new file mode 100644 index 00000000000..cdbda9cb8a2 --- /dev/null +++ b/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit @@ -0,0 +1,10 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec2 payload; +hitAttributeNV vec2 hit; + +void main() +{ + payload = hit; +} diff --git a/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..f391f1ebfcc --- /dev/null +++ b/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,12 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Foo { float a; float b; }; + +layout(location = 0) rayPayloadInEXT Foo payload; +hitAttributeEXT Foo hit; + +void main() +{ + payload = hit; +} diff --git a/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit new file mode 100644 index 00000000000..625e125bc90 --- /dev/null +++ b/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit @@ -0,0 +1,12 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Foo { float a; float b; }; + +layout(location = 0) rayPayloadInNV Foo payload; +hitAttributeNV Foo hit; + +void main() +{ + payload = hit; +} diff --git a/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..1bf150000a2 --- /dev/null +++ b/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = gl_HitKindEXT; +} diff --git a/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit new file mode 100644 index 00000000000..39a088f46a2 --- /dev/null +++ b/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = gl_HitKindNV; +} diff --git a/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..5b4fc4dd506 --- /dev/null +++ b/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = gl_HitTEXT; +} diff --git a/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit new file mode 100644 index 00000000000..16d6f06ea98 --- /dev/null +++ b/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV float payload; + +void main() +{ + payload = gl_HitTNV; +} diff --git a/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..2c286465541 --- /dev/null +++ b/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = gl_IncomingRayFlagsEXT; +} diff --git a/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit b/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit new file mode 100644 index 00000000000..a1726d0ef45 --- /dev/null +++ b/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = gl_IncomingRayFlagsNV; +} diff --git a/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..330dfcebb5e --- /dev/null +++ b/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = gl_InstanceCustomIndexEXT; +} diff --git a/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit b/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit new file mode 100644 index 00000000000..02ae3430926 --- /dev/null +++ b/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = gl_InstanceCustomIndexNV; +} diff --git a/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..23d1e02c75a --- /dev/null +++ b/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = gl_InstanceID; +} diff --git a/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit b/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit new file mode 100644 index 00000000000..d6f99668117 --- /dev/null +++ b/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = gl_InstanceID; +} diff --git a/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..4d45134cb97 --- /dev/null +++ b/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_ObjectRayDirectionEXT; +} diff --git a/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit b/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit new file mode 100644 index 00000000000..257175b510f --- /dev/null +++ b/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_ObjectRayDirectionNV; +} diff --git a/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..0964b7b5668 --- /dev/null +++ b/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_ObjectRayOriginEXT; +} diff --git a/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit b/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit new file mode 100644 index 00000000000..8b71e7d9c4b --- /dev/null +++ b/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_ObjectRayOriginNV; +} diff --git a/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..4377443c2ba --- /dev/null +++ b/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_ObjectToWorldEXT * vec4(payload, 1.0); +} diff --git a/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit b/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit new file mode 100644 index 00000000000..53b1406fe71 --- /dev/null +++ b/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_ObjectToWorldNV * vec4(payload, 1.0); +} diff --git a/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..12ed5a7d62d --- /dev/null +++ b/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,19 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +struct Payload +{ + vec4 a; +}; + +layout(location = 0) rayPayloadInEXT Payload payload; + +void write_incoming_payload_in_function() +{ + payload.a = vec4(10.0); +} + +void main() +{ + write_incoming_payload_in_function(); +} diff --git a/shaders/vulkan/rchit/payloads.nocompat.vk.rchit b/shaders/vulkan/rchit/payloads.nocompat.vk.rchit new file mode 100644 index 00000000000..61a86663b4a --- /dev/null +++ b/shaders/vulkan/rchit/payloads.nocompat.vk.rchit @@ -0,0 +1,19 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +struct Payload +{ + vec4 a; +}; + +layout(location = 0) rayPayloadInNV Payload payload; + +void write_incoming_payload_in_function() +{ + payload.a = vec4(10.0); +} + +void main() +{ + write_incoming_payload_in_function(); +} diff --git a/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..f3798c1f42c --- /dev/null +++ b/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT uint payload; + +void main() +{ + payload = gl_PrimitiveID; +} diff --git a/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit b/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit new file mode 100644 index 00000000000..fdfa1ffa32b --- /dev/null +++ b/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV uint payload; + +void main() +{ + payload = gl_PrimitiveID; +} diff --git a/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..2f688baf9e9 --- /dev/null +++ b/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = gl_RayTmaxEXT; +} diff --git a/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit new file mode 100644 index 00000000000..c0e1387b3c8 --- /dev/null +++ b/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV float payload; + +void main() +{ + payload = gl_RayTmaxNV; +} diff --git a/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..3f8ddf05f6b --- /dev/null +++ b/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = gl_RayTminEXT; +} diff --git a/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit new file mode 100644 index 00000000000..896f4ffb4fb --- /dev/null +++ b/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV float payload; + +void main() +{ + payload = gl_RayTminNV; +} diff --git a/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..d48995b821e --- /dev/null +++ b/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = 1.0 + float(gl_InstanceID); +} diff --git a/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit index 107f9751849..44c814d708e 100644 --- a/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit +++ b/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit @@ -5,5 +5,5 @@ layout(location = 0) rayPayloadInNV float payload; void main() { - payload = 1.0; + payload = 1.0 + float(gl_InstanceID); } diff --git a/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..191905a312e --- /dev/null +++ b/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_WorldRayDirectionEXT; +} diff --git a/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit b/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit new file mode 100644 index 00000000000..43d14f2825f --- /dev/null +++ b/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_WorldRayDirectionNV; +} diff --git a/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..2c86a7234c9 --- /dev/null +++ b/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_WorldRayOriginEXT; +} diff --git a/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit b/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit new file mode 100644 index 00000000000..8b03e7dedf1 --- /dev/null +++ b/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_WorldRayOriginNV; +} diff --git a/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit new file mode 100644 index 00000000000..eb64bd0b416 --- /dev/null +++ b/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT vec3 payload; + +void main() +{ + payload = gl_WorldToObjectEXT * vec4(payload, 1.0); +} diff --git a/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit b/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit new file mode 100644 index 00000000000..dc67c4a90fa --- /dev/null +++ b/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(location = 0) rayPayloadInNV vec3 payload; + +void main() +{ + payload = gl_WorldToObjectNV * vec4(payload, 1.0); +} diff --git a/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen b/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen new file mode 100644 index 00000000000..8d6f1f4a933 --- /dev/null +++ b/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen @@ -0,0 +1,16 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 3) rayPayloadEXT vec4 payload; + +layout(push_constant) uniform Registers +{ + uvec2 ptr; +}; + +void main() +{ + vec3 origin = vec3(0.0); + vec3 direction = vec3(0.0, 0.0, -1.0); + traceRayEXT(accelerationStructureEXT(ptr), gl_RayFlagsOpaqueEXT, 0xFF, 0u, 0u, 0u, origin, 0.0, direction, 100.0f, 3); +} diff --git a/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen b/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen new file mode 100644 index 00000000000..9a4380e0a6e --- /dev/null +++ b/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen @@ -0,0 +1,16 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform accelerationStructureEXT as; +layout(set = 0, binding = 1, rgba32f) uniform writeonly image2D image; +layout(location = 3) rayPayloadEXT vec4 payload; +layout(location = 4) callableDataEXT float blend; + +void main() +{ + vec3 origin = vec3(0.0); + vec3 direction = vec3(0.0, 0.0, -1.0); + traceRayEXT(as, gl_RayFlagsOpaqueEXT, 0xFF, 0u, 0u, 0u, origin, 0.0, direction, 100.0f, 3); + executeCallableEXT(0u, 4); + imageStore(image, ivec2(gl_LaunchIDEXT.xy), payload + vec4(blend)); +} diff --git a/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen new file mode 100644 index 00000000000..6f9983e97f6 --- /dev/null +++ b/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchIDEXT.xy), vec4(1.0)); +} diff --git a/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen b/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen new file mode 100644 index 00000000000..b89792e3628 --- /dev/null +++ b/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchIDNV.xy), vec4(1.0)); +} diff --git a/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen new file mode 100644 index 00000000000..955f57dc404 --- /dev/null +++ b/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchSizeEXT.xy) - 1, vec4(1.0)); +} diff --git a/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen b/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen new file mode 100644 index 00000000000..1e1ff55937a --- /dev/null +++ b/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen @@ -0,0 +1,9 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(set = 0, binding = 0) uniform writeonly image2D uImage; + +void main() +{ + imageStore(uImage, ivec2(gl_LaunchSizeNV.xy) - 1, vec4(1.0)); +} diff --git a/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen new file mode 100644 index 00000000000..b73a2463dec --- /dev/null +++ b/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen @@ -0,0 +1,49 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0, rgba8) uniform image2D image; +layout(set = 0, binding = 1) uniform accelerationStructureEXT as; + +struct Payload +{ + float a, b; +}; + +// Plain payload +layout(location = 0) rayPayloadEXT float payload1; +// Struct payload +layout(location = 1) rayPayloadEXT Payload payload2; + +// This is syntactic sugar with the struct formulation (pretty sure), spec is kinda vague. +layout(location = 2) rayPayloadEXT Block +{ + float a, b; + Payload c, d; +}; + +vec4 trace_in_function() +{ + vec4 result = vec4(0.0); + // Test that we can write to a payload in a function. + vec3 origin = vec3(1.0, 0.0, 0.0); + vec3 direction = vec3(0.0, 1.0, 0.0); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 1); + result += payload2.a; + result += payload2.b; + return result; +} + +void main() +{ + vec3 origin = vec3(1.0, 0.0, 0.0); + vec3 direction = vec3(0.0, 1.0, 0.0); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0); + vec4 result = vec4(payload1); + + result += trace_in_function(); + + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 2); + result += a + b + c.a + c.b + d.a + d.b; + + imageStore(image, ivec2(gl_LaunchIDEXT.xy), result); +} diff --git a/shaders/vulkan/rgen/payloads.nocompat.vk.rgen b/shaders/vulkan/rgen/payloads.nocompat.vk.rgen new file mode 100644 index 00000000000..11c12d44f69 --- /dev/null +++ b/shaders/vulkan/rgen/payloads.nocompat.vk.rgen @@ -0,0 +1,49 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +layout(set = 0, binding = 0, rgba8) uniform image2D image; +layout(set = 0, binding = 1) uniform accelerationStructureNV as; + +struct Payload +{ + float a, b; +}; + +// Plain payload +layout(location = 0) rayPayloadNV float payload1; +// Struct payload +layout(location = 1) rayPayloadNV Payload payload2; + +// This is syntactic sugar with the struct formulation (pretty sure), spec is kinda vague. +layout(location = 2) rayPayloadNV Block +{ + float a, b; + Payload c, d; +}; + +vec4 trace_in_function() +{ + vec4 result = vec4(0.0); + // Test that we can write to a payload in a function. + vec3 origin = vec3(1.0, 0.0, 0.0); + vec3 direction = vec3(0.0, 1.0, 0.0); + traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 1); + result += payload2.a; + result += payload2.b; + return result; +} + +void main() +{ + vec3 origin = vec3(1.0, 0.0, 0.0); + vec3 direction = vec3(0.0, 1.0, 0.0); + traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0); + vec4 result = vec4(payload1); + + result += trace_in_function(); + + traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 2); + result += a + b + c.a + c.b + d.a + d.b; + + imageStore(image, ivec2(gl_LaunchIDNV.xy), result); +} diff --git a/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen new file mode 100644 index 00000000000..6763f55365c --- /dev/null +++ b/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen @@ -0,0 +1,18 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 1) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT float payload; + +float pure_call(vec2 launchID, vec2 launchSize) +{ + vec3 origin = vec3(launchID.x / launchSize.x, launchID.y / launchSize.y, 1.0); + vec3 direction = vec3(0.0, 0.0, -1.0); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0); + return 0.0; +} + +void main() +{ + pure_call(vec2(gl_LaunchIDEXT.xy), vec2(gl_LaunchSizeEXT.xy)); +} diff --git a/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen new file mode 100644 index 00000000000..40f16489010 --- /dev/null +++ b/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen @@ -0,0 +1,16 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0, rgba8) uniform image2D image; +layout(set = 0, binding = 1) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT float payload; + +void main() +{ + vec4 col = vec4(0.0, 0.0, 0.0, 1.0); + vec3 origin = vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0); + vec3 direction = vec3(0.0, 0.0, -1.0); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0); + col.y = payload; + imageStore(image, ivec2(gl_LaunchIDEXT.xy), col); +} diff --git a/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen new file mode 100644 index 00000000000..39c9bf27867 --- /dev/null +++ b/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen @@ -0,0 +1,16 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(shaderRecordEXT, std430) buffer sbt +{ + vec3 direction; + float tmax; +}; + +layout(set = 0, binding = 0) uniform accelerationStructureEXT as; +layout(location = 0) rayPayloadEXT float payload; + +void main() +{ + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(0.0), 0.0, direction, tmax, 0); +} diff --git a/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint b/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint new file mode 100644 index 00000000000..b930e5c69fa --- /dev/null +++ b/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint @@ -0,0 +1,12 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +void in_func() +{ + reportIntersectionEXT(0.5, 10); +} + +void main() +{ + in_func(); +} diff --git a/shaders/vulkan/rint/report-intersection.nocompat.vk.rint b/shaders/vulkan/rint/report-intersection.nocompat.vk.rint new file mode 100644 index 00000000000..ee384fc8da0 --- /dev/null +++ b/shaders/vulkan/rint/report-intersection.nocompat.vk.rint @@ -0,0 +1,12 @@ +#version 460 +#extension GL_NV_ray_tracing : require + +void in_func() +{ + reportIntersectionNV(0.5, 10); +} + +void main() +{ + in_func(); +} diff --git a/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss b/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss new file mode 100644 index 00000000000..ee873cb96e5 --- /dev/null +++ b/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(location = 0) rayPayloadInEXT float payload; + +void main() +{ + payload = 0.0; +} diff --git a/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss b/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss new file mode 100644 index 00000000000..265a8f8b21a --- /dev/null +++ b/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss @@ -0,0 +1,12 @@ +#version 460 +#extension GL_EXT_ray_tracing : require + +layout(set = 0, binding = 0) uniform accelerationStructureEXT as; +layout(location = 3) rayPayloadInEXT float p; + +void main() +{ + vec3 origin = vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0); + vec3 direction = vec3(0.0, 0.0, -1.0); + traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 3); +} diff --git a/shaders/vulkan/vert/device-group.nocompat.vk.vert b/shaders/vulkan/vert/device-group.nocompat.vk.vert new file mode 100644 index 00000000000..16ed51b15ef --- /dev/null +++ b/shaders/vulkan/vert/device-group.nocompat.vk.vert @@ -0,0 +1,7 @@ +#version 450 core +#extension GL_EXT_device_group : require + +void main() +{ + gl_Position = vec4(gl_DeviceIndex); +} diff --git a/spirv.h b/spirv.h index 8da27dd2638..5b6e8aaf475 100644 --- a/spirv.h +++ b/spirv.h @@ -1,5 +1,5 @@ /* -** Copyright (c) 2014-2019 The Khronos Group Inc. +** Copyright (c) 2014-2020 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a copy ** of this software and/or associated documentation files (the "Materials"), @@ -31,7 +31,7 @@ /* ** Enumeration tokens for SPIR-V, in various styles: -** C, C++, C++11, JSON, Lua, Python, C#, D +** C, C++, C++11, JSON, Lua, Python, C#, D, Beef ** ** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL ** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL @@ -41,6 +41,8 @@ ** - C# will use enum classes in the Specification class located in the "Spv" namespace, ** e.g.: Spv.Specification.SourceLanguage.GLSL ** - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +** - Beef will use enum classes in the Specification class located in the "Spv" namespace, +** e.g.: Spv.Specification.SourceLanguage.GLSL ** ** Some tokens act like mask values, which can be OR'd together, ** while others are mutually exclusive. The mask-like ones have @@ -53,12 +55,12 @@ typedef unsigned int SpvId; -#define SPV_VERSION 0x10300 -#define SPV_REVISION 6 +#define SPV_VERSION 0x10600 +#define SPV_REVISION 1 static const unsigned int SpvMagicNumber = 0x07230203; -static const unsigned int SpvVersion = 0x00010300; -static const unsigned int SpvRevision = 6; +static const unsigned int SpvVersion = 0x00010600; +static const unsigned int SpvRevision = 1; static const unsigned int SpvOpCodeMask = 0xffff; static const unsigned int SpvWordCountShift = 16; @@ -69,6 +71,8 @@ typedef enum SpvSourceLanguage_ { SpvSourceLanguageOpenCL_C = 3, SpvSourceLanguageOpenCL_CPP = 4, SpvSourceLanguageHLSL = 5, + SpvSourceLanguageCPP_for_OpenCL = 6, + SpvSourceLanguageSYCL = 7, SpvSourceLanguageMax = 0x7fffffff, } SpvSourceLanguage; @@ -82,12 +86,20 @@ typedef enum SpvExecutionModel_ { SpvExecutionModelKernel = 6, SpvExecutionModelTaskNV = 5267, SpvExecutionModelMeshNV = 5268, + SpvExecutionModelRayGenerationKHR = 5313, SpvExecutionModelRayGenerationNV = 5313, + SpvExecutionModelIntersectionKHR = 5314, SpvExecutionModelIntersectionNV = 5314, + SpvExecutionModelAnyHitKHR = 5315, SpvExecutionModelAnyHitNV = 5315, + SpvExecutionModelClosestHitKHR = 5316, SpvExecutionModelClosestHitNV = 5316, + SpvExecutionModelMissKHR = 5317, SpvExecutionModelMissNV = 5317, + SpvExecutionModelCallableKHR = 5318, SpvExecutionModelCallableNV = 5318, + SpvExecutionModelTaskEXT = 5364, + SpvExecutionModelMeshEXT = 5365, SpvExecutionModelMax = 0x7fffffff, } SpvExecutionModel; @@ -95,6 +107,7 @@ typedef enum SpvAddressingModel_ { SpvAddressingModelLogical = 0, SpvAddressingModelPhysical32 = 1, SpvAddressingModelPhysical64 = 2, + SpvAddressingModelPhysicalStorageBuffer64 = 5348, SpvAddressingModelPhysicalStorageBuffer64EXT = 5348, SpvAddressingModelMax = 0x7fffffff, } SpvAddressingModel; @@ -103,6 +116,7 @@ typedef enum SpvMemoryModel_ { SpvMemoryModelSimple = 0, SpvMemoryModelGLSL450 = 1, SpvMemoryModelOpenCL = 2, + SpvMemoryModelVulkan = 3, SpvMemoryModelVulkanKHR = 3, SpvMemoryModelMax = 0x7fffffff, } SpvMemoryModel; @@ -146,18 +160,46 @@ typedef enum SpvExecutionMode_ { SpvExecutionModeSubgroupsPerWorkgroupId = 37, SpvExecutionModeLocalSizeId = 38, SpvExecutionModeLocalSizeHintId = 39, + SpvExecutionModeSubgroupUniformControlFlowKHR = 4421, SpvExecutionModePostDepthCoverage = 4446, SpvExecutionModeDenormPreserve = 4459, SpvExecutionModeDenormFlushToZero = 4460, SpvExecutionModeSignedZeroInfNanPreserve = 4461, SpvExecutionModeRoundingModeRTE = 4462, SpvExecutionModeRoundingModeRTZ = 4463, + SpvExecutionModeEarlyAndLateFragmentTestsAMD = 5017, SpvExecutionModeStencilRefReplacingEXT = 5027, + SpvExecutionModeStencilRefUnchangedFrontAMD = 5079, + SpvExecutionModeStencilRefGreaterFrontAMD = 5080, + SpvExecutionModeStencilRefLessFrontAMD = 5081, + SpvExecutionModeStencilRefUnchangedBackAMD = 5082, + SpvExecutionModeStencilRefGreaterBackAMD = 5083, + SpvExecutionModeStencilRefLessBackAMD = 5084, + SpvExecutionModeOutputLinesEXT = 5269, SpvExecutionModeOutputLinesNV = 5269, + SpvExecutionModeOutputPrimitivesEXT = 5270, SpvExecutionModeOutputPrimitivesNV = 5270, SpvExecutionModeDerivativeGroupQuadsNV = 5289, SpvExecutionModeDerivativeGroupLinearNV = 5290, + SpvExecutionModeOutputTrianglesEXT = 5298, SpvExecutionModeOutputTrianglesNV = 5298, + SpvExecutionModePixelInterlockOrderedEXT = 5366, + SpvExecutionModePixelInterlockUnorderedEXT = 5367, + SpvExecutionModeSampleInterlockOrderedEXT = 5368, + SpvExecutionModeSampleInterlockUnorderedEXT = 5369, + SpvExecutionModeShadingRateInterlockOrderedEXT = 5370, + SpvExecutionModeShadingRateInterlockUnorderedEXT = 5371, + SpvExecutionModeSharedLocalMemorySizeINTEL = 5618, + SpvExecutionModeRoundingModeRTPINTEL = 5620, + SpvExecutionModeRoundingModeRTNINTEL = 5621, + SpvExecutionModeFloatingPointModeALTINTEL = 5622, + SpvExecutionModeFloatingPointModeIEEEINTEL = 5623, + SpvExecutionModeMaxWorkgroupSizeINTEL = 5893, + SpvExecutionModeMaxWorkDimINTEL = 5894, + SpvExecutionModeNoGlobalOffsetINTEL = 5895, + SpvExecutionModeNumSIMDWorkitemsINTEL = 5896, + SpvExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + SpvExecutionModeNamedBarrierCountINTEL = 6417, SpvExecutionModeMax = 0x7fffffff, } SpvExecutionMode; @@ -175,13 +217,24 @@ typedef enum SpvStorageClass_ { SpvStorageClassAtomicCounter = 10, SpvStorageClassImage = 11, SpvStorageClassStorageBuffer = 12, + SpvStorageClassCallableDataKHR = 5328, SpvStorageClassCallableDataNV = 5328, + SpvStorageClassIncomingCallableDataKHR = 5329, SpvStorageClassIncomingCallableDataNV = 5329, + SpvStorageClassRayPayloadKHR = 5338, SpvStorageClassRayPayloadNV = 5338, + SpvStorageClassHitAttributeKHR = 5339, SpvStorageClassHitAttributeNV = 5339, + SpvStorageClassIncomingRayPayloadKHR = 5342, SpvStorageClassIncomingRayPayloadNV = 5342, + SpvStorageClassShaderRecordBufferKHR = 5343, SpvStorageClassShaderRecordBufferNV = 5343, + SpvStorageClassPhysicalStorageBuffer = 5349, SpvStorageClassPhysicalStorageBufferEXT = 5349, + SpvStorageClassTaskPayloadWorkgroupEXT = 5402, + SpvStorageClassCodeSectionINTEL = 5605, + SpvStorageClassDeviceOnlyINTEL = 5936, + SpvStorageClassHostOnlyINTEL = 5937, SpvStorageClassMax = 0x7fffffff, } SpvStorageClass; @@ -252,6 +305,8 @@ typedef enum SpvImageFormat_ { SpvImageFormatRg8ui = 37, SpvImageFormatR16ui = 38, SpvImageFormatR8ui = 39, + SpvImageFormatR64ui = 40, + SpvImageFormatR64i = 41, SpvImageFormatMax = 0x7fffffff, } SpvImageFormat; @@ -309,10 +364,18 @@ typedef enum SpvImageOperandsShift_ { SpvImageOperandsConstOffsetsShift = 5, SpvImageOperandsSampleShift = 6, SpvImageOperandsMinLodShift = 7, + SpvImageOperandsMakeTexelAvailableShift = 8, SpvImageOperandsMakeTexelAvailableKHRShift = 8, + SpvImageOperandsMakeTexelVisibleShift = 9, SpvImageOperandsMakeTexelVisibleKHRShift = 9, + SpvImageOperandsNonPrivateTexelShift = 10, SpvImageOperandsNonPrivateTexelKHRShift = 10, + SpvImageOperandsVolatileTexelShift = 11, SpvImageOperandsVolatileTexelKHRShift = 11, + SpvImageOperandsSignExtendShift = 12, + SpvImageOperandsZeroExtendShift = 13, + SpvImageOperandsNontemporalShift = 14, + SpvImageOperandsOffsetsShift = 16, SpvImageOperandsMax = 0x7fffffff, } SpvImageOperandsShift; @@ -326,10 +389,18 @@ typedef enum SpvImageOperandsMask_ { SpvImageOperandsConstOffsetsMask = 0x00000020, SpvImageOperandsSampleMask = 0x00000040, SpvImageOperandsMinLodMask = 0x00000080, + SpvImageOperandsMakeTexelAvailableMask = 0x00000100, SpvImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + SpvImageOperandsMakeTexelVisibleMask = 0x00000200, SpvImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + SpvImageOperandsNonPrivateTexelMask = 0x00000400, SpvImageOperandsNonPrivateTexelKHRMask = 0x00000400, + SpvImageOperandsVolatileTexelMask = 0x00000800, SpvImageOperandsVolatileTexelKHRMask = 0x00000800, + SpvImageOperandsSignExtendMask = 0x00001000, + SpvImageOperandsZeroExtendMask = 0x00002000, + SpvImageOperandsNontemporalMask = 0x00004000, + SpvImageOperandsOffsetsMask = 0x00010000, } SpvImageOperandsMask; typedef enum SpvFPFastMathModeShift_ { @@ -338,6 +409,8 @@ typedef enum SpvFPFastMathModeShift_ { SpvFPFastMathModeNSZShift = 2, SpvFPFastMathModeAllowRecipShift = 3, SpvFPFastMathModeFastShift = 4, + SpvFPFastMathModeAllowContractFastINTELShift = 16, + SpvFPFastMathModeAllowReassocINTELShift = 17, SpvFPFastMathModeMax = 0x7fffffff, } SpvFPFastMathModeShift; @@ -348,6 +421,8 @@ typedef enum SpvFPFastMathModeMask_ { SpvFPFastMathModeNSZMask = 0x00000004, SpvFPFastMathModeAllowRecipMask = 0x00000008, SpvFPFastMathModeFastMask = 0x00000010, + SpvFPFastMathModeAllowContractFastINTELMask = 0x00010000, + SpvFPFastMathModeAllowReassocINTELMask = 0x00020000, } SpvFPFastMathModeMask; typedef enum SpvFPRoundingMode_ { @@ -361,6 +436,7 @@ typedef enum SpvFPRoundingMode_ { typedef enum SpvLinkageType_ { SpvLinkageTypeExport = 0, SpvLinkageTypeImport = 1, + SpvLinkageTypeLinkOnceODR = 2, SpvLinkageTypeMax = 0x7fffffff, } SpvLinkageType; @@ -410,6 +486,7 @@ typedef enum SpvDecoration_ { SpvDecorationNonWritable = 24, SpvDecorationNonReadable = 25, SpvDecorationUniform = 26, + SpvDecorationUniformId = 27, SpvDecorationSaturatedConversion = 28, SpvDecorationStream = 29, SpvDecorationLocation = 30, @@ -437,15 +514,64 @@ typedef enum SpvDecoration_ { SpvDecorationPassthroughNV = 5250, SpvDecorationViewportRelativeNV = 5252, SpvDecorationSecondaryViewportRelativeNV = 5256, + SpvDecorationPerPrimitiveEXT = 5271, SpvDecorationPerPrimitiveNV = 5271, SpvDecorationPerViewNV = 5272, SpvDecorationPerTaskNV = 5273, + SpvDecorationPerVertexKHR = 5285, SpvDecorationPerVertexNV = 5285, + SpvDecorationNonUniform = 5300, SpvDecorationNonUniformEXT = 5300, + SpvDecorationRestrictPointer = 5355, SpvDecorationRestrictPointerEXT = 5355, + SpvDecorationAliasedPointer = 5356, SpvDecorationAliasedPointerEXT = 5356, + SpvDecorationBindlessSamplerNV = 5398, + SpvDecorationBindlessImageNV = 5399, + SpvDecorationBoundSamplerNV = 5400, + SpvDecorationBoundImageNV = 5401, + SpvDecorationSIMTCallINTEL = 5599, + SpvDecorationReferencedIndirectlyINTEL = 5602, + SpvDecorationClobberINTEL = 5607, + SpvDecorationSideEffectsINTEL = 5608, + SpvDecorationVectorComputeVariableINTEL = 5624, + SpvDecorationFuncParamIOKindINTEL = 5625, + SpvDecorationVectorComputeFunctionINTEL = 5626, + SpvDecorationStackCallINTEL = 5627, + SpvDecorationGlobalVariableOffsetINTEL = 5628, + SpvDecorationCounterBuffer = 5634, SpvDecorationHlslCounterBufferGOOGLE = 5634, SpvDecorationHlslSemanticGOOGLE = 5635, + SpvDecorationUserSemantic = 5635, + SpvDecorationUserTypeGOOGLE = 5636, + SpvDecorationFunctionRoundingModeINTEL = 5822, + SpvDecorationFunctionDenormModeINTEL = 5823, + SpvDecorationRegisterINTEL = 5825, + SpvDecorationMemoryINTEL = 5826, + SpvDecorationNumbanksINTEL = 5827, + SpvDecorationBankwidthINTEL = 5828, + SpvDecorationMaxPrivateCopiesINTEL = 5829, + SpvDecorationSinglepumpINTEL = 5830, + SpvDecorationDoublepumpINTEL = 5831, + SpvDecorationMaxReplicatesINTEL = 5832, + SpvDecorationSimpleDualPortINTEL = 5833, + SpvDecorationMergeINTEL = 5834, + SpvDecorationBankBitsINTEL = 5835, + SpvDecorationForcePow2DepthINTEL = 5836, + SpvDecorationBurstCoalesceINTEL = 5899, + SpvDecorationCacheSizeINTEL = 5900, + SpvDecorationDontStaticallyCoalesceINTEL = 5901, + SpvDecorationPrefetchINTEL = 5902, + SpvDecorationStallEnableINTEL = 5905, + SpvDecorationFuseLoopsInFunctionINTEL = 5907, + SpvDecorationAliasScopeINTEL = 5914, + SpvDecorationNoAliasINTEL = 5915, + SpvDecorationBufferLocationINTEL = 5921, + SpvDecorationIOPipeStorageINTEL = 5944, + SpvDecorationFunctionFloatingPointModeINTEL = 6080, + SpvDecorationSingleElementVectorINTEL = 6085, + SpvDecorationVectorComputeCallableFunctionINTEL = 6087, + SpvDecorationMediaBlockIOINTEL = 6140, SpvDecorationMax = 0x7fffffff, } SpvDecoration; @@ -504,8 +630,10 @@ typedef enum SpvBuiltIn_ { SpvBuiltInBaseVertex = 4424, SpvBuiltInBaseInstance = 4425, SpvBuiltInDrawIndex = 4426, + SpvBuiltInPrimitiveShadingRateKHR = 4432, SpvBuiltInDeviceIndex = 4438, SpvBuiltInViewIndex = 4440, + SpvBuiltInShadingRateKHR = 4444, SpvBuiltInBaryCoordNoPerspAMD = 4992, SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993, SpvBuiltInBaryCoordNoPerspSampleAMD = 4994, @@ -528,26 +656,52 @@ typedef enum SpvBuiltIn_ { SpvBuiltInLayerPerViewNV = 5279, SpvBuiltInMeshViewCountNV = 5280, SpvBuiltInMeshViewIndicesNV = 5281, + SpvBuiltInBaryCoordKHR = 5286, SpvBuiltInBaryCoordNV = 5286, + SpvBuiltInBaryCoordNoPerspKHR = 5287, SpvBuiltInBaryCoordNoPerspNV = 5287, SpvBuiltInFragSizeEXT = 5292, SpvBuiltInFragmentSizeNV = 5292, SpvBuiltInFragInvocationCountEXT = 5293, SpvBuiltInInvocationsPerPixelNV = 5293, + SpvBuiltInPrimitivePointIndicesEXT = 5294, + SpvBuiltInPrimitiveLineIndicesEXT = 5295, + SpvBuiltInPrimitiveTriangleIndicesEXT = 5296, + SpvBuiltInCullPrimitiveEXT = 5299, + SpvBuiltInLaunchIdKHR = 5319, SpvBuiltInLaunchIdNV = 5319, + SpvBuiltInLaunchSizeKHR = 5320, SpvBuiltInLaunchSizeNV = 5320, + SpvBuiltInWorldRayOriginKHR = 5321, SpvBuiltInWorldRayOriginNV = 5321, + SpvBuiltInWorldRayDirectionKHR = 5322, SpvBuiltInWorldRayDirectionNV = 5322, + SpvBuiltInObjectRayOriginKHR = 5323, SpvBuiltInObjectRayOriginNV = 5323, + SpvBuiltInObjectRayDirectionKHR = 5324, SpvBuiltInObjectRayDirectionNV = 5324, + SpvBuiltInRayTminKHR = 5325, SpvBuiltInRayTminNV = 5325, + SpvBuiltInRayTmaxKHR = 5326, SpvBuiltInRayTmaxNV = 5326, + SpvBuiltInInstanceCustomIndexKHR = 5327, SpvBuiltInInstanceCustomIndexNV = 5327, + SpvBuiltInObjectToWorldKHR = 5330, SpvBuiltInObjectToWorldNV = 5330, + SpvBuiltInWorldToObjectKHR = 5331, SpvBuiltInWorldToObjectNV = 5331, SpvBuiltInHitTNV = 5332, + SpvBuiltInHitKindKHR = 5333, SpvBuiltInHitKindNV = 5333, + SpvBuiltInCurrentRayTimeNV = 5334, + SpvBuiltInIncomingRayFlagsKHR = 5351, SpvBuiltInIncomingRayFlagsNV = 5351, + SpvBuiltInRayGeometryIndexKHR = 5352, + SpvBuiltInWarpsPerSMNV = 5374, + SpvBuiltInSMCountNV = 5375, + SpvBuiltInWarpIDNV = 5376, + SpvBuiltInSMIDNV = 5377, + SpvBuiltInCullMaskKHR = 6021, SpvBuiltInMax = 0x7fffffff, } SpvBuiltIn; @@ -568,6 +722,19 @@ typedef enum SpvLoopControlShift_ { SpvLoopControlDontUnrollShift = 1, SpvLoopControlDependencyInfiniteShift = 2, SpvLoopControlDependencyLengthShift = 3, + SpvLoopControlMinIterationsShift = 4, + SpvLoopControlMaxIterationsShift = 5, + SpvLoopControlIterationMultipleShift = 6, + SpvLoopControlPeelCountShift = 7, + SpvLoopControlPartialCountShift = 8, + SpvLoopControlInitiationIntervalINTELShift = 16, + SpvLoopControlMaxConcurrencyINTELShift = 17, + SpvLoopControlDependencyArrayINTELShift = 18, + SpvLoopControlPipelineEnableINTELShift = 19, + SpvLoopControlLoopCoalesceINTELShift = 20, + SpvLoopControlMaxInterleavingINTELShift = 21, + SpvLoopControlSpeculatedIterationsINTELShift = 22, + SpvLoopControlNoFusionINTELShift = 23, SpvLoopControlMax = 0x7fffffff, } SpvLoopControlShift; @@ -577,6 +744,19 @@ typedef enum SpvLoopControlMask_ { SpvLoopControlDontUnrollMask = 0x00000002, SpvLoopControlDependencyInfiniteMask = 0x00000004, SpvLoopControlDependencyLengthMask = 0x00000008, + SpvLoopControlMinIterationsMask = 0x00000010, + SpvLoopControlMaxIterationsMask = 0x00000020, + SpvLoopControlIterationMultipleMask = 0x00000040, + SpvLoopControlPeelCountMask = 0x00000080, + SpvLoopControlPartialCountMask = 0x00000100, + SpvLoopControlInitiationIntervalINTELMask = 0x00010000, + SpvLoopControlMaxConcurrencyINTELMask = 0x00020000, + SpvLoopControlDependencyArrayINTELMask = 0x00040000, + SpvLoopControlPipelineEnableINTELMask = 0x00080000, + SpvLoopControlLoopCoalesceINTELMask = 0x00100000, + SpvLoopControlMaxInterleavingINTELMask = 0x00200000, + SpvLoopControlSpeculatedIterationsINTELMask = 0x00400000, + SpvLoopControlNoFusionINTELMask = 0x00800000, } SpvLoopControlMask; typedef enum SpvFunctionControlShift_ { @@ -584,6 +764,7 @@ typedef enum SpvFunctionControlShift_ { SpvFunctionControlDontInlineShift = 1, SpvFunctionControlPureShift = 2, SpvFunctionControlConstShift = 3, + SpvFunctionControlOptNoneINTELShift = 16, SpvFunctionControlMax = 0x7fffffff, } SpvFunctionControlShift; @@ -593,6 +774,7 @@ typedef enum SpvFunctionControlMask_ { SpvFunctionControlDontInlineMask = 0x00000002, SpvFunctionControlPureMask = 0x00000004, SpvFunctionControlConstMask = 0x00000008, + SpvFunctionControlOptNoneINTELMask = 0x00010000, } SpvFunctionControlMask; typedef enum SpvMemorySemanticsShift_ { @@ -606,9 +788,13 @@ typedef enum SpvMemorySemanticsShift_ { SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, SpvMemorySemanticsAtomicCounterMemoryShift = 10, SpvMemorySemanticsImageMemoryShift = 11, + SpvMemorySemanticsOutputMemoryShift = 12, SpvMemorySemanticsOutputMemoryKHRShift = 12, + SpvMemorySemanticsMakeAvailableShift = 13, SpvMemorySemanticsMakeAvailableKHRShift = 13, + SpvMemorySemanticsMakeVisibleShift = 14, SpvMemorySemanticsMakeVisibleKHRShift = 14, + SpvMemorySemanticsVolatileShift = 15, SpvMemorySemanticsMax = 0x7fffffff, } SpvMemorySemanticsShift; @@ -624,18 +810,27 @@ typedef enum SpvMemorySemanticsMask_ { SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, SpvMemorySemanticsImageMemoryMask = 0x00000800, + SpvMemorySemanticsOutputMemoryMask = 0x00001000, SpvMemorySemanticsOutputMemoryKHRMask = 0x00001000, + SpvMemorySemanticsMakeAvailableMask = 0x00002000, SpvMemorySemanticsMakeAvailableKHRMask = 0x00002000, + SpvMemorySemanticsMakeVisibleMask = 0x00004000, SpvMemorySemanticsMakeVisibleKHRMask = 0x00004000, + SpvMemorySemanticsVolatileMask = 0x00008000, } SpvMemorySemanticsMask; typedef enum SpvMemoryAccessShift_ { SpvMemoryAccessVolatileShift = 0, SpvMemoryAccessAlignedShift = 1, SpvMemoryAccessNontemporalShift = 2, + SpvMemoryAccessMakePointerAvailableShift = 3, SpvMemoryAccessMakePointerAvailableKHRShift = 3, + SpvMemoryAccessMakePointerVisibleShift = 4, SpvMemoryAccessMakePointerVisibleKHRShift = 4, + SpvMemoryAccessNonPrivatePointerShift = 5, SpvMemoryAccessNonPrivatePointerKHRShift = 5, + SpvMemoryAccessAliasScopeINTELMaskShift = 16, + SpvMemoryAccessNoAliasINTELMaskShift = 17, SpvMemoryAccessMax = 0x7fffffff, } SpvMemoryAccessShift; @@ -644,9 +839,14 @@ typedef enum SpvMemoryAccessMask_ { SpvMemoryAccessVolatileMask = 0x00000001, SpvMemoryAccessAlignedMask = 0x00000002, SpvMemoryAccessNontemporalMask = 0x00000004, + SpvMemoryAccessMakePointerAvailableMask = 0x00000008, SpvMemoryAccessMakePointerAvailableKHRMask = 0x00000008, + SpvMemoryAccessMakePointerVisibleMask = 0x00000010, SpvMemoryAccessMakePointerVisibleKHRMask = 0x00000010, + SpvMemoryAccessNonPrivatePointerMask = 0x00000020, SpvMemoryAccessNonPrivatePointerKHRMask = 0x00000020, + SpvMemoryAccessAliasScopeINTELMaskMask = 0x00010000, + SpvMemoryAccessNoAliasINTELMaskMask = 0x00020000, } SpvMemoryAccessMask; typedef enum SpvScope_ { @@ -655,7 +855,9 @@ typedef enum SpvScope_ { SpvScopeWorkgroup = 2, SpvScopeSubgroup = 3, SpvScopeInvocation = 4, + SpvScopeQueueFamily = 5, SpvScopeQueueFamilyKHR = 5, + SpvScopeShaderCallKHR = 6, SpvScopeMax = 0x7fffffff, } SpvScope; @@ -755,8 +957,15 @@ typedef enum SpvCapability_ { SpvCapabilityGroupNonUniformShuffleRelative = 66, SpvCapabilityGroupNonUniformClustered = 67, SpvCapabilityGroupNonUniformQuad = 68, + SpvCapabilityShaderLayer = 69, + SpvCapabilityShaderViewportIndex = 70, + SpvCapabilityUniformDecoration = 71, + SpvCapabilityFragmentShadingRateKHR = 4422, SpvCapabilitySubgroupBallotKHR = 4423, SpvCapabilityDrawParameters = 4427, + SpvCapabilityWorkgroupMemoryExplicitLayoutKHR = 4428, + SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR = 4429, + SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR = 4430, SpvCapabilitySubgroupVoteKHR = 4431, SpvCapabilityStorageBuffer16BitAccess = 4433, SpvCapabilityStorageUniformBufferBlock16 = 4433, @@ -778,11 +987,17 @@ typedef enum SpvCapability_ { SpvCapabilitySignedZeroInfNanPreserve = 4466, SpvCapabilityRoundingModeRTE = 4467, SpvCapabilityRoundingModeRTZ = 4468, + SpvCapabilityRayQueryProvisionalKHR = 4471, + SpvCapabilityRayQueryKHR = 4472, + SpvCapabilityRayTraversalPrimitiveCullingKHR = 4478, + SpvCapabilityRayTracingKHR = 4479, SpvCapabilityFloat16ImageAMD = 5008, SpvCapabilityImageGatherBiasLodAMD = 5009, SpvCapabilityFragmentMaskAMD = 5010, SpvCapabilityStencilExportEXT = 5013, SpvCapabilityImageReadWriteLodAMD = 5015, + SpvCapabilityInt64ImageEXT = 5016, + SpvCapabilityShaderClockKHR = 5055, SpvCapabilitySampleMaskOverrideCoverageNV = 5249, SpvCapabilityGeometryShaderPassthroughNV = 5251, SpvCapabilityShaderViewportIndexLayerEXT = 5254, @@ -793,36 +1008,217 @@ typedef enum SpvCapability_ { SpvCapabilityFragmentFullyCoveredEXT = 5265, SpvCapabilityMeshShadingNV = 5266, SpvCapabilityImageFootprintNV = 5282, + SpvCapabilityMeshShadingEXT = 5283, + SpvCapabilityFragmentBarycentricKHR = 5284, SpvCapabilityFragmentBarycentricNV = 5284, SpvCapabilityComputeDerivativeGroupQuadsNV = 5288, SpvCapabilityFragmentDensityEXT = 5291, SpvCapabilityShadingRateNV = 5291, SpvCapabilityGroupNonUniformPartitionedNV = 5297, + SpvCapabilityShaderNonUniform = 5301, SpvCapabilityShaderNonUniformEXT = 5301, + SpvCapabilityRuntimeDescriptorArray = 5302, SpvCapabilityRuntimeDescriptorArrayEXT = 5302, + SpvCapabilityInputAttachmentArrayDynamicIndexing = 5303, SpvCapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + SpvCapabilityUniformTexelBufferArrayDynamicIndexing = 5304, SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + SpvCapabilityStorageTexelBufferArrayDynamicIndexing = 5305, SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + SpvCapabilityUniformBufferArrayNonUniformIndexing = 5306, SpvCapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + SpvCapabilitySampledImageArrayNonUniformIndexing = 5307, SpvCapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + SpvCapabilityStorageBufferArrayNonUniformIndexing = 5308, SpvCapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + SpvCapabilityStorageImageArrayNonUniformIndexing = 5309, SpvCapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + SpvCapabilityInputAttachmentArrayNonUniformIndexing = 5310, SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + SpvCapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + SpvCapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, SpvCapabilityRayTracingNV = 5340, + SpvCapabilityRayTracingMotionBlurNV = 5341, + SpvCapabilityVulkanMemoryModel = 5345, SpvCapabilityVulkanMemoryModelKHR = 5345, + SpvCapabilityVulkanMemoryModelDeviceScope = 5346, SpvCapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + SpvCapabilityPhysicalStorageBufferAddresses = 5347, SpvCapabilityPhysicalStorageBufferAddressesEXT = 5347, SpvCapabilityComputeDerivativeGroupLinearNV = 5350, + SpvCapabilityRayTracingProvisionalKHR = 5353, SpvCapabilityCooperativeMatrixNV = 5357, + SpvCapabilityFragmentShaderSampleInterlockEXT = 5363, + SpvCapabilityFragmentShaderShadingRateInterlockEXT = 5372, + SpvCapabilityShaderSMBuiltinsNV = 5373, + SpvCapabilityFragmentShaderPixelInterlockEXT = 5378, + SpvCapabilityDemoteToHelperInvocation = 5379, + SpvCapabilityDemoteToHelperInvocationEXT = 5379, + SpvCapabilityBindlessTextureNV = 5390, SpvCapabilitySubgroupShuffleINTEL = 5568, SpvCapabilitySubgroupBufferBlockIOINTEL = 5569, SpvCapabilitySubgroupImageBlockIOINTEL = 5570, SpvCapabilitySubgroupImageMediaBlockIOINTEL = 5579, + SpvCapabilityRoundToInfinityINTEL = 5582, + SpvCapabilityFloatingPointModeINTEL = 5583, + SpvCapabilityIntegerFunctions2INTEL = 5584, + SpvCapabilityFunctionPointersINTEL = 5603, + SpvCapabilityIndirectReferencesINTEL = 5604, + SpvCapabilityAsmINTEL = 5606, + SpvCapabilityAtomicFloat32MinMaxEXT = 5612, + SpvCapabilityAtomicFloat64MinMaxEXT = 5613, + SpvCapabilityAtomicFloat16MinMaxEXT = 5616, + SpvCapabilityVectorComputeINTEL = 5617, + SpvCapabilityVectorAnyINTEL = 5619, + SpvCapabilityExpectAssumeKHR = 5629, + SpvCapabilitySubgroupAvcMotionEstimationINTEL = 5696, + SpvCapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + SpvCapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, + SpvCapabilityVariableLengthArrayINTEL = 5817, + SpvCapabilityFunctionFloatControlINTEL = 5821, + SpvCapabilityFPGAMemoryAttributesINTEL = 5824, + SpvCapabilityFPFastMathModeINTEL = 5837, + SpvCapabilityArbitraryPrecisionIntegersINTEL = 5844, + SpvCapabilityArbitraryPrecisionFloatingPointINTEL = 5845, + SpvCapabilityUnstructuredLoopControlsINTEL = 5886, + SpvCapabilityFPGALoopControlsINTEL = 5888, + SpvCapabilityKernelAttributesINTEL = 5892, + SpvCapabilityFPGAKernelAttributesINTEL = 5897, + SpvCapabilityFPGAMemoryAccessesINTEL = 5898, + SpvCapabilityFPGAClusterAttributesINTEL = 5904, + SpvCapabilityLoopFuseINTEL = 5906, + SpvCapabilityMemoryAccessAliasingINTEL = 5910, + SpvCapabilityFPGABufferLocationINTEL = 5920, + SpvCapabilityArbitraryPrecisionFixedPointINTEL = 5922, + SpvCapabilityUSMStorageClassesINTEL = 5935, + SpvCapabilityIOPipesINTEL = 5943, + SpvCapabilityBlockingPipesINTEL = 5945, + SpvCapabilityFPGARegINTEL = 5948, + SpvCapabilityDotProductInputAll = 6016, + SpvCapabilityDotProductInputAllKHR = 6016, + SpvCapabilityDotProductInput4x8Bit = 6017, + SpvCapabilityDotProductInput4x8BitKHR = 6017, + SpvCapabilityDotProductInput4x8BitPacked = 6018, + SpvCapabilityDotProductInput4x8BitPackedKHR = 6018, + SpvCapabilityDotProduct = 6019, + SpvCapabilityDotProductKHR = 6019, + SpvCapabilityRayCullMaskKHR = 6020, + SpvCapabilityBitInstructions = 6025, + SpvCapabilityGroupNonUniformRotateKHR = 6026, + SpvCapabilityAtomicFloat32AddEXT = 6033, + SpvCapabilityAtomicFloat64AddEXT = 6034, + SpvCapabilityLongConstantCompositeINTEL = 6089, + SpvCapabilityOptNoneINTEL = 6094, + SpvCapabilityAtomicFloat16AddEXT = 6095, + SpvCapabilityDebugInfoModuleINTEL = 6114, + SpvCapabilitySplitBarrierINTEL = 6141, + SpvCapabilityGroupUniformArithmeticKHR = 6400, SpvCapabilityMax = 0x7fffffff, } SpvCapability; +typedef enum SpvRayFlagsShift_ { + SpvRayFlagsOpaqueKHRShift = 0, + SpvRayFlagsNoOpaqueKHRShift = 1, + SpvRayFlagsTerminateOnFirstHitKHRShift = 2, + SpvRayFlagsSkipClosestHitShaderKHRShift = 3, + SpvRayFlagsCullBackFacingTrianglesKHRShift = 4, + SpvRayFlagsCullFrontFacingTrianglesKHRShift = 5, + SpvRayFlagsCullOpaqueKHRShift = 6, + SpvRayFlagsCullNoOpaqueKHRShift = 7, + SpvRayFlagsSkipTrianglesKHRShift = 8, + SpvRayFlagsSkipAABBsKHRShift = 9, + SpvRayFlagsMax = 0x7fffffff, +} SpvRayFlagsShift; + +typedef enum SpvRayFlagsMask_ { + SpvRayFlagsMaskNone = 0, + SpvRayFlagsOpaqueKHRMask = 0x00000001, + SpvRayFlagsNoOpaqueKHRMask = 0x00000002, + SpvRayFlagsTerminateOnFirstHitKHRMask = 0x00000004, + SpvRayFlagsSkipClosestHitShaderKHRMask = 0x00000008, + SpvRayFlagsCullBackFacingTrianglesKHRMask = 0x00000010, + SpvRayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020, + SpvRayFlagsCullOpaqueKHRMask = 0x00000040, + SpvRayFlagsCullNoOpaqueKHRMask = 0x00000080, + SpvRayFlagsSkipTrianglesKHRMask = 0x00000100, + SpvRayFlagsSkipAABBsKHRMask = 0x00000200, +} SpvRayFlagsMask; + +typedef enum SpvRayQueryIntersection_ { + SpvRayQueryIntersectionRayQueryCandidateIntersectionKHR = 0, + SpvRayQueryIntersectionRayQueryCommittedIntersectionKHR = 1, + SpvRayQueryIntersectionMax = 0x7fffffff, +} SpvRayQueryIntersection; + +typedef enum SpvRayQueryCommittedIntersectionType_ { + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0, + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1, + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2, + SpvRayQueryCommittedIntersectionTypeMax = 0x7fffffff, +} SpvRayQueryCommittedIntersectionType; + +typedef enum SpvRayQueryCandidateIntersectionType_ { + SpvRayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0, + SpvRayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1, + SpvRayQueryCandidateIntersectionTypeMax = 0x7fffffff, +} SpvRayQueryCandidateIntersectionType; + +typedef enum SpvFragmentShadingRateShift_ { + SpvFragmentShadingRateVertical2PixelsShift = 0, + SpvFragmentShadingRateVertical4PixelsShift = 1, + SpvFragmentShadingRateHorizontal2PixelsShift = 2, + SpvFragmentShadingRateHorizontal4PixelsShift = 3, + SpvFragmentShadingRateMax = 0x7fffffff, +} SpvFragmentShadingRateShift; + +typedef enum SpvFragmentShadingRateMask_ { + SpvFragmentShadingRateMaskNone = 0, + SpvFragmentShadingRateVertical2PixelsMask = 0x00000001, + SpvFragmentShadingRateVertical4PixelsMask = 0x00000002, + SpvFragmentShadingRateHorizontal2PixelsMask = 0x00000004, + SpvFragmentShadingRateHorizontal4PixelsMask = 0x00000008, +} SpvFragmentShadingRateMask; + +typedef enum SpvFPDenormMode_ { + SpvFPDenormModePreserve = 0, + SpvFPDenormModeFlushToZero = 1, + SpvFPDenormModeMax = 0x7fffffff, +} SpvFPDenormMode; + +typedef enum SpvFPOperationMode_ { + SpvFPOperationModeIEEE = 0, + SpvFPOperationModeALT = 1, + SpvFPOperationModeMax = 0x7fffffff, +} SpvFPOperationMode; + +typedef enum SpvQuantizationModes_ { + SpvQuantizationModesTRN = 0, + SpvQuantizationModesTRN_ZERO = 1, + SpvQuantizationModesRND = 2, + SpvQuantizationModesRND_ZERO = 3, + SpvQuantizationModesRND_INF = 4, + SpvQuantizationModesRND_MIN_INF = 5, + SpvQuantizationModesRND_CONV = 6, + SpvQuantizationModesRND_CONV_ODD = 7, + SpvQuantizationModesMax = 0x7fffffff, +} SpvQuantizationModes; + +typedef enum SpvOverflowModes_ { + SpvOverflowModesWRAP = 0, + SpvOverflowModesSAT = 1, + SpvOverflowModesSAT_ZERO = 2, + SpvOverflowModesSAT_SYM = 3, + SpvOverflowModesMax = 0x7fffffff, +} SpvOverflowModes; + +typedef enum SpvPackedVectorFormat_ { + SpvPackedVectorFormatPackedVectorFormat4x8Bit = 0, + SpvPackedVectorFormatPackedVectorFormat4x8BitKHR = 0, + SpvPackedVectorFormatMax = 0x7fffffff, +} SpvPackedVectorFormat; + typedef enum SpvOp_ { SpvOpNop = 0, SpvOpUndef = 1, @@ -1164,12 +1560,42 @@ typedef enum SpvOp_ { SpvOpGroupNonUniformLogicalXor = 364, SpvOpGroupNonUniformQuadBroadcast = 365, SpvOpGroupNonUniformQuadSwap = 366, + SpvOpCopyLogical = 400, + SpvOpPtrEqual = 401, + SpvOpPtrNotEqual = 402, + SpvOpPtrDiff = 403, + SpvOpTerminateInvocation = 4416, SpvOpSubgroupBallotKHR = 4421, SpvOpSubgroupFirstInvocationKHR = 4422, SpvOpSubgroupAllKHR = 4428, SpvOpSubgroupAnyKHR = 4429, SpvOpSubgroupAllEqualKHR = 4430, + SpvOpGroupNonUniformRotateKHR = 4431, SpvOpSubgroupReadInvocationKHR = 4432, + SpvOpTraceRayKHR = 4445, + SpvOpExecuteCallableKHR = 4446, + SpvOpConvertUToAccelerationStructureKHR = 4447, + SpvOpIgnoreIntersectionKHR = 4448, + SpvOpTerminateRayKHR = 4449, + SpvOpSDot = 4450, + SpvOpSDotKHR = 4450, + SpvOpUDot = 4451, + SpvOpUDotKHR = 4451, + SpvOpSUDot = 4452, + SpvOpSUDotKHR = 4452, + SpvOpSDotAccSat = 4453, + SpvOpSDotAccSatKHR = 4453, + SpvOpUDotAccSat = 4454, + SpvOpUDotAccSatKHR = 4454, + SpvOpSUDotAccSat = 4455, + SpvOpSUDotAccSatKHR = 4455, + SpvOpTypeRayQueryKHR = 4472, + SpvOpRayQueryInitializeKHR = 4473, + SpvOpRayQueryTerminateKHR = 4474, + SpvOpRayQueryGenerateIntersectionKHR = 4475, + SpvOpRayQueryConfirmIntersectionKHR = 4476, + SpvOpRayQueryProceedKHR = 4477, + SpvOpRayQueryGetIntersectionTypeKHR = 4479, SpvOpGroupIAddNonUniformAMD = 5000, SpvOpGroupFAddNonUniformAMD = 5001, SpvOpGroupFMinNonUniformAMD = 5002, @@ -1180,13 +1606,20 @@ typedef enum SpvOp_ { SpvOpGroupSMaxNonUniformAMD = 5007, SpvOpFragmentMaskFetchAMD = 5011, SpvOpFragmentFetchAMD = 5012, + SpvOpReadClockKHR = 5056, SpvOpImageSampleFootprintNV = 5283, + SpvOpEmitMeshTasksEXT = 5294, + SpvOpSetMeshOutputsEXT = 5295, SpvOpGroupNonUniformPartitionNV = 5296, SpvOpWritePackedPrimitiveIndices4x8NV = 5299, + SpvOpReportIntersectionKHR = 5334, SpvOpReportIntersectionNV = 5334, SpvOpIgnoreIntersectionNV = 5335, SpvOpTerminateRayNV = 5336, SpvOpTraceNV = 5337, + SpvOpTraceMotionNV = 5338, + SpvOpTraceRayMotionNV = 5339, + SpvOpTypeAccelerationStructureKHR = 5341, SpvOpTypeAccelerationStructureNV = 5341, SpvOpExecuteCallableNV = 5344, SpvOpTypeCooperativeMatrixNV = 5358, @@ -1194,6 +1627,18 @@ typedef enum SpvOp_ { SpvOpCooperativeMatrixStoreNV = 5360, SpvOpCooperativeMatrixMulAddNV = 5361, SpvOpCooperativeMatrixLengthNV = 5362, + SpvOpBeginInvocationInterlockEXT = 5364, + SpvOpEndInvocationInterlockEXT = 5365, + SpvOpDemoteToHelperInvocation = 5380, + SpvOpDemoteToHelperInvocationEXT = 5380, + SpvOpIsHelperInvocationEXT = 5381, + SpvOpConvertUToImageNV = 5391, + SpvOpConvertUToSamplerNV = 5392, + SpvOpConvertImageToUNV = 5393, + SpvOpConvertSamplerToUNV = 5394, + SpvOpConvertUToSampledImageNV = 5395, + SpvOpConvertSampledImageToUNV = 5396, + SpvOpSamplerImageAddressingModeNV = 5397, SpvOpSubgroupShuffleINTEL = 5571, SpvOpSubgroupShuffleDownINTEL = 5572, SpvOpSubgroupShuffleUpINTEL = 5573, @@ -1204,10 +1649,920 @@ typedef enum SpvOp_ { SpvOpSubgroupImageBlockWriteINTEL = 5578, SpvOpSubgroupImageMediaBlockReadINTEL = 5580, SpvOpSubgroupImageMediaBlockWriteINTEL = 5581, + SpvOpUCountLeadingZerosINTEL = 5585, + SpvOpUCountTrailingZerosINTEL = 5586, + SpvOpAbsISubINTEL = 5587, + SpvOpAbsUSubINTEL = 5588, + SpvOpIAddSatINTEL = 5589, + SpvOpUAddSatINTEL = 5590, + SpvOpIAverageINTEL = 5591, + SpvOpUAverageINTEL = 5592, + SpvOpIAverageRoundedINTEL = 5593, + SpvOpUAverageRoundedINTEL = 5594, + SpvOpISubSatINTEL = 5595, + SpvOpUSubSatINTEL = 5596, + SpvOpIMul32x16INTEL = 5597, + SpvOpUMul32x16INTEL = 5598, + SpvOpConstantFunctionPointerINTEL = 5600, + SpvOpFunctionPointerCallINTEL = 5601, + SpvOpAsmTargetINTEL = 5609, + SpvOpAsmINTEL = 5610, + SpvOpAsmCallINTEL = 5611, + SpvOpAtomicFMinEXT = 5614, + SpvOpAtomicFMaxEXT = 5615, + SpvOpAssumeTrueKHR = 5630, + SpvOpExpectKHR = 5631, + SpvOpDecorateString = 5632, SpvOpDecorateStringGOOGLE = 5632, + SpvOpMemberDecorateString = 5633, SpvOpMemberDecorateStringGOOGLE = 5633, + SpvOpVmeImageINTEL = 5699, + SpvOpTypeVmeImageINTEL = 5700, + SpvOpTypeAvcImePayloadINTEL = 5701, + SpvOpTypeAvcRefPayloadINTEL = 5702, + SpvOpTypeAvcSicPayloadINTEL = 5703, + SpvOpTypeAvcMcePayloadINTEL = 5704, + SpvOpTypeAvcMceResultINTEL = 5705, + SpvOpTypeAvcImeResultINTEL = 5706, + SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + SpvOpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + SpvOpTypeAvcImeDualReferenceStreaminINTEL = 5710, + SpvOpTypeAvcRefResultINTEL = 5711, + SpvOpTypeAvcSicResultINTEL = 5712, + SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + SpvOpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + SpvOpSubgroupAvcMceConvertToImeResultINTEL = 5733, + SpvOpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + SpvOpSubgroupAvcMceConvertToRefResultINTEL = 5735, + SpvOpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + SpvOpSubgroupAvcMceConvertToSicResultINTEL = 5737, + SpvOpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + SpvOpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + SpvOpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + SpvOpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + SpvOpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + SpvOpSubgroupAvcImeInitializeINTEL = 5747, + SpvOpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + SpvOpSubgroupAvcImeSetDualReferenceINTEL = 5749, + SpvOpSubgroupAvcImeRefWindowSizeINTEL = 5750, + SpvOpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + SpvOpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + SpvOpSubgroupAvcImeSetWeightedSadINTEL = 5756, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + SpvOpSubgroupAvcImeConvertToMceResultINTEL = 5765, + SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + SpvOpSubgroupAvcImeGetBorderReachedINTEL = 5776, + SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + SpvOpSubgroupAvcFmeInitializeINTEL = 5781, + SpvOpSubgroupAvcBmeInitializeINTEL = 5782, + SpvOpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + SpvOpSubgroupAvcRefConvertToMceResultINTEL = 5790, + SpvOpSubgroupAvcSicInitializeINTEL = 5791, + SpvOpSubgroupAvcSicConfigureSkcINTEL = 5792, + SpvOpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + SpvOpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + SpvOpSubgroupAvcSicEvaluateIpeINTEL = 5803, + SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + SpvOpSubgroupAvcSicConvertToMceResultINTEL = 5808, + SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + SpvOpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + SpvOpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + SpvOpVariableLengthArrayINTEL = 5818, + SpvOpSaveMemoryINTEL = 5819, + SpvOpRestoreMemoryINTEL = 5820, + SpvOpArbitraryFloatSinCosPiINTEL = 5840, + SpvOpArbitraryFloatCastINTEL = 5841, + SpvOpArbitraryFloatCastFromIntINTEL = 5842, + SpvOpArbitraryFloatCastToIntINTEL = 5843, + SpvOpArbitraryFloatAddINTEL = 5846, + SpvOpArbitraryFloatSubINTEL = 5847, + SpvOpArbitraryFloatMulINTEL = 5848, + SpvOpArbitraryFloatDivINTEL = 5849, + SpvOpArbitraryFloatGTINTEL = 5850, + SpvOpArbitraryFloatGEINTEL = 5851, + SpvOpArbitraryFloatLTINTEL = 5852, + SpvOpArbitraryFloatLEINTEL = 5853, + SpvOpArbitraryFloatEQINTEL = 5854, + SpvOpArbitraryFloatRecipINTEL = 5855, + SpvOpArbitraryFloatRSqrtINTEL = 5856, + SpvOpArbitraryFloatCbrtINTEL = 5857, + SpvOpArbitraryFloatHypotINTEL = 5858, + SpvOpArbitraryFloatSqrtINTEL = 5859, + SpvOpArbitraryFloatLogINTEL = 5860, + SpvOpArbitraryFloatLog2INTEL = 5861, + SpvOpArbitraryFloatLog10INTEL = 5862, + SpvOpArbitraryFloatLog1pINTEL = 5863, + SpvOpArbitraryFloatExpINTEL = 5864, + SpvOpArbitraryFloatExp2INTEL = 5865, + SpvOpArbitraryFloatExp10INTEL = 5866, + SpvOpArbitraryFloatExpm1INTEL = 5867, + SpvOpArbitraryFloatSinINTEL = 5868, + SpvOpArbitraryFloatCosINTEL = 5869, + SpvOpArbitraryFloatSinCosINTEL = 5870, + SpvOpArbitraryFloatSinPiINTEL = 5871, + SpvOpArbitraryFloatCosPiINTEL = 5872, + SpvOpArbitraryFloatASinINTEL = 5873, + SpvOpArbitraryFloatASinPiINTEL = 5874, + SpvOpArbitraryFloatACosINTEL = 5875, + SpvOpArbitraryFloatACosPiINTEL = 5876, + SpvOpArbitraryFloatATanINTEL = 5877, + SpvOpArbitraryFloatATanPiINTEL = 5878, + SpvOpArbitraryFloatATan2INTEL = 5879, + SpvOpArbitraryFloatPowINTEL = 5880, + SpvOpArbitraryFloatPowRINTEL = 5881, + SpvOpArbitraryFloatPowNINTEL = 5882, + SpvOpLoopControlINTEL = 5887, + SpvOpAliasDomainDeclINTEL = 5911, + SpvOpAliasScopeDeclINTEL = 5912, + SpvOpAliasScopeListDeclINTEL = 5913, + SpvOpFixedSqrtINTEL = 5923, + SpvOpFixedRecipINTEL = 5924, + SpvOpFixedRsqrtINTEL = 5925, + SpvOpFixedSinINTEL = 5926, + SpvOpFixedCosINTEL = 5927, + SpvOpFixedSinCosINTEL = 5928, + SpvOpFixedSinPiINTEL = 5929, + SpvOpFixedCosPiINTEL = 5930, + SpvOpFixedSinCosPiINTEL = 5931, + SpvOpFixedLogINTEL = 5932, + SpvOpFixedExpINTEL = 5933, + SpvOpPtrCastToCrossWorkgroupINTEL = 5934, + SpvOpCrossWorkgroupCastToPtrINTEL = 5938, + SpvOpReadPipeBlockingINTEL = 5946, + SpvOpWritePipeBlockingINTEL = 5947, + SpvOpFPGARegINTEL = 5949, + SpvOpRayQueryGetRayTMinKHR = 6016, + SpvOpRayQueryGetRayFlagsKHR = 6017, + SpvOpRayQueryGetIntersectionTKHR = 6018, + SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + SpvOpRayQueryGetIntersectionInstanceIdKHR = 6020, + SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + SpvOpRayQueryGetIntersectionGeometryIndexKHR = 6022, + SpvOpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + SpvOpRayQueryGetIntersectionBarycentricsKHR = 6024, + SpvOpRayQueryGetIntersectionFrontFaceKHR = 6025, + SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + SpvOpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + SpvOpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + SpvOpRayQueryGetWorldRayDirectionKHR = 6029, + SpvOpRayQueryGetWorldRayOriginKHR = 6030, + SpvOpRayQueryGetIntersectionObjectToWorldKHR = 6031, + SpvOpRayQueryGetIntersectionWorldToObjectKHR = 6032, + SpvOpAtomicFAddEXT = 6035, + SpvOpTypeBufferSurfaceINTEL = 6086, + SpvOpTypeStructContinuedINTEL = 6090, + SpvOpConstantCompositeContinuedINTEL = 6091, + SpvOpSpecConstantCompositeContinuedINTEL = 6092, + SpvOpControlBarrierArriveINTEL = 6142, + SpvOpControlBarrierWaitINTEL = 6143, + SpvOpGroupIMulKHR = 6401, + SpvOpGroupFMulKHR = 6402, + SpvOpGroupBitwiseAndKHR = 6403, + SpvOpGroupBitwiseOrKHR = 6404, + SpvOpGroupBitwiseXorKHR = 6405, + SpvOpGroupLogicalAndKHR = 6406, + SpvOpGroupLogicalOrKHR = 6407, + SpvOpGroupLogicalXorKHR = 6408, SpvOpMax = 0x7fffffff, } SpvOp; +#ifdef SPV_ENABLE_UTILITY_CODE +#ifndef __cplusplus +#include +#endif +inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case SpvOpNop: *hasResult = false; *hasResultType = false; break; + case SpvOpUndef: *hasResult = true; *hasResultType = true; break; + case SpvOpSourceContinued: *hasResult = false; *hasResultType = false; break; + case SpvOpSource: *hasResult = false; *hasResultType = false; break; + case SpvOpSourceExtension: *hasResult = false; *hasResultType = false; break; + case SpvOpName: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberName: *hasResult = false; *hasResultType = false; break; + case SpvOpString: *hasResult = true; *hasResultType = false; break; + case SpvOpLine: *hasResult = false; *hasResultType = false; break; + case SpvOpExtension: *hasResult = false; *hasResultType = false; break; + case SpvOpExtInstImport: *hasResult = true; *hasResultType = false; break; + case SpvOpExtInst: *hasResult = true; *hasResultType = true; break; + case SpvOpMemoryModel: *hasResult = false; *hasResultType = false; break; + case SpvOpEntryPoint: *hasResult = false; *hasResultType = false; break; + case SpvOpExecutionMode: *hasResult = false; *hasResultType = false; break; + case SpvOpCapability: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeVoid: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeBool: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeInt: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeFloat: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeVector: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeImage: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeSampler: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeArray: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeStruct: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case SpvOpTypePointer: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeFunction: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeEvent: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeQueue: *hasResult = true; *hasResultType = false; break; + case SpvOpTypePipe: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case SpvOpConstantTrue: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantFalse: *hasResult = true; *hasResultType = true; break; + case SpvOpConstant: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantComposite: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantSampler: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantNull: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstant: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case SpvOpFunction: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case SpvOpFunctionCall: *hasResult = true; *hasResultType = true; break; + case SpvOpVariable: *hasResult = true; *hasResultType = true; break; + case SpvOpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case SpvOpLoad: *hasResult = true; *hasResultType = true; break; + case SpvOpStore: *hasResult = false; *hasResultType = false; break; + case SpvOpCopyMemory: *hasResult = false; *hasResultType = false; break; + case SpvOpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case SpvOpAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpArrayLength: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case SpvOpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case SpvOpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case SpvOpCopyObject: *hasResult = true; *hasResultType = true; break; + case SpvOpTranspose: *hasResult = true; *hasResultType = true; break; + case SpvOpSampledImage: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageFetch: *hasResult = true; *hasResultType = true; break; + case SpvOpImageGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageRead: *hasResult = true; *hasResultType = true; break; + case SpvOpImageWrite: *hasResult = false; *hasResultType = false; break; + case SpvOpImage: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertFToU: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertFToS: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertSToF: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToF: *hasResult = true; *hasResultType = true; break; + case SpvOpUConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpSConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpFConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case SpvOpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case SpvOpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case SpvOpBitcast: *hasResult = true; *hasResultType = true; break; + case SpvOpSNegate: *hasResult = true; *hasResultType = true; break; + case SpvOpFNegate: *hasResult = true; *hasResultType = true; break; + case SpvOpIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpISub: *hasResult = true; *hasResultType = true; break; + case SpvOpFSub: *hasResult = true; *hasResultType = true; break; + case SpvOpIMul: *hasResult = true; *hasResultType = true; break; + case SpvOpFMul: *hasResult = true; *hasResultType = true; break; + case SpvOpUDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpSDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpFDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpUMod: *hasResult = true; *hasResultType = true; break; + case SpvOpSRem: *hasResult = true; *hasResultType = true; break; + case SpvOpSMod: *hasResult = true; *hasResultType = true; break; + case SpvOpFRem: *hasResult = true; *hasResultType = true; break; + case SpvOpFMod: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case SpvOpOuterProduct: *hasResult = true; *hasResultType = true; break; + case SpvOpDot: *hasResult = true; *hasResultType = true; break; + case SpvOpIAddCarry: *hasResult = true; *hasResultType = true; break; + case SpvOpISubBorrow: *hasResult = true; *hasResultType = true; break; + case SpvOpUMulExtended: *hasResult = true; *hasResultType = true; break; + case SpvOpSMulExtended: *hasResult = true; *hasResultType = true; break; + case SpvOpAny: *hasResult = true; *hasResultType = true; break; + case SpvOpAll: *hasResult = true; *hasResultType = true; break; + case SpvOpIsNan: *hasResult = true; *hasResultType = true; break; + case SpvOpIsInf: *hasResult = true; *hasResultType = true; break; + case SpvOpIsFinite: *hasResult = true; *hasResultType = true; break; + case SpvOpIsNormal: *hasResult = true; *hasResultType = true; break; + case SpvOpSignBitSet: *hasResult = true; *hasResultType = true; break; + case SpvOpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case SpvOpOrdered: *hasResult = true; *hasResultType = true; break; + case SpvOpUnordered: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalOr: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalNot: *hasResult = true; *hasResultType = true; break; + case SpvOpSelect: *hasResult = true; *hasResultType = true; break; + case SpvOpIEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpINotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpULessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpSLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpNot: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpBitReverse: *hasResult = true; *hasResultType = true; break; + case SpvOpBitCount: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdx: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdy: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidth: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdxFine: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdyFine: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidthFine: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpEmitVertex: *hasResult = false; *hasResultType = false; break; + case SpvOpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case SpvOpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case SpvOpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicStore: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicISub: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicOr: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicXor: *hasResult = true; *hasResultType = true; break; + case SpvOpPhi: *hasResult = true; *hasResultType = true; break; + case SpvOpLoopMerge: *hasResult = false; *hasResultType = false; break; + case SpvOpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case SpvOpLabel: *hasResult = true; *hasResultType = false; break; + case SpvOpBranch: *hasResult = false; *hasResultType = false; break; + case SpvOpBranchConditional: *hasResult = false; *hasResultType = false; break; + case SpvOpSwitch: *hasResult = false; *hasResultType = false; break; + case SpvOpKill: *hasResult = false; *hasResultType = false; break; + case SpvOpReturn: *hasResult = false; *hasResultType = false; break; + case SpvOpReturnValue: *hasResult = false; *hasResultType = false; break; + case SpvOpUnreachable: *hasResult = false; *hasResultType = false; break; + case SpvOpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case SpvOpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupAll: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupAny: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpReadPipe: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case SpvOpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case SpvOpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case SpvOpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case SpvOpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case SpvOpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case SpvOpRetainEvent: *hasResult = false; *hasResultType = false; break; + case SpvOpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case SpvOpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case SpvOpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case SpvOpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case SpvOpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case SpvOpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case SpvOpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case SpvOpNoLine: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case SpvOpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case SpvOpSizeOf: *hasResult = true; *hasResultType = true; break; + case SpvOpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case SpvOpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case SpvOpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case SpvOpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case SpvOpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case SpvOpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case SpvOpDecorateId: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case SpvOpCopyLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrDiff: *hasResult = true; *hasResultType = true; break; + case SpvOpTerminateInvocation: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpTraceRayKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpIgnoreIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpTerminateRayKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpSDot: *hasResult = true; *hasResultType = true; break; + case SpvOpUDot: *hasResult = true; *hasResultType = true; break; + case SpvOpSUDot: *hasResult = true; *hasResultType = true; break; + case SpvOpSDotAccSat: *hasResult = true; *hasResultType = true; break; + case SpvOpUDotAccSat: *hasResult = true; *hasResultType = true; break; + case SpvOpSUDotAccSat: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break; + case SpvOpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case SpvOpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case SpvOpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case SpvOpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTraceNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTraceMotionNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case SpvOpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case SpvOpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case SpvOpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case SpvOpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case SpvOpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case SpvOpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break; + case SpvOpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToImageNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertImageToUNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertSamplerToUNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break; + case SpvOpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAsmINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAsmCallINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpExpectKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpDecorateString: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case SpvOpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpVariableLengthArrayINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSaveMemoryINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpRestoreMemoryINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpArbitraryFloatSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCastINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCastFromIntINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCastToIntINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatAddINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatMulINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatDivINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatGTINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatGEINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLTINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLEINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatEQINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatRecipINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatRSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCbrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatHypotINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLogINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLog2INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLog10INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLog1pINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExpINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExp2INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExp10INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExpm1INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSinINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatASinINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatASinPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatACosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatACosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatATanINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatATanPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatATan2INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatPowINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpAliasDomainDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpAliasScopeDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpAliasScopeListDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedLogINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedExpINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrCastToCrossWorkgroupINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpCrossWorkgroupCastToPtrINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpReadPipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFPGARegINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeBufferSurfaceINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupIMulKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMulKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseOrKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseXorKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + #endif diff --git a/spirv.hpp b/spirv.hpp index adc13de3f34..e25264af28f 100644 --- a/spirv.hpp +++ b/spirv.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2014-2019 The Khronos Group Inc. +// Copyright (c) 2014-2020 The Khronos Group Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and/or associated documentation files (the "Materials"), @@ -26,7 +26,7 @@ // the Binary Section of the SPIR-V specification. // Enumeration tokens for SPIR-V, in various styles: -// C, C++, C++11, JSON, Lua, Python, C#, D +// C, C++, C++11, JSON, Lua, Python, C#, D, Beef // // - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL // - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL @@ -36,6 +36,8 @@ // - C# will use enum classes in the Specification class located in the "Spv" namespace, // e.g.: Spv.Specification.SourceLanguage.GLSL // - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +// - Beef will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL // // Some tokens act like mask values, which can be OR'd together, // while others are mutually exclusive. The mask-like ones have @@ -49,12 +51,12 @@ namespace spv { typedef unsigned int Id; -#define SPV_VERSION 0x10300 -#define SPV_REVISION 6 +#define SPV_VERSION 0x10600 +#define SPV_REVISION 1 static const unsigned int MagicNumber = 0x07230203; -static const unsigned int Version = 0x00010300; -static const unsigned int Revision = 6; +static const unsigned int Version = 0x00010600; +static const unsigned int Revision = 1; static const unsigned int OpCodeMask = 0xffff; static const unsigned int WordCountShift = 16; @@ -65,6 +67,8 @@ enum SourceLanguage { SourceLanguageOpenCL_C = 3, SourceLanguageOpenCL_CPP = 4, SourceLanguageHLSL = 5, + SourceLanguageCPP_for_OpenCL = 6, + SourceLanguageSYCL = 7, SourceLanguageMax = 0x7fffffff, }; @@ -78,12 +82,20 @@ enum ExecutionModel { ExecutionModelKernel = 6, ExecutionModelTaskNV = 5267, ExecutionModelMeshNV = 5268, + ExecutionModelRayGenerationKHR = 5313, ExecutionModelRayGenerationNV = 5313, + ExecutionModelIntersectionKHR = 5314, ExecutionModelIntersectionNV = 5314, + ExecutionModelAnyHitKHR = 5315, ExecutionModelAnyHitNV = 5315, + ExecutionModelClosestHitKHR = 5316, ExecutionModelClosestHitNV = 5316, + ExecutionModelMissKHR = 5317, ExecutionModelMissNV = 5317, + ExecutionModelCallableKHR = 5318, ExecutionModelCallableNV = 5318, + ExecutionModelTaskEXT = 5364, + ExecutionModelMeshEXT = 5365, ExecutionModelMax = 0x7fffffff, }; @@ -91,6 +103,7 @@ enum AddressingModel { AddressingModelLogical = 0, AddressingModelPhysical32 = 1, AddressingModelPhysical64 = 2, + AddressingModelPhysicalStorageBuffer64 = 5348, AddressingModelPhysicalStorageBuffer64EXT = 5348, AddressingModelMax = 0x7fffffff, }; @@ -99,6 +112,7 @@ enum MemoryModel { MemoryModelSimple = 0, MemoryModelGLSL450 = 1, MemoryModelOpenCL = 2, + MemoryModelVulkan = 3, MemoryModelVulkanKHR = 3, MemoryModelMax = 0x7fffffff, }; @@ -142,18 +156,46 @@ enum ExecutionMode { ExecutionModeSubgroupsPerWorkgroupId = 37, ExecutionModeLocalSizeId = 38, ExecutionModeLocalSizeHintId = 39, + ExecutionModeSubgroupUniformControlFlowKHR = 4421, ExecutionModePostDepthCoverage = 4446, ExecutionModeDenormPreserve = 4459, ExecutionModeDenormFlushToZero = 4460, ExecutionModeSignedZeroInfNanPreserve = 4461, ExecutionModeRoundingModeRTE = 4462, ExecutionModeRoundingModeRTZ = 4463, + ExecutionModeEarlyAndLateFragmentTestsAMD = 5017, ExecutionModeStencilRefReplacingEXT = 5027, + ExecutionModeStencilRefUnchangedFrontAMD = 5079, + ExecutionModeStencilRefGreaterFrontAMD = 5080, + ExecutionModeStencilRefLessFrontAMD = 5081, + ExecutionModeStencilRefUnchangedBackAMD = 5082, + ExecutionModeStencilRefGreaterBackAMD = 5083, + ExecutionModeStencilRefLessBackAMD = 5084, + ExecutionModeOutputLinesEXT = 5269, ExecutionModeOutputLinesNV = 5269, + ExecutionModeOutputPrimitivesEXT = 5270, ExecutionModeOutputPrimitivesNV = 5270, ExecutionModeDerivativeGroupQuadsNV = 5289, ExecutionModeDerivativeGroupLinearNV = 5290, + ExecutionModeOutputTrianglesEXT = 5298, ExecutionModeOutputTrianglesNV = 5298, + ExecutionModePixelInterlockOrderedEXT = 5366, + ExecutionModePixelInterlockUnorderedEXT = 5367, + ExecutionModeSampleInterlockOrderedEXT = 5368, + ExecutionModeSampleInterlockUnorderedEXT = 5369, + ExecutionModeShadingRateInterlockOrderedEXT = 5370, + ExecutionModeShadingRateInterlockUnorderedEXT = 5371, + ExecutionModeSharedLocalMemorySizeINTEL = 5618, + ExecutionModeRoundingModeRTPINTEL = 5620, + ExecutionModeRoundingModeRTNINTEL = 5621, + ExecutionModeFloatingPointModeALTINTEL = 5622, + ExecutionModeFloatingPointModeIEEEINTEL = 5623, + ExecutionModeMaxWorkgroupSizeINTEL = 5893, + ExecutionModeMaxWorkDimINTEL = 5894, + ExecutionModeNoGlobalOffsetINTEL = 5895, + ExecutionModeNumSIMDWorkitemsINTEL = 5896, + ExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + ExecutionModeNamedBarrierCountINTEL = 6417, ExecutionModeMax = 0x7fffffff, }; @@ -171,13 +213,24 @@ enum StorageClass { StorageClassAtomicCounter = 10, StorageClassImage = 11, StorageClassStorageBuffer = 12, + StorageClassCallableDataKHR = 5328, StorageClassCallableDataNV = 5328, + StorageClassIncomingCallableDataKHR = 5329, StorageClassIncomingCallableDataNV = 5329, + StorageClassRayPayloadKHR = 5338, StorageClassRayPayloadNV = 5338, + StorageClassHitAttributeKHR = 5339, StorageClassHitAttributeNV = 5339, + StorageClassIncomingRayPayloadKHR = 5342, StorageClassIncomingRayPayloadNV = 5342, + StorageClassShaderRecordBufferKHR = 5343, StorageClassShaderRecordBufferNV = 5343, + StorageClassPhysicalStorageBuffer = 5349, StorageClassPhysicalStorageBufferEXT = 5349, + StorageClassTaskPayloadWorkgroupEXT = 5402, + StorageClassCodeSectionINTEL = 5605, + StorageClassDeviceOnlyINTEL = 5936, + StorageClassHostOnlyINTEL = 5937, StorageClassMax = 0x7fffffff, }; @@ -248,6 +301,8 @@ enum ImageFormat { ImageFormatRg8ui = 37, ImageFormatR16ui = 38, ImageFormatR8ui = 39, + ImageFormatR64ui = 40, + ImageFormatR64i = 41, ImageFormatMax = 0x7fffffff, }; @@ -305,10 +360,18 @@ enum ImageOperandsShift { ImageOperandsConstOffsetsShift = 5, ImageOperandsSampleShift = 6, ImageOperandsMinLodShift = 7, + ImageOperandsMakeTexelAvailableShift = 8, ImageOperandsMakeTexelAvailableKHRShift = 8, + ImageOperandsMakeTexelVisibleShift = 9, ImageOperandsMakeTexelVisibleKHRShift = 9, + ImageOperandsNonPrivateTexelShift = 10, ImageOperandsNonPrivateTexelKHRShift = 10, + ImageOperandsVolatileTexelShift = 11, ImageOperandsVolatileTexelKHRShift = 11, + ImageOperandsSignExtendShift = 12, + ImageOperandsZeroExtendShift = 13, + ImageOperandsNontemporalShift = 14, + ImageOperandsOffsetsShift = 16, ImageOperandsMax = 0x7fffffff, }; @@ -322,10 +385,18 @@ enum ImageOperandsMask { ImageOperandsConstOffsetsMask = 0x00000020, ImageOperandsSampleMask = 0x00000040, ImageOperandsMinLodMask = 0x00000080, + ImageOperandsMakeTexelAvailableMask = 0x00000100, ImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + ImageOperandsMakeTexelVisibleMask = 0x00000200, ImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + ImageOperandsNonPrivateTexelMask = 0x00000400, ImageOperandsNonPrivateTexelKHRMask = 0x00000400, + ImageOperandsVolatileTexelMask = 0x00000800, ImageOperandsVolatileTexelKHRMask = 0x00000800, + ImageOperandsSignExtendMask = 0x00001000, + ImageOperandsZeroExtendMask = 0x00002000, + ImageOperandsNontemporalMask = 0x00004000, + ImageOperandsOffsetsMask = 0x00010000, }; enum FPFastMathModeShift { @@ -334,6 +405,8 @@ enum FPFastMathModeShift { FPFastMathModeNSZShift = 2, FPFastMathModeAllowRecipShift = 3, FPFastMathModeFastShift = 4, + FPFastMathModeAllowContractFastINTELShift = 16, + FPFastMathModeAllowReassocINTELShift = 17, FPFastMathModeMax = 0x7fffffff, }; @@ -344,6 +417,8 @@ enum FPFastMathModeMask { FPFastMathModeNSZMask = 0x00000004, FPFastMathModeAllowRecipMask = 0x00000008, FPFastMathModeFastMask = 0x00000010, + FPFastMathModeAllowContractFastINTELMask = 0x00010000, + FPFastMathModeAllowReassocINTELMask = 0x00020000, }; enum FPRoundingMode { @@ -357,6 +432,7 @@ enum FPRoundingMode { enum LinkageType { LinkageTypeExport = 0, LinkageTypeImport = 1, + LinkageTypeLinkOnceODR = 2, LinkageTypeMax = 0x7fffffff, }; @@ -406,6 +482,7 @@ enum Decoration { DecorationNonWritable = 24, DecorationNonReadable = 25, DecorationUniform = 26, + DecorationUniformId = 27, DecorationSaturatedConversion = 28, DecorationStream = 29, DecorationLocation = 30, @@ -433,15 +510,64 @@ enum Decoration { DecorationPassthroughNV = 5250, DecorationViewportRelativeNV = 5252, DecorationSecondaryViewportRelativeNV = 5256, + DecorationPerPrimitiveEXT = 5271, DecorationPerPrimitiveNV = 5271, DecorationPerViewNV = 5272, DecorationPerTaskNV = 5273, + DecorationPerVertexKHR = 5285, DecorationPerVertexNV = 5285, + DecorationNonUniform = 5300, DecorationNonUniformEXT = 5300, + DecorationRestrictPointer = 5355, DecorationRestrictPointerEXT = 5355, + DecorationAliasedPointer = 5356, DecorationAliasedPointerEXT = 5356, + DecorationBindlessSamplerNV = 5398, + DecorationBindlessImageNV = 5399, + DecorationBoundSamplerNV = 5400, + DecorationBoundImageNV = 5401, + DecorationSIMTCallINTEL = 5599, + DecorationReferencedIndirectlyINTEL = 5602, + DecorationClobberINTEL = 5607, + DecorationSideEffectsINTEL = 5608, + DecorationVectorComputeVariableINTEL = 5624, + DecorationFuncParamIOKindINTEL = 5625, + DecorationVectorComputeFunctionINTEL = 5626, + DecorationStackCallINTEL = 5627, + DecorationGlobalVariableOffsetINTEL = 5628, + DecorationCounterBuffer = 5634, DecorationHlslCounterBufferGOOGLE = 5634, DecorationHlslSemanticGOOGLE = 5635, + DecorationUserSemantic = 5635, + DecorationUserTypeGOOGLE = 5636, + DecorationFunctionRoundingModeINTEL = 5822, + DecorationFunctionDenormModeINTEL = 5823, + DecorationRegisterINTEL = 5825, + DecorationMemoryINTEL = 5826, + DecorationNumbanksINTEL = 5827, + DecorationBankwidthINTEL = 5828, + DecorationMaxPrivateCopiesINTEL = 5829, + DecorationSinglepumpINTEL = 5830, + DecorationDoublepumpINTEL = 5831, + DecorationMaxReplicatesINTEL = 5832, + DecorationSimpleDualPortINTEL = 5833, + DecorationMergeINTEL = 5834, + DecorationBankBitsINTEL = 5835, + DecorationForcePow2DepthINTEL = 5836, + DecorationBurstCoalesceINTEL = 5899, + DecorationCacheSizeINTEL = 5900, + DecorationDontStaticallyCoalesceINTEL = 5901, + DecorationPrefetchINTEL = 5902, + DecorationStallEnableINTEL = 5905, + DecorationFuseLoopsInFunctionINTEL = 5907, + DecorationAliasScopeINTEL = 5914, + DecorationNoAliasINTEL = 5915, + DecorationBufferLocationINTEL = 5921, + DecorationIOPipeStorageINTEL = 5944, + DecorationFunctionFloatingPointModeINTEL = 6080, + DecorationSingleElementVectorINTEL = 6085, + DecorationVectorComputeCallableFunctionINTEL = 6087, + DecorationMediaBlockIOINTEL = 6140, DecorationMax = 0x7fffffff, }; @@ -500,8 +626,10 @@ enum BuiltIn { BuiltInBaseVertex = 4424, BuiltInBaseInstance = 4425, BuiltInDrawIndex = 4426, + BuiltInPrimitiveShadingRateKHR = 4432, BuiltInDeviceIndex = 4438, BuiltInViewIndex = 4440, + BuiltInShadingRateKHR = 4444, BuiltInBaryCoordNoPerspAMD = 4992, BuiltInBaryCoordNoPerspCentroidAMD = 4993, BuiltInBaryCoordNoPerspSampleAMD = 4994, @@ -524,26 +652,52 @@ enum BuiltIn { BuiltInLayerPerViewNV = 5279, BuiltInMeshViewCountNV = 5280, BuiltInMeshViewIndicesNV = 5281, + BuiltInBaryCoordKHR = 5286, BuiltInBaryCoordNV = 5286, + BuiltInBaryCoordNoPerspKHR = 5287, BuiltInBaryCoordNoPerspNV = 5287, BuiltInFragSizeEXT = 5292, BuiltInFragmentSizeNV = 5292, BuiltInFragInvocationCountEXT = 5293, BuiltInInvocationsPerPixelNV = 5293, + BuiltInPrimitivePointIndicesEXT = 5294, + BuiltInPrimitiveLineIndicesEXT = 5295, + BuiltInPrimitiveTriangleIndicesEXT = 5296, + BuiltInCullPrimitiveEXT = 5299, + BuiltInLaunchIdKHR = 5319, BuiltInLaunchIdNV = 5319, + BuiltInLaunchSizeKHR = 5320, BuiltInLaunchSizeNV = 5320, + BuiltInWorldRayOriginKHR = 5321, BuiltInWorldRayOriginNV = 5321, + BuiltInWorldRayDirectionKHR = 5322, BuiltInWorldRayDirectionNV = 5322, + BuiltInObjectRayOriginKHR = 5323, BuiltInObjectRayOriginNV = 5323, + BuiltInObjectRayDirectionKHR = 5324, BuiltInObjectRayDirectionNV = 5324, + BuiltInRayTminKHR = 5325, BuiltInRayTminNV = 5325, + BuiltInRayTmaxKHR = 5326, BuiltInRayTmaxNV = 5326, + BuiltInInstanceCustomIndexKHR = 5327, BuiltInInstanceCustomIndexNV = 5327, + BuiltInObjectToWorldKHR = 5330, BuiltInObjectToWorldNV = 5330, + BuiltInWorldToObjectKHR = 5331, BuiltInWorldToObjectNV = 5331, BuiltInHitTNV = 5332, + BuiltInHitKindKHR = 5333, BuiltInHitKindNV = 5333, + BuiltInCurrentRayTimeNV = 5334, + BuiltInIncomingRayFlagsKHR = 5351, BuiltInIncomingRayFlagsNV = 5351, + BuiltInRayGeometryIndexKHR = 5352, + BuiltInWarpsPerSMNV = 5374, + BuiltInSMCountNV = 5375, + BuiltInWarpIDNV = 5376, + BuiltInSMIDNV = 5377, + BuiltInCullMaskKHR = 6021, BuiltInMax = 0x7fffffff, }; @@ -564,6 +718,19 @@ enum LoopControlShift { LoopControlDontUnrollShift = 1, LoopControlDependencyInfiniteShift = 2, LoopControlDependencyLengthShift = 3, + LoopControlMinIterationsShift = 4, + LoopControlMaxIterationsShift = 5, + LoopControlIterationMultipleShift = 6, + LoopControlPeelCountShift = 7, + LoopControlPartialCountShift = 8, + LoopControlInitiationIntervalINTELShift = 16, + LoopControlMaxConcurrencyINTELShift = 17, + LoopControlDependencyArrayINTELShift = 18, + LoopControlPipelineEnableINTELShift = 19, + LoopControlLoopCoalesceINTELShift = 20, + LoopControlMaxInterleavingINTELShift = 21, + LoopControlSpeculatedIterationsINTELShift = 22, + LoopControlNoFusionINTELShift = 23, LoopControlMax = 0x7fffffff, }; @@ -573,6 +740,19 @@ enum LoopControlMask { LoopControlDontUnrollMask = 0x00000002, LoopControlDependencyInfiniteMask = 0x00000004, LoopControlDependencyLengthMask = 0x00000008, + LoopControlMinIterationsMask = 0x00000010, + LoopControlMaxIterationsMask = 0x00000020, + LoopControlIterationMultipleMask = 0x00000040, + LoopControlPeelCountMask = 0x00000080, + LoopControlPartialCountMask = 0x00000100, + LoopControlInitiationIntervalINTELMask = 0x00010000, + LoopControlMaxConcurrencyINTELMask = 0x00020000, + LoopControlDependencyArrayINTELMask = 0x00040000, + LoopControlPipelineEnableINTELMask = 0x00080000, + LoopControlLoopCoalesceINTELMask = 0x00100000, + LoopControlMaxInterleavingINTELMask = 0x00200000, + LoopControlSpeculatedIterationsINTELMask = 0x00400000, + LoopControlNoFusionINTELMask = 0x00800000, }; enum FunctionControlShift { @@ -580,6 +760,7 @@ enum FunctionControlShift { FunctionControlDontInlineShift = 1, FunctionControlPureShift = 2, FunctionControlConstShift = 3, + FunctionControlOptNoneINTELShift = 16, FunctionControlMax = 0x7fffffff, }; @@ -589,6 +770,7 @@ enum FunctionControlMask { FunctionControlDontInlineMask = 0x00000002, FunctionControlPureMask = 0x00000004, FunctionControlConstMask = 0x00000008, + FunctionControlOptNoneINTELMask = 0x00010000, }; enum MemorySemanticsShift { @@ -602,9 +784,13 @@ enum MemorySemanticsShift { MemorySemanticsCrossWorkgroupMemoryShift = 9, MemorySemanticsAtomicCounterMemoryShift = 10, MemorySemanticsImageMemoryShift = 11, + MemorySemanticsOutputMemoryShift = 12, MemorySemanticsOutputMemoryKHRShift = 12, + MemorySemanticsMakeAvailableShift = 13, MemorySemanticsMakeAvailableKHRShift = 13, + MemorySemanticsMakeVisibleShift = 14, MemorySemanticsMakeVisibleKHRShift = 14, + MemorySemanticsVolatileShift = 15, MemorySemanticsMax = 0x7fffffff, }; @@ -620,18 +806,27 @@ enum MemorySemanticsMask { MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, MemorySemanticsAtomicCounterMemoryMask = 0x00000400, MemorySemanticsImageMemoryMask = 0x00000800, + MemorySemanticsOutputMemoryMask = 0x00001000, MemorySemanticsOutputMemoryKHRMask = 0x00001000, + MemorySemanticsMakeAvailableMask = 0x00002000, MemorySemanticsMakeAvailableKHRMask = 0x00002000, + MemorySemanticsMakeVisibleMask = 0x00004000, MemorySemanticsMakeVisibleKHRMask = 0x00004000, + MemorySemanticsVolatileMask = 0x00008000, }; enum MemoryAccessShift { MemoryAccessVolatileShift = 0, MemoryAccessAlignedShift = 1, MemoryAccessNontemporalShift = 2, + MemoryAccessMakePointerAvailableShift = 3, MemoryAccessMakePointerAvailableKHRShift = 3, + MemoryAccessMakePointerVisibleShift = 4, MemoryAccessMakePointerVisibleKHRShift = 4, + MemoryAccessNonPrivatePointerShift = 5, MemoryAccessNonPrivatePointerKHRShift = 5, + MemoryAccessAliasScopeINTELMaskShift = 16, + MemoryAccessNoAliasINTELMaskShift = 17, MemoryAccessMax = 0x7fffffff, }; @@ -640,9 +835,14 @@ enum MemoryAccessMask { MemoryAccessVolatileMask = 0x00000001, MemoryAccessAlignedMask = 0x00000002, MemoryAccessNontemporalMask = 0x00000004, + MemoryAccessMakePointerAvailableMask = 0x00000008, MemoryAccessMakePointerAvailableKHRMask = 0x00000008, + MemoryAccessMakePointerVisibleMask = 0x00000010, MemoryAccessMakePointerVisibleKHRMask = 0x00000010, + MemoryAccessNonPrivatePointerMask = 0x00000020, MemoryAccessNonPrivatePointerKHRMask = 0x00000020, + MemoryAccessAliasScopeINTELMaskMask = 0x00010000, + MemoryAccessNoAliasINTELMaskMask = 0x00020000, }; enum Scope { @@ -651,7 +851,9 @@ enum Scope { ScopeWorkgroup = 2, ScopeSubgroup = 3, ScopeInvocation = 4, + ScopeQueueFamily = 5, ScopeQueueFamilyKHR = 5, + ScopeShaderCallKHR = 6, ScopeMax = 0x7fffffff, }; @@ -751,8 +953,15 @@ enum Capability { CapabilityGroupNonUniformShuffleRelative = 66, CapabilityGroupNonUniformClustered = 67, CapabilityGroupNonUniformQuad = 68, + CapabilityShaderLayer = 69, + CapabilityShaderViewportIndex = 70, + CapabilityUniformDecoration = 71, + CapabilityFragmentShadingRateKHR = 4422, CapabilitySubgroupBallotKHR = 4423, CapabilityDrawParameters = 4427, + CapabilityWorkgroupMemoryExplicitLayoutKHR = 4428, + CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR = 4429, + CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR = 4430, CapabilitySubgroupVoteKHR = 4431, CapabilityStorageBuffer16BitAccess = 4433, CapabilityStorageUniformBufferBlock16 = 4433, @@ -774,11 +983,17 @@ enum Capability { CapabilitySignedZeroInfNanPreserve = 4466, CapabilityRoundingModeRTE = 4467, CapabilityRoundingModeRTZ = 4468, + CapabilityRayQueryProvisionalKHR = 4471, + CapabilityRayQueryKHR = 4472, + CapabilityRayTraversalPrimitiveCullingKHR = 4478, + CapabilityRayTracingKHR = 4479, CapabilityFloat16ImageAMD = 5008, CapabilityImageGatherBiasLodAMD = 5009, CapabilityFragmentMaskAMD = 5010, CapabilityStencilExportEXT = 5013, CapabilityImageReadWriteLodAMD = 5015, + CapabilityInt64ImageEXT = 5016, + CapabilityShaderClockKHR = 5055, CapabilitySampleMaskOverrideCoverageNV = 5249, CapabilityGeometryShaderPassthroughNV = 5251, CapabilityShaderViewportIndexLayerEXT = 5254, @@ -789,35 +1004,217 @@ enum Capability { CapabilityFragmentFullyCoveredEXT = 5265, CapabilityMeshShadingNV = 5266, CapabilityImageFootprintNV = 5282, + CapabilityMeshShadingEXT = 5283, + CapabilityFragmentBarycentricKHR = 5284, CapabilityFragmentBarycentricNV = 5284, CapabilityComputeDerivativeGroupQuadsNV = 5288, CapabilityFragmentDensityEXT = 5291, CapabilityShadingRateNV = 5291, CapabilityGroupNonUniformPartitionedNV = 5297, + CapabilityShaderNonUniform = 5301, CapabilityShaderNonUniformEXT = 5301, + CapabilityRuntimeDescriptorArray = 5302, CapabilityRuntimeDescriptorArrayEXT = 5302, + CapabilityInputAttachmentArrayDynamicIndexing = 5303, CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + CapabilityUniformTexelBufferArrayDynamicIndexing = 5304, CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + CapabilityStorageTexelBufferArrayDynamicIndexing = 5305, CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + CapabilityUniformBufferArrayNonUniformIndexing = 5306, CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + CapabilitySampledImageArrayNonUniformIndexing = 5307, CapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + CapabilityStorageBufferArrayNonUniformIndexing = 5308, CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + CapabilityStorageImageArrayNonUniformIndexing = 5309, CapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + CapabilityInputAttachmentArrayNonUniformIndexing = 5310, CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, CapabilityRayTracingNV = 5340, + CapabilityRayTracingMotionBlurNV = 5341, + CapabilityVulkanMemoryModel = 5345, CapabilityVulkanMemoryModelKHR = 5345, + CapabilityVulkanMemoryModelDeviceScope = 5346, CapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + CapabilityPhysicalStorageBufferAddresses = 5347, CapabilityPhysicalStorageBufferAddressesEXT = 5347, CapabilityComputeDerivativeGroupLinearNV = 5350, + CapabilityRayTracingProvisionalKHR = 5353, + CapabilityCooperativeMatrixNV = 5357, + CapabilityFragmentShaderSampleInterlockEXT = 5363, + CapabilityFragmentShaderShadingRateInterlockEXT = 5372, + CapabilityShaderSMBuiltinsNV = 5373, + CapabilityFragmentShaderPixelInterlockEXT = 5378, + CapabilityDemoteToHelperInvocation = 5379, + CapabilityDemoteToHelperInvocationEXT = 5379, + CapabilityBindlessTextureNV = 5390, CapabilitySubgroupShuffleINTEL = 5568, CapabilitySubgroupBufferBlockIOINTEL = 5569, CapabilitySubgroupImageBlockIOINTEL = 5570, CapabilitySubgroupImageMediaBlockIOINTEL = 5579, + CapabilityRoundToInfinityINTEL = 5582, + CapabilityFloatingPointModeINTEL = 5583, + CapabilityIntegerFunctions2INTEL = 5584, + CapabilityFunctionPointersINTEL = 5603, + CapabilityIndirectReferencesINTEL = 5604, + CapabilityAsmINTEL = 5606, + CapabilityAtomicFloat32MinMaxEXT = 5612, + CapabilityAtomicFloat64MinMaxEXT = 5613, + CapabilityAtomicFloat16MinMaxEXT = 5616, + CapabilityVectorComputeINTEL = 5617, + CapabilityVectorAnyINTEL = 5619, + CapabilityExpectAssumeKHR = 5629, + CapabilitySubgroupAvcMotionEstimationINTEL = 5696, + CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, + CapabilityVariableLengthArrayINTEL = 5817, + CapabilityFunctionFloatControlINTEL = 5821, + CapabilityFPGAMemoryAttributesINTEL = 5824, + CapabilityFPFastMathModeINTEL = 5837, + CapabilityArbitraryPrecisionIntegersINTEL = 5844, + CapabilityArbitraryPrecisionFloatingPointINTEL = 5845, + CapabilityUnstructuredLoopControlsINTEL = 5886, + CapabilityFPGALoopControlsINTEL = 5888, + CapabilityKernelAttributesINTEL = 5892, + CapabilityFPGAKernelAttributesINTEL = 5897, + CapabilityFPGAMemoryAccessesINTEL = 5898, + CapabilityFPGAClusterAttributesINTEL = 5904, + CapabilityLoopFuseINTEL = 5906, + CapabilityMemoryAccessAliasingINTEL = 5910, + CapabilityFPGABufferLocationINTEL = 5920, + CapabilityArbitraryPrecisionFixedPointINTEL = 5922, + CapabilityUSMStorageClassesINTEL = 5935, + CapabilityIOPipesINTEL = 5943, + CapabilityBlockingPipesINTEL = 5945, + CapabilityFPGARegINTEL = 5948, + CapabilityDotProductInputAll = 6016, + CapabilityDotProductInputAllKHR = 6016, + CapabilityDotProductInput4x8Bit = 6017, + CapabilityDotProductInput4x8BitKHR = 6017, + CapabilityDotProductInput4x8BitPacked = 6018, + CapabilityDotProductInput4x8BitPackedKHR = 6018, + CapabilityDotProduct = 6019, + CapabilityDotProductKHR = 6019, + CapabilityRayCullMaskKHR = 6020, + CapabilityBitInstructions = 6025, + CapabilityGroupNonUniformRotateKHR = 6026, + CapabilityAtomicFloat32AddEXT = 6033, + CapabilityAtomicFloat64AddEXT = 6034, + CapabilityLongConstantCompositeINTEL = 6089, + CapabilityOptNoneINTEL = 6094, + CapabilityAtomicFloat16AddEXT = 6095, + CapabilityDebugInfoModuleINTEL = 6114, + CapabilitySplitBarrierINTEL = 6141, + CapabilityGroupUniformArithmeticKHR = 6400, CapabilityMax = 0x7fffffff, }; +enum RayFlagsShift { + RayFlagsOpaqueKHRShift = 0, + RayFlagsNoOpaqueKHRShift = 1, + RayFlagsTerminateOnFirstHitKHRShift = 2, + RayFlagsSkipClosestHitShaderKHRShift = 3, + RayFlagsCullBackFacingTrianglesKHRShift = 4, + RayFlagsCullFrontFacingTrianglesKHRShift = 5, + RayFlagsCullOpaqueKHRShift = 6, + RayFlagsCullNoOpaqueKHRShift = 7, + RayFlagsSkipTrianglesKHRShift = 8, + RayFlagsSkipAABBsKHRShift = 9, + RayFlagsMax = 0x7fffffff, +}; + +enum RayFlagsMask { + RayFlagsMaskNone = 0, + RayFlagsOpaqueKHRMask = 0x00000001, + RayFlagsNoOpaqueKHRMask = 0x00000002, + RayFlagsTerminateOnFirstHitKHRMask = 0x00000004, + RayFlagsSkipClosestHitShaderKHRMask = 0x00000008, + RayFlagsCullBackFacingTrianglesKHRMask = 0x00000010, + RayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020, + RayFlagsCullOpaqueKHRMask = 0x00000040, + RayFlagsCullNoOpaqueKHRMask = 0x00000080, + RayFlagsSkipTrianglesKHRMask = 0x00000100, + RayFlagsSkipAABBsKHRMask = 0x00000200, +}; + +enum RayQueryIntersection { + RayQueryIntersectionRayQueryCandidateIntersectionKHR = 0, + RayQueryIntersectionRayQueryCommittedIntersectionKHR = 1, + RayQueryIntersectionMax = 0x7fffffff, +}; + +enum RayQueryCommittedIntersectionType { + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2, + RayQueryCommittedIntersectionTypeMax = 0x7fffffff, +}; + +enum RayQueryCandidateIntersectionType { + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0, + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1, + RayQueryCandidateIntersectionTypeMax = 0x7fffffff, +}; + +enum FragmentShadingRateShift { + FragmentShadingRateVertical2PixelsShift = 0, + FragmentShadingRateVertical4PixelsShift = 1, + FragmentShadingRateHorizontal2PixelsShift = 2, + FragmentShadingRateHorizontal4PixelsShift = 3, + FragmentShadingRateMax = 0x7fffffff, +}; + +enum FragmentShadingRateMask { + FragmentShadingRateMaskNone = 0, + FragmentShadingRateVertical2PixelsMask = 0x00000001, + FragmentShadingRateVertical4PixelsMask = 0x00000002, + FragmentShadingRateHorizontal2PixelsMask = 0x00000004, + FragmentShadingRateHorizontal4PixelsMask = 0x00000008, +}; + +enum FPDenormMode { + FPDenormModePreserve = 0, + FPDenormModeFlushToZero = 1, + FPDenormModeMax = 0x7fffffff, +}; + +enum FPOperationMode { + FPOperationModeIEEE = 0, + FPOperationModeALT = 1, + FPOperationModeMax = 0x7fffffff, +}; + +enum QuantizationModes { + QuantizationModesTRN = 0, + QuantizationModesTRN_ZERO = 1, + QuantizationModesRND = 2, + QuantizationModesRND_ZERO = 3, + QuantizationModesRND_INF = 4, + QuantizationModesRND_MIN_INF = 5, + QuantizationModesRND_CONV = 6, + QuantizationModesRND_CONV_ODD = 7, + QuantizationModesMax = 0x7fffffff, +}; + +enum OverflowModes { + OverflowModesWRAP = 0, + OverflowModesSAT = 1, + OverflowModesSAT_ZERO = 2, + OverflowModesSAT_SYM = 3, + OverflowModesMax = 0x7fffffff, +}; + +enum PackedVectorFormat { + PackedVectorFormatPackedVectorFormat4x8Bit = 0, + PackedVectorFormatPackedVectorFormat4x8BitKHR = 0, + PackedVectorFormatMax = 0x7fffffff, +}; + enum Op { OpNop = 0, OpUndef = 1, @@ -1159,12 +1556,42 @@ enum Op { OpGroupNonUniformLogicalXor = 364, OpGroupNonUniformQuadBroadcast = 365, OpGroupNonUniformQuadSwap = 366, + OpCopyLogical = 400, + OpPtrEqual = 401, + OpPtrNotEqual = 402, + OpPtrDiff = 403, + OpTerminateInvocation = 4416, OpSubgroupBallotKHR = 4421, OpSubgroupFirstInvocationKHR = 4422, OpSubgroupAllKHR = 4428, OpSubgroupAnyKHR = 4429, OpSubgroupAllEqualKHR = 4430, + OpGroupNonUniformRotateKHR = 4431, OpSubgroupReadInvocationKHR = 4432, + OpTraceRayKHR = 4445, + OpExecuteCallableKHR = 4446, + OpConvertUToAccelerationStructureKHR = 4447, + OpIgnoreIntersectionKHR = 4448, + OpTerminateRayKHR = 4449, + OpSDot = 4450, + OpSDotKHR = 4450, + OpUDot = 4451, + OpUDotKHR = 4451, + OpSUDot = 4452, + OpSUDotKHR = 4452, + OpSDotAccSat = 4453, + OpSDotAccSatKHR = 4453, + OpUDotAccSat = 4454, + OpUDotAccSatKHR = 4454, + OpSUDotAccSat = 4455, + OpSUDotAccSatKHR = 4455, + OpTypeRayQueryKHR = 4472, + OpRayQueryInitializeKHR = 4473, + OpRayQueryTerminateKHR = 4474, + OpRayQueryGenerateIntersectionKHR = 4475, + OpRayQueryConfirmIntersectionKHR = 4476, + OpRayQueryProceedKHR = 4477, + OpRayQueryGetIntersectionTypeKHR = 4479, OpGroupIAddNonUniformAMD = 5000, OpGroupFAddNonUniformAMD = 5001, OpGroupFMinNonUniformAMD = 5002, @@ -1175,15 +1602,39 @@ enum Op { OpGroupSMaxNonUniformAMD = 5007, OpFragmentMaskFetchAMD = 5011, OpFragmentFetchAMD = 5012, + OpReadClockKHR = 5056, OpImageSampleFootprintNV = 5283, + OpEmitMeshTasksEXT = 5294, + OpSetMeshOutputsEXT = 5295, OpGroupNonUniformPartitionNV = 5296, OpWritePackedPrimitiveIndices4x8NV = 5299, + OpReportIntersectionKHR = 5334, OpReportIntersectionNV = 5334, OpIgnoreIntersectionNV = 5335, OpTerminateRayNV = 5336, OpTraceNV = 5337, + OpTraceMotionNV = 5338, + OpTraceRayMotionNV = 5339, + OpTypeAccelerationStructureKHR = 5341, OpTypeAccelerationStructureNV = 5341, OpExecuteCallableNV = 5344, + OpTypeCooperativeMatrixNV = 5358, + OpCooperativeMatrixLoadNV = 5359, + OpCooperativeMatrixStoreNV = 5360, + OpCooperativeMatrixMulAddNV = 5361, + OpCooperativeMatrixLengthNV = 5362, + OpBeginInvocationInterlockEXT = 5364, + OpEndInvocationInterlockEXT = 5365, + OpDemoteToHelperInvocation = 5380, + OpDemoteToHelperInvocationEXT = 5380, + OpIsHelperInvocationEXT = 5381, + OpConvertUToImageNV = 5391, + OpConvertUToSamplerNV = 5392, + OpConvertImageToUNV = 5393, + OpConvertSamplerToUNV = 5394, + OpConvertUToSampledImageNV = 5395, + OpConvertSampledImageToUNV = 5396, + OpSamplerImageAddressingModeNV = 5397, OpSubgroupShuffleINTEL = 5571, OpSubgroupShuffleDownINTEL = 5572, OpSubgroupShuffleUpINTEL = 5573, @@ -1194,11 +1645,921 @@ enum Op { OpSubgroupImageBlockWriteINTEL = 5578, OpSubgroupImageMediaBlockReadINTEL = 5580, OpSubgroupImageMediaBlockWriteINTEL = 5581, + OpUCountLeadingZerosINTEL = 5585, + OpUCountTrailingZerosINTEL = 5586, + OpAbsISubINTEL = 5587, + OpAbsUSubINTEL = 5588, + OpIAddSatINTEL = 5589, + OpUAddSatINTEL = 5590, + OpIAverageINTEL = 5591, + OpUAverageINTEL = 5592, + OpIAverageRoundedINTEL = 5593, + OpUAverageRoundedINTEL = 5594, + OpISubSatINTEL = 5595, + OpUSubSatINTEL = 5596, + OpIMul32x16INTEL = 5597, + OpUMul32x16INTEL = 5598, + OpConstantFunctionPointerINTEL = 5600, + OpFunctionPointerCallINTEL = 5601, + OpAsmTargetINTEL = 5609, + OpAsmINTEL = 5610, + OpAsmCallINTEL = 5611, + OpAtomicFMinEXT = 5614, + OpAtomicFMaxEXT = 5615, + OpAssumeTrueKHR = 5630, + OpExpectKHR = 5631, + OpDecorateString = 5632, OpDecorateStringGOOGLE = 5632, + OpMemberDecorateString = 5633, OpMemberDecorateStringGOOGLE = 5633, + OpVmeImageINTEL = 5699, + OpTypeVmeImageINTEL = 5700, + OpTypeAvcImePayloadINTEL = 5701, + OpTypeAvcRefPayloadINTEL = 5702, + OpTypeAvcSicPayloadINTEL = 5703, + OpTypeAvcMcePayloadINTEL = 5704, + OpTypeAvcMceResultINTEL = 5705, + OpTypeAvcImeResultINTEL = 5706, + OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + OpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + OpTypeAvcImeDualReferenceStreaminINTEL = 5710, + OpTypeAvcRefResultINTEL = 5711, + OpTypeAvcSicResultINTEL = 5712, + OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + OpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + OpSubgroupAvcMceConvertToImeResultINTEL = 5733, + OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + OpSubgroupAvcMceConvertToRefResultINTEL = 5735, + OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + OpSubgroupAvcMceConvertToSicResultINTEL = 5737, + OpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + OpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + OpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + OpSubgroupAvcImeInitializeINTEL = 5747, + OpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + OpSubgroupAvcImeSetDualReferenceINTEL = 5749, + OpSubgroupAvcImeRefWindowSizeINTEL = 5750, + OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + OpSubgroupAvcImeSetWeightedSadINTEL = 5756, + OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + OpSubgroupAvcImeConvertToMceResultINTEL = 5765, + OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + OpSubgroupAvcImeGetBorderReachedINTEL = 5776, + OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + OpSubgroupAvcFmeInitializeINTEL = 5781, + OpSubgroupAvcBmeInitializeINTEL = 5782, + OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + OpSubgroupAvcRefConvertToMceResultINTEL = 5790, + OpSubgroupAvcSicInitializeINTEL = 5791, + OpSubgroupAvcSicConfigureSkcINTEL = 5792, + OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + OpSubgroupAvcSicEvaluateIpeINTEL = 5803, + OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + OpSubgroupAvcSicConvertToMceResultINTEL = 5808, + OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + OpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + OpVariableLengthArrayINTEL = 5818, + OpSaveMemoryINTEL = 5819, + OpRestoreMemoryINTEL = 5820, + OpArbitraryFloatSinCosPiINTEL = 5840, + OpArbitraryFloatCastINTEL = 5841, + OpArbitraryFloatCastFromIntINTEL = 5842, + OpArbitraryFloatCastToIntINTEL = 5843, + OpArbitraryFloatAddINTEL = 5846, + OpArbitraryFloatSubINTEL = 5847, + OpArbitraryFloatMulINTEL = 5848, + OpArbitraryFloatDivINTEL = 5849, + OpArbitraryFloatGTINTEL = 5850, + OpArbitraryFloatGEINTEL = 5851, + OpArbitraryFloatLTINTEL = 5852, + OpArbitraryFloatLEINTEL = 5853, + OpArbitraryFloatEQINTEL = 5854, + OpArbitraryFloatRecipINTEL = 5855, + OpArbitraryFloatRSqrtINTEL = 5856, + OpArbitraryFloatCbrtINTEL = 5857, + OpArbitraryFloatHypotINTEL = 5858, + OpArbitraryFloatSqrtINTEL = 5859, + OpArbitraryFloatLogINTEL = 5860, + OpArbitraryFloatLog2INTEL = 5861, + OpArbitraryFloatLog10INTEL = 5862, + OpArbitraryFloatLog1pINTEL = 5863, + OpArbitraryFloatExpINTEL = 5864, + OpArbitraryFloatExp2INTEL = 5865, + OpArbitraryFloatExp10INTEL = 5866, + OpArbitraryFloatExpm1INTEL = 5867, + OpArbitraryFloatSinINTEL = 5868, + OpArbitraryFloatCosINTEL = 5869, + OpArbitraryFloatSinCosINTEL = 5870, + OpArbitraryFloatSinPiINTEL = 5871, + OpArbitraryFloatCosPiINTEL = 5872, + OpArbitraryFloatASinINTEL = 5873, + OpArbitraryFloatASinPiINTEL = 5874, + OpArbitraryFloatACosINTEL = 5875, + OpArbitraryFloatACosPiINTEL = 5876, + OpArbitraryFloatATanINTEL = 5877, + OpArbitraryFloatATanPiINTEL = 5878, + OpArbitraryFloatATan2INTEL = 5879, + OpArbitraryFloatPowINTEL = 5880, + OpArbitraryFloatPowRINTEL = 5881, + OpArbitraryFloatPowNINTEL = 5882, + OpLoopControlINTEL = 5887, + OpAliasDomainDeclINTEL = 5911, + OpAliasScopeDeclINTEL = 5912, + OpAliasScopeListDeclINTEL = 5913, + OpFixedSqrtINTEL = 5923, + OpFixedRecipINTEL = 5924, + OpFixedRsqrtINTEL = 5925, + OpFixedSinINTEL = 5926, + OpFixedCosINTEL = 5927, + OpFixedSinCosINTEL = 5928, + OpFixedSinPiINTEL = 5929, + OpFixedCosPiINTEL = 5930, + OpFixedSinCosPiINTEL = 5931, + OpFixedLogINTEL = 5932, + OpFixedExpINTEL = 5933, + OpPtrCastToCrossWorkgroupINTEL = 5934, + OpCrossWorkgroupCastToPtrINTEL = 5938, + OpReadPipeBlockingINTEL = 5946, + OpWritePipeBlockingINTEL = 5947, + OpFPGARegINTEL = 5949, + OpRayQueryGetRayTMinKHR = 6016, + OpRayQueryGetRayFlagsKHR = 6017, + OpRayQueryGetIntersectionTKHR = 6018, + OpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + OpRayQueryGetIntersectionInstanceIdKHR = 6020, + OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + OpRayQueryGetIntersectionGeometryIndexKHR = 6022, + OpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + OpRayQueryGetIntersectionBarycentricsKHR = 6024, + OpRayQueryGetIntersectionFrontFaceKHR = 6025, + OpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + OpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + OpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + OpRayQueryGetWorldRayDirectionKHR = 6029, + OpRayQueryGetWorldRayOriginKHR = 6030, + OpRayQueryGetIntersectionObjectToWorldKHR = 6031, + OpRayQueryGetIntersectionWorldToObjectKHR = 6032, + OpAtomicFAddEXT = 6035, + OpTypeBufferSurfaceINTEL = 6086, + OpTypeStructContinuedINTEL = 6090, + OpConstantCompositeContinuedINTEL = 6091, + OpSpecConstantCompositeContinuedINTEL = 6092, + OpControlBarrierArriveINTEL = 6142, + OpControlBarrierWaitINTEL = 6143, + OpGroupIMulKHR = 6401, + OpGroupFMulKHR = 6402, + OpGroupBitwiseAndKHR = 6403, + OpGroupBitwiseOrKHR = 6404, + OpGroupBitwiseXorKHR = 6405, + OpGroupLogicalAndKHR = 6406, + OpGroupLogicalOrKHR = 6407, + OpGroupLogicalXorKHR = 6408, OpMax = 0x7fffffff, }; +#ifdef SPV_ENABLE_UTILITY_CODE +#ifndef __cplusplus +#include +#endif +inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case OpNop: *hasResult = false; *hasResultType = false; break; + case OpUndef: *hasResult = true; *hasResultType = true; break; + case OpSourceContinued: *hasResult = false; *hasResultType = false; break; + case OpSource: *hasResult = false; *hasResultType = false; break; + case OpSourceExtension: *hasResult = false; *hasResultType = false; break; + case OpName: *hasResult = false; *hasResultType = false; break; + case OpMemberName: *hasResult = false; *hasResultType = false; break; + case OpString: *hasResult = true; *hasResultType = false; break; + case OpLine: *hasResult = false; *hasResultType = false; break; + case OpExtension: *hasResult = false; *hasResultType = false; break; + case OpExtInstImport: *hasResult = true; *hasResultType = false; break; + case OpExtInst: *hasResult = true; *hasResultType = true; break; + case OpMemoryModel: *hasResult = false; *hasResultType = false; break; + case OpEntryPoint: *hasResult = false; *hasResultType = false; break; + case OpExecutionMode: *hasResult = false; *hasResultType = false; break; + case OpCapability: *hasResult = false; *hasResultType = false; break; + case OpTypeVoid: *hasResult = true; *hasResultType = false; break; + case OpTypeBool: *hasResult = true; *hasResultType = false; break; + case OpTypeInt: *hasResult = true; *hasResultType = false; break; + case OpTypeFloat: *hasResult = true; *hasResultType = false; break; + case OpTypeVector: *hasResult = true; *hasResultType = false; break; + case OpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case OpTypeImage: *hasResult = true; *hasResultType = false; break; + case OpTypeSampler: *hasResult = true; *hasResultType = false; break; + case OpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case OpTypeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeStruct: *hasResult = true; *hasResultType = false; break; + case OpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case OpTypePointer: *hasResult = true; *hasResultType = false; break; + case OpTypeFunction: *hasResult = true; *hasResultType = false; break; + case OpTypeEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case OpTypeQueue: *hasResult = true; *hasResultType = false; break; + case OpTypePipe: *hasResult = true; *hasResultType = false; break; + case OpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case OpConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpConstant: *hasResult = true; *hasResultType = true; break; + case OpConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpConstantSampler: *hasResult = true; *hasResultType = true; break; + case OpConstantNull: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpSpecConstant: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case OpFunction: *hasResult = true; *hasResultType = true; break; + case OpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case OpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case OpFunctionCall: *hasResult = true; *hasResultType = true; break; + case OpVariable: *hasResult = true; *hasResultType = true; break; + case OpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case OpLoad: *hasResult = true; *hasResultType = true; break; + case OpStore: *hasResult = false; *hasResultType = false; break; + case OpCopyMemory: *hasResult = false; *hasResultType = false; break; + case OpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case OpAccessChain: *hasResult = true; *hasResultType = true; break; + case OpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case OpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpArrayLength: *hasResult = true; *hasResultType = true; break; + case OpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case OpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpDecorate: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case OpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case OpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case OpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case OpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case OpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case OpCopyObject: *hasResult = true; *hasResultType = true; break; + case OpTranspose: *hasResult = true; *hasResultType = true; break; + case OpSampledImage: *hasResult = true; *hasResultType = true; break; + case OpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageFetch: *hasResult = true; *hasResultType = true; break; + case OpImageGather: *hasResult = true; *hasResultType = true; break; + case OpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageRead: *hasResult = true; *hasResultType = true; break; + case OpImageWrite: *hasResult = false; *hasResultType = false; break; + case OpImage: *hasResult = true; *hasResultType = true; break; + case OpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case OpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case OpConvertFToU: *hasResult = true; *hasResultType = true; break; + case OpConvertFToS: *hasResult = true; *hasResultType = true; break; + case OpConvertSToF: *hasResult = true; *hasResultType = true; break; + case OpConvertUToF: *hasResult = true; *hasResultType = true; break; + case OpUConvert: *hasResult = true; *hasResultType = true; break; + case OpSConvert: *hasResult = true; *hasResultType = true; break; + case OpFConvert: *hasResult = true; *hasResultType = true; break; + case OpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case OpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case OpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case OpBitcast: *hasResult = true; *hasResultType = true; break; + case OpSNegate: *hasResult = true; *hasResultType = true; break; + case OpFNegate: *hasResult = true; *hasResultType = true; break; + case OpIAdd: *hasResult = true; *hasResultType = true; break; + case OpFAdd: *hasResult = true; *hasResultType = true; break; + case OpISub: *hasResult = true; *hasResultType = true; break; + case OpFSub: *hasResult = true; *hasResultType = true; break; + case OpIMul: *hasResult = true; *hasResultType = true; break; + case OpFMul: *hasResult = true; *hasResultType = true; break; + case OpUDiv: *hasResult = true; *hasResultType = true; break; + case OpSDiv: *hasResult = true; *hasResultType = true; break; + case OpFDiv: *hasResult = true; *hasResultType = true; break; + case OpUMod: *hasResult = true; *hasResultType = true; break; + case OpSRem: *hasResult = true; *hasResultType = true; break; + case OpSMod: *hasResult = true; *hasResultType = true; break; + case OpFRem: *hasResult = true; *hasResultType = true; break; + case OpFMod: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpOuterProduct: *hasResult = true; *hasResultType = true; break; + case OpDot: *hasResult = true; *hasResultType = true; break; + case OpIAddCarry: *hasResult = true; *hasResultType = true; break; + case OpISubBorrow: *hasResult = true; *hasResultType = true; break; + case OpUMulExtended: *hasResult = true; *hasResultType = true; break; + case OpSMulExtended: *hasResult = true; *hasResultType = true; break; + case OpAny: *hasResult = true; *hasResultType = true; break; + case OpAll: *hasResult = true; *hasResultType = true; break; + case OpIsNan: *hasResult = true; *hasResultType = true; break; + case OpIsInf: *hasResult = true; *hasResultType = true; break; + case OpIsFinite: *hasResult = true; *hasResultType = true; break; + case OpIsNormal: *hasResult = true; *hasResultType = true; break; + case OpSignBitSet: *hasResult = true; *hasResultType = true; break; + case OpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case OpOrdered: *hasResult = true; *hasResultType = true; break; + case OpUnordered: *hasResult = true; *hasResultType = true; break; + case OpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpLogicalNot: *hasResult = true; *hasResultType = true; break; + case OpSelect: *hasResult = true; *hasResultType = true; break; + case OpIEqual: *hasResult = true; *hasResultType = true; break; + case OpINotEqual: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpULessThan: *hasResult = true; *hasResultType = true; break; + case OpSLessThan: *hasResult = true; *hasResultType = true; break; + case OpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case OpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case OpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case OpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpNot: *hasResult = true; *hasResultType = true; break; + case OpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case OpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case OpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case OpBitReverse: *hasResult = true; *hasResultType = true; break; + case OpBitCount: *hasResult = true; *hasResultType = true; break; + case OpDPdx: *hasResult = true; *hasResultType = true; break; + case OpDPdy: *hasResult = true; *hasResultType = true; break; + case OpFwidth: *hasResult = true; *hasResultType = true; break; + case OpDPdxFine: *hasResult = true; *hasResultType = true; break; + case OpDPdyFine: *hasResult = true; *hasResultType = true; break; + case OpFwidthFine: *hasResult = true; *hasResultType = true; break; + case OpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case OpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case OpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case OpEmitVertex: *hasResult = false; *hasResultType = false; break; + case OpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case OpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case OpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case OpControlBarrier: *hasResult = false; *hasResultType = false; break; + case OpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case OpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case OpAtomicStore: *hasResult = false; *hasResultType = false; break; + case OpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case OpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case OpAtomicISub: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case OpAtomicOr: *hasResult = true; *hasResultType = true; break; + case OpAtomicXor: *hasResult = true; *hasResultType = true; break; + case OpPhi: *hasResult = true; *hasResultType = true; break; + case OpLoopMerge: *hasResult = false; *hasResultType = false; break; + case OpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case OpLabel: *hasResult = true; *hasResultType = false; break; + case OpBranch: *hasResult = false; *hasResultType = false; break; + case OpBranchConditional: *hasResult = false; *hasResultType = false; break; + case OpSwitch: *hasResult = false; *hasResultType = false; break; + case OpKill: *hasResult = false; *hasResultType = false; break; + case OpReturn: *hasResult = false; *hasResultType = false; break; + case OpReturnValue: *hasResult = false; *hasResultType = false; break; + case OpUnreachable: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case OpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case OpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case OpGroupAll: *hasResult = true; *hasResultType = true; break; + case OpGroupAny: *hasResult = true; *hasResultType = true; break; + case OpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupSMax: *hasResult = true; *hasResultType = true; break; + case OpReadPipe: *hasResult = true; *hasResultType = true; break; + case OpWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case OpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case OpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case OpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case OpRetainEvent: *hasResult = false; *hasResultType = false; break; + case OpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case OpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case OpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case OpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case OpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case OpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case OpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case OpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case OpNoLine: *hasResult = false; *hasResultType = false; break; + case OpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case OpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case OpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case OpSizeOf: *hasResult = true; *hasResultType = true; break; + case OpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case OpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case OpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case OpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case OpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case OpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case OpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case OpDecorateId: *hasResult = false; *hasResultType = false; break; + case OpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case OpCopyLogical: *hasResult = true; *hasResultType = true; break; + case OpPtrEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrDiff: *hasResult = true; *hasResultType = true; break; + case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break; + case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpTraceRayKHR: *hasResult = false; *hasResultType = false; break; + case OpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; + case OpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayKHR: *hasResult = false; *hasResultType = false; break; + case OpSDot: *hasResult = true; *hasResultType = true; break; + case OpUDot: *hasResult = true; *hasResultType = true; break; + case OpSUDot: *hasResult = true; *hasResultType = true; break; + case OpSDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpUDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpSUDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break; + case OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break; + case OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break; + case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case OpTraceNV: *hasResult = false; *hasResultType = false; break; + case OpTraceMotionNV: *hasResult = false; *hasResultType = false; break; + case OpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; + case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case OpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break; + case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case OpConvertUToImageNV: *hasResult = true; *hasResultType = true; break; + case OpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break; + case OpConvertImageToUNV: *hasResult = true; *hasResultType = true; break; + case OpConvertSamplerToUNV: *hasResult = true; *hasResultType = true; break; + case OpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break; + case OpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break; + case OpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break; + case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break; + case OpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break; + case OpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break; + case OpExpectKHR: *hasResult = true; *hasResultType = true; break; + case OpDecorateString: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case OpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case OpVariableLengthArrayINTEL: *hasResult = true; *hasResultType = true; break; + case OpSaveMemoryINTEL: *hasResult = true; *hasResultType = true; break; + case OpRestoreMemoryINTEL: *hasResult = false; *hasResultType = false; break; + case OpArbitraryFloatSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastFromIntINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastToIntINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatAddINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatMulINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatDivINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatGTINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatGEINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLTINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLEINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatEQINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatRecipINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatRSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCbrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatHypotINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLogINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog10INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog1pINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExpINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExp2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExp10INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExpm1INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatASinINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatASinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatACosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatACosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATanINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATanPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATan2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; + case OpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case OpAliasDomainDeclINTEL: *hasResult = true; *hasResultType = false; break; + case OpAliasScopeDeclINTEL: *hasResult = true; *hasResultType = false; break; + case OpAliasScopeListDeclINTEL: *hasResult = true; *hasResultType = false; break; + case OpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedLogINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedExpINTEL: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToCrossWorkgroupINTEL: *hasResult = true; *hasResultType = true; break; + case OpCrossWorkgroupCastToPtrINTEL: *hasResult = true; *hasResultType = true; break; + case OpReadPipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case OpWritePipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case OpFPGARegINTEL: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; + case OpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break; + case OpTypeBufferSurfaceINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break; + case OpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break; + case OpGroupIMulKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupFMulKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupBitwiseOrKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupBitwiseXorKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + // Overload operator| for mask bit combining inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } @@ -1209,6 +2570,8 @@ inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } +inline RayFlagsMask operator|(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) | unsigned(b)); } +inline FragmentShadingRateMask operator|(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) | unsigned(b)); } } // end namespace spv diff --git a/spirv_cfg.cpp b/spirv_cfg.cpp index ed31f236792..93299479815 100644 --- a/spirv_cfg.cpp +++ b/spirv_cfg.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2016-2019 Arm Limited + * Copyright 2016-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #include "spirv_cfg.hpp" #include "spirv_cross.hpp" #include @@ -61,7 +68,7 @@ void CFG::build_immediate_dominators() if (immediate_dominators[block]) { assert(immediate_dominators[edge]); - immediate_dominators[block] = find_common_dominator(block, edge); + immediate_dominators[block] = find_common_dominator(immediate_dominators[block], edge); } else immediate_dominators[block] = edge; @@ -97,8 +104,22 @@ bool CFG::post_order_visit(uint32_t block_id) // Block back-edges from recursively revisiting ourselves. visit_order[block_id].get() = 0; - // First visit our branch targets. auto &block = compiler.get(block_id); + + // If this is a loop header, add an implied branch to the merge target. + // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners. + // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block. + // This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator. + // We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG. + + // Make a point out of visiting merge target first. This is to make sure that post visit order outside the loop + // is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis. + // For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine, + // but for loops, only the header might end up actually branching to merge block. + if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block)) + add_branch(block_id, block.merge_block); + + // First visit our branch targets. switch (block.terminator) { case SPIRBlock::Direct: @@ -114,7 +135,9 @@ bool CFG::post_order_visit(uint32_t block_id) break; case SPIRBlock::MultiSelect: - for (auto &target : block.cases) + { + const auto &cases = compiler.get_case_list(block); + for (const auto &target : cases) { if (post_order_visit(target.block)) add_branch(block_id, target.block); @@ -122,19 +145,11 @@ bool CFG::post_order_visit(uint32_t block_id) if (block.default_block && post_order_visit(block.default_block)) add_branch(block_id, block.default_block); break; - + } default: break; } - // If this is a loop header, add an implied branch to the merge target. - // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners. - // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block. - // This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator. - // We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG. - if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block)) - add_branch(block_id, block.merge_block); - // If this is a selection merge, add an implied branch to the merge target. // This is needed to avoid cases where an inner branch dominates the outer branch. // This can happen if one of the branches exit early, e.g.: @@ -148,14 +163,35 @@ bool CFG::post_order_visit(uint32_t block_id) // Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement // will be hoisted out to outside the selection merge. // If size > 1, the variable will be automatically hoisted, so we should not mess with it. + // The exception here is switch blocks, where we can have multiple edges to merge block, + // all coming from same scope, so be more conservative in this case. // Adding fake branches unconditionally breaks parameter preservation analysis, // which looks at how variables are accessed through the CFG. auto pred_itr = preceding_edges.find(block.next_block); if (pred_itr != end(preceding_edges)) { auto &pred = pred_itr->second; - if (pred.size() == 1 && *pred.begin() != block_id) - add_branch(block_id, block.next_block); + auto succ_itr = succeeding_edges.find(block_id); + size_t num_succeeding_edges = 0; + if (succ_itr != end(succeeding_edges)) + num_succeeding_edges = succ_itr->second.size(); + + if (block.terminator == SPIRBlock::MultiSelect && num_succeeding_edges == 1) + { + // Multiple branches can come from the same scope due to "break;", so we need to assume that all branches + // come from same case scope in worst case, even if there are multiple preceding edges. + // If we have more than one succeeding edge from the block header, it should be impossible + // to have a dominator be inside the block. + // Only case this can go wrong is if we have 2 or more edges from block header and + // 2 or more edges to merge block, and still have dominator be inside a case label. + if (!pred.empty()) + add_branch(block_id, block.next_block); + } + else + { + if (pred.size() == 1 && *pred.begin() != block_id) + add_branch(block_id, block.next_block); + } } else { @@ -210,13 +246,13 @@ uint32_t CFG::find_loop_dominator(uint32_t block_id) const for (auto &pred : itr->second) { auto &pred_block = compiler.get(pred); - if (pred_block.merge == SPIRBlock::MergeLoop && pred_block.merge_block == block_id) + if (pred_block.merge == SPIRBlock::MergeLoop && pred_block.merge_block == ID(block_id)) { pred_block_id = pred; ignore_loop_header = true; break; } - else if (pred_block.merge == SPIRBlock::MergeSelection && pred_block.next_block == block_id) + else if (pred_block.merge == SPIRBlock::MergeSelection && pred_block.next_block == ID(block_id)) { pred_block_id = pred; break; @@ -241,6 +277,82 @@ uint32_t CFG::find_loop_dominator(uint32_t block_id) const return block_id; } +bool CFG::node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const +{ + // Walk backwards, starting from "to" block. + // Only follow pred edges if they have a 1:1 relationship, or a merge relationship. + // If we cannot find a path to "from", we must assume that to is inside control flow in some way. + + auto &from_block = compiler.get(from); + BlockID ignore_block_id = 0; + if (from_block.merge == SPIRBlock::MergeLoop) + ignore_block_id = from_block.merge_block; + + while (to != from) + { + auto pred_itr = preceding_edges.find(to); + if (pred_itr == end(preceding_edges)) + return false; + + DominatorBuilder builder(*this); + for (auto &edge : pred_itr->second) + builder.add_block(edge); + + uint32_t dominator = builder.get_dominator(); + if (dominator == 0) + return false; + + auto &dom = compiler.get(dominator); + + bool true_path_ignore = false; + bool false_path_ignore = false; + + bool merges_to_nothing = dom.merge == SPIRBlock::MergeNone || + (dom.merge == SPIRBlock::MergeSelection && dom.next_block && + compiler.get(dom.next_block).terminator == SPIRBlock::Unreachable) || + (dom.merge == SPIRBlock::MergeLoop && dom.merge_block && + compiler.get(dom.merge_block).terminator == SPIRBlock::Unreachable); + + if (dom.self == from || merges_to_nothing) + { + // We can only ignore inner branchy paths if there is no merge, + // i.e. no code is generated afterwards. E.g. this allows us to elide continue: + // for (;;) { if (cond) { continue; } else { break; } }. + // Codegen here in SPIR-V will be something like either no merge if one path directly breaks, or + // we merge to Unreachable. + if (ignore_block_id && dom.terminator == SPIRBlock::Select) + { + auto &true_block = compiler.get(dom.true_block); + auto &false_block = compiler.get(dom.false_block); + auto &ignore_block = compiler.get(ignore_block_id); + true_path_ignore = compiler.execution_is_branchless(true_block, ignore_block); + false_path_ignore = compiler.execution_is_branchless(false_block, ignore_block); + } + } + + // Cases where we allow traversal. This serves as a proxy for post-dominance in a loop body. + // TODO: Might want to do full post-dominance analysis, but it's a lot of churn for something like this ... + // - We're the merge block of a selection construct. Jump to header. + // - We're the merge block of a loop. Jump to header. + // - Direct branch. Trivial. + // - Allow cases inside a branch if the header cannot merge execution before loop exit. + if ((dom.merge == SPIRBlock::MergeSelection && dom.next_block == to) || + (dom.merge == SPIRBlock::MergeLoop && dom.merge_block == to) || + (dom.terminator == SPIRBlock::Direct && dom.next_block == to) || + (dom.terminator == SPIRBlock::Select && dom.true_block == to && false_path_ignore) || + (dom.terminator == SPIRBlock::Select && dom.false_block == to && true_path_ignore)) + { + // Allow walking selection constructs if the other branch reaches out of a loop construct. + // It cannot be in-scope anymore. + to = dominator; + } + else + return false; + } + + return true; +} + DominatorBuilder::DominatorBuilder(const CFG &cfg_) : cfg(cfg_) { @@ -296,7 +408,9 @@ void DominatorBuilder::lift_continue_block_dominator() break; case SPIRBlock::MultiSelect: - for (auto &target : block.cases) + { + auto &cases = cfg.get_compiler().get_case_list(block); + for (auto &target : cases) { if (cfg.get_visit_order(target.block) > post_order) back_edge_dominator = true; @@ -304,6 +418,7 @@ void DominatorBuilder::lift_continue_block_dominator() if (block.default_block && cfg.get_visit_order(block.default_block) > post_order) back_edge_dominator = true; break; + } default: break; diff --git a/spirv_cfg.hpp b/spirv_cfg.hpp index 7d07d484107..1d85fe0a97b 100644 --- a/spirv_cfg.hpp +++ b/spirv_cfg.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2016-2019 Arm Limited + * Copyright 2016-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_CFG_HPP #define SPIRV_CROSS_CFG_HPP @@ -52,6 +59,11 @@ class CFG return 0; } + bool is_reachable(uint32_t block) const + { + return visit_order.count(block) != 0; + } + uint32_t get_visit_order(uint32_t block) const { auto itr = visit_order.find(block); @@ -88,13 +100,17 @@ class CFG return; seen_blocks.insert(block); - op(block); - for (auto b : get_succeeding_edges(block)) - walk_from(seen_blocks, b, op); + if (op(block)) + { + for (auto b : get_succeeding_edges(block)) + walk_from(seen_blocks, b, op); + } } uint32_t find_loop_dominator(uint32_t block) const; + bool node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const; + private: struct VisitOrder { diff --git a/spirv_common.hpp b/spirv_common.hpp index d7a5f32ecab..ba420e1dbe0 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2015-2019 Arm Limited + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,12 +15,23 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_COMMON_HPP #define SPIRV_CROSS_COMMON_HPP +#ifndef SPV_ENABLE_UTILITY_CODE +#define SPV_ENABLE_UTILITY_CODE +#endif #include "spirv.hpp" + #include "spirv_cross_containers.hpp" #include "spirv_cross_error_handling.hpp" +#include // A bit crude, but allows projects which embed SPIRV-Cross statically to // effectively hide all the symbols from other projects. @@ -203,14 +215,39 @@ inline std::string convert_to_string(const T &t) return std::to_string(t); } +static inline std::string convert_to_string(int32_t value) +{ + // INT_MIN is ... special on some backends. If we use a decimal literal, and negate it, we + // could accidentally promote the literal to long first, then negate. + // To workaround it, emit int(0x80000000) instead. + if (value == std::numeric_limits::min()) + return "int(0x80000000)"; + else + return std::to_string(value); +} + +static inline std::string convert_to_string(int64_t value, const std::string &int64_type, bool long_long_literal_suffix) +{ + // INT64_MIN is ... special on some backends. + // If we use a decimal literal, and negate it, we might overflow the representable numbers. + // To workaround it, emit int(0x80000000) instead. + if (value == std::numeric_limits::min()) + return join(int64_type, "(0x8000000000000000u", (long_long_literal_suffix ? "ll" : "l"), ")"); + else + return std::to_string(value) + (long_long_literal_suffix ? "ll" : "l"); +} + // Allow implementations to set a convenient standard precision #ifndef SPIRV_CROSS_FLT_FMT #define SPIRV_CROSS_FLT_FMT "%.32g" #endif -#ifdef _MSC_VER -// sprintf warning. -// We cannot rely on snprintf existing because, ..., MSVC. +// Disable sprintf and strcat warnings. +// We cannot rely on snprintf and family existing because, ..., MSVC. +#if defined(__clang__) || defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#elif defined(_MSC_VER) #pragma warning(push) #pragma warning(disable : 4996) #endif @@ -258,7 +295,32 @@ inline std::string convert_to_string(double t, char locale_radix_point) return buf; } -#ifdef _MSC_VER +template +struct ValueSaver +{ + explicit ValueSaver(T ¤t_) + : current(current_) + , saved(current_) + { + } + + void release() + { + current = saved; + } + + ~ValueSaver() + { + release(); + } + + T ¤t; + T saved; +}; + +#if defined(__clang__) || defined(__GNUC__) +#pragma GCC diagnostic pop +#elif defined(_MSC_VER) #pragma warning(pop) #endif @@ -266,24 +328,22 @@ struct Instruction { uint16_t op = 0; uint16_t count = 0; + // If offset is 0 (not a valid offset into the instruction stream), + // we have an instruction stream which is embedded in the object. uint32_t offset = 0; uint32_t length = 0; + + inline bool is_embedded() const + { + return offset == 0; + } }; -// Helper for Variant interface. -struct IVariant +struct EmbeddedInstruction : Instruction { - virtual ~IVariant() = default; - virtual IVariant *clone(ObjectPoolBase *pool) = 0; - uint32_t self = 0; + SmallVector ops; }; -#define SPIRV_CROSS_DECLARE_CLONE(T) \ - IVariant *clone(ObjectPoolBase *pool) override \ - { \ - return static_cast *>(pool)->allocate(*this); \ - } - enum Types { TypeNone, @@ -303,6 +363,99 @@ enum Types TypeCount }; +template +class TypedID; + +template <> +class TypedID +{ +public: + TypedID() = default; + TypedID(uint32_t id_) + : id(id_) + { + } + + template + TypedID(const TypedID &other) + { + *this = other; + } + + template + TypedID &operator=(const TypedID &other) + { + id = uint32_t(other); + return *this; + } + + // Implicit conversion to u32 is desired here. + // As long as we block implicit conversion between TypedID and TypedID we're good. + operator uint32_t() const + { + return id; + } + + template + operator TypedID() const + { + return TypedID(*this); + } + +private: + uint32_t id = 0; +}; + +template +class TypedID +{ +public: + TypedID() = default; + TypedID(uint32_t id_) + : id(id_) + { + } + + explicit TypedID(const TypedID &other) + : id(uint32_t(other)) + { + } + + operator uint32_t() const + { + return id; + } + +private: + uint32_t id = 0; +}; + +using VariableID = TypedID; +using TypeID = TypedID; +using ConstantID = TypedID; +using FunctionID = TypedID; +using BlockID = TypedID; +using ID = TypedID; + +// Helper for Variant interface. +struct IVariant +{ + virtual ~IVariant() = default; + virtual IVariant *clone(ObjectPoolBase *pool) = 0; + ID self = 0; + +protected: + IVariant() = default; + IVariant(const IVariant&) = default; + IVariant &operator=(const IVariant&) = default; +}; + +#define SPIRV_CROSS_DECLARE_CLONE(T) \ + IVariant *clone(ObjectPoolBase *pool) override \ + { \ + return static_cast *>(pool)->allocate(*this); \ + } + struct SPIRUndef : IVariant { enum @@ -310,11 +463,11 @@ struct SPIRUndef : IVariant type = TypeUndef }; - explicit SPIRUndef(uint32_t basetype_) + explicit SPIRUndef(TypeID basetype_) : basetype(basetype_) { } - uint32_t basetype; + TypeID basetype; SPIRV_CROSS_DECLARE_CLONE(SPIRUndef) }; @@ -344,15 +497,15 @@ struct SPIRCombinedImageSampler : IVariant { type = TypeCombinedImageSampler }; - SPIRCombinedImageSampler(uint32_t type_, uint32_t image_, uint32_t sampler_) + SPIRCombinedImageSampler(TypeID type_, VariableID image_, VariableID sampler_) : combined_type(type_) , image(image_) , sampler(sampler_) { } - uint32_t combined_type; - uint32_t image; - uint32_t sampler; + TypeID combined_type; + VariableID image; + VariableID sampler; SPIRV_CROSS_DECLARE_CLONE(SPIRCombinedImageSampler) }; @@ -364,16 +517,18 @@ struct SPIRConstantOp : IVariant type = TypeConstantOp }; - SPIRConstantOp(uint32_t result_type, spv::Op op, const uint32_t *args, uint32_t length) + SPIRConstantOp(TypeID result_type, spv::Op op, const uint32_t *args, uint32_t length) : opcode(op) - , arguments(args, args + length) , basetype(result_type) { + arguments.reserve(length); + for (uint32_t i = 0; i < length; i++) + arguments.push_back(args[i]); } spv::Op opcode; SmallVector arguments; - uint32_t basetype; + TypeID basetype; SPIRV_CROSS_DECLARE_CLONE(SPIRConstantOp) }; @@ -406,10 +561,12 @@ struct SPIRType : IVariant Image, SampledImage, Sampler, - AccelerationStructureNV, + AccelerationStructure, + RayQuery, // Keep internal types at the end. ControlPointArray, + Interpolant, Char }; @@ -433,14 +590,19 @@ struct SPIRType : IVariant // Keep track of how many pointer layers we have. uint32_t pointer_depth = 0; bool pointer = false; + bool forward_pointer = false; spv::StorageClass storage = spv::StorageClassGeneric; - SmallVector member_types; + SmallVector member_types; + + // If member order has been rewritten to handle certain scenarios with Offset, + // allow codegen to rewrite the index. + SmallVector member_type_index_redirection; struct ImageType { - uint32_t type; + TypeID type; spv::Dim dim; bool depth; bool arrayed; @@ -453,11 +615,11 @@ struct SPIRType : IVariant // Structs can be declared multiple times if they are used as part of interface blocks. // We want to detect this so that we only emit the struct definition once. // Since we cannot rely on OpName to be equal, we need to figure out aliases. - uint32_t type_alias = 0; + TypeID type_alias = 0; // Denotes the type which this type is based on. // Allows the backend to traverse how a complex type is built up during access chains. - uint32_t parent_type = 0; + TypeID parent_type = 0; // Used in backends to avoid emitting members with conflicting names. std::unordered_set member_name_cache; @@ -480,7 +642,10 @@ struct SPIRExtension : IVariant SPV_AMD_shader_ballot, SPV_AMD_shader_explicit_vertex_parameter, SPV_AMD_shader_trinary_minmax, - SPV_AMD_gcn_shader + SPV_AMD_gcn_shader, + NonSemanticDebugPrintf, + NonSemanticShaderDebugInfo, + NonSemanticGeneric }; explicit SPIRExtension(Extension ext_) @@ -496,7 +661,7 @@ struct SPIRExtension : IVariant // so in order to avoid conflicts, we can't stick them in the ids array. struct SPIREntryPoint { - SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, const std::string &entry_name) + SPIREntryPoint(FunctionID self_, spv::ExecutionModel execution_model, const std::string &entry_name) : self(self_) , name(entry_name) , orig_name(entry_name) @@ -505,20 +670,23 @@ struct SPIREntryPoint } SPIREntryPoint() = default; - uint32_t self = 0; + FunctionID self = 0; std::string name; std::string orig_name; - SmallVector interface_variables; + SmallVector interface_variables; Bitset flags; - struct + struct WorkgroupSize { uint32_t x = 0, y = 0, z = 0; + uint32_t id_x = 0, id_y = 0, id_z = 0; uint32_t constant = 0; // Workgroup size can be expressed as a constant/spec-constant instead. } workgroup_size; uint32_t invocations = 0; uint32_t output_vertices = 0; + uint32_t output_primitives = 0; spv::ExecutionModel model = spv::ExecutionModelMax; + bool geometry_passthrough = false; }; struct SPIRExpression : IVariant @@ -529,8 +697,8 @@ struct SPIRExpression : IVariant }; // Only created by the backend target to avoid creating tons of temporaries. - SPIRExpression(std::string expr, uint32_t expression_type_, bool immutable_) - : expression(move(expr)) + SPIRExpression(std::string expr, TypeID expression_type_, bool immutable_) + : expression(std::move(expr)) , expression_type(expression_type_) , immutable(immutable_) { @@ -539,14 +707,14 @@ struct SPIRExpression : IVariant // If non-zero, prepend expression with to_expression(base_expression). // Used in amortizing multiple calls to to_expression() // where in certain cases that would quickly force a temporary when not needed. - uint32_t base_expression = 0; + ID base_expression = 0; std::string expression; - uint32_t expression_type = 0; + TypeID expression_type = 0; // If this expression is a forwarded load, // allow us to reference the original variable. - uint32_t loaded_from = 0; + ID loaded_from = 0; // If this expression will never change, we can avoid lots of temporaries // in high level source. @@ -562,11 +730,14 @@ struct SPIRExpression : IVariant bool access_chain = false; // A list of expressions which this expression depends on. - SmallVector expression_dependencies; + SmallVector expression_dependencies; // By reading this expression, we implicitly read these expressions as well. // Used by access chain Store and Load since we read multiple expressions in this case. - SmallVector implied_read_expressions; + SmallVector implied_read_expressions; + + // The expression was emitted at a certain scope. Lets us track when an expression read means multiple reads. + uint32_t emitted_loop_level = 0; SPIRV_CROSS_DECLARE_CLONE(SPIRExpression) }; @@ -578,12 +749,12 @@ struct SPIRFunctionPrototype : IVariant type = TypeFunctionPrototype }; - explicit SPIRFunctionPrototype(uint32_t return_type_) + explicit SPIRFunctionPrototype(TypeID return_type_) : return_type(return_type_) { } - uint32_t return_type; + TypeID return_type; SmallVector parameter_types; SPIRV_CROSS_DECLARE_CLONE(SPIRFunctionPrototype) @@ -606,7 +777,10 @@ struct SPIRBlock : IVariant Return, // Block ends with return. Unreachable, // Noop - Kill // Discard + Kill, // Discard + IgnoreIntersection, // Ray Tracing + TerminateRay, // Ray Tracing + EmitMeshTasks // Mesh shaders }; enum Merge @@ -650,7 +824,7 @@ struct SPIRBlock : IVariant ComplexLoop }; - enum + enum : uint32_t { NoDominator = 0xffffffffu }; @@ -658,23 +832,30 @@ struct SPIRBlock : IVariant Terminator terminator = Unknown; Merge merge = MergeNone; Hints hint = HintNone; - uint32_t next_block = 0; - uint32_t merge_block = 0; - uint32_t continue_block = 0; + BlockID next_block = 0; + BlockID merge_block = 0; + BlockID continue_block = 0; - uint32_t return_value = 0; // If 0, return nothing (void). - uint32_t condition = 0; - uint32_t true_block = 0; - uint32_t false_block = 0; - uint32_t default_block = 0; + ID return_value = 0; // If 0, return nothing (void). + ID condition = 0; + BlockID true_block = 0; + BlockID false_block = 0; + BlockID default_block = 0; + + // If terminator is EmitMeshTasksEXT. + struct + { + ID groups[3]; + ID payload; + } mesh = {}; SmallVector ops; struct Phi { - uint32_t local_variable; // flush local variable ... - uint32_t parent; // If we're in from_block and want to branch into this block ... - uint32_t function_variable; // to this function-global "phi" variable first. + ID local_variable; // flush local variable ... + BlockID parent; // If we're in from_block and want to branch into this block ... + VariableID function_variable; // to this function-global "phi" variable first. }; // Before entering this block flush out local variables to magical "phi" variables. @@ -682,18 +863,19 @@ struct SPIRBlock : IVariant // Declare these temporaries before beginning the block. // Used for handling complex continue blocks which have side effects. - SmallVector> declare_temporary; + SmallVector> declare_temporary; // Declare these temporaries, but only conditionally if this block turns out to be // a complex loop header. - SmallVector> potential_declare_temporary; + SmallVector> potential_declare_temporary; struct Case { - uint32_t value; - uint32_t block; + uint64_t value; + BlockID block; }; - SmallVector cases; + SmallVector cases_32bit; + SmallVector cases_64bit; // If we have tried to optimize code for this block but failed, // keep track of this. @@ -707,25 +889,25 @@ struct SPIRBlock : IVariant // If marked, we have explicitly handled Phi from this block, so skip any flushes related to that on a branch. // Used to handle an edge case with switch and case-label fallthrough where fall-through writes to Phi. - uint32_t ignore_phi_from_block = 0; + BlockID ignore_phi_from_block = 0; // The dominating block which this block might be within. // Used in continue; blocks to determine if we really need to write continue. - uint32_t loop_dominator = 0; + BlockID loop_dominator = 0; // All access to these variables are dominated by this block, // so before branching anywhere we need to make sure that we declare these variables. - SmallVector dominated_variables; + SmallVector dominated_variables; // These are variables which should be declared in a for loop header, if we // fail to use a classic for-loop, // we remove these variables, and fall back to regular variables outside the loop. - SmallVector loop_variables; + SmallVector loop_variables; // Some expressions are control-flow dependent, i.e. any instruction which relies on derivatives or // sub-group-like operations. // Make sure that we only use these expressions in the original block. - SmallVector invalidate_expressions; + SmallVector invalidate_expressions; SPIRV_CROSS_DECLARE_CLONE(SPIRBlock) }; @@ -737,7 +919,7 @@ struct SPIRFunction : IVariant type = TypeFunction }; - SPIRFunction(uint32_t return_type_, uint32_t function_type_) + SPIRFunction(TypeID return_type_, TypeID function_type_) : return_type(return_type_) , function_type(function_type_) { @@ -745,8 +927,8 @@ struct SPIRFunction : IVariant struct Parameter { - uint32_t type; - uint32_t id; + TypeID type; + ID id; uint32_t read_count; uint32_t write_count; @@ -768,25 +950,25 @@ struct SPIRFunction : IVariant // or a global ID. struct CombinedImageSamplerParameter { - uint32_t id; - uint32_t image_id; - uint32_t sampler_id; + VariableID id; + VariableID image_id; + VariableID sampler_id; bool global_image; bool global_sampler; bool depth; }; - uint32_t return_type; - uint32_t function_type; + TypeID return_type; + TypeID function_type; SmallVector arguments; // Can be used by backends to add magic arguments. // Currently used by combined image/sampler implementation. SmallVector shadow_arguments; - SmallVector local_variables; - uint32_t entry_block = 0; - SmallVector blocks; + SmallVector local_variables; + BlockID entry_block = 0; + SmallVector blocks; SmallVector combined_parameters; struct EntryLine @@ -796,12 +978,12 @@ struct SPIRFunction : IVariant }; EntryLine entry_line; - void add_local_variable(uint32_t id) + void add_local_variable(VariableID id) { local_variables.push_back(id); } - void add_parameter(uint32_t parameter_type, uint32_t id, bool alias_global_variable = false) + void add_parameter(TypeID parameter_type, ID id, bool alias_global_variable = false) { // Arguments are read-only until proven otherwise. arguments.push_back({ parameter_type, id, 0u, 0u, alias_global_variable }); @@ -822,7 +1004,7 @@ struct SPIRFunction : IVariant // On function entry, make sure to copy a constant array into thread addr space to work around // the case where we are passing a constant array by value to a function on backends which do not // consider arrays value types. - SmallVector constant_arrays_needed_on_stack; + SmallVector constant_arrays_needed_on_stack; bool active = false; bool flush_undeclared = true; @@ -838,7 +1020,7 @@ struct SPIRAccessChain : IVariant type = TypeAccessChain }; - SPIRAccessChain(uint32_t basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_, + SPIRAccessChain(TypeID basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_, int32_t static_index_) : basetype(basetype_) , storage(storage_) @@ -853,20 +1035,21 @@ struct SPIRAccessChain : IVariant // which has no usable buffer type ala GLSL SSBOs. // StructuredBuffer is too limited, so our only option is to deal with ByteAddressBuffer which works with raw addresses. - uint32_t basetype; + TypeID basetype; spv::StorageClass storage; std::string base; std::string dynamic_index; int32_t static_index; - uint32_t loaded_from = 0; + VariableID loaded_from = 0; uint32_t matrix_stride = 0; + uint32_t array_stride = 0; bool row_major_matrix = false; bool immutable = false; // By reading this expression, we implicitly read these expressions as well. // Used by access chain Store and Load since we read multiple expressions in this case. - SmallVector implied_read_expressions; + SmallVector implied_read_expressions; SPIRV_CROSS_DECLARE_CLONE(SPIRAccessChain) }; @@ -879,7 +1062,7 @@ struct SPIRVariable : IVariant }; SPIRVariable() = default; - SPIRVariable(uint32_t basetype_, spv::StorageClass storage_, uint32_t initializer_ = 0, uint32_t basevariable_ = 0) + SPIRVariable(TypeID basetype_, spv::StorageClass storage_, ID initializer_ = 0, VariableID basevariable_ = 0) : basetype(basetype_) , storage(storage_) , initializer(initializer_) @@ -887,11 +1070,11 @@ struct SPIRVariable : IVariant { } - uint32_t basetype = 0; + TypeID basetype = 0; spv::StorageClass storage = spv::StorageClassGeneric; uint32_t decoration = 0; - uint32_t initializer = 0; - uint32_t basevariable = 0; + ID initializer = 0; + VariableID basevariable = 0; SmallVector dereference_chain; bool compat_builtin = false; @@ -901,11 +1084,10 @@ struct SPIRVariable : IVariant // When we read the variable as an expression, just forward // shadowed_id as the expression. bool statically_assigned = false; - uint32_t static_expression = 0; + ID static_expression = 0; // Temporaries which can remain forwarded as long as this variable is not modified. - SmallVector dependees; - bool forwardable = true; + SmallVector dependees; bool deferred_declaration = false; bool phi_variable = false; @@ -917,7 +1099,7 @@ struct SPIRVariable : IVariant uint32_t remapped_components = 0; // The block which dominates all access to this variable. - uint32_t dominator = 0; + BlockID dominator = 0; // If true, this variable is a loop variable, when accessing the variable // outside a loop, // we should statically forward it. @@ -937,7 +1119,8 @@ struct SPIRConstant : IVariant type = TypeConstant }; - union Constant { + union Constant + { uint32_t u32; int32_t i32; float f32; @@ -951,15 +1134,12 @@ struct SPIRConstant : IVariant { Constant r[4]; // If != 0, this element is a specialization constant, and we should keep track of it as such. - uint32_t id[4]; + ID id[4]; uint32_t vecsize = 1; - // Workaround for MSVC 2013, initializing an array breaks. ConstantVector() { memset(r, 0, sizeof(r)); - for (unsigned i = 0; i < 4; i++) - id[i] = 0; } }; @@ -967,15 +1147,8 @@ struct SPIRConstant : IVariant { ConstantVector c[4]; // If != 0, this column is a specialization constant, and we should keep track of it as such. - uint32_t id[4]; + ID id[4]; uint32_t columns = 1; - - // Workaround for MSVC 2013, initializing an array breaks. - ConstantMatrix() - { - for (unsigned i = 0; i < 4; i++) - id[i] = 0; - } }; static inline float f16_to_f32(uint16_t u16_value) @@ -985,7 +1158,8 @@ struct SPIRConstant : IVariant int e = (u16_value >> 10) & 0x1f; int m = (u16_value >> 0) & 0x3ff; - union { + union + { float f32; uint32_t u32; } u; @@ -1139,16 +1313,18 @@ struct SPIRConstant : IVariant SPIRConstant() = default; - SPIRConstant(uint32_t constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized) + SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized) : constant_type(constant_type_) , specialization(specialized) { - subconstants.insert(std::end(subconstants), elements, elements + num_elements); + subconstants.reserve(num_elements); + for (uint32_t i = 0; i < num_elements; i++) + subconstants.push_back(elements[i]); specialization = specialized; } // Construct scalar (32-bit). - SPIRConstant(uint32_t constant_type_, uint32_t v0, bool specialized) + SPIRConstant(TypeID constant_type_, uint32_t v0, bool specialized) : constant_type(constant_type_) , specialization(specialized) { @@ -1158,7 +1334,7 @@ struct SPIRConstant : IVariant } // Construct scalar (64-bit). - SPIRConstant(uint32_t constant_type_, uint64_t v0, bool specialized) + SPIRConstant(TypeID constant_type_, uint64_t v0, bool specialized) : constant_type(constant_type_) , specialization(specialized) { @@ -1168,7 +1344,7 @@ struct SPIRConstant : IVariant } // Construct vectors and matrices. - SPIRConstant(uint32_t constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements, + SPIRConstant(TypeID constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements, bool specialized) : constant_type(constant_type_) , specialization(specialized) @@ -1200,7 +1376,7 @@ struct SPIRConstant : IVariant } } - uint32_t constant_type = 0; + TypeID constant_type = 0; ConstantMatrix m; // If this constant is a specialization constant (i.e. created with OpSpecConstant*). @@ -1212,7 +1388,7 @@ struct SPIRConstant : IVariant bool is_used_as_lut = false; // For composites which are constant arrays, etc. - SmallVector subconstants; + SmallVector subconstants; // Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant, // and uses them to initialize the constant. This allows the user @@ -1240,7 +1416,7 @@ class Variant ~Variant() { if (holder) - group->pools[type]->free_opaque(holder); + group->pools[type]->deallocate_opaque(holder); } // Marking custom move constructor as noexcept is important. @@ -1259,7 +1435,7 @@ class Variant if (this != &other) { if (holder) - group->pools[type]->free_opaque(holder); + group->pools[type]->deallocate_opaque(holder); holder = other.holder; group = other.group; type = other.type; @@ -1283,7 +1459,7 @@ class Variant if (this != &other) { if (holder) - group->pools[type]->free_opaque(holder); + group->pools[type]->deallocate_opaque(holder); if (other.holder) holder = other.holder->clone(group->pools[other.type].get()); @@ -1299,13 +1475,13 @@ class Variant void set(IVariant *val, Types new_type) { if (holder) - group->pools[type]->free_opaque(holder); + group->pools[type]->deallocate_opaque(holder); holder = nullptr; if (!allow_type_rewrite && type != TypeNone && type != new_type) { if (val) - group->pools[new_type]->free_opaque(val); + group->pools[new_type]->deallocate_opaque(val); SPIRV_CROSS_THROW("Overwriting a variant with new type."); } @@ -1347,9 +1523,9 @@ class Variant return type; } - uint32_t get_id() const + ID get_id() const { - return holder ? holder->self : 0; + return holder ? holder->self : ID(0); } bool empty() const @@ -1360,7 +1536,7 @@ class Variant void reset() { if (holder) - group->pools[type]->free_opaque(holder); + group->pools[type]->deallocate_opaque(holder); holder = nullptr; type = TypeNone; } @@ -1398,10 +1574,86 @@ T &variant_set(Variant &var, P &&... args) struct AccessChainMeta { - uint32_t storage_packed_type = 0; + uint32_t storage_physical_type = 0; bool need_transpose = false; bool storage_is_packed = false; bool storage_is_invariant = false; + bool flattened_struct = false; + bool relaxed_precision = false; +}; + +enum ExtendedDecorations +{ + // Marks if a buffer block is re-packed, i.e. member declaration might be subject to PhysicalTypeID remapping and padding. + SPIRVCrossDecorationBufferBlockRepacked = 0, + + // A type in a buffer block might be declared with a different physical type than the logical type. + // If this is not set, PhysicalTypeID == the SPIR-V type as declared. + SPIRVCrossDecorationPhysicalTypeID, + + // Marks if the physical type is to be declared with tight packing rules, i.e. packed_floatN on MSL and friends. + // If this is set, PhysicalTypeID might also be set. It can be set to same as logical type if all we're doing + // is converting float3 to packed_float3 for example. + // If this is marked on a struct, it means the struct itself must use only Packed types for all its members. + SPIRVCrossDecorationPhysicalTypePacked, + + // The padding in bytes before declaring this struct member. + // If used on a struct type, marks the target size of a struct. + SPIRVCrossDecorationPaddingTarget, + + SPIRVCrossDecorationInterfaceMemberIndex, + SPIRVCrossDecorationInterfaceOrigID, + SPIRVCrossDecorationResourceIndexPrimary, + // Used for decorations like resource indices for samplers when part of combined image samplers. + // A variable might need to hold two resource indices in this case. + SPIRVCrossDecorationResourceIndexSecondary, + // Used for resource indices for multiplanar images when part of combined image samplers. + SPIRVCrossDecorationResourceIndexTertiary, + SPIRVCrossDecorationResourceIndexQuaternary, + + // Marks a buffer block for using explicit offsets (GLSL/HLSL). + SPIRVCrossDecorationExplicitOffset, + + // Apply to a variable in the Input storage class; marks it as holding the base group passed to vkCmdDispatchBase(), + // or the base vertex and instance indices passed to vkCmdDrawIndexed(). + // In MSL, this is used to adjust the WorkgroupId and GlobalInvocationId variables in compute shaders, + // and to hold the BaseVertex and BaseInstance variables in vertex shaders. + SPIRVCrossDecorationBuiltInDispatchBase, + + // Apply to a variable that is a function parameter; marks it as being a "dynamic" + // combined image-sampler. In MSL, this is used when a function parameter might hold + // either a regular combined image-sampler or one that has an attached sampler + // Y'CbCr conversion. + SPIRVCrossDecorationDynamicImageSampler, + + // Apply to a variable in the Input storage class; marks it as holding the size of the stage + // input grid. + // In MSL, this is used to hold the vertex and instance counts in a tessellation pipeline + // vertex shader. + SPIRVCrossDecorationBuiltInStageInputSize, + + // Apply to any access chain of a tessellation I/O variable; stores the type of the sub-object + // that was chained to, as recorded in the input variable itself. This is used in case the pointer + // is itself used as the base of an access chain, to calculate the original type of the sub-object + // chained to, in case a swizzle needs to be applied. This should not happen normally with valid + // SPIR-V, but the MSL backend can change the type of input variables, necessitating the + // addition of swizzles to keep the generated code compiling. + SPIRVCrossDecorationTessIOOriginalInputTypeID, + + // Apply to any access chain of an interface variable used with pull-model interpolation, where the variable is a + // vector but the resulting pointer is a scalar; stores the component index that is to be accessed by the chain. + // This is used when emitting calls to interpolation functions on the chain in MSL: in this case, the component + // must be applied to the result, since pull-model interpolants in MSL cannot be swizzled directly, but the + // results of interpolation can. + SPIRVCrossDecorationInterpolantComponentExpr, + + // Apply to any struct type that is used in the Workgroup storage class. + // This causes matrices in MSL prior to Metal 3.0 to be emitted using a special + // class that is convertible to the standard matrix type, to work around the + // lack of constructors in the 'threadgroup' address space. + SPIRVCrossDecorationWorkgroupStruct, + + SPIRVCrossDecorationCount }; struct Meta @@ -1418,6 +1670,9 @@ struct Meta uint32_t set = 0; uint32_t binding = 0; uint32_t offset = 0; + uint32_t xfb_buffer = 0; + uint32_t xfb_stride = 0; + uint32_t stream = 0; uint32_t array_stride = 0; uint32_t matrix_stride = 0; uint32_t input_attachment = 0; @@ -1426,14 +1681,17 @@ struct Meta spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax; bool builtin = false; - struct + struct Extended { - uint32_t packed_type = 0; - bool packed = false; - uint32_t ib_member_index = ~(0u); - uint32_t ib_orig_id = 0; - uint32_t resource_index_primary = ~(0u); - uint32_t resource_index_secondary = ~(0u); + Extended() + { + // MSVC 2013 workaround to init like this. + for (auto &v : values) + v = 0; + } + + Bitset flags; + uint32_t values[SPIRVCrossDecorationCount]; } extended; }; @@ -1539,6 +1797,125 @@ static inline bool opcode_is_sign_invariant(spv::Op opcode) return false; } } + +static inline bool opcode_can_promote_integer_implicitly(spv::Op opcode) +{ + switch (opcode) + { + case spv::OpSNegate: + case spv::OpNot: + case spv::OpBitwiseAnd: + case spv::OpBitwiseOr: + case spv::OpBitwiseXor: + case spv::OpShiftLeftLogical: + case spv::OpShiftRightLogical: + case spv::OpShiftRightArithmetic: + case spv::OpIAdd: + case spv::OpISub: + case spv::OpIMul: + case spv::OpSDiv: + case spv::OpUDiv: + case spv::OpSRem: + case spv::OpUMod: + case spv::OpSMod: + return true; + + default: + return false; + } +} + +struct SetBindingPair +{ + uint32_t desc_set; + uint32_t binding; + + inline bool operator==(const SetBindingPair &other) const + { + return desc_set == other.desc_set && binding == other.binding; + } + + inline bool operator<(const SetBindingPair &other) const + { + return desc_set < other.desc_set || (desc_set == other.desc_set && binding < other.binding); + } +}; + +struct LocationComponentPair +{ + uint32_t location; + uint32_t component; + + inline bool operator==(const LocationComponentPair &other) const + { + return location == other.location && component == other.component; + } + + inline bool operator<(const LocationComponentPair &other) const + { + return location < other.location || (location == other.location && component < other.component); + } +}; + +struct StageSetBinding +{ + spv::ExecutionModel model; + uint32_t desc_set; + uint32_t binding; + + inline bool operator==(const StageSetBinding &other) const + { + return model == other.model && desc_set == other.desc_set && binding == other.binding; + } +}; + +struct InternalHasher +{ + inline size_t operator()(const SetBindingPair &value) const + { + // Quality of hash doesn't really matter here. + auto hash_set = std::hash()(value.desc_set); + auto hash_binding = std::hash()(value.binding); + return (hash_set * 0x10001b31) ^ hash_binding; + } + + inline size_t operator()(const LocationComponentPair &value) const + { + // Quality of hash doesn't really matter here. + auto hash_set = std::hash()(value.location); + auto hash_binding = std::hash()(value.component); + return (hash_set * 0x10001b31) ^ hash_binding; + } + + inline size_t operator()(const StageSetBinding &value) const + { + // Quality of hash doesn't really matter here. + auto hash_model = std::hash()(value.model); + auto hash_set = std::hash()(value.desc_set); + auto tmp_hash = (hash_model * 0x10001b31) ^ hash_set; + return (tmp_hash * 0x10001b31) ^ value.binding; + } +}; + +// Special constant used in a {MSL,HLSL}ResourceBinding desc_set +// element to indicate the bindings for the push constants. +static const uint32_t ResourceBindingPushConstantDescriptorSet = ~(0u); + +// Special constant used in a {MSL,HLSL}ResourceBinding binding +// element to indicate the bindings for the push constants. +static const uint32_t ResourceBindingPushConstantBinding = 0; } // namespace SPIRV_CROSS_NAMESPACE +namespace std +{ +template +struct hash> +{ + size_t operator()(const SPIRV_CROSS_NAMESPACE::TypedID &value) const + { + return std::hash()(value); + } +}; +} // namespace std + #endif diff --git a/spirv_cpp.cpp b/spirv_cpp.cpp index 25966b32ad7..dd0a84c8312 100644 --- a/spirv_cpp.cpp +++ b/spirv_cpp.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2015-2019 Arm Limited + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #include "spirv_cpp.hpp" using namespace spv; @@ -267,8 +274,6 @@ void CompilerCPP::emit_resources() if (emitted) statement(""); - declare_undefined_values(); - statement("inline void init(spirv_cross_shader& s)"); begin_scope(); statement(resource_type, "::init(s);"); @@ -306,6 +311,8 @@ void CompilerCPP::emit_resources() string CompilerCPP::compile() { + ir.fixup_reserved_names(); + // Do not deal with ES-isms like precision, older extensions and such. options.es = false; options.version = 450; @@ -329,11 +336,8 @@ string CompilerCPP::compile() uint32_t pass_count = 0; do { - if (pass_count >= 3) - SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); - resource_registrations.clear(); - reset(); + reset(pass_count); // Move constructor for this type is broken on GCC 4.9 ... buffer.reset(); diff --git a/spirv_cpp.hpp b/spirv_cpp.hpp index 4c20aa37b8d..c76629cdcbe 100644 --- a/spirv_cpp.hpp +++ b/spirv_cpp.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2015-2019 Arm Limited + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_CPP_HPP #define SPIRV_CROSS_CPP_HPP diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 9fdfd1f00c0..edc98f81e1d 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2015-2019 Arm Limited + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #include "spirv_cross.hpp" #include "GLSL.std.450.h" #include "spirv_cfg.hpp" @@ -29,16 +36,16 @@ using namespace SPIRV_CROSS_NAMESPACE; Compiler::Compiler(vector ir_) { - Parser parser(move(ir_)); + Parser parser(std::move(ir_)); parser.parse(); - set_ir(move(parser.get_parsed_ir())); + set_ir(std::move(parser.get_parsed_ir())); } Compiler::Compiler(const uint32_t *ir_, size_t word_count) { Parser parser(ir_, word_count); parser.parse(); - set_ir(move(parser.get_parsed_ir())); + set_ir(std::move(parser.get_parsed_ir())); } Compiler::Compiler(const ParsedIR &ir_) @@ -48,12 +55,12 @@ Compiler::Compiler(const ParsedIR &ir_) Compiler::Compiler(ParsedIR &&ir_) { - set_ir(move(ir_)); + set_ir(std::move(ir_)); } void Compiler::set_ir(ParsedIR &&ir_) { - ir = move(ir_); + ir = std::move(ir_); parse_fixup(); } @@ -88,6 +95,13 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v) bool Compiler::block_is_pure(const SPIRBlock &block) { + // This is a global side effect of the function. + if (block.terminator == SPIRBlock::Kill || + block.terminator == SPIRBlock::TerminateRay || + block.terminator == SPIRBlock::IgnoreIntersection || + block.terminator == SPIRBlock::EmitMeshTasks) + return false; + for (auto &i : block.ops) { auto ops = stream(i); @@ -141,21 +155,62 @@ bool Compiler::block_is_pure(const SPIRBlock &block) case OpEmitVertex: return false; + // Mesh shader functions modify global state. + // (EmitMeshTasks is a terminator). + case OpSetMeshOutputsEXT: + return false; + // Barriers disallow any reordering, so we should treat blocks with barrier as writing. case OpControlBarrier: case OpMemoryBarrier: return false; // Ray tracing builtins are impure. - case OpReportIntersectionNV: + case OpReportIntersectionKHR: case OpIgnoreIntersectionNV: case OpTerminateRayNV: case OpTraceNV: + case OpTraceRayKHR: case OpExecuteCallableNV: + case OpExecuteCallableKHR: + case OpRayQueryInitializeKHR: + case OpRayQueryTerminateKHR: + case OpRayQueryGenerateIntersectionKHR: + case OpRayQueryConfirmIntersectionKHR: + case OpRayQueryProceedKHR: + // There are various getters in ray query, but they are considered pure. return false; // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure. + case OpDemoteToHelperInvocationEXT: + // This is a global side effect of the function. + return false; + + case OpExtInst: + { + uint32_t extension_set = ops[2]; + if (get(extension_set).ext == SPIRExtension::GLSL) + { + auto op_450 = static_cast(ops[3]); + switch (op_450) + { + case GLSLstd450Modf: + case GLSLstd450Frexp: + { + auto &type = expression_type(ops[5]); + if (type.storage != StorageClassFunction) + return false; + break; + } + + default: + break; + } + } + break; + } + default: break; } @@ -177,7 +232,7 @@ string Compiler::to_name(uint32_t id, bool allow_alias) const { // If the alias master has been specially packed, we will have emitted a clean variant as well, // so skip the name aliasing here. - if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked)) + if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) return to_name(type.type_alias); } } @@ -300,6 +355,8 @@ void Compiler::register_write(uint32_t chain) var = maybe_get(access_chain->loaded_from); } + auto &chain_type = expression_type(chain); + if (var) { bool check_argument_storage_qualifier = true; @@ -342,7 +399,7 @@ void Compiler::register_write(uint32_t chain) force_recompile(); } } - else + else if (chain_type.pointer) { // If we stored through a variable pointer, then we don't know which // variable we stored to. So *all* expressions after this point need to @@ -351,6 +408,9 @@ void Compiler::register_write(uint32_t chain) // only certain variables, we can invalidate only those. flush_all_active_variables(); } + + // If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead. + // This can happen in copy_logical_type where we unroll complex reads and writes to temporaries. } void Compiler::flush_dependees(SPIRVariable &var) @@ -497,10 +557,16 @@ bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins return false; } - bool hidden = false; - if (check_active_interface_variables && storage_class_is_interface(var.storage)) - hidden = active_interface_variables.find(var.self) == end(active_interface_variables); - return hidden; + // In SPIR-V 1.4 and up we must also use the active variable interface to disable global variables + // which are not part of the entry point. + if (ir.get_spirv_version() >= 0x10400 && var.storage != spv::StorageClassGeneric && + var.storage != spv::StorageClassFunction && !interface_variable_exists_in_entry_point(var.self)) + { + return true; + } + + return check_active_interface_variables && storage_class_is_interface(var.storage) && + active_interface_variables.find(var.self) == end(active_interface_variables); } bool Compiler::is_builtin_type(const SPIRType &type) const @@ -569,7 +635,7 @@ ShaderResources Compiler::get_shader_resources() const return get_shader_resources(nullptr); } -ShaderResources Compiler::get_shader_resources(const unordered_set &active_variables) const +ShaderResources Compiler::get_shader_resources(const unordered_set &active_variables) const { return get_shader_resources(&active_variables); } @@ -659,10 +725,42 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t case OpExtInst: { - if (length < 5) + if (length < 3) return false; - uint32_t extension_set = args[2]; - if (compiler.get(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) + auto &extension_set = compiler.get(args[2]); + switch (extension_set.ext) + { + case SPIRExtension::GLSL: + { + auto op = static_cast(args[3]); + + switch (op) + { + case GLSLstd450InterpolateAtCentroid: + case GLSLstd450InterpolateAtSample: + case GLSLstd450InterpolateAtOffset: + { + auto *var = compiler.maybe_get(args[4]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[4]); + break; + } + + case GLSLstd450Modf: + case GLSLstd450Fract: + { + auto *var = compiler.maybe_get(args[5]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[5]); + break; + } + + default: + break; + } + break; + } + case SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter: { enum AMDShaderExplicitVertexParameter { @@ -684,6 +782,10 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t default: break; } + break; + } + default: + break; } break; } @@ -726,16 +828,24 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t return true; } -unordered_set Compiler::get_active_interface_variables() const +unordered_set Compiler::get_active_interface_variables() const { // Traverse the call graph and find all interface variables which are in use. - unordered_set variables; + unordered_set variables; InterfaceVariableAccessHandler handler(*this, variables); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); - // Make sure we preserve output variables which are only initialized, but never accessed by any code. ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { - if (var.storage == StorageClassOutput && var.initializer != 0) + if (var.storage != StorageClassOutput) + return; + if (!interface_variable_exists_in_entry_point(var.self)) + return; + + // An output variable which is just declared (but uninitialized) might be read by subsequent stages + // so we should force-enable these outputs, + // since compilation will fail if a subsequent stage attempts to read from the variable in question. + // Also, make sure we preserve output variables which are only initialized, but never accessed by any code. + if (var.initializer != ID(0) || get_execution_model() != ExecutionModelFragment) variables.insert(var.self); }); @@ -746,13 +856,13 @@ unordered_set Compiler::get_active_interface_variables() const return variables; } -void Compiler::set_enabled_interface_variables(std::unordered_set active_variables) +void Compiler::set_enabled_interface_variables(std::unordered_set active_variables) { - active_interface_variables = move(active_variables); + active_interface_variables = std::move(active_variables); check_active_interface_variables = true; } -ShaderResources Compiler::get_shader_resources(const unordered_set *active_variables) const +ShaderResources Compiler::get_shader_resources(const unordered_set *active_variables) const { ShaderResources res; @@ -763,19 +873,79 @@ ShaderResources Compiler::get_shader_resources(const unordered_set *ac // It is possible for uniform storage classes to be passed as function parameters, so detect // that. To detect function parameters, check of StorageClass of variable is function scope. - if (var.storage == StorageClassFunction || !type.pointer || is_builtin_variable(var)) + if (var.storage == StorageClassFunction || !type.pointer) return; if (active_variables && active_variables->find(var.self) == end(*active_variables)) return; + // In SPIR-V 1.4 and up, every global must be present in the entry point interface list, + // not just IO variables. + bool active_in_entry_point = true; + if (ir.get_spirv_version() < 0x10400) + { + if (var.storage == StorageClassInput || var.storage == StorageClassOutput) + active_in_entry_point = interface_variable_exists_in_entry_point(var.self); + } + else + active_in_entry_point = interface_variable_exists_in_entry_point(var.self); + + if (!active_in_entry_point) + return; + + bool is_builtin = is_builtin_variable(var); + + if (is_builtin) + { + if (var.storage != StorageClassInput && var.storage != StorageClassOutput) + return; + + auto &list = var.storage == StorageClassInput ? res.builtin_inputs : res.builtin_outputs; + BuiltInResource resource; + + if (has_decoration(type.self, DecorationBlock)) + { + resource.resource = { var.self, var.basetype, type.self, + get_remapped_declared_block_name(var.self, false) }; + + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + resource.value_type_id = type.member_types[i]; + resource.builtin = BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)); + list.push_back(resource); + } + } + else + { + bool strip_array = + !has_decoration(var.self, DecorationPatch) && ( + get_execution_model() == ExecutionModelTessellationControl || + (get_execution_model() == ExecutionModelTessellationEvaluation && + var.storage == StorageClassInput)); + + resource.resource = { var.self, var.basetype, type.self, get_name(var.self) }; + + if (strip_array && !type.array.empty()) + resource.value_type_id = get_variable_data_type(var).parent_type; + else + resource.value_type_id = get_variable_data_type_id(var); + + assert(resource.value_type_id); + + resource.builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + list.push_back(std::move(resource)); + } + return; + } + // Input - if (var.storage == StorageClassInput && interface_variable_exists_in_entry_point(var.self)) + if (var.storage == StorageClassInput) { if (has_decoration(type.self, DecorationBlock)) { res.stage_inputs.push_back( - { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); + { var.self, var.basetype, type.self, + get_remapped_declared_block_name(var.self, false) }); } else res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); @@ -786,12 +956,12 @@ ShaderResources Compiler::get_shader_resources(const unordered_set *ac res.subpass_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // Outputs - else if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self)) + else if (var.storage == StorageClassOutput) { if (has_decoration(type.self, DecorationBlock)) { res.stage_outputs.push_back( - { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); } else res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); @@ -821,6 +991,10 @@ ShaderResources Compiler::get_shader_resources(const unordered_set *ac // in the future. res.push_constant_buffers.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } + else if (type.storage == StorageClassShaderRecordBufferKHR) + { + res.shader_record_buffers.push_back({ var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) }); + } // Images else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && type.image.sampled == 2) @@ -849,7 +1023,7 @@ ShaderResources Compiler::get_shader_resources(const unordered_set *ac res.atomic_counters.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // Acceleration structures - else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructureNV) + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructure) { res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } @@ -886,10 +1060,12 @@ void Compiler::parse_fixup() if (id.get_type() == TypeConstant) { auto &c = id.get(); - if (ir.meta[c.self].decoration.builtin && ir.meta[c.self].decoration.builtin_type == BuiltInWorkgroupSize) + if (has_decoration(c.self, DecorationBuiltIn) && + BuiltIn(get_decoration(c.self, DecorationBuiltIn)) == BuiltInWorkgroupSize) { // In current SPIR-V, there can be just one constant like this. // All entry points will receive the constant value. + // WorkgroupSize take precedence over LocalSizeId. for (auto &entry : ir.entry_points) { entry.second.workgroup_size.constant = c.self; @@ -903,8 +1079,11 @@ void Compiler::parse_fixup() { auto &var = id.get(); if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup || + var.storage == StorageClassTaskPayloadWorkgroupEXT || var.storage == StorageClassOutput) + { global_variables.push_back(var.self); + } if (variable_storage_is_aliased(var)) aliased_variables.push_back(var.self); } @@ -969,17 +1148,17 @@ void Compiler::update_name_cache(unordered_set &cache, string &name) update_name_cache(cache, cache, name); } -void Compiler::set_name(uint32_t id, const std::string &name) +void Compiler::set_name(ID id, const std::string &name) { ir.set_name(id, name); } -const SPIRType &Compiler::get_type(uint32_t id) const +const SPIRType &Compiler::get_type(TypeID id) const { return get(id); } -const SPIRType &Compiler::get_type_from_variable(uint32_t id) const +const SPIRType &Compiler::get_type_from_variable(VariableID id) const { return get(get(id).basetype); } @@ -1050,23 +1229,23 @@ bool Compiler::is_sampled_image_type(const SPIRType &type) type.image.dim != DimBuffer; } -void Compiler::set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, +void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, const std::string &argument) { ir.set_member_decoration_string(id, index, decoration, argument); } -void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument) +void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument) { ir.set_member_decoration(id, index, decoration, argument); } -void Compiler::set_member_name(uint32_t id, uint32_t index, const std::string &name) +void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name) { ir.set_member_name(id, index, name); } -const std::string &Compiler::get_member_name(uint32_t id, uint32_t index) const +const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const { return ir.get_member_name(id, index); } @@ -1082,7 +1261,7 @@ void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const ir.meta[type_id].members[index].qualified_alias = name; } -const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t index) const +const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const { auto *m = ir.find_meta(type_id); if (m && index < m->members.size()) @@ -1091,32 +1270,32 @@ const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t ind return ir.get_empty_string(); } -uint32_t Compiler::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { return ir.get_member_decoration(id, index, decoration); } -const Bitset &Compiler::get_member_decoration_bitset(uint32_t id, uint32_t index) const +const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const { return ir.get_member_decoration_bitset(id, index); } -bool Compiler::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { return ir.has_member_decoration(id, index, decoration); } -void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration) +void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration) { ir.unset_member_decoration(id, index, decoration); } -void Compiler::set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument) +void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument) { ir.set_decoration_string(id, decoration, argument); } -void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argument) +void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument) { ir.set_decoration(id, decoration, argument); } @@ -1124,32 +1303,8 @@ void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argum void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value) { auto &dec = ir.meta[id].decoration; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - dec.extended.packed = true; - break; - - case SPIRVCrossDecorationPackedType: - dec.extended.packed_type = value; - break; - - case SPIRVCrossDecorationInterfaceMemberIndex: - dec.extended.ib_member_index = value; - break; - - case SPIRVCrossDecorationInterfaceOrigID: - dec.extended.ib_orig_id = value; - break; - - case SPIRVCrossDecorationResourceIndexPrimary: - dec.extended.resource_index_primary = value; - break; - - case SPIRVCrossDecorationResourceIndexSecondary: - dec.extended.resource_index_secondary = value; - break; - } + dec.extended.flags.set(decoration); + dec.extended.values[decoration] = value; } void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration, @@ -1157,32 +1312,23 @@ void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, Ext { ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); auto &dec = ir.meta[type].members[index]; + dec.extended.flags.set(decoration); + dec.extended.values[decoration] = value; +} +static uint32_t get_default_extended_decoration(ExtendedDecorations decoration) +{ switch (decoration) { - case SPIRVCrossDecorationPacked: - dec.extended.packed = true; - break; - - case SPIRVCrossDecorationPackedType: - dec.extended.packed_type = value; - break; - - case SPIRVCrossDecorationInterfaceMemberIndex: - dec.extended.ib_member_index = value; - break; - - case SPIRVCrossDecorationInterfaceOrigID: - dec.extended.ib_orig_id = value; - break; - case SPIRVCrossDecorationResourceIndexPrimary: - dec.extended.resource_index_primary = value; - break; - case SPIRVCrossDecorationResourceIndexSecondary: - dec.extended.resource_index_secondary = value; - break; + case SPIRVCrossDecorationResourceIndexTertiary: + case SPIRVCrossDecorationResourceIndexQuaternary: + case SPIRVCrossDecorationInterfaceMemberIndex: + return ~(0u); + + default: + return 0; } } @@ -1193,28 +1339,11 @@ uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations deco return 0; auto &dec = m->decoration; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - return uint32_t(dec.extended.packed); - - case SPIRVCrossDecorationPackedType: - return dec.extended.packed_type; - - case SPIRVCrossDecorationInterfaceMemberIndex: - return dec.extended.ib_member_index; - - case SPIRVCrossDecorationInterfaceOrigID: - return dec.extended.ib_orig_id; - - case SPIRVCrossDecorationResourceIndexPrimary: - return dec.extended.resource_index_primary; - case SPIRVCrossDecorationResourceIndexSecondary: - return dec.extended.resource_index_secondary; - } + if (!dec.extended.flags.get(decoration)) + return get_default_extended_decoration(decoration); - return 0; + return dec.extended.values[decoration]; } uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const @@ -1227,28 +1356,9 @@ uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, return 0; auto &dec = m->members[index]; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - return uint32_t(dec.extended.packed); - - case SPIRVCrossDecorationPackedType: - return dec.extended.packed_type; - - case SPIRVCrossDecorationInterfaceMemberIndex: - return dec.extended.ib_member_index; - - case SPIRVCrossDecorationInterfaceOrigID: - return dec.extended.ib_orig_id; - - case SPIRVCrossDecorationResourceIndexPrimary: - return dec.extended.resource_index_primary; - - case SPIRVCrossDecorationResourceIndexSecondary: - return dec.extended.resource_index_secondary; - } - - return 0; + if (!dec.extended.flags.get(decoration)) + return get_default_extended_decoration(decoration); + return dec.extended.values[decoration]; } bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const @@ -1258,28 +1368,7 @@ bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decorati return false; auto &dec = m->decoration; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - return dec.extended.packed; - - case SPIRVCrossDecorationPackedType: - return dec.extended.packed_type != 0; - - case SPIRVCrossDecorationInterfaceMemberIndex: - return dec.extended.ib_member_index != uint32_t(-1); - - case SPIRVCrossDecorationInterfaceOrigID: - return dec.extended.ib_orig_id != 0; - - case SPIRVCrossDecorationResourceIndexPrimary: - return dec.extended.resource_index_primary != uint32_t(-1); - - case SPIRVCrossDecorationResourceIndexSecondary: - return dec.extended.resource_index_secondary != uint32_t(-1); - } - - return false; + return dec.extended.flags.get(decoration); } bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const @@ -1292,110 +1381,40 @@ bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, Ext return false; auto &dec = m->members[index]; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - return dec.extended.packed; - - case SPIRVCrossDecorationPackedType: - return dec.extended.packed_type != 0; - - case SPIRVCrossDecorationInterfaceMemberIndex: - return dec.extended.ib_member_index != uint32_t(-1); - - case SPIRVCrossDecorationInterfaceOrigID: - return dec.extended.ib_orig_id != 0; - - case SPIRVCrossDecorationResourceIndexPrimary: - return dec.extended.resource_index_primary != uint32_t(-1); - - case SPIRVCrossDecorationResourceIndexSecondary: - return dec.extended.resource_index_secondary != uint32_t(-1); - } - - return false; + return dec.extended.flags.get(decoration); } void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration) { auto &dec = ir.meta[id].decoration; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - dec.extended.packed = false; - break; - - case SPIRVCrossDecorationPackedType: - dec.extended.packed_type = 0; - break; - - case SPIRVCrossDecorationInterfaceMemberIndex: - dec.extended.ib_member_index = uint32_t(-1); - break; - - case SPIRVCrossDecorationInterfaceOrigID: - dec.extended.ib_orig_id = 0; - break; - - case SPIRVCrossDecorationResourceIndexPrimary: - dec.extended.resource_index_primary = uint32_t(-1); - break; - - case SPIRVCrossDecorationResourceIndexSecondary: - dec.extended.resource_index_secondary = uint32_t(-1); - break; - } + dec.extended.flags.clear(decoration); + dec.extended.values[decoration] = 0; } void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) { ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); auto &dec = ir.meta[type].members[index]; - - switch (decoration) - { - case SPIRVCrossDecorationPacked: - dec.extended.packed = false; - break; - - case SPIRVCrossDecorationPackedType: - dec.extended.packed_type = 0; - break; - - case SPIRVCrossDecorationInterfaceMemberIndex: - dec.extended.ib_member_index = uint32_t(-1); - break; - - case SPIRVCrossDecorationInterfaceOrigID: - dec.extended.ib_orig_id = 0; - break; - - case SPIRVCrossDecorationResourceIndexPrimary: - dec.extended.resource_index_primary = uint32_t(-1); - break; - - case SPIRVCrossDecorationResourceIndexSecondary: - dec.extended.resource_index_secondary = uint32_t(-1); - break; - } + dec.extended.flags.clear(decoration); + dec.extended.values[decoration] = 0; } -StorageClass Compiler::get_storage_class(uint32_t id) const +StorageClass Compiler::get_storage_class(VariableID id) const { return get(id).storage; } -const std::string &Compiler::get_name(uint32_t id) const +const std::string &Compiler::get_name(ID id) const { return ir.get_name(id); } -const std::string Compiler::get_fallback_name(uint32_t id) const +const std::string Compiler::get_fallback_name(ID id) const { return join("_", id); } -const std::string Compiler::get_block_fallback_name(uint32_t id) const +const std::string Compiler::get_block_fallback_name(VariableID id) const { auto &var = get(id); if (get_name(id).empty()) @@ -1404,37 +1423,37 @@ const std::string Compiler::get_block_fallback_name(uint32_t id) const return get_name(id); } -const Bitset &Compiler::get_decoration_bitset(uint32_t id) const +const Bitset &Compiler::get_decoration_bitset(ID id) const { return ir.get_decoration_bitset(id); } -bool Compiler::has_decoration(uint32_t id, Decoration decoration) const +bool Compiler::has_decoration(ID id, Decoration decoration) const { return ir.has_decoration(id, decoration); } -const string &Compiler::get_decoration_string(uint32_t id, Decoration decoration) const +const string &Compiler::get_decoration_string(ID id, Decoration decoration) const { return ir.get_decoration_string(id, decoration); } -const string &Compiler::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const +const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const { return ir.get_member_decoration_string(id, index, decoration); } -uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const +uint32_t Compiler::get_decoration(ID id, Decoration decoration) const { return ir.get_decoration(id, decoration); } -void Compiler::unset_decoration(uint32_t id, Decoration decoration) +void Compiler::unset_decoration(ID id, Decoration decoration) { ir.unset_decoration(id, decoration); } -bool Compiler::get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const +bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const { auto *m = ir.find_meta(id); if (!m) @@ -1449,6 +1468,58 @@ bool Compiler::get_binary_offset_for_decoration(uint32_t id, spv::Decoration dec return true; } +bool Compiler::block_is_noop(const SPIRBlock &block) const +{ + if (block.terminator != SPIRBlock::Direct) + return false; + + auto &child = get(block.next_block); + + // If this block participates in PHI, the block isn't really noop. + for (auto &phi : block.phi_variables) + if (phi.parent == block.self || phi.parent == child.self) + return false; + + for (auto &phi : child.phi_variables) + if (phi.parent == block.self) + return false; + + // Verify all instructions have no semantic impact. + for (auto &i : block.ops) + { + auto op = static_cast(i.op); + + switch (op) + { + // Non-Semantic instructions. + case OpLine: + case OpNoLine: + break; + + case OpExtInst: + { + auto *ops = stream(i); + auto ext = get(ops[2]).ext; + + bool ext_is_nonsemantic_only = + ext == SPIRExtension::NonSemanticShaderDebugInfo || + ext == SPIRExtension::SPV_debug_info || + ext == SPIRExtension::NonSemanticGeneric; + + if (!ext_is_nonsemantic_only) + return false; + + break; + } + + default: + return false; + } + } + + return true; +} + bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const { // Tried and failed. @@ -1506,7 +1577,7 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method { // Empty loop header that just sets up merge target // and branches to loop body. - bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block.ops.empty(); + bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block_is_noop(block); if (!ret) return false; @@ -1532,19 +1603,8 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone && (positive_candidate || negative_candidate); - // If we have OpPhi which depends on branches which came from our own block, - // we need to flush phi variables in else block instead of a trivial break, - // so we cannot assume this is a for loop candidate. if (ret) { - for (auto &phi : block.phi_variables) - if (phi.parent == block.self || phi.parent == child.self) - return false; - - for (auto &phi : child.phi_variables) - if (phi.parent == block.self) - return false; - auto *merge = maybe_get(block.merge_block); if (merge) for (auto &phi : merge->phi_variables) @@ -1558,42 +1618,10 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method return false; } -bool Compiler::block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to) +bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const { - auto *start = &from; - - if (start->self == to.self) - return true; - - // Break cycles. - if (is_continue(start->self)) - return false; - - // If our select block doesn't merge, we must break or continue in these blocks, - // so if continues occur branchless within these blocks, consider them branchless as well. - // This is typically used for loop control. - if (start->terminator == SPIRBlock::Select && start->merge == SPIRBlock::MergeNone && - (block_is_outside_flow_control_from_block(get(start->true_block), to) || - block_is_outside_flow_control_from_block(get(start->false_block), to))) - { - return true; - } - else if (start->merge_block && block_is_outside_flow_control_from_block(get(start->merge_block), to)) - { - return true; - } - else if (start->next_block && block_is_outside_flow_control_from_block(get(start->next_block), to)) - { - return true; - } - else - return false; -} - -bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const -{ - if (!execution_is_branchless(from, to)) - return false; + if (!execution_is_branchless(from, to)) + return false; auto *start = &from; for (;;) @@ -1601,15 +1629,10 @@ bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) con if (start->self == to.self) return true; - if (!start->ops.empty()) + if (!block_is_noop(*start)) return false; auto &next = get(start->next_block); - // Flushing phi variables does not count as noop. - for (auto &phi : next.phi_variables) - if (phi.parent == start->self) - return false; - start = &next; } } @@ -1645,7 +1668,7 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc if (block.merge == SPIRBlock::MergeLoop) return SPIRBlock::WhileLoop; - if (block.loop_dominator == SPIRBlock::NoDominator) + if (block.loop_dominator == BlockID(SPIRBlock::NoDominator)) { // Continue block is never reached from CFG. return SPIRBlock::ComplexLoop; @@ -1663,6 +1686,12 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc const auto *true_block = maybe_get(block.true_block); const auto *merge_block = maybe_get(dominator.merge_block); + // If we need to flush Phi in this block, we cannot have a DoWhile loop. + bool flush_phi_to_false = false_block && flush_phi_required(block.self, block.false_block); + bool flush_phi_to_true = true_block && flush_phi_required(block.self, block.true_block); + if (flush_phi_to_false || flush_phi_to_true) + return SPIRBlock::ComplexLoop; + bool positive_do_while = block.true_block == dominator.self && (block.false_block == dominator.merge_block || (false_block && merge_block && execution_is_noop(*false_block, *merge_block))); @@ -1681,9 +1710,48 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc } } +const SmallVector &Compiler::get_case_list(const SPIRBlock &block) const +{ + uint32_t width = 0; + + // First we check if we can get the type directly from the block.condition + // since it can be a SPIRConstant or a SPIRVariable. + if (const auto *constant = maybe_get(block.condition)) + { + const auto &type = get(constant->constant_type); + width = type.width; + } + else if (const auto *var = maybe_get(block.condition)) + { + const auto &type = get(var->basetype); + width = type.width; + } + else if (const auto *undef = maybe_get(block.condition)) + { + const auto &type = get(undef->basetype); + width = type.width; + } + else + { + auto search = ir.load_type_width.find(block.condition); + if (search == ir.load_type_width.end()) + { + SPIRV_CROSS_THROW("Use of undeclared variable on a switch statement."); + } + + width = search->second; + } + + if (width > 32) + return block.cases_64bit; + + return block.cases_32bit; +} + bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const { handler.set_current_block(block); + handler.rearm_current_block(block); // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks, // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing @@ -1707,10 +1775,15 @@ bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHand return false; if (!handler.end_function_scope(ops, i.length)) return false; + + handler.rearm_current_block(block); } } } + if (!handler.handle_terminator(block)) + return false; + return true; } @@ -1778,10 +1851,22 @@ size_t Compiler::get_declared_struct_size(const SPIRType &type) const if (type.member_types.empty()) SPIRV_CROSS_THROW("Declared struct in block cannot be empty."); - uint32_t last = uint32_t(type.member_types.size() - 1); - size_t offset = type_struct_member_offset(type, last); - size_t size = get_declared_struct_member_size(type, last); - return offset + size; + // Offsets can be declared out of order, so we need to deduce the actual size + // based on last member instead. + uint32_t member_index = 0; + size_t highest_offset = 0; + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + size_t offset = type_struct_member_offset(type, i); + if (offset > highest_offset) + { + highest_offset = offset; + member_index = i; + } + } + + size_t size = get_declared_struct_member_size(type, member_index); + return highest_offset + size; } size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, size_t array_size) const @@ -1797,6 +1882,161 @@ size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, si return size; } +uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const +{ + auto &result_type = get(spec.basetype); + if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int && + result_type.basetype != SPIRType::Boolean) + { + SPIRV_CROSS_THROW( + "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n"); + } + + if (!is_scalar(result_type)) + SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n"); + + uint32_t value = 0; + + const auto eval_u32 = [&](uint32_t id) -> uint32_t { + auto &type = expression_type(id); + if (type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int && type.basetype != SPIRType::Boolean) + { + SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating " + "specialization constants.\n"); + } + + if (!is_scalar(type)) + SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n"); + if (const auto *c = this->maybe_get(id)) + return c->scalar(); + else + return evaluate_spec_constant_u32(this->get(id)); + }; + +#define binary_spec_op(op, binary_op) \ + case Op##op: \ + value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \ + break +#define binary_spec_op_cast(op, binary_op, type) \ + case Op##op: \ + value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \ + break + + // Support the basic opcodes which are typically used when computing array sizes. + switch (spec.opcode) + { + binary_spec_op(IAdd, +); + binary_spec_op(ISub, -); + binary_spec_op(IMul, *); + binary_spec_op(BitwiseAnd, &); + binary_spec_op(BitwiseOr, |); + binary_spec_op(BitwiseXor, ^); + binary_spec_op(LogicalAnd, &); + binary_spec_op(LogicalOr, |); + binary_spec_op(ShiftLeftLogical, <<); + binary_spec_op(ShiftRightLogical, >>); + binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t); + binary_spec_op(LogicalEqual, ==); + binary_spec_op(LogicalNotEqual, !=); + binary_spec_op(IEqual, ==); + binary_spec_op(INotEqual, !=); + binary_spec_op(ULessThan, <); + binary_spec_op(ULessThanEqual, <=); + binary_spec_op(UGreaterThan, >); + binary_spec_op(UGreaterThanEqual, >=); + binary_spec_op_cast(SLessThan, <, int32_t); + binary_spec_op_cast(SLessThanEqual, <=, int32_t); + binary_spec_op_cast(SGreaterThan, >, int32_t); + binary_spec_op_cast(SGreaterThanEqual, >=, int32_t); +#undef binary_spec_op +#undef binary_spec_op_cast + + case OpLogicalNot: + value = uint32_t(!eval_u32(spec.arguments[0])); + break; + + case OpNot: + value = ~eval_u32(spec.arguments[0]); + break; + + case OpSNegate: + value = uint32_t(-int32_t(eval_u32(spec.arguments[0]))); + break; + + case OpSelect: + value = eval_u32(spec.arguments[0]) ? eval_u32(spec.arguments[1]) : eval_u32(spec.arguments[2]); + break; + + case OpUMod: + { + uint32_t a = eval_u32(spec.arguments[0]); + uint32_t b = eval_u32(spec.arguments[1]); + if (b == 0) + SPIRV_CROSS_THROW("Undefined behavior in UMod, b == 0.\n"); + value = a % b; + break; + } + + case OpSRem: + { + auto a = int32_t(eval_u32(spec.arguments[0])); + auto b = int32_t(eval_u32(spec.arguments[1])); + if (b == 0) + SPIRV_CROSS_THROW("Undefined behavior in SRem, b == 0.\n"); + value = a % b; + break; + } + + case OpSMod: + { + auto a = int32_t(eval_u32(spec.arguments[0])); + auto b = int32_t(eval_u32(spec.arguments[1])); + if (b == 0) + SPIRV_CROSS_THROW("Undefined behavior in SMod, b == 0.\n"); + auto v = a % b; + + // Makes sure we match the sign of b, not a. + if ((b < 0 && v > 0) || (b > 0 && v < 0)) + v += b; + value = v; + break; + } + + case OpUDiv: + { + uint32_t a = eval_u32(spec.arguments[0]); + uint32_t b = eval_u32(spec.arguments[1]); + if (b == 0) + SPIRV_CROSS_THROW("Undefined behavior in UDiv, b == 0.\n"); + value = a / b; + break; + } + + case OpSDiv: + { + auto a = int32_t(eval_u32(spec.arguments[0])); + auto b = int32_t(eval_u32(spec.arguments[1])); + if (b == 0) + SPIRV_CROSS_THROW("Undefined behavior in SDiv, b == 0.\n"); + value = a / b; + break; + } + + default: + SPIRV_CROSS_THROW("Unsupported spec constant opcode for evaluation.\n"); + } + + return value; +} + +uint32_t Compiler::evaluate_constant_u32(uint32_t id) const +{ + if (const auto *c = maybe_get(id)) + return c->scalar(); + else + return evaluate_spec_constant_u32(get(id)); +} + size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const { if (struct_type.member_types.empty()) @@ -1820,11 +2060,18 @@ size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, ui break; } + if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + { + // Check if this is a top-level pointer type, and not an array of pointers. + if (type.pointer_depth > get(type.parent_type).pointer_depth) + return 8; + } + if (!type.array.empty()) { // For arrays, we can use ArrayStride to get an easy check. bool array_size_literal = type.array_size_literal.back(); - uint32_t array_size = array_size_literal ? type.array.back() : get(type.array.back()).scalar(); + uint32_t array_size = array_size_literal ? type.array.back() : evaluate_constant_u32(type.array.back()); return type_struct_member_array_stride(struct_type, index) * array_size; } else if (type.basetype == SPIRType::Struct) @@ -1903,7 +2150,7 @@ bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint return true; } -SmallVector Compiler::get_active_buffer_ranges(uint32_t id) const +SmallVector Compiler::get_active_buffer_ranges(VariableID id) const { SmallVector ranges; BufferAccessHandler handler(*this, ranges, id); @@ -1965,6 +2212,12 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar execution.workgroup_size.z = arg2; break; + case ExecutionModeLocalSizeId: + execution.workgroup_size.id_x = arg0; + execution.workgroup_size.id_y = arg1; + execution.workgroup_size.id_z = arg2; + break; + case ExecutionModeInvocations: execution.invocations = arg0; break; @@ -1973,6 +2226,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar execution.output_vertices = arg0; break; + case ExecutionModeOutputPrimitivesEXT: + execution.output_primitives = arg0; + break; + default: break; } @@ -1992,28 +2249,52 @@ uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationCo y = { 0, 0 }; z = { 0, 0 }; + // WorkgroupSize builtin takes precedence over LocalSize / LocalSizeId. if (execution.workgroup_size.constant != 0) { auto &c = get(execution.workgroup_size.constant); - if (c.m.c[0].id[0] != 0) + if (c.m.c[0].id[0] != ID(0)) { x.id = c.m.c[0].id[0]; x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId); } - if (c.m.c[0].id[1] != 0) + if (c.m.c[0].id[1] != ID(0)) { y.id = c.m.c[0].id[1]; y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId); } - if (c.m.c[0].id[2] != 0) + if (c.m.c[0].id[2] != ID(0)) { z.id = c.m.c[0].id[2]; z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId); } } + else if (execution.flags.get(ExecutionModeLocalSizeId)) + { + auto &cx = get(execution.workgroup_size.id_x); + if (cx.specialization) + { + x.id = execution.workgroup_size.id_x; + x.constant_id = get_decoration(execution.workgroup_size.id_x, DecorationSpecId); + } + + auto &cy = get(execution.workgroup_size.id_y); + if (cy.specialization) + { + y.id = execution.workgroup_size.id_y; + y.constant_id = get_decoration(execution.workgroup_size.id_y, DecorationSpecId); + } + + auto &cz = get(execution.workgroup_size.id_z); + if (cz.specialization) + { + z.id = execution.workgroup_size.id_z; + z.constant_id = get_decoration(execution.workgroup_size.id_z, DecorationSpecId); + } + } return execution.workgroup_size.constant; } @@ -2023,15 +2304,42 @@ uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t auto &execution = get_entry_point(); switch (mode) { + case ExecutionModeLocalSizeId: + if (execution.flags.get(ExecutionModeLocalSizeId)) + { + switch (index) + { + case 0: + return execution.workgroup_size.id_x; + case 1: + return execution.workgroup_size.id_y; + case 2: + return execution.workgroup_size.id_z; + default: + return 0; + } + } + else + return 0; + case ExecutionModeLocalSize: switch (index) { case 0: - return execution.workgroup_size.x; + if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_x != 0) + return get(execution.workgroup_size.id_x).scalar(); + else + return execution.workgroup_size.x; case 1: - return execution.workgroup_size.y; + if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_y != 0) + return get(execution.workgroup_size.id_y).scalar(); + else + return execution.workgroup_size.y; case 2: - return execution.workgroup_size.z; + if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_z != 0) + return get(execution.workgroup_size.id_z).scalar(); + else + return execution.workgroup_size.z; default: return 0; } @@ -2042,6 +2350,9 @@ uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t case ExecutionModeOutputVertices: return execution.output_vertices; + case ExecutionModeOutputPrimitivesEXT: + return execution.output_primitives; + default: return 0; } @@ -2058,45 +2369,70 @@ bool Compiler::is_tessellation_shader(ExecutionModel model) return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation; } +bool Compiler::is_vertex_like_shader() const +{ + auto model = get_execution_model(); + return model == ExecutionModelVertex || model == ExecutionModelGeometry || + model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation; +} + bool Compiler::is_tessellation_shader() const { return is_tessellation_shader(get_execution_model()); } -void Compiler::set_remapped_variable_state(uint32_t id, bool remap_enable) +bool Compiler::is_tessellating_triangles() const +{ + return get_execution_mode_bitset().get(ExecutionModeTriangles); +} + +void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable) { get(id).remapped_variable = remap_enable; } -bool Compiler::get_remapped_variable_state(uint32_t id) const +bool Compiler::get_remapped_variable_state(VariableID id) const { return get(id).remapped_variable; } -void Compiler::set_subpass_input_remapped_components(uint32_t id, uint32_t components) +void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components) { get(id).remapped_components = components; } -uint32_t Compiler::get_subpass_input_remapped_components(uint32_t id) const +uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const { return get(id).remapped_components; } void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source) { - auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source); + auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); if (itr == end(e.implied_read_expressions)) e.implied_read_expressions.push_back(source); } void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source) { - auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source); + auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); if (itr == end(e.implied_read_expressions)) e.implied_read_expressions.push_back(source); } +void Compiler::add_active_interface_variable(uint32_t var_id) +{ + active_interface_variables.insert(var_id); + + // In SPIR-V 1.4 and up we must also track the interface variable in the entry point. + if (ir.get_spirv_version() >= 0x10400) + { + auto &vars = get_entry_point().interface_variables; + if (find(begin(vars), end(vars), VariableID(var_id)) == end(vars)) + vars.push_back(var_id); + } +} + void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression) { // Don't inherit any expression dependencies if the expression in dst @@ -2221,19 +2557,25 @@ SPIREntryPoint &Compiler::get_entry_point() bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const { auto &var = get(id); - if (var.storage != StorageClassInput && var.storage != StorageClassOutput && - var.storage != StorageClassUniformConstant) - SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface."); - - // This is to avoid potential problems with very old glslang versions which did - // not emit input/output interfaces properly. - // We can assume they only had a single entry point, and single entry point - // shaders could easily be assumed to use every interface variable anyways. - if (ir.entry_points.size() <= 1) - return true; + + if (ir.get_spirv_version() < 0x10400) + { + if (var.storage != StorageClassInput && var.storage != StorageClassOutput && + var.storage != StorageClassUniformConstant) + SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface."); + + // This is to avoid potential problems with very old glslang versions which did + // not emit input/output interfaces properly. + // We can assume they only had a single entry point, and single entry point + // shaders could easily be assumed to use every interface variable anyways. + if (ir.entry_points.size() <= 1) + return true; + } + + // In SPIR-V 1.4 and later, all global resource variables must be present. auto &execution = get_entry_point(); - return find(begin(execution.interface_variables), end(execution.interface_variables), id) != + return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) != end(execution.interface_variables); } @@ -2245,7 +2587,7 @@ void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunc unordered_map remapping; for (uint32_t i = 0; i < length; i++) remapping[func.arguments[i].id] = remap_parameter(args[i]); - parameter_remapping.push(move(remapping)); + parameter_remapping.push(std::move(remapping)); } void Compiler::CombinedImageSamplerHandler::pop_remap_parameters() @@ -2313,8 +2655,8 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a { for (auto ¶m : params) { - uint32_t image_id = param.global_image ? param.image_id : args[param.image_id]; - uint32_t sampler_id = param.global_sampler ? param.sampler_id : args[param.sampler_id]; + VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]); + VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]); auto *i = compiler.maybe_get_backing_variable(image_id); auto *s = compiler.maybe_get_backing_variable(sampler_id); @@ -2323,15 +2665,17 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a if (s) sampler_id = s->self; - register_combined_image_sampler(caller, image_id, sampler_id, param.depth); + register_combined_image_sampler(caller, 0, image_id, sampler_id, param.depth); } } return true; } -void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, uint32_t image_id, - uint32_t sampler_id, bool depth) +void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, + VariableID combined_module_id, + VariableID image_id, VariableID sampler_id, + bool depth) { // We now have a texture ID and a sampler ID which will either be found as a global // or a parameter in our own function. If both are global, they will not need a parameter, @@ -2391,12 +2735,15 @@ void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIR // Build new variable. compiler.set(combined_id, ptr_type_id, StorageClassFunction, 0); - // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant). - auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags; - auto &old_flags = compiler.ir.meta[sampler_id].decoration.decoration_flags; - new_flags.reset(); - if (old_flags.get(DecorationRelaxedPrecision)) - new_flags.set(DecorationRelaxedPrecision); + // Inherit RelaxedPrecision. + // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration. + bool relaxed_precision = + compiler.has_decoration(sampler_id, DecorationRelaxedPrecision) || + compiler.has_decoration(image_id, DecorationRelaxedPrecision) || + (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision)); + + if (relaxed_precision) + compiler.set_decoration(combined_id, DecorationRelaxedPrecision); param.id = combined_id; @@ -2603,8 +2950,10 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar if (sampler) sampler_id = sampler->self; + uint32_t combined_id = args[1]; + auto &combined_type = compiler.get(args[0]); - register_combined_image_sampler(callee, image_id, sampler_id, combined_type.image.depth); + register_combined_image_sampler(callee, combined_id, image_id, sampler_id, combined_type.image.depth); } } @@ -2612,8 +2961,8 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar // This information is statically known from the current place in the call stack. // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know // which backing variable the image/sample came from. - uint32_t image_id = remap_parameter(args[2]); - uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]); + VariableID image_id = remap_parameter(args[2]); + VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]); auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers), [image_id, sampler_id](const CombinedImageSampler &combined) { @@ -2623,6 +2972,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar if (itr == end(compiler.combined_image_samplers)) { uint32_t sampled_type; + uint32_t combined_module_id; if (is_fetch) { // Have to invent the sampled image type. @@ -2632,10 +2982,12 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar type.self = sampled_type; type.basetype = SPIRType::SampledImage; type.image.depth = false; + combined_module_id = 0; } else { sampled_type = args[0]; + combined_module_id = args[1]; } auto id = compiler.ir.increase_bound_by(2); @@ -2655,12 +3007,14 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar compiler.set(combined_id, type_id, StorageClassUniformConstant, 0); // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant). - auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags; - // Fetch inherits precision from the image, not sampler (there is no sampler). - auto &old_flags = compiler.ir.meta[is_fetch ? image_id : sampler_id].decoration.decoration_flags; - new_flags.reset(); - if (old_flags.get(DecorationRelaxedPrecision)) - new_flags.set(DecorationRelaxedPrecision); + // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration. + bool relaxed_precision = + (sampler_id && compiler.has_decoration(sampler_id, DecorationRelaxedPrecision)) || + (image_id && compiler.has_decoration(image_id, DecorationRelaxedPrecision)) || + (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision)); + + if (relaxed_precision) + compiler.set_decoration(combined_id, DecorationRelaxedPrecision); // Propagate the array type for the original image as well. auto *var = compiler.maybe_get_backing_variable(image_id); @@ -2677,7 +3031,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar return true; } -uint32_t Compiler::build_dummy_sampler_for_combined_images() +VariableID Compiler::build_dummy_sampler_for_combined_images() { DummySamplerForCombinedImageHandler handler(*this); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); @@ -2731,17 +3085,18 @@ SmallVector Compiler::get_specialization_constants() con return spec_consts; } -SPIRConstant &Compiler::get_constant(uint32_t id) +SPIRConstant &Compiler::get_constant(ConstantID id) { return get(id); } -const SPIRConstant &Compiler::get_constant(uint32_t id) const +const SPIRConstant &Compiler::get_constant(ConstantID id) const { return get(id); } -static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set &blocks) +static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set &blocks, + unordered_set &visit_cache) { // This block accesses the variable. if (blocks.find(block) != end(blocks)) @@ -2753,8 +3108,14 @@ static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, con // If any of our successors have a path to the end, there exists a path from block. for (auto &succ : cfg.get_succeeding_edges(block)) - if (exists_unaccessed_path_to_return(cfg, succ, blocks)) - return true; + { + if (visit_cache.count(succ) == 0) + { + if (exists_unaccessed_path_to_return(cfg, succ, blocks, visit_cache)) + return true; + visit_cache.insert(succ); + } + } return false; } @@ -2811,7 +3172,8 @@ void Compiler::analyze_parameter_preservation( // void foo(int &var) { if (cond) var = 10; } // Using read/write counts, we will think it's just an out variable, but it really needs to be inout, // because if we don't write anything whatever we put into the function must return back to the caller. - if (exists_unaccessed_path_to_return(cfg, entry.entry_block, itr->second)) + unordered_set visit_cache; + if (exists_unaccessed_path_to_return(cfg, entry.entry_block, itr->second, visit_cache)) arg.read_count++; } } @@ -2866,12 +3228,15 @@ void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBl break; case SPIRBlock::MultiSelect: + { notify_variable_access(block.condition, block.self); - for (auto &target : block.cases) + auto &cases = compiler.get_case_list(block); + for (auto &target : cases) test_phi(target.block); if (block.default_block) test_phi(block.default_block); break; + } default: break; @@ -2883,6 +3248,12 @@ void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_ if (id == 0) return; + // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. + auto itr = rvalue_forward_children.find(id); + if (itr != end(rvalue_forward_children)) + for (auto child_id : itr->second) + notify_variable_access(child_id, block); + if (id_is_phi_variable(id)) accessed_variables_to_block[id].insert(block); else if (id_is_potential_temporary(id)) @@ -2906,12 +3277,46 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression); } +bool Compiler::AnalyzeVariableScopeAccessHandler::handle_terminator(const SPIRBlock &block) +{ + switch (block.terminator) + { + case SPIRBlock::Return: + if (block.return_value) + notify_variable_access(block.return_value, block.self); + break; + + case SPIRBlock::Select: + case SPIRBlock::MultiSelect: + notify_variable_access(block.condition, block.self); + break; + + default: + break; + } + + return true; +} + bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) { // Keep track of the types of temporaries, so we can hoist them out as necessary. uint32_t result_type, result_id; if (compiler.instruction_to_result_type(result_type, result_id, op, args, length)) + { + // For some opcodes, we will need to override the result id. + // If we need to hoist the temporary, the temporary type is the input, not the result. + // FIXME: This will likely break with OpCopyObject + hoisting, but we'll have to + // solve it if we ever get there ... + if (op == OpConvertUToAccelerationStructureKHR) + { + auto itr = result_id_to_type.find(args[2]); + if (itr != result_id_to_type.end()) + result_type = itr->second; + } + result_id_to_type[result_id] = result_type; + } switch (op) { @@ -2920,7 +3325,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (length < 2) return false; - uint32_t ptr = args[0]; + ID ptr = args[0]; auto *var = compiler.maybe_get_backing_variable(ptr); // If we store through an access chain, we have a partial write. @@ -2947,14 +3352,21 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (length < 3) return false; + // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. uint32_t ptr = args[2]; auto *var = compiler.maybe_get(ptr); if (var) + { accessed_variables_to_block[var->self].insert(current_block->self); + rvalue_forward_children[args[1]].insert(var->self); + } // args[2] might be another access chain we have to track use of. for (uint32_t i = 2; i < length; i++) + { notify_variable_access(args[i], current_block->self); + rvalue_forward_children[args[1]].insert(args[i]); + } // Also keep track of the access chain pointer itself. // In exceptionally rare cases, we can end up with a case where @@ -2965,7 +3377,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 // The result of an access chain is a fixed expression and is not really considered a temporary. auto &e = compiler.set(args[1], "", args[0], true); auto *backing_variable = compiler.maybe_get_backing_variable(ptr); - e.loaded_from = backing_variable ? backing_variable->self : 0; + e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0); // Other backends might use SPIRAccessChain for this later. compiler.ir.ids[args[1]].set_allow_type_rewrite(); @@ -2978,8 +3390,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (length < 2) return false; - uint32_t lhs = args[0]; - uint32_t rhs = args[1]; + ID lhs = args[0]; + ID rhs = args[1]; auto *var = compiler.maybe_get_backing_variable(lhs); // If we store through an access chain, we have a partial write. @@ -3035,6 +3447,12 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 // Might be an access chain we have to track use of. notify_variable_access(args[2], current_block->self); + + // If we're loading an opaque type we cannot lower it to a temporary, + // we must defer access of args[2] until it's used. + auto &type = compiler.get(args[0]); + if (compiler.type_is_opaque_value(type)) + rvalue_forward_children[args[1]].insert(args[2]); break; } @@ -3043,6 +3461,10 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (length < 3) return false; + // Return value may be a temporary. + if (compiler.get_type(args[0]).basetype != SPIRType::Void) + notify_variable_access(args[1], current_block->self); + length -= 3; args += 3; @@ -3063,9 +3485,29 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 // Might try to copy a Phi variable here. notify_variable_access(args[i], current_block->self); } + break; + } - // Return value may be a temporary. - notify_variable_access(args[1], current_block->self); + case OpSelect: + { + // In case of variable pointers, we might access a variable here. + // We cannot prove anything about these accesses however. + for (uint32_t i = 1; i < length; i++) + { + if (i >= 3) + { + auto *var = compiler.maybe_get_backing_variable(args[i]); + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + // Assume we can get partial writes to this variable. + partial_write_variables_to_block[var->self].insert(current_block->self); + } + } + + // Might try to copy a Phi variable here. + notify_variable_access(args[i], current_block->self); + } break; } @@ -3074,10 +3516,41 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 for (uint32_t i = 4; i < length; i++) notify_variable_access(args[i], current_block->self); notify_variable_access(args[1], current_block->self); + + uint32_t extension_set = args[2]; + if (compiler.get(extension_set).ext == SPIRExtension::GLSL) + { + auto op_450 = static_cast(args[3]); + switch (op_450) + { + case GLSLstd450Modf: + case GLSLstd450Frexp: + { + uint32_t ptr = args[5]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + if (var->self == ptr) + complete_write_variables_to_block[var->self].insert(current_block->self); + else + partial_write_variables_to_block[var->self].insert(current_block->self); + } + break; + } + + default: + break; + } + } break; } case OpArrayLength: + // Only result is a temporary. + notify_variable_access(args[1], current_block->self); + break; + case OpLine: case OpNoLine: // Uses literals, but cannot be a phi variable or temporary, so ignore. @@ -3344,12 +3817,14 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA for (auto &var : handler.accessed_variables_to_block) { // Only deal with variables which are considered local variables in this function. - if (find(begin(entry.local_variables), end(entry.local_variables), var.first) == end(entry.local_variables)) + if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) == + end(entry.local_variables)) continue; DominatorBuilder builder(cfg); auto &blocks = var.second; auto &type = expression_type(var.first); + BlockID potential_continue_block = 0; // Figure out which block is dominating all accesses of those variables. for (auto &block : blocks) @@ -3371,21 +3846,48 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA { // The variable is used in multiple continue blocks, this is not a loop // candidate, signal that by setting block to -1u. - auto &potential = potential_loop_variables[var.first]; - - if (potential == 0) - potential = block; + if (potential_continue_block == 0) + potential_continue_block = block; else - potential = ~(0u); + potential_continue_block = ~(0u); } } + builder.add_block(block); } builder.lift_continue_block_dominator(); // Add it to a per-block list of variables. - uint32_t dominating_block = builder.get_dominator(); + BlockID dominating_block = builder.get_dominator(); + + if (dominating_block && potential_continue_block != 0 && potential_continue_block != ~0u) + { + auto &inner_block = get(dominating_block); + + BlockID merge_candidate = 0; + + // Analyze the dominator. If it lives in a different loop scope than the candidate continue + // block, reject the loop variable candidate. + if (inner_block.merge == SPIRBlock::MergeLoop) + merge_candidate = inner_block.merge_block; + else if (inner_block.loop_dominator != SPIRBlock::NoDominator) + merge_candidate = get(inner_block.loop_dominator).merge_block; + + if (merge_candidate != 0 && cfg.is_reachable(merge_candidate)) + { + // If the merge block has a higher post-visit order, we know that continue candidate + // cannot reach the merge block, and we have two separate scopes. + if (!cfg.is_reachable(potential_continue_block) || + cfg.get_visit_order(merge_candidate) > cfg.get_visit_order(potential_continue_block)) + { + potential_continue_block = 0; + } + } + } + + if (potential_continue_block != 0 && potential_continue_block != ~0u) + potential_loop_variables[var.first] = potential_continue_block; // For variables whose dominating block is inside a loop, there is a risk that these variables // actually need to be preserved across loop iterations. We can express this by adding @@ -3403,7 +3905,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA if (preserve) { // Find the outermost loop scope. - while (block->loop_dominator != SPIRBlock::NoDominator) + while (block->loop_dominator != BlockID(SPIRBlock::NoDominator)) block = &get(block->loop_dominator); if (block->self != dominating_block) @@ -3443,6 +3945,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA DominatorBuilder builder(cfg); bool force_temporary = false; + bool used_in_header_hoisted_continue_block = false; // Figure out which block is dominating all accesses of those temporaries. auto &blocks = var.second; @@ -3450,25 +3953,27 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA { builder.add_block(block); - // If a temporary is used in more than one block, we might have to lift continue block - // access up to loop header like we did for variables. if (blocks.size() != 1 && is_continue(block)) { + // The risk here is that inner loop can dominate the continue block. + // Any temporary we access in the continue block must be declared before the loop. + // This is moot for complex loops however. auto &loop_header_block = get(ir.continue_block_to_loop_header[block]); assert(loop_header_block.merge == SPIRBlock::MergeLoop); - - // Only relevant if the loop is not marked as complex. - if (!loop_header_block.complex_continue) - builder.add_block(loop_header_block.self); - } - else if (blocks.size() != 1 && is_single_block_loop(block)) - { - // Awkward case, because the loop header is also the continue block. - force_temporary = true; + builder.add_block(loop_header_block.self); + used_in_header_hoisted_continue_block = true; } } uint32_t dominating_block = builder.get_dominator(); + + if (blocks.size() != 1 && is_single_block_loop(dominating_block)) + { + // Awkward case, because the loop header is also the continue block, + // so hoisting to loop header does not help. + force_temporary = true; + } + if (dominating_block) { // If we touch a variable in the dominating block, this is the expected setup. @@ -3481,11 +3986,22 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA { // Exceptionally rare case. // We cannot declare temporaries of access chains (except on MSL perhaps with pointers). - // Rather than do that, we force a complex loop to make sure access chains are created and consumed - // in expected order. - auto &loop_header_block = get(dominating_block); - assert(loop_header_block.merge == SPIRBlock::MergeLoop); - loop_header_block.complex_continue = true; + // Rather than do that, we force the indexing expressions to be declared in the right scope by + // tracking their usage to that end. There is no temporary to hoist. + // However, we still need to observe declaration order of the access chain. + + if (used_in_header_hoisted_continue_block) + { + // For this scenario, we used an access chain inside a continue block where we also registered an access to header block. + // This is a problem as we need to declare an access chain properly first with full definition. + // We cannot use temporaries for these expressions, + // so we must make sure the access chain is declared ahead of time. + // Force a complex for loop to deal with this. + // TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option. + auto &loop_header_block = get(dominating_block); + assert(loop_header_block.merge == SPIRBlock::MergeLoop); + loop_header_block.complex_continue = true; + } } else { @@ -3519,17 +4035,17 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA { auto &var = get(loop_variable.first); auto dominator = var.dominator; - auto block = loop_variable.second; + BlockID block = loop_variable.second; // The variable was accessed in multiple continue blocks, ignore. - if (block == ~(0u) || block == 0) + if (block == BlockID(~(0u)) || block == BlockID(0)) continue; // Dead code. - if (dominator == 0) + if (dominator == ID(0)) continue; - uint32_t header = 0; + BlockID header = 0; // Find the loop header for this block if we are a continue block. { @@ -3588,10 +4104,11 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA // merge can occur. Walk the CFG to see if we find anything. seen_blocks.clear(); - cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) { + cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool { // We found a block which accesses the variable outside the loop. if (blocks.find(walk_block) != end(blocks)) static_loop_init = false; + return true; }); if (!static_loop_init) @@ -3679,7 +4196,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint return true; } -Bitset Compiler::get_buffer_block_flags(uint32_t id) const +Bitset Compiler::get_buffer_block_flags(VariableID id) const { return ir.get_buffer_block_flags(get(id)); } @@ -3739,23 +4256,55 @@ void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltI } } -bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length) +void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id, bool allow_blocks) { - const auto add_if_builtin = [&](uint32_t id) { - // Only handles variables here. - // Builtins which are part of a block are handled in AccessChain. - auto *var = compiler.maybe_get(id); - auto &decorations = compiler.ir.meta[id].decoration; - if (var && decorations.builtin) + // Only handle plain variables here. + // Builtins which are part of a block are handled in AccessChain. + // If allow_blocks is used however, this is to handle initializers of blocks, + // which implies that all members are written to. + + auto *var = compiler.maybe_get(id); + auto *m = compiler.ir.find_meta(id); + if (var && m) + { + auto &type = compiler.get(var->basetype); + auto &decorations = m->decoration; + auto &flags = type.storage == StorageClassInput ? + compiler.active_input_builtins : compiler.active_output_builtins; + if (decorations.builtin) { - auto &type = compiler.get(var->basetype); - auto &flags = - type.storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins; flags.set(decorations.builtin_type); handle_builtin(type, decorations.builtin_type, decorations.decoration_flags); } - }; + else if (allow_blocks && compiler.has_decoration(type.self, DecorationBlock)) + { + uint32_t member_count = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < member_count; i++) + { + if (compiler.has_member_decoration(type.self, i, DecorationBuiltIn)) + { + auto &member_type = compiler.get(type.member_types[i]); + BuiltIn builtin = BuiltIn(compiler.get_member_decoration(type.self, i, DecorationBuiltIn)); + flags.set(builtin); + handle_builtin(member_type, builtin, compiler.get_member_decoration_bitset(type.self, i)); + } + } + } + } +} + +void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id) +{ + add_if_builtin(id, false); +} +void Compiler::ActiveBuiltinHandler::add_if_builtin_or_block(uint32_t id) +{ + add_if_builtin(id, true); +} + +bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length) +{ switch (opcode) { case OpStore: @@ -3893,10 +4442,21 @@ void Compiler::update_active_builtins() clip_distance_count = 0; ActiveBuiltinHandler handler(*this); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + if (var.storage != StorageClassOutput) + return; + if (!interface_variable_exists_in_entry_point(var.self)) + return; + + // Also, make sure we preserve output variables which are only initialized, but never accessed by any code. + if (var.initializer != ID(0)) + handler.add_if_builtin_or_block(var.self); + }); } // Returns whether this shader uses a builtin of the storage class -bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) +bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) const { const Bitset *flags; switch (storage) @@ -3921,8 +4481,16 @@ void Compiler::analyze_image_and_sampler_usage() CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); - comparison_ids = move(handler.comparison_ids); + + // Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions + // down to main(). + // In the second pass, we can propagate up forced depth state coming from main() up into leaf functions. + handler.dependency_hierarchy.clear(); + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + + comparison_ids = std::move(handler.comparison_ids); need_subpass_input = handler.need_subpass_input; + need_subpass_input_ms = handler.need_subpass_input_ms; // Forward information from separate images and samplers into combined image samplers. for (auto &combined : combined_image_samplers) @@ -3955,12 +4523,26 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin return true; } +const CFG &Compiler::get_cfg_for_current_function() const +{ + assert(current_function); + return get_cfg_for_function(current_function->self); +} + +const CFG &Compiler::get_cfg_for_function(uint32_t id) const +{ + auto cfg_itr = function_cfgs.find(id); + assert(cfg_itr != end(function_cfgs)); + assert(cfg_itr->second); + return *cfg_itr->second; +} + void Compiler::build_function_control_flow_graphs_and_analyze() { CFGBuilder handler(*this); handler.function_cfgs[ir.default_entry_point].reset(new CFG(*this, get(ir.default_entry_point))); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); - function_cfgs = move(handler.function_cfgs); + function_cfgs = std::move(handler.function_cfgs); bool single_function = function_cfgs.size() <= 1; for (auto &f : function_cfgs) @@ -4022,6 +4604,14 @@ bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func) return false; } +void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src) +{ + dependency_hierarchy[dst].insert(src); + // Propagate up any comparison state if we're loading from one such variable. + if (comparison_ids.count(src)) + comparison_ids.insert(dst); +} + bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length) { if (length < 3) @@ -4034,7 +4624,7 @@ bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint for (uint32_t i = 0; i < length; i++) { auto &argument = func.arguments[i]; - dependency_hierarchy[argument.id].insert(arg[i]); + add_dependency(argument.id, arg[i]); } return true; @@ -4044,6 +4634,7 @@ void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids { // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids. comparison_ids.insert(id); + for (auto &dep_id : dependency_hierarchy[id]) add_hierarchy_to_comparison_ids(dep_id); } @@ -4059,13 +4650,18 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_ { if (length < 3) return false; - dependency_hierarchy[args[1]].insert(args[2]); + + add_dependency(args[1], args[2]); // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs. // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord. auto &type = compiler.get(args[0]); if (type.image.dim == DimSubpassData) + { need_subpass_input = true; + if (type.image.ms) + need_subpass_input_ms = true; + } // If we load a SampledImage and it will be used with Dref, propagate the state up. if (dref_combined_samplers.count(args[1]) != 0) @@ -4078,17 +4674,17 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_ if (length < 4) return false; - uint32_t result_type = args[0]; + // If the underlying resource has been used for comparison then duplicate loads of that resource must be too. + // This image must be a depth image. uint32_t result_id = args[1]; - auto &type = compiler.get(result_type); - if (type.image.depth || dref_combined_samplers.count(result_id) != 0) + uint32_t image = args[2]; + uint32_t sampler = args[3]; + + if (dref_combined_samplers.count(result_id) != 0) { - // This image must be a depth image. - uint32_t image = args[2]; add_hierarchy_to_comparison_ids(image); // This sampler must be a SamplerComparisonState, and not a regular SamplerState. - uint32_t sampler = args[3]; add_hierarchy_to_comparison_ids(sampler); // Mark the OpSampledImage itself as being comparison state. @@ -4104,13 +4700,13 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_ return true; } -bool Compiler::buffer_is_hlsl_counter_buffer(uint32_t id) const +bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const { auto *m = ir.find_meta(id); return m && m->hlsl_is_magic_counter_buffer; } -bool Compiler::buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const +bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const { auto *m = ir.find_meta(id); @@ -4175,7 +4771,7 @@ const SmallVector &Compiler::get_declared_extensions() const return ir.declared_extensions; } -std::string Compiler::get_remapped_declared_block_name(uint32_t id) const +std::string Compiler::get_remapped_declared_block_name(VariableID id) const { return get_remapped_declared_block_name(id, false); } @@ -4240,46 +4836,22 @@ bool Compiler::reflection_ssbo_instance_name_is_significant() const return aliased_ssbo_types; } -bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args, - uint32_t length) +bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, + const uint32_t *args, uint32_t length) { - // Most instructions follow the pattern of . - // There are some exceptions. - switch (op) - { - case OpStore: - case OpCopyMemory: - case OpCopyMemorySized: - case OpImageWrite: - case OpAtomicStore: - case OpAtomicFlagClear: - case OpEmitStreamVertex: - case OpEndStreamPrimitive: - case OpControlBarrier: - case OpMemoryBarrier: - case OpGroupWaitEvents: - case OpRetainEvent: - case OpReleaseEvent: - case OpSetUserEventStatus: - case OpCaptureEventProfilingInfo: - case OpCommitReadPipe: - case OpCommitWritePipe: - case OpGroupCommitReadPipe: - case OpGroupCommitWritePipe: - case OpLine: - case OpNoLine: + if (length < 2) return false; - default: - if (length > 1 && maybe_get(args[0]) != nullptr) - { - result_type = args[0]; - result_id = args[1]; - return true; - } - else - return false; + bool has_result_id = false, has_result_type = false; + HasResultAndType(op, &has_result_id, &has_result_type); + if (has_result_id && has_result_type) + { + result_type = args[0]; + result_id = args[1]; + return true; } + else + return false; } Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const @@ -4289,19 +4861,22 @@ Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t i if (type_meta) { - auto &memb = type_meta->members; - if (index >= memb.size()) + auto &members = type_meta->members; + if (index >= members.size()) return flags; - auto &dec = memb[index]; + auto &dec = members[index]; - // If our type is a struct, traverse all the members as well recursively. flags.merge_or(dec.decoration_flags); - for (uint32_t i = 0; i < type.member_types.size(); i++) + auto &member_type = get(type.member_types[index]); + + // If our member type is a struct, traverse all the child members as well recursively. + auto &member_childs = member_type.member_types; + for (uint32_t i = 0; i < member_childs.size(); i++) { - auto &memb_type = get(type.member_types[i]); - if (!memb_type.pointer) - flags.merge_or(combined_decoration_for_member(memb_type, i)); + auto &child_member_type = get(member_childs[i]); + if (!child_member_type.pointer) + flags.merge_or(combined_decoration_for_member(member_type, i)); } } @@ -4341,9 +4916,11 @@ bool Compiler::is_desktop_only_format(spv::ImageFormat format) return false; } -bool Compiler::image_is_comparison(const SPIRType &type, uint32_t id) const +// An image is determined to be a depth image if it is marked as a depth image and is not also +// explicitly marked with a color format, or if there are any sample/gather compare operations on it. +bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const { - return type.image.depth || (comparison_ids.count(id) != 0); + return (type.image.depth && type.image.format == ImageFormatUnknown) || comparison_ids.count(id); } bool Compiler::type_is_opaque_value(const SPIRType &type) const @@ -4358,6 +4935,12 @@ void Compiler::force_recompile() is_force_recompile = true; } +void Compiler::force_recompile_guarantee_forward_progress() +{ + force_recompile(); + is_force_recompile_forward_progress = true; +} + bool Compiler::is_forcing_recompilation() const { return is_force_recompile; @@ -4366,6 +4949,7 @@ bool Compiler::is_forcing_recompilation() const void Compiler::clear_force_recompile() { is_force_recompile = false; + is_force_recompile_forward_progress = false; } Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandler(Compiler &compiler_) @@ -4373,31 +4957,491 @@ Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandl { } -bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t) +Compiler::PhysicalBlockMeta *Compiler::PhysicalStorageBufferPointerHandler::find_block_meta(uint32_t id) const +{ + auto chain_itr = access_chain_to_physical_block.find(id); + if (chain_itr != access_chain_to_physical_block.end()) + return chain_itr->second; + else + return nullptr; +} + +void Compiler::PhysicalStorageBufferPointerHandler::mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length) { - if (op == OpConvertUToPtr || op == OpBitcast) + uint32_t mask = *args; + args++; + length--; + if (length && (mask & MemoryAccessVolatileMask) != 0) { - auto &type = compiler.get(args[0]); - if (type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer && type.pointer_depth == 1) + args++; + length--; + } + + if (length && (mask & MemoryAccessAlignedMask) != 0) + { + uint32_t alignment = *args; + auto *meta = find_block_meta(id); + + // This makes the assumption that the application does not rely on insane edge cases like: + // Bind buffer with ADDR = 8, use block offset of 8 bytes, load/store with 16 byte alignment. + // If we emit the buffer with alignment = 16 here, the first element at offset = 0 should + // actually have alignment of 8 bytes, but this is too theoretical and awkward to support. + // We could potentially keep track of any offset in the access chain, but it's + // practically impossible for high level compilers to emit code like that, + // so deducing overall alignment requirement based on maximum observed Alignment value is probably fine. + if (meta && alignment > meta->alignment) + meta->alignment = alignment; + } +} + +bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint32_t type_id) const +{ + auto &type = compiler.get(type_id); + return type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer && + type.pointer_depth == 1 && !compiler.type_is_array_of_pointers(type); +} + +uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const +{ + if (type.storage == spv::StorageClassPhysicalStorageBufferEXT) + return 8; + else if (type.basetype == SPIRType::Struct) + { + uint32_t alignment = 0; + for (auto &member_type : type.member_types) { - // If we need to cast to a pointer type which is not a block, we might need to synthesize ourselves - // a block type which wraps this POD type. - if (type.basetype != SPIRType::Struct) - types.insert(args[0]); + uint32_t member_align = get_minimum_scalar_alignment(compiler.get(member_type)); + if (member_align > alignment) + alignment = member_align; } + return alignment; + } + else + return type.width / 8; +} + +void Compiler::PhysicalStorageBufferPointerHandler::setup_meta_chain(uint32_t type_id, uint32_t var_id) +{ + if (type_is_bda_block_entry(type_id)) + { + auto &meta = physical_block_type_meta[type_id]; + access_chain_to_physical_block[var_id] = &meta; + + auto &type = compiler.get(type_id); + if (type.basetype != SPIRType::Struct) + non_block_types.insert(type_id); + + if (meta.alignment == 0) + meta.alignment = get_minimum_scalar_alignment(compiler.get_pointee_type(type)); + } +} + +bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t length) +{ + // When a BDA pointer comes to life, we need to keep a mapping of SSA ID -> type ID for the pointer type. + // For every load and store, we'll need to be able to look up the type ID being accessed and mark any alignment + // requirements. + switch (op) + { + case OpConvertUToPtr: + case OpBitcast: + case OpCompositeExtract: + // Extract can begin a new chain if we had a struct or array of pointers as input. + // We don't begin chains before we have a pure scalar pointer. + setup_meta_chain(args[0], args[1]); + break; + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + case OpCopyObject: + { + auto itr = access_chain_to_physical_block.find(args[2]); + if (itr != access_chain_to_physical_block.end()) + access_chain_to_physical_block[args[1]] = itr->second; + break; + } + + case OpLoad: + { + setup_meta_chain(args[0], args[1]); + if (length >= 4) + mark_aligned_access(args[2], args + 3, length - 3); + break; + } + + case OpStore: + { + if (length >= 3) + mark_aligned_access(args[0], args + 2, length - 2); + break; + } + + default: + break; } return true; } +uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_id(uint32_t type_id) const +{ + auto *type = &compiler.get(type_id); + while (type->pointer && + type->storage == StorageClassPhysicalStorageBufferEXT && + !type_is_bda_block_entry(type_id)) + { + type_id = type->parent_type; + type = &compiler.get(type_id); + } + + assert(type_is_bda_block_entry(type_id)); + return type_id; +} + +void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType &type) +{ + for (auto &member : type.member_types) + { + auto &subtype = compiler.get(member); + if (subtype.basetype != SPIRType::Struct && subtype.pointer && + subtype.storage == spv::StorageClassPhysicalStorageBufferEXT) + { + non_block_types.insert(get_base_non_block_type_id(member)); + } + else if (subtype.basetype == SPIRType::Struct && !subtype.pointer) + analyze_non_block_types_from_block(subtype); + } +} + void Compiler::analyze_non_block_pointer_types() { PhysicalStorageBufferPointerHandler handler(*this); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); - physical_storage_non_block_pointer_types.reserve(handler.types.size()); - for (auto type : handler.types) + + // Analyze any block declaration we have to make. It might contain + // physical pointers to POD types which we never used, and thus never added to the list. + // We'll need to add those pointer types to the set of types we declare. + ir.for_each_typed_id([&](uint32_t, SPIRType &type) { + if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) + handler.analyze_non_block_types_from_block(type); + }); + + physical_storage_non_block_pointer_types.reserve(handler.non_block_types.size()); + for (auto type : handler.non_block_types) physical_storage_non_block_pointer_types.push_back(type); sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types)); + physical_storage_type_to_alignment = std::move(handler.physical_block_type_meta); +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t) +{ + if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT) + { + if (interlock_function_id != 0 && interlock_function_id != call_stack.back()) + { + // Most complex case, we have no sensible way of dealing with this + // other than taking the 100% conservative approach, exit early. + split_function_case = true; + return false; + } + else + { + interlock_function_id = call_stack.back(); + // If this call is performed inside control flow we have a problem. + auto &cfg = compiler.get_cfg_for_function(interlock_function_id); + + uint32_t from_block_id = compiler.get(interlock_function_id).entry_block; + bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id); + if (!outside_control_flow) + control_flow_interlock = true; + } + } + return true; +} + +void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block) +{ + current_block_id = block.self; +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + call_stack.push_back(args[2]); + return true; +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t) +{ + call_stack.pop_back(); + return true; +} + +bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + if (args[2] == interlock_function_id) + call_stack_is_interlocked = true; + + call_stack.push_back(args[2]); + return true; +} + +bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t) +{ + if (call_stack.back() == interlock_function_id) + call_stack_is_interlocked = false; + + call_stack.pop_back(); + return true; +} + +void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id) +{ + if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) || + split_function_case) + { + compiler.interlocked_resources.insert(id); + } +} + +bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + // Only care about critical section analysis if we have simple case. + if (use_critical_section) + { + if (opcode == OpBeginInvocationInterlockEXT) + { + in_crit_sec = true; + return true; + } + + if (opcode == OpEndInvocationInterlockEXT) + { + // End critical section--nothing more to do. + return false; + } + } + + // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need. + switch (opcode) + { + case OpLoad: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // We're only concerned with buffer and image memory here. + if (!var) + break; + + switch (var->storage) + { + default: + break; + + case StorageClassUniformConstant: + { + uint32_t result_type = args[0]; + uint32_t id = args[1]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + break; + } + + case StorageClassUniform: + // Must have BufferBlock; we only care about SSBOs. + if (!compiler.has_decoration(compiler.get(var->basetype).self, DecorationBufferBlock)) + break; + // fallthrough + case StorageClassStorageBuffer: + access_potential_resource(var->self); + break; + } + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + + auto &type = compiler.get(result_type); + if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant || + type.storage == StorageClassStorageBuffer) + { + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + compiler.ir.ids[id].set_allow_type_rewrite(); + } + break; + } + + case OpImageTexelPointer: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + uint32_t id = args[1]; + uint32_t ptr = args[2]; + auto &e = compiler.set(id, "", result_type, true); + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var) + e.loaded_from = var->self; + break; + } + + case OpStore: + case OpImageWrite: + case OpAtomicStore: + { + if (length < 1) + return false; + + uint32_t ptr = args[0]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || + var->storage == StorageClassStorageBuffer)) + { + access_potential_resource(var->self); + } + + break; + } + + case OpCopyMemory: + { + if (length < 2) + return false; + + uint32_t dst = args[0]; + uint32_t src = args[1]; + auto *dst_var = compiler.maybe_get_backing_variable(dst); + auto *src_var = compiler.maybe_get_backing_variable(src); + + if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer)) + access_potential_resource(dst_var->self); + + if (src_var) + { + if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer) + break; + + if (src_var->storage == StorageClassUniform && + !compiler.has_decoration(compiler.get(src_var->basetype).self, DecorationBufferBlock)) + { + break; + } + + access_potential_resource(src_var->self); + } + + break; + } + + case OpImageRead: + case OpAtomicLoad: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // We're only concerned with buffer and image memory here. + if (!var) + break; + + switch (var->storage) + { + default: + break; + + case StorageClassUniform: + // Must have BufferBlock; we only care about SSBOs. + if (!compiler.has_decoration(compiler.get(var->basetype).self, DecorationBufferBlock)) + break; + // fallthrough + case StorageClassUniformConstant: + case StorageClassStorageBuffer: + access_potential_resource(var->self); + break; + } + break; + } + + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || + var->storage == StorageClassStorageBuffer)) + { + access_potential_resource(var->self); + } + + break; + } + + default: + break; + } + + return true; +} + +void Compiler::analyze_interlocked_resource_usage() +{ + if (get_execution_model() == ExecutionModelFragment && + (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) || + get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) || + get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) || + get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT))) + { + InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point); + traverse_all_reachable_opcodes(get(ir.default_entry_point), prepass_handler); + + InterlockedResourceAccessHandler handler(*this, ir.default_entry_point); + handler.interlock_function_id = prepass_handler.interlock_function_id; + handler.split_function_case = prepass_handler.split_function_case; + handler.control_flow_interlock = prepass_handler.control_flow_interlock; + handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock; + + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + + // For GLSL. If we hit any of these cases, we have to fall back to conservative approach. + interlocked_is_complex = + !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point; + } } bool Compiler::type_is_array_of_pointers(const SPIRType &type) const @@ -4408,3 +5452,23 @@ bool Compiler::type_is_array_of_pointers(const SPIRType &type) const // If parent type has same pointer depth, we must have an array of pointers. return type.pointer_depth == get(type.parent_type).pointer_depth; } + +bool Compiler::type_is_top_level_physical_pointer(const SPIRType &type) const +{ + return type.pointer && type.storage == StorageClassPhysicalStorageBuffer && + type.pointer_depth > get(type.parent_type).pointer_depth; +} + +bool Compiler::flush_phi_required(BlockID from, BlockID to) const +{ + auto &child = get(to); + for (auto &phi : child.phi_variables) + if (phi.parent == from) + return true; + return false; +} + +void Compiler::add_loop_level() +{ + current_loop_level++; +} diff --git a/spirv_cross.hpp b/spirv_cross.hpp index c2dc4ea61ba..ea98ee60d37 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2015-2019 Arm Limited + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +15,18 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_HPP #define SPIRV_CROSS_HPP +#ifndef SPV_ENABLE_UTILITY_CODE +#define SPV_ENABLE_UTILITY_CODE +#endif #include "spirv.hpp" #include "spirv_cfg.hpp" #include "spirv_cross_parsed_ir.hpp" @@ -27,18 +37,18 @@ struct Resource { // Resources are identified with their SPIR-V ID. // This is the ID of the OpVariable. - uint32_t id; + ID id; // The type ID of the variable which includes arrays and all type modifications. // This type ID is not suitable for parsing OpMemberDecoration of a struct and other decorations in general // since these modifications typically happen on the base_type_id. - uint32_t type_id; + TypeID type_id; // The base type of the declared resource. // This type is the base type which ignores pointers and arrays of the type_id. // This is mostly useful to parse decorations of the underlying type. // base_type_id can also be obtained with get_type(get_type(type_id).self). - uint32_t base_type_id; + TypeID base_type_id; // The declared name (OpName) of the resource. // For Buffer blocks, the name actually reflects the externally @@ -52,6 +62,27 @@ struct Resource std::string name; }; +struct BuiltInResource +{ + // This is mostly here to support reflection of builtins such as Position/PointSize/CullDistance/ClipDistance. + // This needs to be different from Resource since we can collect builtins from blocks. + // A builtin present here does not necessarily mean it's considered an active builtin, + // since variable ID "activeness" is only tracked on OpVariable level, not Block members. + // For that, update_active_builtins() -> has_active_builtin() can be used to further refine the reflection. + spv::BuiltIn builtin; + + // This is the actual value type of the builtin. + // Typically float4, float, array for the gl_PerVertex builtins. + // If the builtin is a control point, the control point array type will be stripped away here as appropriate. + TypeID value_type_id; + + // This refers to the base resource which contains the builtin. + // If resource is a Block, it can hold multiple builtins, or it might not be a block. + // For advanced reflection scenarios, all information in builtin/value_type_id can be deduced, + // it's just more convenient this way. + Resource resource; +}; + struct ShaderResources { SmallVector uniform_buffers; @@ -68,26 +99,31 @@ struct ShaderResources // but keep the vector in case this restriction is lifted in the future. SmallVector push_constant_buffers; + SmallVector shader_record_buffers; + // For Vulkan GLSL and HLSL source, // these correspond to separate texture2D and samplers respectively. SmallVector separate_images; SmallVector separate_samplers; + + SmallVector builtin_inputs; + SmallVector builtin_outputs; }; struct CombinedImageSampler { // The ID of the sampler2D variable. - uint32_t combined_id; + VariableID combined_id; // The ID of the texture2D variable. - uint32_t image_id; + VariableID image_id; // The ID of the sampler variable. - uint32_t sampler_id; + VariableID sampler_id; }; struct SpecializationConstant { // The ID of the specialization constant. - uint32_t id; + ConstantID id; // The constant ID of the constant, used in Vulkan during pipeline creation. uint32_t constant_id; }; @@ -117,18 +153,6 @@ struct EntryPoint spv::ExecutionModel execution_model; }; -enum ExtendedDecorations -{ - SPIRVCrossDecorationPacked, - SPIRVCrossDecorationPackedType, - SPIRVCrossDecorationInterfaceMemberIndex, - SPIRVCrossDecorationInterfaceOrigID, - SPIRVCrossDecorationResourceIndexPrimary, - // Used for decorations like resource indices for samplers when part of combined image samplers. - // A variable might need to hold two resource indices in this case. - SPIRVCrossDecorationResourceIndexSecondary, -}; - class Compiler { public: @@ -154,81 +178,81 @@ class Compiler virtual std::string compile(); // Gets the identifier (OpName) of an ID. If not defined, an empty string will be returned. - const std::string &get_name(uint32_t id) const; + const std::string &get_name(ID id) const; // Applies a decoration to an ID. Effectively injects OpDecorate. - void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0); - void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument); + void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0); + void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument); // Overrides the identifier OpName of an ID. // Identifiers beginning with underscores or identifiers which contain double underscores // are reserved by the implementation. - void set_name(uint32_t id, const std::string &name); + void set_name(ID id, const std::string &name); // Gets a bitmask for the decorations which are applied to ID. // I.e. (1ull << spv::DecorationFoo) | (1ull << spv::DecorationBar) - const Bitset &get_decoration_bitset(uint32_t id) const; + const Bitset &get_decoration_bitset(ID id) const; // Returns whether the decoration has been applied to the ID. - bool has_decoration(uint32_t id, spv::Decoration decoration) const; + bool has_decoration(ID id, spv::Decoration decoration) const; // Gets the value for decorations which take arguments. // If the decoration is a boolean (i.e. spv::DecorationNonWritable), // 1 will be returned. // If decoration doesn't exist or decoration is not recognized, // 0 will be returned. - uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const; - const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const; + uint32_t get_decoration(ID id, spv::Decoration decoration) const; + const std::string &get_decoration_string(ID id, spv::Decoration decoration) const; // Removes the decoration for an ID. - void unset_decoration(uint32_t id, spv::Decoration decoration); + void unset_decoration(ID id, spv::Decoration decoration); // Gets the SPIR-V type associated with ID. // Mostly used with Resource::type_id and Resource::base_type_id to parse the underlying type of a resource. - const SPIRType &get_type(uint32_t id) const; + const SPIRType &get_type(TypeID id) const; // Gets the SPIR-V type of a variable. - const SPIRType &get_type_from_variable(uint32_t id) const; + const SPIRType &get_type_from_variable(VariableID id) const; // Gets the underlying storage class for an OpVariable. - spv::StorageClass get_storage_class(uint32_t id) const; + spv::StorageClass get_storage_class(VariableID id) const; // If get_name() is an empty string, get the fallback name which will be used // instead in the disassembled source. - virtual const std::string get_fallback_name(uint32_t id) const; + virtual const std::string get_fallback_name(ID id) const; // If get_name() of a Block struct is an empty string, get the fallback name. // This needs to be per-variable as multiple variables can use the same block type. - virtual const std::string get_block_fallback_name(uint32_t id) const; + virtual const std::string get_block_fallback_name(VariableID id) const; // Given an OpTypeStruct in ID, obtain the identifier for member number "index". // This may be an empty string. - const std::string &get_member_name(uint32_t id, uint32_t index) const; + const std::string &get_member_name(TypeID id, uint32_t index) const; // Given an OpTypeStruct in ID, obtain the OpMemberDecoration for member number "index". - uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; - const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const; + uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const; // Sets the member identifier for OpTypeStruct ID, member number "index". - void set_member_name(uint32_t id, uint32_t index, const std::string &name); + void set_member_name(TypeID id, uint32_t index, const std::string &name); // Returns the qualified member identifier for OpTypeStruct ID, member number "index", // or an empty string if no qualified alias exists - const std::string &get_member_qualified_name(uint32_t type_id, uint32_t index) const; + const std::string &get_member_qualified_name(TypeID type_id, uint32_t index) const; // Gets the decoration mask for a member of a struct, similar to get_decoration_mask. - const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const; + const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const; // Returns whether the decoration has been applied to a member of a struct. - bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; + bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; // Similar to set_decoration, but for struct members. - void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); - void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, + void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); + void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, const std::string &argument); // Unsets a member decoration, similar to unset_decoration. - void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration); + void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration); // Gets the fallback name for a member, similar to get_fallback_name. virtual const std::string get_fallback_member_name(uint32_t index) const @@ -240,7 +264,7 @@ class Compiler // SPIR-V shader. The granularity of this analysis is per-member of a struct. // This can be used for Buffer (UBO), BufferBlock/StorageBuffer (SSBO) and PushConstant blocks. // ID is the Resource::id obtained from get_shader_resources(). - SmallVector get_active_buffer_ranges(uint32_t id) const; + SmallVector get_active_buffer_ranges(VariableID id) const; // Returns the effective size of a buffer block. size_t get_declared_struct_size(const SPIRType &struct_type) const; @@ -268,12 +292,12 @@ class Compiler // // To use the returned set as the filter for which variables are used during compilation, // this set can be moved to set_enabled_interface_variables(). - std::unordered_set get_active_interface_variables() const; + std::unordered_set get_active_interface_variables() const; // Sets the interface variables which are used during compilation. // By default, all variables are used. // Once set, compile() will only consider the set in active_variables. - void set_enabled_interface_variables(std::unordered_set active_variables); + void set_enabled_interface_variables(std::unordered_set active_variables); // Query shader resources, use ids with reflection interface to modify or query binding points, etc. ShaderResources get_shader_resources() const; @@ -281,19 +305,19 @@ class Compiler // Query shader resources, but only return the variables which are part of active_variables. // E.g.: get_shader_resources(get_active_variables()) to only return the variables which are statically // accessed. - ShaderResources get_shader_resources(const std::unordered_set &active_variables) const; + ShaderResources get_shader_resources(const std::unordered_set &active_variables) const; // Remapped variables are considered built-in variables and a backend will // not emit a declaration for this variable. // This is mostly useful for making use of builtins which are dependent on extensions. - void set_remapped_variable_state(uint32_t id, bool remap_enable); - bool get_remapped_variable_state(uint32_t id) const; + void set_remapped_variable_state(VariableID id, bool remap_enable); + bool get_remapped_variable_state(VariableID id) const; // For subpassInput variables which are remapped to plain variables, // the number of components in the remapped // variable must be specified as the backing type of subpass inputs are opaque. - void set_subpass_input_remapped_components(uint32_t id, uint32_t components); - uint32_t get_subpass_input_remapped_components(uint32_t id) const; + void set_subpass_input_remapped_components(VariableID id, uint32_t components); + uint32_t get_subpass_input_remapped_components(VariableID id) const; // All operations work on the current entry point. // Entry points can be swapped out with set_entry_point(). @@ -327,6 +351,10 @@ class Compiler const std::string &get_cleansed_entry_point_name(const std::string &name, spv::ExecutionModel execution_model) const; + // Traverses all reachable opcodes and sets active_builtins to a bitmask of all builtin variables which are accessed in the shader. + void update_active_builtins(); + bool has_active_builtin(spv::BuiltIn builtin, spv::StorageClass storage) const; + // Query and modify OpExecutionMode. const Bitset &get_execution_mode_bitset() const; @@ -334,12 +362,16 @@ class Compiler void set_execution_mode(spv::ExecutionMode mode, uint32_t arg0 = 0, uint32_t arg1 = 0, uint32_t arg2 = 0); // Gets argument for an execution mode (LocalSize, Invocations, OutputVertices). - // For LocalSize, the index argument is used to select the dimension (X = 0, Y = 1, Z = 2). + // For LocalSize or LocalSizeId, the index argument is used to select the dimension (X = 0, Y = 1, Z = 2). // For execution modes which do not have arguments, 0 is returned. + // LocalSizeId query returns an ID. If LocalSizeId execution mode is not used, it returns 0. + // LocalSize always returns a literal. If execution mode is LocalSizeId, + // the literal (spec constant or not) is still returned. uint32_t get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index = 0) const; spv::ExecutionModel get_execution_model() const; bool is_tessellation_shader() const; + bool is_tessellating_triangles() const; // In SPIR-V, the compute work group size can be represented by a constant vector, in which case // the LocalSize execution mode is ignored. @@ -357,6 +389,8 @@ class Compiler // If the component is not a specialization constant, a zeroed out struct will be written. // The return value is the constant ID of the builtin WorkGroupSize, but this is not expected to be useful // for most use cases. + // If LocalSizeId is used, there is no uvec3 value representing the workgroup size, so the return value is 0, + // but x, y and z are written as normal if the components are specialization constants. uint32_t get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y, SpecializationConstant &z) const; @@ -374,7 +408,7 @@ class Compiler // If the returned ID is non-zero, it can be decorated with set/bindings as desired before calling compile(). // Calling this function also invalidates get_active_interface_variables(), so this should be called // before that function. - uint32_t build_dummy_sampler_for_combined_images(); + VariableID build_dummy_sampler_for_combined_images(); // Analyzes all separate image and samplers used from the currently selected entry point, // and re-routes them all to a combined image sampler instead. @@ -423,8 +457,8 @@ class Compiler // constant_type is the SPIRType for the specialization constant, // which can be queried to determine which fields in the unions should be poked at. SmallVector get_specialization_constants() const; - SPIRConstant &get_constant(uint32_t id); - const SPIRConstant &get_constant(uint32_t id) const; + SPIRConstant &get_constant(ConstantID id); + const SPIRConstant &get_constant(ConstantID id) const; uint32_t get_current_id_bound() const { @@ -447,7 +481,7 @@ class Compiler // If the decoration was declared, sets the word_offset to an offset into the provided SPIR-V binary buffer and returns true, // otherwise, returns false. // If the decoration does not have any value attached to it (e.g. DecorationRelaxedPrecision), this function will also return false. - bool get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const; + bool get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const; // HLSL counter buffer reflection interface. // Append/Consume/Increment/Decrement in HLSL is implemented as two "neighbor" buffer objects where @@ -462,7 +496,7 @@ class Compiler // only return true if OpSource was reported HLSL. // To rely on this functionality, ensure that the SPIR-V module is not stripped. - bool buffer_is_hlsl_counter_buffer(uint32_t id) const; + bool buffer_is_hlsl_counter_buffer(VariableID id) const; // Queries if a buffer object has a neighbor "counter" buffer. // If so, the ID of that counter buffer will be returned in counter_id. @@ -470,7 +504,7 @@ class Compiler // Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will // only return true if OpSource was reported HLSL. // To rely on this functionality, ensure that the SPIR-V module is not stripped. - bool buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const; + bool buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const; // Gets the list of all SPIR-V Capabilities which were declared in the SPIR-V module. const SmallVector &get_declared_capabilities() const; @@ -491,13 +525,19 @@ class Compiler // ID is the name of a variable as returned by Resource::id, and must be a variable with a Block-like type. // // This also applies to HLSL cbuffers. - std::string get_remapped_declared_block_name(uint32_t id) const; + std::string get_remapped_declared_block_name(VariableID id) const; // For buffer block variables, get the decorations for that variable. // Sometimes, decorations for buffer blocks are found in member decorations instead // of direct decorations on the variable itself. // The most common use here is to check if a buffer is readonly or writeonly. - Bitset get_buffer_block_flags(uint32_t id) const; + Bitset get_buffer_block_flags(VariableID id) const; + + // Returns whether the position output is invariant + bool is_position_invariant() const + { + return position_invariant; + } protected: const uint32_t *stream(const Instruction &instr) const @@ -508,9 +548,23 @@ class Compiler if (!instr.length) return nullptr; - if (instr.offset + instr.length > ir.spirv.size()) - SPIRV_CROSS_THROW("Compiler::stream() out of range."); - return &ir.spirv[instr.offset]; + if (instr.is_embedded()) + { + auto &embedded = static_cast(instr); + assert(embedded.ops.size() == instr.length); + return embedded.ops.data(); + } + else + { + if (instr.offset + instr.length > ir.spirv.size()) + SPIRV_CROSS_THROW("Compiler::stream() out of range."); + return &ir.spirv[instr.offset]; + } + } + + uint32_t *stream_mutable(const Instruction &instr) const + { + return const_cast(stream(instr)); } ParsedIR ir; @@ -521,9 +575,22 @@ class Compiler SPIRFunction *current_function = nullptr; SPIRBlock *current_block = nullptr; - std::unordered_set active_interface_variables; + uint32_t current_loop_level = 0; + std::unordered_set active_interface_variables; bool check_active_interface_variables = false; + void add_loop_level(); + + void set_initializers(SPIRExpression &e) + { + e.emitted_loop_level = current_loop_level; + } + + template + void set_initializers(const T &) + { + } + // If our IDs are out of range here as part of opcodes, throw instead of // undefined behavior. template @@ -532,6 +599,7 @@ class Compiler ir.add_typed_id(static_cast(T::type), id); auto &var = variant_set(ir.ids[id], std::forward

(args)...); var.self = id; + set_initializers(var); return var; } @@ -561,7 +629,9 @@ class Compiler template const T *maybe_get(uint32_t id) const { - if (ir.ids[id].get_type() == static_cast(T::type)) + if (id >= ir.ids.size()) + return nullptr; + else if (ir.ids[id].get_type() == static_cast(T::type)) return &get(id); else return nullptr; @@ -629,7 +699,7 @@ class Compiler inline bool is_single_block_loop(uint32_t next) const { auto &block = get(next); - return block.merge == SPIRBlock::MergeLoop && block.continue_block == next; + return block.merge == SPIRBlock::MergeLoop && block.continue_block == ID(next); } inline bool is_break(uint32_t next) const @@ -669,7 +739,6 @@ class Compiler bool function_is_pure(const SPIRFunction &func); bool block_is_pure(const SPIRBlock &block); - bool block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to); bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const; bool execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const; @@ -677,16 +746,20 @@ class Compiler SPIRBlock::ContinueBlockType continue_block_type(const SPIRBlock &continue_block) const; void force_recompile(); + void force_recompile_guarantee_forward_progress(); void clear_force_recompile(); bool is_forcing_recompilation() const; bool is_force_recompile = false; + bool is_force_recompile_forward_progress = false; + bool block_is_noop(const SPIRBlock &block) const; bool block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const; bool types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const; void inherit_expression_dependencies(uint32_t dst, uint32_t source); void add_implied_read_expression(SPIRExpression &e, uint32_t source); void add_implied_read_expression(SPIRAccessChain &e, uint32_t source); + void add_active_interface_variable(uint32_t var_id); // For proper multiple entry point support, allow querying if an Input or Output // variable is part of that entry points interface. @@ -712,6 +785,10 @@ class Compiler // Return true if traversal should continue. // If false, traversal will end immediately. virtual bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) = 0; + virtual bool handle_terminator(const SPIRBlock &) + { + return true; + } virtual bool follow_function_call(const SPIRFunction &) { @@ -722,6 +799,13 @@ class Compiler { } + // Called after returning from a function or when entering a block, + // can be called multiple times per block, + // while set_current_block is only called on block entry. + virtual void rearm_current_block(const SPIRBlock &) + { + } + virtual bool begin_function_scope(const uint32_t *, uint32_t) { return true; @@ -753,7 +837,7 @@ class Compiler struct InterfaceVariableAccessHandler : OpcodeHandler { - InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set &variables_) + InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set &variables_) : compiler(compiler_) , variables(variables_) { @@ -762,7 +846,7 @@ class Compiler bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; const Compiler &compiler; - std::unordered_set &variables; + std::unordered_set &variables; }; struct CombinedImageSamplerHandler : OpcodeHandler @@ -784,8 +868,8 @@ class Compiler uint32_t remap_parameter(uint32_t id); void push_remap_parameters(const SPIRFunction &func, const uint32_t *args, uint32_t length); void pop_remap_parameters(); - void register_combined_image_sampler(SPIRFunction &caller, uint32_t texture_id, uint32_t sampler_id, - bool depth); + void register_combined_image_sampler(SPIRFunction &caller, VariableID combined_id, VariableID texture_id, + VariableID sampler_id, bool depth); }; struct DummySamplerForCombinedImageHandler : OpcodeHandler @@ -811,6 +895,9 @@ class Compiler Compiler &compiler; void handle_builtin(const SPIRType &type, spv::BuiltIn builtin, const Bitset &decoration_flags); + void add_if_builtin(uint32_t id); + void add_if_builtin_or_block(uint32_t id); + void add_if_builtin(uint32_t id, bool allow_blocks); }; bool traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const; @@ -818,7 +905,7 @@ class Compiler // This must be an ordered data structure so we always pick the same type aliases. SmallVector global_struct_cache; - ShaderResources get_shader_resources(const std::unordered_set *active_variables) const; + ShaderResources get_shader_resources(const std::unordered_set *active_variables) const; VariableTypeRemapCallback variable_remap_callback; @@ -826,7 +913,9 @@ class Compiler std::unordered_set forced_temporaries; std::unordered_set forwarded_temporaries; + std::unordered_set suppressed_usage_tracking; std::unordered_set hoisted_temporaries; + std::unordered_set forced_invariant_temporaries; Bitset active_input_builtins; Bitset active_output_builtins; @@ -834,10 +923,6 @@ class Compiler uint32_t cull_distance_count = 0; bool position_invariant = false; - // Traverses all reachable opcodes and sets active_builtins to a bitmask of all builtin variables which are accessed in the shader. - void update_active_builtins(); - bool has_active_builtin(spv::BuiltIn builtin, spv::StorageClass storage); - void analyze_parameter_preservation( SPIRFunction &entry, const CFG &cfg, const std::unordered_map> &variable_to_blocks, @@ -850,6 +935,7 @@ class Compiler // Similar is implemented for images, as well as if subpass inputs are needed. std::unordered_set comparison_ids; bool need_subpass_input = false; + bool need_subpass_input_ms = false; // In certain backends, we will need to use a dummy sampler to be able to emit code. // GLSL does not support texelFetch on texture2D objects, but SPIR-V does, @@ -889,13 +975,18 @@ class Compiler void add_hierarchy_to_comparison_ids(uint32_t ids); bool need_subpass_input = false; + bool need_subpass_input_ms = false; + void add_dependency(uint32_t dst, uint32_t src); }; void build_function_control_flow_graphs_and_analyze(); std::unordered_map> function_cfgs; + const CFG &get_cfg_for_current_function() const; + const CFG &get_cfg_for_function(uint32_t id) const; + struct CFGBuilder : OpcodeHandler { - CFGBuilder(Compiler &compiler_); + explicit CFGBuilder(Compiler &compiler_); bool follow_function_call(const SPIRFunction &func) override; bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; @@ -914,6 +1005,7 @@ class Compiler bool id_is_phi_variable(uint32_t id) const; bool id_is_potential_temporary(uint32_t id) const; bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool handle_terminator(const SPIRBlock &block) override; Compiler &compiler; SPIRFunction &entry; @@ -923,6 +1015,9 @@ class Compiler std::unordered_map> complete_write_variables_to_block; std::unordered_map> partial_write_variables_to_block; std::unordered_set access_chain_expressions; + // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. + // This is also relevant when forwarding opaque objects since we cannot lower these to temporaries. + std::unordered_map> rvalue_forward_children; const SPIRBlock *current_block = nullptr; }; @@ -938,21 +1033,93 @@ class Compiler uint32_t write_count = 0; }; + struct PhysicalBlockMeta + { + uint32_t alignment = 0; + }; + struct PhysicalStorageBufferPointerHandler : OpcodeHandler { - PhysicalStorageBufferPointerHandler(Compiler &compiler_); + explicit PhysicalStorageBufferPointerHandler(Compiler &compiler_); bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; Compiler &compiler; - std::unordered_set types; + + std::unordered_set non_block_types; + std::unordered_map physical_block_type_meta; + std::unordered_map access_chain_to_physical_block; + + void mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length); + PhysicalBlockMeta *find_block_meta(uint32_t id) const; + bool type_is_bda_block_entry(uint32_t type_id) const; + void setup_meta_chain(uint32_t type_id, uint32_t var_id); + uint32_t get_minimum_scalar_alignment(const SPIRType &type) const; + void analyze_non_block_types_from_block(const SPIRType &type); + uint32_t get_base_non_block_type_id(uint32_t type_id) const; }; void analyze_non_block_pointer_types(); SmallVector physical_storage_non_block_pointer_types; + std::unordered_map physical_storage_type_to_alignment; void analyze_variable_scope(SPIRFunction &function, AnalyzeVariableScopeAccessHandler &handler); void find_function_local_luts(SPIRFunction &function, const AnalyzeVariableScopeAccessHandler &handler, bool single_function); bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var); + // Finds all resources that are written to from inside the critical section, if present. + // The critical section is delimited by OpBeginInvocationInterlockEXT and + // OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written + // while inside the critical section must be placed in a raster order group. + struct InterlockedResourceAccessHandler : OpcodeHandler + { + InterlockedResourceAccessHandler(Compiler &compiler_, uint32_t entry_point_id) + : compiler(compiler_) + { + call_stack.push_back(entry_point_id); + } + + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + bool in_crit_sec = false; + + uint32_t interlock_function_id = 0; + bool split_function_case = false; + bool control_flow_interlock = false; + bool use_critical_section = false; + bool call_stack_is_interlocked = false; + SmallVector call_stack; + + void access_potential_resource(uint32_t id); + }; + + struct InterlockedResourceAccessPrepassHandler : OpcodeHandler + { + InterlockedResourceAccessPrepassHandler(Compiler &compiler_, uint32_t entry_point_id) + : compiler(compiler_) + { + call_stack.push_back(entry_point_id); + } + + void rearm_current_block(const SPIRBlock &block) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + uint32_t interlock_function_id = 0; + uint32_t current_block_id = 0; + bool split_function_case = false; + bool control_flow_interlock = false; + SmallVector call_stack; + }; + + void analyze_interlocked_resource_usage(); + // The set of all resources written while inside the critical section, if present. + std::unordered_set interlocked_resources; + bool interlocked_is_complex = false; + void make_constant_null(uint32_t id, uint32_t type); std::unordered_map declared_block_names; @@ -963,7 +1130,7 @@ class Compiler Bitset combined_decoration_for_member(const SPIRType &type, uint32_t index) const; static bool is_desktop_only_format(spv::ImageFormat format); - bool image_is_comparison(const SPIRType &type, uint32_t id) const; + bool is_depth_image(const SPIRType &type, uint32_t id) const; void set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value = 0); uint32_t get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const; @@ -977,12 +1144,25 @@ class Compiler void unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration); bool type_is_array_of_pointers(const SPIRType &type) const; + bool type_is_top_level_physical_pointer(const SPIRType &type) const; bool type_is_block_like(const SPIRType &type) const; bool type_is_opaque_value(const SPIRType &type) const; bool reflection_ssbo_instance_name_is_significant() const; std::string get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const; + bool flush_phi_required(BlockID from, BlockID to) const; + + uint32_t evaluate_spec_constant_u32(const SPIRConstantOp &spec) const; + uint32_t evaluate_constant_u32(uint32_t id) const; + + bool is_vertex_like_shader() const; + + // Get the correct case list for the OpSwitch, since it can be either a + // 32 bit wide condition or a 64 bit, but the type is not embedded in the + // instruction itself. + const SmallVector &get_case_list(const SPIRBlock &block) const; + private: // Used only to implement the old deprecated get_entry_point() interface. const SPIREntryPoint &get_first_entry_point(const std::string &name) const; diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp index 8048274f00b..72614d78e36 100644 --- a/spirv_cross_c.cpp +++ b/spirv_cross_c.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2019 Hans-Kristian Arntzen + * Copyright 2019-2021 Hans-Kristian Arntzen + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #include "spirv_cross_c.h" #if SPIRV_CROSS_C_API_CPP @@ -162,7 +169,7 @@ struct spvc_compiler_options_s : ScratchMemoryAllocation struct spvc_set_s : ScratchMemoryAllocation { - std::unordered_set set; + std::unordered_set set; }; // Dummy-inherit to we can keep our opaque type handle type safe in C-land as well, @@ -187,11 +194,15 @@ struct spvc_resources_s : ScratchMemoryAllocation SmallVector sampled_images; SmallVector atomic_counters; SmallVector push_constant_buffers; + SmallVector shader_record_buffers; SmallVector separate_images; SmallVector separate_samplers; SmallVector acceleration_structures; + SmallVector builtin_inputs; + SmallVector builtin_outputs; bool copy_resources(SmallVector &outputs, const SmallVector &inputs); + bool copy_resources(SmallVector &outputs, const SmallVector &inputs); bool copy_resources(const ShaderResources &resources); }; @@ -241,7 +252,7 @@ spvc_result spvc_context_parse_spirv(spvc_context context, const SpvId *spirv, s pir->context = context; Parser parser(spirv, word_count); parser.parse(); - pir->parsed = move(parser.get_parsed_ir()); + pir->parsed = std::move(parser.get_parsed_ir()); *parsed_ir = pir.get(); context->allocations.push_back(std::move(pir)); } @@ -273,7 +284,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back { case SPVC_BACKEND_NONE: if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) - comp->compiler.reset(new Compiler(move(parsed_ir->parsed))); + comp->compiler.reset(new Compiler(std::move(parsed_ir->parsed))); else if (mode == SPVC_CAPTURE_MODE_COPY) comp->compiler.reset(new Compiler(parsed_ir->parsed)); break; @@ -281,7 +292,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back #if SPIRV_CROSS_C_API_GLSL case SPVC_BACKEND_GLSL: if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) - comp->compiler.reset(new CompilerGLSL(move(parsed_ir->parsed))); + comp->compiler.reset(new CompilerGLSL(std::move(parsed_ir->parsed))); else if (mode == SPVC_CAPTURE_MODE_COPY) comp->compiler.reset(new CompilerGLSL(parsed_ir->parsed)); break; @@ -290,7 +301,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back #if SPIRV_CROSS_C_API_HLSL case SPVC_BACKEND_HLSL: if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) - comp->compiler.reset(new CompilerHLSL(move(parsed_ir->parsed))); + comp->compiler.reset(new CompilerHLSL(std::move(parsed_ir->parsed))); else if (mode == SPVC_CAPTURE_MODE_COPY) comp->compiler.reset(new CompilerHLSL(parsed_ir->parsed)); break; @@ -299,7 +310,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back #if SPIRV_CROSS_C_API_MSL case SPVC_BACKEND_MSL: if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) - comp->compiler.reset(new CompilerMSL(move(parsed_ir->parsed))); + comp->compiler.reset(new CompilerMSL(std::move(parsed_ir->parsed))); else if (mode == SPVC_CAPTURE_MODE_COPY) comp->compiler.reset(new CompilerMSL(parsed_ir->parsed)); break; @@ -308,7 +319,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back #if SPIRV_CROSS_C_API_CPP case SPVC_BACKEND_CPP: if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) - comp->compiler.reset(new CompilerCPP(move(parsed_ir->parsed))); + comp->compiler.reset(new CompilerCPP(std::move(parsed_ir->parsed))); else if (mode == SPVC_CAPTURE_MODE_COPY) comp->compiler.reset(new CompilerCPP(parsed_ir->parsed)); break; @@ -317,7 +328,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back #if SPIRV_CROSS_C_API_REFLECT case SPVC_BACKEND_JSON: if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) - comp->compiler.reset(new CompilerReflection(move(parsed_ir->parsed))); + comp->compiler.reset(new CompilerReflection(std::move(parsed_ir->parsed))); else if (mode == SPVC_CAPTURE_MODE_COPY) comp->compiler.reset(new CompilerReflection(parsed_ir->parsed)); break; @@ -420,6 +431,12 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES: options->glsl.emit_line_directives = value != 0; break; + case SPVC_COMPILER_OPTION_ENABLE_STORAGE_IMAGE_QUALIFIER_DEDUCTION: + options->glsl.enable_storage_image_qualifier_deduction = value != 0; + break; + case SPVC_COMPILER_OPTION_FORCE_ZERO_INITIALIZED_VARIABLES: + options->glsl.force_zero_initialized_variables = value != 0; + break; case SPVC_COMPILER_OPTION_GLSL_SUPPORT_NONZERO_BASE_INSTANCE: options->glsl.vertex.support_nonzero_base_instance = value != 0; @@ -453,6 +470,18 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_GLSL_EMIT_UNIFORM_BUFFER_AS_PLAIN_UNIFORMS: options->glsl.emit_uniform_buffer_as_plain_uniforms = value != 0; break; + case SPVC_COMPILER_OPTION_GLSL_FORCE_FLATTENED_IO_BLOCKS: + options->glsl.force_flattened_io_blocks = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT: + options->glsl.ovr_multiview_view_count = value; + break; + case SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS: + options->glsl.relax_nan_checks = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_ENABLE_ROW_MAJOR_LOAD_WORKAROUND: + options->glsl.enable_row_major_load_workaround = value != 0; + break; #endif #if SPIRV_CROSS_C_API_HLSL @@ -471,6 +500,22 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_BASE_INSTANCE: options->hlsl.support_nonzero_base_vertex_base_instance = value != 0; break; + + case SPVC_COMPILER_OPTION_HLSL_FORCE_STORAGE_BUFFER_AS_UAV: + options->hlsl.force_storage_buffer_as_uav = value != 0; + break; + + case SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV: + options->hlsl.nonwritable_uav_texture_as_srv = value != 0; + break; + + case SPVC_COMPILER_OPTION_HLSL_ENABLE_16BIT_TYPES: + options->hlsl.enable_16bit_types = value != 0; + break; + + case SPVC_COMPILER_OPTION_HLSL_FLATTEN_MATRIX_VERTEX_INPUT_SEMANTICS: + options->hlsl.flatten_matrix_vertex_input_semantics = value != 0; + break; #endif #if SPIRV_CROSS_C_API_MSL @@ -545,6 +590,154 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX: options->msl.buffer_size_buffer_index = value; break; + + case SPVC_COMPILER_OPTION_MSL_MULTIVIEW: + options->msl.multiview = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX: + options->msl.view_mask_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX: + options->msl.device_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX: + options->msl.view_index_from_device_index = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE: + options->msl.dispatch_base = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX: + options->msl.dynamic_offsets_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D: + options->msl.texture_1D_as_2D = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO: + options->msl.enable_base_index_zero = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS: + options->msl.use_framebuffer_fetch_subpasses = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH: + options->msl.invariant_float_math = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY: + options->msl.emulate_cube_array = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING: + options->msl.enable_decoration_binding = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_FORCE_ACTIVE_ARGUMENT_BUFFER_RESOURCES: + options->msl.force_active_argument_buffer_resources = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_FORCE_NATIVE_ARRAYS: + options->msl.force_native_arrays = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_OUTPUT_MASK: + options->msl.enable_frag_output_mask = value; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_DEPTH_BUILTIN: + options->msl.enable_frag_depth_builtin = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_STENCIL_REF_BUILTIN: + options->msl.enable_frag_stencil_ref_builtin = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_CLIP_DISTANCE_USER_VARYING: + options->msl.enable_clip_distance_user_varying = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_MULTI_PATCH_WORKGROUP: + options->msl.multi_patch_workgroup = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_BUFFER_INDEX: + options->msl.shader_input_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_INDEX_BUFFER_INDEX: + options->msl.shader_index_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_VERTEX_FOR_TESSELLATION: + options->msl.vertex_for_tessellation = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_VERTEX_INDEX_TYPE: + options->msl.vertex_index_type = static_cast(value); + break; + + case SPVC_COMPILER_OPTION_MSL_MULTIVIEW_LAYERED_RENDERING: + options->msl.multiview_layered_rendering = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ARRAYED_SUBPASS_INPUT: + options->msl.arrayed_subpass_input = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_R32UI_LINEAR_TEXTURE_ALIGNMENT: + options->msl.r32ui_linear_texture_alignment = value; + break; + + case SPVC_COMPILER_OPTION_MSL_R32UI_ALIGNMENT_CONSTANT_ID: + options->msl.r32ui_alignment_constant_id = value; + break; + + case SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS: + options->msl.ios_use_simdgroup_functions = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS: + options->msl.emulate_subgroups = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE: + options->msl.fixed_subgroup_size = value; + break; + + case SPVC_COMPILER_OPTION_MSL_FORCE_SAMPLE_RATE_SHADING: + options->msl.force_sample_rate_shading = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_IOS_SUPPORT_BASE_VERTEX_INSTANCE: + options->msl.ios_support_base_vertex_instance = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_RAW_BUFFER_TESE_INPUT: + options->msl.raw_buffer_tese_input = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_INPUT_BUFFER_INDEX: + options->msl.shader_patch_input_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_MANUAL_HELPER_INVOCATION_UPDATES: + options->msl.manual_helper_invocation_updates = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_CHECK_DISCARDED_FRAG_STORES: + options->msl.check_discarded_frag_stores = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS_TIER: + options->msl.argument_buffers_tier = static_cast(value); + break; #endif default: @@ -641,6 +834,61 @@ spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler compiler, spvc_vari #endif } +spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spvc_variable_id id) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + return static_cast(compiler->compiler.get())->variable_is_depth_or_compare(id) ? SPVC_TRUE : SPVC_FALSE; +#else + (void)id; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_FALSE; +#endif +} + +spvc_result spvc_compiler_mask_stage_output_by_location(spvc_compiler compiler, + unsigned location, unsigned component) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + static_cast(compiler->compiler.get())->mask_stage_output_by_location(location, component); + return SPVC_SUCCESS; +#else + (void)location; + (void)component; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_mask_stage_output_by_builtin(spvc_compiler compiler, SpvBuiltIn builtin) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + static_cast(compiler->compiler.get())->mask_stage_output_by_builtin(spv::BuiltIn(builtin)); + return SPVC_SUCCESS; +#else + (void)builtin; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + spvc_result spvc_compiler_hlsl_set_root_constants_layout(spvc_compiler compiler, const spvc_hlsl_root_constants *constant_info, size_t count) @@ -721,6 +969,80 @@ spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler c #endif } +spvc_result spvc_compiler_hlsl_set_resource_binding_flags(spvc_compiler compiler, + spvc_hlsl_binding_flags flags) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &hlsl = *static_cast(compiler->compiler.get()); + hlsl.set_resource_binding_flags(flags); + return SPVC_SUCCESS; +#else + (void)flags; + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_hlsl_add_resource_binding(spvc_compiler compiler, + const spvc_hlsl_resource_binding *binding) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &hlsl = *static_cast(compiler->compiler.get()); + HLSLResourceBinding bind; + bind.binding = binding->binding; + bind.desc_set = binding->desc_set; + bind.stage = static_cast(binding->stage); + bind.cbv.register_binding = binding->cbv.register_binding; + bind.cbv.register_space = binding->cbv.register_space; + bind.uav.register_binding = binding->uav.register_binding; + bind.uav.register_space = binding->uav.register_space; + bind.srv.register_binding = binding->srv.register_binding; + bind.srv.register_space = binding->srv.register_space; + bind.sampler.register_binding = binding->sampler.register_binding; + bind.sampler.register_space = binding->sampler.register_space; + hlsl.add_hlsl_resource_binding(bind); + return SPVC_SUCCESS; +#else + (void)binding; + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_bool spvc_compiler_hlsl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, unsigned set, + unsigned binding) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_FALSE; + } + + auto &hlsl = *static_cast(compiler->compiler.get()); + return hlsl.is_hlsl_resource_binding_used(static_cast(model), set, binding) ? SPVC_TRUE : + SPVC_FALSE; +#else + (void)model; + (void)set; + (void)binding; + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_FALSE; +#endif +} + spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler) { #if SPIRV_CROSS_C_API_MSL @@ -838,15 +1160,11 @@ spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const } auto &msl = *static_cast(compiler->compiler.get()); - MSLVertexAttr attr; + MSLShaderInterfaceVariable attr; attr.location = va->location; - attr.msl_buffer = va->msl_buffer; - attr.msl_offset = va->msl_offset; - attr.msl_stride = va->msl_stride; - attr.format = static_cast(va->format); + attr.format = static_cast(va->format); attr.builtin = static_cast(va->builtin); - attr.per_instance = va->per_instance; - msl.add_msl_vertex_attribute(attr); + msl.add_msl_shader_input(attr); return SPVC_SUCCESS; #else (void)va; @@ -855,6 +1173,104 @@ spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const #endif } +spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, const spvc_msl_shader_interface_var *si) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLShaderInterfaceVariable input; + input.location = si->location; + input.format = static_cast(si->format); + input.builtin = static_cast(si->builtin); + input.vecsize = si->vecsize; + msl.add_msl_shader_input(input); + return SPVC_SUCCESS; +#else + (void)si; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_add_shader_input_2(spvc_compiler compiler, const spvc_msl_shader_interface_var_2 *si) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLShaderInterfaceVariable input; + input.location = si->location; + input.format = static_cast(si->format); + input.builtin = static_cast(si->builtin); + input.vecsize = si->vecsize; + input.rate = static_cast(si->rate); + msl.add_msl_shader_input(input); + return SPVC_SUCCESS; +#else + (void)si; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_add_shader_output(spvc_compiler compiler, const spvc_msl_shader_interface_var *so) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLShaderInterfaceVariable output; + output.location = so->location; + output.format = static_cast(so->format); + output.builtin = static_cast(so->builtin); + output.vecsize = so->vecsize; + msl.add_msl_shader_output(output); + return SPVC_SUCCESS; +#else + (void)so; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_add_shader_output_2(spvc_compiler compiler, const spvc_msl_shader_interface_var_2 *so) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLShaderInterfaceVariable output; + output.location = so->location; + output.format = static_cast(so->format); + output.builtin = static_cast(so->builtin); + output.vecsize = so->vecsize; + output.rate = static_cast(so->rate); + msl.add_msl_shader_output(output); + return SPVC_SUCCESS; +#else + (void)so; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, const spvc_msl_resource_binding *binding) { @@ -882,6 +1298,47 @@ spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, #endif } +spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + msl.add_dynamic_buffer(desc_set, binding, index); + return SPVC_SUCCESS; +#else + (void)binding; + (void)desc_set; + (void)index; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, unsigned binding) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + msl.add_inline_uniform_block(desc_set, binding); + return SPVC_SUCCESS; +#else + (void)binding; + (void)desc_set; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set) { #if SPIRV_CROSS_C_API_MSL @@ -901,7 +1358,27 @@ spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler #endif } -spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location) +spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + msl.set_argument_buffer_device_address_space(desc_set, bool(device_address)); + return SPVC_SUCCESS; +#else + (void)desc_set; + (void)device_address; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigned location) { #if SPIRV_CROSS_C_API_MSL if (compiler->backend != SPVC_BACKEND_MSL) @@ -911,7 +1388,7 @@ spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, uns } auto &msl = *static_cast(compiler->compiler.get()); - return msl.is_msl_vertex_attribute_used(location) ? SPVC_TRUE : SPVC_FALSE; + return msl.is_msl_shader_input_used(location) ? SPVC_TRUE : SPVC_FALSE; #else (void)location; compiler->context->report_error("MSL function used on a non-MSL backend."); @@ -919,6 +1396,29 @@ spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, uns #endif } +spvc_bool spvc_compiler_msl_is_shader_output_used(spvc_compiler compiler, unsigned location) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.is_msl_shader_output_used(location) ? SPVC_TRUE : SPVC_FALSE; +#else + (void)location; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location) +{ + return spvc_compiler_msl_is_shader_input_used(compiler, location); +} + spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, unsigned set, unsigned binding) { @@ -941,6 +1441,42 @@ spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutio #endif } +spvc_result spvc_compiler_msl_set_combined_sampler_suffix(spvc_compiler compiler, const char *suffix) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + msl.set_combined_sampler_suffix(suffix); + return SPVC_SUCCESS; +#else + (void)suffix; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +const char *spvc_compiler_msl_get_combined_sampler_suffix(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return ""; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.get_combined_sampler_suffix(); +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return ""; +#endif +} + #if SPIRV_CROSS_C_API_MSL static void spvc_convert_msl_sampler(MSLConstexprSampler &samp, const spvc_msl_constexpr_sampler *sampler) { @@ -949,17 +1485,33 @@ static void spvc_convert_msl_sampler(MSLConstexprSampler &samp, const spvc_msl_c samp.r_address = static_cast(sampler->r_address); samp.lod_clamp_min = sampler->lod_clamp_min; samp.lod_clamp_max = sampler->lod_clamp_max; - samp.lod_clamp_enable = sampler->lod_clamp_enable; + samp.lod_clamp_enable = sampler->lod_clamp_enable != 0; samp.min_filter = static_cast(sampler->min_filter); samp.mag_filter = static_cast(sampler->mag_filter); samp.mip_filter = static_cast(sampler->mip_filter); - samp.compare_enable = sampler->compare_enable; - samp.anisotropy_enable = sampler->anisotropy_enable; + samp.compare_enable = sampler->compare_enable != 0; + samp.anisotropy_enable = sampler->anisotropy_enable != 0; samp.max_anisotropy = sampler->max_anisotropy; samp.compare_func = static_cast(sampler->compare_func); samp.coord = static_cast(sampler->coord); samp.border_color = static_cast(sampler->border_color); } + +static void spvc_convert_msl_sampler_ycbcr_conversion(MSLConstexprSampler &samp, const spvc_msl_sampler_ycbcr_conversion *conv) +{ + samp.ycbcr_conversion_enable = conv != nullptr; + if (conv == nullptr) return; + samp.planes = conv->planes; + samp.resolution = static_cast(conv->resolution); + samp.chroma_filter = static_cast(conv->chroma_filter); + samp.x_chroma_offset = static_cast(conv->x_chroma_offset); + samp.y_chroma_offset = static_cast(conv->y_chroma_offset); + for (int i = 0; i < 4; i++) + samp.swizzle[i] = static_cast(conv->swizzle[i]); + samp.ycbcr_model = static_cast(conv->ycbcr_model); + samp.ycbcr_range = static_cast(conv->ycbcr_range); + samp.bpc = conv->bpc; +} #endif spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, @@ -1010,6 +1562,60 @@ spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler c #endif } +spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, + const spvc_msl_constexpr_sampler *sampler, + const spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + spvc_convert_msl_sampler_ycbcr_conversion(samp, conv); + msl.remap_constexpr_sampler(id, samp); + return SPVC_SUCCESS; +#else + (void)id; + (void)sampler; + (void)conv; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, + unsigned desc_set, unsigned binding, + const spvc_msl_constexpr_sampler *sampler, + const spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + spvc_convert_msl_sampler_ycbcr_conversion(samp, conv); + msl.remap_constexpr_sampler_by_binding(desc_set, binding, samp); + return SPVC_SUCCESS; +#else + (void)desc_set; + (void)binding; + (void)sampler; + (void)conv; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, unsigned components) { @@ -1108,6 +1714,30 @@ bool spvc_resources_s::copy_resources(SmallVector &outp return true; } +bool spvc_resources_s::copy_resources(SmallVector &outputs, + const SmallVector &inputs) +{ + for (auto &i : inputs) + { + spvc_reflected_builtin_resource br; + + br.value_type_id = i.value_type_id; + br.builtin = SpvBuiltIn(i.builtin); + + auto &r = br.resource; + r.base_type_id = i.resource.base_type_id; + r.type_id = i.resource.type_id; + r.id = i.resource.id; + r.name = context->allocate_name(i.resource.name); + if (!r.name) + return false; + + outputs.push_back(br); + } + + return true; +} + bool spvc_resources_s::copy_resources(const ShaderResources &resources) { if (!copy_resources(uniform_buffers, resources.uniform_buffers)) @@ -1128,12 +1758,18 @@ bool spvc_resources_s::copy_resources(const ShaderResources &resources) return false; if (!copy_resources(push_constant_buffers, resources.push_constant_buffers)) return false; + if (!copy_resources(shader_record_buffers, resources.shader_record_buffers)) + return false; if (!copy_resources(separate_images, resources.separate_images)) return false; if (!copy_resources(separate_samplers, resources.separate_samplers)) return false; if (!copy_resources(acceleration_structures, resources.acceleration_structures)) return false; + if (!copy_resources(builtin_inputs, resources.builtin_inputs)) + return false; + if (!copy_resources(builtin_outputs, resources.builtin_outputs)) + return false; return true; } @@ -1277,6 +1913,41 @@ spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources, list = &resources->acceleration_structures; break; + case SPVC_RESOURCE_TYPE_SHADER_RECORD_BUFFER: + list = &resources->shader_record_buffers; + break; + + default: + break; + } + + if (!list) + { + resources->context->report_error("Invalid argument."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + *resource_size = list->size(); + *resource_list = list->data(); + return SPVC_SUCCESS; +} + +spvc_result spvc_resources_get_builtin_resource_list_for_type( + spvc_resources resources, spvc_builtin_resource_type type, + const spvc_reflected_builtin_resource **resource_list, + size_t *resource_size) +{ + const SmallVector *list = nullptr; + switch (type) + { + case SPVC_BUILTIN_RESOURCE_TYPE_STAGE_INPUT: + list = &resources->builtin_inputs; + break; + + case SPVC_BUILTIN_RESOURCE_TYPE_STAGE_OUTPUT: + list = &resources->builtin_outputs; + break; + default: break; } @@ -1494,6 +2165,18 @@ SpvExecutionModel spvc_compiler_get_execution_model(spvc_compiler compiler) return static_cast(compiler->compiler->get_execution_model()); } +void spvc_compiler_update_active_builtins(spvc_compiler compiler) +{ + compiler->compiler->update_active_builtins(); +} + +spvc_bool spvc_compiler_has_active_builtin(spvc_compiler compiler, SpvBuiltIn builtin, SpvStorageClass storage) +{ + return compiler->compiler->has_active_builtin(static_cast(builtin), static_cast(storage)) ? + SPVC_TRUE : + SPVC_FALSE; +} + spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id) { // Should only throw if an intentionally garbage ID is passed, but the IDs are not type-safe. @@ -1504,6 +2187,11 @@ spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id) SPVC_END_SAFE_SCOPE(compiler->context, nullptr) } +spvc_type_id spvc_type_get_base_type_id(spvc_type type) +{ + return type->self; +} + static spvc_basetype convert_basetype(SPIRType::BaseType type) { // For now the enums match up. @@ -1945,19 +2633,47 @@ void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr) { #if SPIRV_CROSS_C_API_MSL // Crude, but works. - MSLVertexAttr attr_default; + MSLShaderInterfaceVariable attr_default; attr->location = attr_default.location; - attr->per_instance = attr_default.per_instance ? SPVC_TRUE : SPVC_FALSE; attr->format = static_cast(attr_default.format); attr->builtin = static_cast(attr_default.builtin); - attr->msl_buffer = attr_default.msl_buffer; - attr->msl_offset = attr_default.msl_offset; - attr->msl_stride = attr_default.msl_stride; #else memset(attr, 0, sizeof(*attr)); #endif } +void spvc_msl_shader_interface_var_init(spvc_msl_shader_interface_var *var) +{ +#if SPIRV_CROSS_C_API_MSL + MSLShaderInterfaceVariable var_default; + var->location = var_default.location; + var->format = static_cast(var_default.format); + var->builtin = static_cast(var_default.builtin); + var->vecsize = var_default.vecsize; +#else + memset(var, 0, sizeof(*var)); +#endif +} + +void spvc_msl_shader_input_init(spvc_msl_shader_input *input) +{ + spvc_msl_shader_interface_var_init(input); +} + +void spvc_msl_shader_interface_var_init_2(spvc_msl_shader_interface_var_2 *var) +{ +#if SPIRV_CROSS_C_API_MSL + MSLShaderInterfaceVariable var_default; + var->location = var_default.location; + var->format = static_cast(var_default.format); + var->builtin = static_cast(var_default.builtin); + var->vecsize = var_default.vecsize; + var->rate = static_cast(var_default.rate); +#else + memset(var, 0, sizeof(*var)); +#endif +} + void spvc_msl_resource_binding_init(spvc_msl_resource_binding *binding) { #if SPIRV_CROSS_C_API_MSL @@ -1973,6 +2689,26 @@ void spvc_msl_resource_binding_init(spvc_msl_resource_binding *binding) #endif } +void spvc_hlsl_resource_binding_init(spvc_hlsl_resource_binding *binding) +{ +#if SPIRV_CROSS_C_API_HLSL + HLSLResourceBinding binding_default; + binding->desc_set = binding_default.desc_set; + binding->binding = binding_default.binding; + binding->cbv.register_binding = binding_default.cbv.register_binding; + binding->cbv.register_space = binding_default.cbv.register_space; + binding->srv.register_binding = binding_default.srv.register_binding; + binding->srv.register_space = binding_default.srv.register_space; + binding->uav.register_binding = binding_default.uav.register_binding; + binding->uav.register_space = binding_default.uav.register_space; + binding->sampler.register_binding = binding_default.sampler.register_binding; + binding->sampler.register_space = binding_default.sampler.register_space; + binding->stage = static_cast(binding_default.stage); +#else + memset(binding, 0, sizeof(*binding)); +#endif +} + void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler) { #if SPIRV_CROSS_C_API_MSL @@ -1997,6 +2733,24 @@ void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler) #endif } +void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + MSLConstexprSampler defaults; + conv->planes = defaults.planes; + conv->resolution = static_cast(defaults.resolution); + conv->chroma_filter = static_cast(defaults.chroma_filter); + conv->x_chroma_offset = static_cast(defaults.x_chroma_offset); + conv->y_chroma_offset = static_cast(defaults.y_chroma_offset); + for (int i = 0; i < 4; i++) + conv->swizzle[i] = static_cast(defaults.swizzle[i]); + conv->ycbcr_model = static_cast(defaults.ycbcr_model); + conv->ycbcr_range = static_cast(defaults.ycbcr_range); +#else + memset(conv, 0, sizeof(*conv)); +#endif +} + unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler) { return compiler->compiler->get_current_id_bound(); diff --git a/spirv_cross_c.h b/spirv_cross_c.h index 6efaf8543ed..826e25a7401 100644 --- a/spirv_cross_c.h +++ b/spirv_cross_c.h @@ -1,5 +1,6 @@ /* - * Copyright 2019 Hans-Kristian Arntzen + * Copyright 2019-2021 Hans-Kristian Arntzen + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_C_API_H #define SPIRV_CROSS_C_API_H @@ -33,7 +40,7 @@ extern "C" { /* Bumped if ABI or API breaks backwards compatibility. */ #define SPVC_C_API_VERSION_MAJOR 0 /* Bumped if APIs or enumerations are added in a backwards compatible way. */ -#define SPVC_C_API_VERSION_MINOR 16 +#define SPVC_C_API_VERSION_MINOR 54 /* Bumped if internal implementation details change. */ #define SPVC_C_API_VERSION_PATCH 0 @@ -92,6 +99,13 @@ typedef struct spvc_reflected_resource const char *name; } spvc_reflected_resource; +typedef struct spvc_reflected_builtin_resource +{ + SpvBuiltIn builtin; + spvc_type_id value_type_id; + spvc_reflected_resource resource; +} spvc_reflected_builtin_resource; + /* See C++ API. */ typedef struct spvc_entry_point { @@ -210,9 +224,19 @@ typedef enum spvc_resource_type SPVC_RESOURCE_TYPE_SEPARATE_IMAGE = 10, SPVC_RESOURCE_TYPE_SEPARATE_SAMPLERS = 11, SPVC_RESOURCE_TYPE_ACCELERATION_STRUCTURE = 12, + SPVC_RESOURCE_TYPE_RAY_QUERY = 13, + SPVC_RESOURCE_TYPE_SHADER_RECORD_BUFFER = 14, SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff } spvc_resource_type; +typedef enum spvc_builtin_resource_type +{ + SPVC_BUILTIN_RESOURCE_TYPE_UNKNOWN = 0, + SPVC_BUILTIN_RESOURCE_TYPE_STAGE_INPUT = 1, + SPVC_BUILTIN_RESOURCE_TYPE_STAGE_OUTPUT = 2, + SPVC_BUILTIN_RESOURCE_TYPE_INT_MAX = 0x7fffffff +} spvc_builtin_resource_type; + /* Maps to spirv_cross::SPIRType::BaseType. */ typedef enum spvc_basetype { @@ -258,21 +282,51 @@ typedef enum spvc_msl_platform } spvc_msl_platform; /* Maps to C++ API. */ -typedef enum spvc_msl_vertex_format +typedef enum spvc_msl_index_type { - SPVC_MSL_VERTEX_FORMAT_OTHER = 0, - SPVC_MSL_VERTEX_FORMAT_UINT8 = 1, - SPVC_MSL_VERTEX_FORMAT_UINT16 = 2 -} spvc_msl_vertex_format; + SPVC_MSL_INDEX_TYPE_NONE = 0, + SPVC_MSL_INDEX_TYPE_UINT16 = 1, + SPVC_MSL_INDEX_TYPE_UINT32 = 2, + SPVC_MSL_INDEX_TYPE_MAX_INT = 0x7fffffff +} spvc_msl_index_type; /* Maps to C++ API. */ +typedef enum spvc_msl_shader_variable_format +{ + SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER = 0, + SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1, + SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2, + SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3, + SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4, + + /* Deprecated names. */ + SPVC_MSL_VERTEX_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER, + SPVC_MSL_VERTEX_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8, + SPVC_MSL_VERTEX_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16, + SPVC_MSL_SHADER_INPUT_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER, + SPVC_MSL_SHADER_INPUT_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8, + SPVC_MSL_SHADER_INPUT_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16, + SPVC_MSL_SHADER_INPUT_FORMAT_ANY16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16, + SPVC_MSL_SHADER_INPUT_FORMAT_ANY32 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32, + + + SPVC_MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff +} spvc_msl_shader_variable_format, spvc_msl_shader_input_format, spvc_msl_vertex_format; + +/* Maps to C++ API. Deprecated; use spvc_msl_shader_interface_var. */ typedef struct spvc_msl_vertex_attribute { unsigned location; + + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ unsigned msl_buffer; + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ unsigned msl_offset; + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ unsigned msl_stride; + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ spvc_bool per_instance; + spvc_msl_vertex_format format; SpvBuiltIn builtin; } spvc_msl_vertex_attribute; @@ -282,6 +336,50 @@ typedef struct spvc_msl_vertex_attribute */ SPVC_PUBLIC_API void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr); +/* Maps to C++ API. Deprecated; use spvc_msl_shader_interface_var_2. */ +typedef struct spvc_msl_shader_interface_var +{ + unsigned location; + spvc_msl_vertex_format format; + SpvBuiltIn builtin; + unsigned vecsize; +} spvc_msl_shader_interface_var, spvc_msl_shader_input; + +/* + * Initializes the shader input struct. + * Deprecated. Use spvc_msl_shader_interface_var_init_2(). + */ +SPVC_PUBLIC_API void spvc_msl_shader_interface_var_init(spvc_msl_shader_interface_var *var); +/* + * Deprecated. Use spvc_msl_shader_interface_var_init_2(). + */ +SPVC_PUBLIC_API void spvc_msl_shader_input_init(spvc_msl_shader_input *input); + +/* Maps to C++ API. */ +typedef enum spvc_msl_shader_variable_rate +{ + SPVC_MSL_SHADER_VARIABLE_RATE_PER_VERTEX = 0, + SPVC_MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE = 1, + SPVC_MSL_SHADER_VARIABLE_RATE_PER_PATCH = 2, + + SPVC_MSL_SHADER_VARIABLE_RATE_INT_MAX = 0x7fffffff, +} spvc_msl_shader_variable_rate; + +/* Maps to C++ API. */ +typedef struct spvc_msl_shader_interface_var_2 +{ + unsigned location; + spvc_msl_shader_variable_format format; + SpvBuiltIn builtin; + unsigned vecsize; + spvc_msl_shader_variable_rate rate; +} spvc_msl_shader_interface_var_2; + +/* + * Initializes the shader interface variable struct. + */ +SPVC_PUBLIC_API void spvc_msl_shader_interface_var_init_2(spvc_msl_shader_interface_var_2 *var); + /* Maps to C++ API. */ typedef struct spvc_msl_resource_binding { @@ -370,6 +468,55 @@ typedef enum spvc_msl_sampler_border_color SPVC_MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff } spvc_msl_sampler_border_color; +/* Maps to C++ API. */ +typedef enum spvc_msl_format_resolution +{ + SPVC_MSL_FORMAT_RESOLUTION_444 = 0, + SPVC_MSL_FORMAT_RESOLUTION_422, + SPVC_MSL_FORMAT_RESOLUTION_420, + SPVC_MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff +} spvc_msl_format_resolution; + +/* Maps to C++ API. */ +typedef enum spvc_msl_chroma_location +{ + SPVC_MSL_CHROMA_LOCATION_COSITED_EVEN = 0, + SPVC_MSL_CHROMA_LOCATION_MIDPOINT, + SPVC_MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff +} spvc_msl_chroma_location; + +/* Maps to C++ API. */ +typedef enum spvc_msl_component_swizzle +{ + SPVC_MSL_COMPONENT_SWIZZLE_IDENTITY = 0, + SPVC_MSL_COMPONENT_SWIZZLE_ZERO, + SPVC_MSL_COMPONENT_SWIZZLE_ONE, + SPVC_MSL_COMPONENT_SWIZZLE_R, + SPVC_MSL_COMPONENT_SWIZZLE_G, + SPVC_MSL_COMPONENT_SWIZZLE_B, + SPVC_MSL_COMPONENT_SWIZZLE_A, + SPVC_MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff +} spvc_msl_component_swizzle; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_ycbcr_model_conversion +{ + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff +} spvc_msl_sampler_ycbcr_model_conversion; + +/* Maps to C+ API. */ +typedef enum spvc_msl_sampler_ycbcr_range +{ + SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, + SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, + SPVC_MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff +} spvc_msl_sampler_ycbcr_range; + /* Maps to C++ API. */ typedef struct spvc_msl_constexpr_sampler { @@ -397,6 +544,64 @@ typedef struct spvc_msl_constexpr_sampler */ SPVC_PUBLIC_API void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler); +/* Maps to the sampler Y'CbCr conversion-related portions of MSLConstexprSampler. See C++ API for defaults and details. */ +typedef struct spvc_msl_sampler_ycbcr_conversion +{ + unsigned planes; + spvc_msl_format_resolution resolution; + spvc_msl_sampler_filter chroma_filter; + spvc_msl_chroma_location x_chroma_offset; + spvc_msl_chroma_location y_chroma_offset; + spvc_msl_component_swizzle swizzle[4]; + spvc_msl_sampler_ycbcr_model_conversion ycbcr_model; + spvc_msl_sampler_ycbcr_range ycbcr_range; + unsigned bpc; +} spvc_msl_sampler_ycbcr_conversion; + +/* + * Initializes the constexpr sampler struct. + * The defaults are non-zero. + */ +SPVC_PUBLIC_API void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv); + +/* Maps to C++ API. */ +typedef enum spvc_hlsl_binding_flag_bits +{ + SPVC_HLSL_BINDING_AUTO_NONE_BIT = 0, + SPVC_HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT = 1 << 0, + SPVC_HLSL_BINDING_AUTO_CBV_BIT = 1 << 1, + SPVC_HLSL_BINDING_AUTO_SRV_BIT = 1 << 2, + SPVC_HLSL_BINDING_AUTO_UAV_BIT = 1 << 3, + SPVC_HLSL_BINDING_AUTO_SAMPLER_BIT = 1 << 4, + SPVC_HLSL_BINDING_AUTO_ALL = 0x7fffffff +} spvc_hlsl_binding_flag_bits; +typedef unsigned spvc_hlsl_binding_flags; + +#define SPVC_HLSL_PUSH_CONSTANT_DESC_SET (~(0u)) +#define SPVC_HLSL_PUSH_CONSTANT_BINDING (0) + +/* Maps to C++ API. */ +typedef struct spvc_hlsl_resource_binding_mapping +{ + unsigned register_space; + unsigned register_binding; +} spvc_hlsl_resource_binding_mapping; + +typedef struct spvc_hlsl_resource_binding +{ + SpvExecutionModel stage; + unsigned desc_set; + unsigned binding; + + spvc_hlsl_resource_binding_mapping cbv, uav, srv, sampler; +} spvc_hlsl_resource_binding; + +/* + * Initializes the resource binding struct. + * The defaults are non-zero. + */ +SPVC_PUBLIC_API void spvc_hlsl_resource_binding_init(spvc_hlsl_resource_binding *binding); + /* Maps to the various spirv_cross::Compiler*::Option structures. See C++ API for defaults and details. */ typedef enum spvc_compiler_option { @@ -452,6 +657,74 @@ typedef enum spvc_compiler_option SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES = 37 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_MSL_MULTIVIEW = 38 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX = 39 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX = 40 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX = 41 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE = 42 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D = 44 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO = 45 | SPVC_COMPILER_OPTION_MSL_BIT, + + /* Obsolete. Use MSL_FRAMEBUFFER_FETCH_SUBPASS instead. */ + SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH = 47 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY = 48 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING = 49 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_ACTIVE_ARGUMENT_BUFFER_RESOURCES = 50 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_NATIVE_ARRAYS = 51 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_ENABLE_STORAGE_IMAGE_QUALIFIER_DEDUCTION = 52 | SPVC_COMPILER_OPTION_COMMON_BIT, + + SPVC_COMPILER_OPTION_HLSL_FORCE_STORAGE_BUFFER_AS_UAV = 53 | SPVC_COMPILER_OPTION_HLSL_BIT, + + SPVC_COMPILER_OPTION_FORCE_ZERO_INITIALIZED_VARIABLES = 54 | SPVC_COMPILER_OPTION_COMMON_BIT, + + SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV = 55 | SPVC_COMPILER_OPTION_HLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_OUTPUT_MASK = 56 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_DEPTH_BUILTIN = 57 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_STENCIL_REF_BUILTIN = 58 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_CLIP_DISTANCE_USER_VARYING = 59 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_HLSL_ENABLE_16BIT_TYPES = 60 | SPVC_COMPILER_OPTION_HLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_MULTI_PATCH_WORKGROUP = 61 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_BUFFER_INDEX = 62 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_INDEX_BUFFER_INDEX = 63 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VERTEX_FOR_TESSELLATION = 64 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VERTEX_INDEX_TYPE = 65 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_GLSL_FORCE_FLATTENED_IO_BLOCKS = 66 | SPVC_COMPILER_OPTION_GLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_MULTIVIEW_LAYERED_RENDERING = 67 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ARRAYED_SUBPASS_INPUT = 68 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_R32UI_LINEAR_TEXTURE_ALIGNMENT = 69 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_R32UI_ALIGNMENT_CONSTANT_ID = 70 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_HLSL_FLATTEN_MATRIX_VERTEX_INPUT_SEMANTICS = 71 | SPVC_COMPILER_OPTION_HLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS = 72 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS = 73 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE = 74 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_SAMPLE_RATE_SHADING = 75 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_IOS_SUPPORT_BASE_VERTEX_INSTANCE = 76 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT = 77 | SPVC_COMPILER_OPTION_GLSL_BIT, + + SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS = 78 | SPVC_COMPILER_OPTION_COMMON_BIT, + + SPVC_COMPILER_OPTION_MSL_RAW_BUFFER_TESE_INPUT = 79 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_INPUT_BUFFER_INDEX = 80 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_MANUAL_HELPER_INVOCATION_UPDATES = 81 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_CHECK_DISCARDED_FRAG_STORES = 82 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_GLSL_ENABLE_ROW_MAJOR_LOAD_WORKAROUND = 83 | SPVC_COMPILER_OPTION_GLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS_TIER = 84 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; @@ -512,6 +785,12 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_add_header_line(spvc_compiler compiler SPVC_PUBLIC_API spvc_result spvc_compiler_require_extension(spvc_compiler compiler, const char *ext); SPVC_PUBLIC_API spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler compiler, spvc_variable_id id); +SPVC_PUBLIC_API spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spvc_variable_id id); + +SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_location(spvc_compiler compiler, + unsigned location, unsigned component); +SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_builtin(spvc_compiler compiler, SpvBuiltIn builtin); + /* * HLSL specifics. * Maps to C++ API. @@ -524,6 +803,16 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_vertex_attribute_remap(spvc_c size_t remaps); SPVC_PUBLIC_API spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_set_resource_binding_flags(spvc_compiler compiler, + spvc_hlsl_binding_flags flags); + +SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_resource_binding(spvc_compiler compiler, + const spvc_hlsl_resource_binding *binding); +SPVC_PUBLIC_API spvc_bool spvc_compiler_hlsl_is_resource_used(spvc_compiler compiler, + SpvExecutionModel model, + unsigned set, + unsigned binding); + /* * MSL specifics. * Maps to C++ API. @@ -542,19 +831,44 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler const spvc_msl_vertex_attribute *attrs); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, const spvc_msl_resource_binding *binding); +/* Deprecated; use spvc_compiler_msl_add_shader_input_2(). */ +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, + const spvc_msl_shader_interface_var *input); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_input_2(spvc_compiler compiler, + const spvc_msl_shader_interface_var_2 *input); +/* Deprecated; use spvc_compiler_msl_add_shader_output_2(). */ +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_output(spvc_compiler compiler, + const spvc_msl_shader_interface_var *output); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_output_2(spvc_compiler compiler, + const spvc_msl_shader_interface_var_2 *output); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address); + +/* Obsolete, use is_shader_input_used. */ SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigned location); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_output_used(spvc_compiler compiler, unsigned location); + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, unsigned set, unsigned binding); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, unsigned components); SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id); SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index); + +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, unsigned binding); + +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_combined_sampler_suffix(spvc_compiler compiler, const char *suffix); +SPVC_PUBLIC_API const char *spvc_compiler_msl_get_combined_sampler_suffix(spvc_compiler compiler); + /* * Reflect resources. * Maps almost 1:1 to C++ API. @@ -569,6 +883,11 @@ SPVC_PUBLIC_API spvc_result spvc_resources_get_resource_list_for_type(spvc_resou const spvc_reflected_resource **resource_list, size_t *resource_size); +SPVC_PUBLIC_API spvc_result spvc_resources_get_builtin_resource_list_for_type( + spvc_resources resources, spvc_builtin_resource_type type, + const spvc_reflected_builtin_resource **resource_list, + size_t *resource_size); + /* * Decorations. * Maps to C++ API. @@ -625,6 +944,8 @@ SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument(spvc_compiler SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument_by_index(spvc_compiler compiler, SpvExecutionMode mode, unsigned index); SPVC_PUBLIC_API SpvExecutionModel spvc_compiler_get_execution_model(spvc_compiler compiler); +SPVC_PUBLIC_API void spvc_compiler_update_active_builtins(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_bool spvc_compiler_has_active_builtin(spvc_compiler compiler, SpvBuiltIn builtin, SpvStorageClass storage); /* * Type query interface. @@ -632,6 +953,12 @@ SPVC_PUBLIC_API SpvExecutionModel spvc_compiler_get_execution_model(spvc_compile */ SPVC_PUBLIC_API spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id); +/* Pulls out SPIRType::self. This effectively gives the type ID without array or pointer qualifiers. + * This is necessary when reflecting decoration/name information on members of a struct, + * which are placed in the base type, not the qualified type. + * This is similar to spvc_reflected_resource::base_type_id. */ +SPVC_PUBLIC_API spvc_type_id spvc_type_get_base_type_id(spvc_type type); + SPVC_PUBLIC_API spvc_basetype spvc_type_get_basetype(spvc_type type); SPVC_PUBLIC_API unsigned spvc_type_get_bit_width(spvc_type type); SPVC_PUBLIC_API unsigned spvc_type_get_vector_size(spvc_type type); diff --git a/spirv_cross_containers.hpp b/spirv_cross_containers.hpp index 31a8abbd0d3..50513f49e7b 100644 --- a/spirv_cross_containers.hpp +++ b/spirv_cross_containers.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2019 Hans-Kristian Arntzen + * Copyright 2019-2021 Hans-Kristian Arntzen + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_CONTAINERS_HPP #define SPIRV_CROSS_CONTAINERS_HPP @@ -21,8 +28,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -61,7 +70,8 @@ class AlignedBuffer private: #if defined(_MSC_VER) && _MSC_VER < 1900 // MSVC 2013 workarounds, sigh ... - union { + union + { char aligned_char[sizeof(T) * N]; double dummy_aligner; } u; @@ -85,72 +95,72 @@ template class VectorView { public: - T &operator[](size_t i) + T &operator[](size_t i) SPIRV_CROSS_NOEXCEPT { return ptr[i]; } - const T &operator[](size_t i) const + const T &operator[](size_t i) const SPIRV_CROSS_NOEXCEPT { return ptr[i]; } - bool empty() const + bool empty() const SPIRV_CROSS_NOEXCEPT { return buffer_size == 0; } - size_t size() const + size_t size() const SPIRV_CROSS_NOEXCEPT { return buffer_size; } - T *data() + T *data() SPIRV_CROSS_NOEXCEPT { return ptr; } - const T *data() const + const T *data() const SPIRV_CROSS_NOEXCEPT { return ptr; } - T *begin() + T *begin() SPIRV_CROSS_NOEXCEPT { return ptr; } - T *end() + T *end() SPIRV_CROSS_NOEXCEPT { return ptr + buffer_size; } - const T *begin() const + const T *begin() const SPIRV_CROSS_NOEXCEPT { return ptr; } - const T *end() const + const T *end() const SPIRV_CROSS_NOEXCEPT { return ptr + buffer_size; } - T &front() + T &front() SPIRV_CROSS_NOEXCEPT { return ptr[0]; } - const T &front() const + const T &front() const SPIRV_CROSS_NOEXCEPT { return ptr[0]; } - T &back() + T &back() SPIRV_CROSS_NOEXCEPT { return ptr[buffer_size - 1]; } - const T &back() const + const T &back() const SPIRV_CROSS_NOEXCEPT { return ptr[buffer_size - 1]; } @@ -194,14 +204,14 @@ template class SmallVector : public VectorView { public: - SmallVector() + SmallVector() SPIRV_CROSS_NOEXCEPT { this->ptr = stack_storage.data(); buffer_capacity = N; } - SmallVector(const T *arg_list_begin, const T *arg_list_end) - : SmallVector() + template + SmallVector(const U *arg_list_begin, const U *arg_list_end) SPIRV_CROSS_NOEXCEPT : SmallVector() { auto count = size_t(arg_list_end - arg_list_begin); reserve(count); @@ -210,6 +220,16 @@ class SmallVector : public VectorView this->buffer_size = count; } + template + SmallVector(std::initializer_list init) SPIRV_CROSS_NOEXCEPT : SmallVector(init.begin(), init.end()) + { + } + + template + explicit SmallVector(const U (&init)[M]) SPIRV_CROSS_NOEXCEPT : SmallVector(init, init + M) + { + } + SmallVector(SmallVector &&other) SPIRV_CROSS_NOEXCEPT : SmallVector() { *this = std::move(other); @@ -245,14 +265,16 @@ class SmallVector : public VectorView return *this; } - SmallVector(const SmallVector &other) - : SmallVector() + SmallVector(const SmallVector &other) SPIRV_CROSS_NOEXCEPT : SmallVector() { *this = other; } - SmallVector &operator=(const SmallVector &other) + SmallVector &operator=(const SmallVector &other) SPIRV_CROSS_NOEXCEPT { + if (this == &other) + return *this; + clear(); reserve(other.buffer_size); for (size_t i = 0; i < other.buffer_size; i++) @@ -261,8 +283,7 @@ class SmallVector : public VectorView return *this; } - explicit SmallVector(size_t count) - : SmallVector() + explicit SmallVector(size_t count) SPIRV_CROSS_NOEXCEPT : SmallVector() { resize(count); } @@ -274,28 +295,28 @@ class SmallVector : public VectorView free(this->ptr); } - void clear() + void clear() SPIRV_CROSS_NOEXCEPT { for (size_t i = 0; i < this->buffer_size; i++) this->ptr[i].~T(); this->buffer_size = 0; } - void push_back(const T &t) + void push_back(const T &t) SPIRV_CROSS_NOEXCEPT { reserve(this->buffer_size + 1); new (&this->ptr[this->buffer_size]) T(t); this->buffer_size++; } - void push_back(T &&t) + void push_back(T &&t) SPIRV_CROSS_NOEXCEPT { reserve(this->buffer_size + 1); new (&this->ptr[this->buffer_size]) T(std::move(t)); this->buffer_size++; } - void pop_back() + void pop_back() SPIRV_CROSS_NOEXCEPT { // Work around false positive warning on GCC 8.3. // Calling pop_back on empty vector is undefined. @@ -304,31 +325,42 @@ class SmallVector : public VectorView } template - void emplace_back(Ts &&... ts) + void emplace_back(Ts &&... ts) SPIRV_CROSS_NOEXCEPT { reserve(this->buffer_size + 1); new (&this->ptr[this->buffer_size]) T(std::forward(ts)...); this->buffer_size++; } - void reserve(size_t count) + void reserve(size_t count) SPIRV_CROSS_NOEXCEPT { + if ((count > (std::numeric_limits::max)() / sizeof(T)) || + (count > (std::numeric_limits::max)() / 2)) + { + // Only way this should ever happen is with garbage input, terminate. + std::terminate(); + } + if (count > buffer_capacity) { size_t target_capacity = buffer_capacity; if (target_capacity == 0) target_capacity = 1; - if (target_capacity < N) - target_capacity = N; + // Weird parens works around macro issues on Windows if NOMINMAX is not used. + target_capacity = (std::max)(target_capacity, N); + + // Need to ensure there is a POT value of target capacity which is larger than count, + // otherwise this will overflow. while (target_capacity < count) target_capacity <<= 1u; T *new_buffer = target_capacity > N ? static_cast(malloc(target_capacity * sizeof(T))) : stack_storage.data(); + // If we actually fail this malloc, we are hosed anyways, there is no reason to attempt recovery. if (!new_buffer) - SPIRV_CROSS_THROW("Out of memory."); + std::terminate(); // In case for some reason two allocations both come from same stack. if (new_buffer != this->ptr) @@ -348,7 +380,7 @@ class SmallVector : public VectorView } } - void insert(T *itr, const T *insert_begin, const T *insert_end) + void insert(T *itr, const T *insert_begin, const T *insert_end) SPIRV_CROSS_NOEXCEPT { auto count = size_t(insert_end - insert_begin); if (itr == this->end()) @@ -374,8 +406,10 @@ class SmallVector : public VectorView // Need to allocate new buffer. Move everything to a new buffer. T *new_buffer = target_capacity > N ? static_cast(malloc(target_capacity * sizeof(T))) : stack_storage.data(); + + // If we actually fail this malloc, we are hosed anyways, there is no reason to attempt recovery. if (!new_buffer) - SPIRV_CROSS_THROW("Out of memory."); + std::terminate(); // First, move elements from source buffer to new buffer. // We don't deal with types which can throw in move constructor. @@ -447,19 +481,19 @@ class SmallVector : public VectorView } } - void insert(T *itr, const T &value) + void insert(T *itr, const T &value) SPIRV_CROSS_NOEXCEPT { insert(itr, &value, &value + 1); } - T *erase(T *itr) + T *erase(T *itr) SPIRV_CROSS_NOEXCEPT { std::move(itr + 1, this->end(), itr); this->ptr[--this->buffer_size].~T(); return itr; } - void erase(T *start_erase, T *end_erase) + void erase(T *start_erase, T *end_erase) SPIRV_CROSS_NOEXCEPT { if (end_erase == this->end()) { @@ -473,7 +507,7 @@ class SmallVector : public VectorView } } - void resize(size_t new_size) + void resize(size_t new_size) SPIRV_CROSS_NOEXCEPT { if (new_size < this->buffer_size) { @@ -519,7 +553,7 @@ class ObjectPoolBase { public: virtual ~ObjectPoolBase() = default; - virtual void free_opaque(void *ptr) = 0; + virtual void deallocate_opaque(void *ptr) = 0; }; template @@ -553,15 +587,15 @@ class ObjectPool : public ObjectPoolBase return ptr; } - void free(T *ptr) + void deallocate(T *ptr) { ptr->~T(); vacants.push_back(ptr); } - void free_opaque(void *ptr) override + void deallocate_opaque(void *ptr) override { - free(static_cast(ptr)); + deallocate(static_cast(ptr)); } void clear() diff --git a/spirv_cross_error_handling.hpp b/spirv_cross_error_handling.hpp index e821c043d5d..e96ebb9a796 100644 --- a/spirv_cross_error_handling.hpp +++ b/spirv_cross_error_handling.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2015-2019 Arm Limited + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +15,21 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_ERROR_HANDLING #define SPIRV_CROSS_ERROR_HANDLING -#include #include #include #include +#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +#include +#endif #ifdef SPIRV_CROSS_NAMESPACE_OVERRIDE #define SPIRV_CROSS_NAMESPACE SPIRV_CROSS_NAMESPACE_OVERRIDE @@ -33,6 +42,8 @@ namespace SPIRV_CROSS_NAMESPACE #ifdef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS #if !defined(_MSC_VER) || defined(__clang__) [[noreturn]] +#elif defined(_MSC_VER) +__declspec(noreturn) #endif inline void report_and_abort(const std::string &msg) diff --git a/spirv_cross_parsed_ir.cpp b/spirv_cross_parsed_ir.cpp index f0b6f7b1df8..8d1acf69f97 100644 --- a/spirv_cross_parsed_ir.cpp +++ b/spirv_cross_parsed_ir.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2018-2019 Arm Limited + * Copyright 2018-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #include "spirv_cross_parsed_ir.hpp" #include #include @@ -47,32 +54,36 @@ ParsedIR::ParsedIR() // Should have been default-implemented, but need this on MSVC 2013. ParsedIR::ParsedIR(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT { - *this = move(other); + *this = std::move(other); } ParsedIR &ParsedIR::operator=(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT { if (this != &other) { - pool_group = move(other.pool_group); - spirv = move(other.spirv); - meta = move(other.meta); + pool_group = std::move(other.pool_group); + spirv = std::move(other.spirv); + meta = std::move(other.meta); for (int i = 0; i < TypeCount; i++) - ids_for_type[i] = move(other.ids_for_type[i]); - ids_for_constant_or_type = move(other.ids_for_constant_or_type); - ids_for_constant_or_variable = move(other.ids_for_constant_or_variable); - declared_capabilities = move(other.declared_capabilities); - declared_extensions = move(other.declared_extensions); - block_meta = move(other.block_meta); - continue_block_to_loop_header = move(other.continue_block_to_loop_header); - entry_points = move(other.entry_points); - ids = move(other.ids); + ids_for_type[i] = std::move(other.ids_for_type[i]); + ids_for_constant_undef_or_type = std::move(other.ids_for_constant_undef_or_type); + ids_for_constant_or_variable = std::move(other.ids_for_constant_or_variable); + declared_capabilities = std::move(other.declared_capabilities); + declared_extensions = std::move(other.declared_extensions); + block_meta = std::move(other.block_meta); + continue_block_to_loop_header = std::move(other.continue_block_to_loop_header); + entry_points = std::move(other.entry_points); + ids = std::move(other.ids); addressing_model = other.addressing_model; memory_model = other.memory_model; default_entry_point = other.default_entry_point; source = other.source; - loop_iteration_depth = other.loop_iteration_depth; + loop_iteration_depth_hard = other.loop_iteration_depth_hard; + loop_iteration_depth_soft = other.loop_iteration_depth_soft; + + meta_needing_name_fixup = std::move(other.meta_needing_name_fixup); + load_type_width = std::move(other.load_type_width); } return *this; } @@ -91,7 +102,7 @@ ParsedIR &ParsedIR::operator=(const ParsedIR &other) meta = other.meta; for (int i = 0; i < TypeCount; i++) ids_for_type[i] = other.ids_for_type[i]; - ids_for_constant_or_type = other.ids_for_constant_or_type; + ids_for_constant_undef_or_type = other.ids_for_constant_undef_or_type; ids_for_constant_or_variable = other.ids_for_constant_or_variable; declared_capabilities = other.declared_capabilities; declared_extensions = other.declared_extensions; @@ -100,10 +111,15 @@ ParsedIR &ParsedIR::operator=(const ParsedIR &other) entry_points = other.entry_points; default_entry_point = other.default_entry_point; source = other.source; - loop_iteration_depth = other.loop_iteration_depth; + loop_iteration_depth_hard = other.loop_iteration_depth_hard; + loop_iteration_depth_soft = other.loop_iteration_depth_soft; addressing_model = other.addressing_model; memory_model = other.memory_model; + + meta_needing_name_fixup = other.meta_needing_name_fixup; + load_type_width = other.load_type_width; + // Very deliberate copying of IDs. There is no default copy constructor, nor a simple default constructor. // Construct object first so we have the correct allocator set-up, then we can copy object into our new pool group. ids.clear(); @@ -126,41 +142,161 @@ void ParsedIR::set_id_bounds(uint32_t bounds) block_meta.resize(bounds); } -static string ensure_valid_identifier(const string &name, bool member) +// Roll our own versions of these functions to avoid potential locale shenanigans. +static bool is_alpha(char c) { - // Functions in glslangValidator are mangled with name( stuff. - // Normally, we would never see '(' in any legal identifiers, so just strip them out. - auto str = name.substr(0, name.find('(')); + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static bool is_numeric(char c) +{ + return c >= '0' && c <= '9'; +} + +static bool is_alphanumeric(char c) +{ + return is_alpha(c) || is_numeric(c); +} + +static bool is_valid_identifier(const string &name) +{ + if (name.empty()) + return true; + + if (is_numeric(name[0])) + return false; + + for (auto c : name) + if (!is_alphanumeric(c) && c != '_') + return false; - for (uint32_t i = 0; i < str.size(); i++) + bool saw_underscore = false; + // Two underscores in a row is not a valid identifier either. + // Technically reserved, but it's easier to treat it as invalid. + for (auto c : name) { - auto &c = str[i]; + bool is_underscore = c == '_'; + if (is_underscore && saw_underscore) + return false; + saw_underscore = is_underscore; + } + + return true; +} + +static bool is_reserved_prefix(const string &name) +{ + // Generic reserved identifiers used by the implementation. + return name.compare(0, 3, "gl_", 3) == 0 || + // Ignore this case for now, might rewrite internal code to always use spv prefix. + //name.compare(0, 11, "SPIRV_Cross", 11) == 0 || + name.compare(0, 3, "spv", 3) == 0; +} + +static bool is_reserved_identifier(const string &name, bool member, bool allow_reserved_prefixes) +{ + if (!allow_reserved_prefixes && is_reserved_prefix(name)) + return true; + + if (member) + { + // Reserved member identifiers come in one form: + // _m[0-9]+$. + if (name.size() < 3) + return false; + + if (name.compare(0, 2, "_m", 2) != 0) + return false; + + size_t index = 2; + while (index < name.size() && is_numeric(name[index])) + index++; + + return index == name.size(); + } + else + { + // Reserved non-member identifiers come in two forms: + // _[0-9]+$, used for temporaries which map directly to a SPIR-V ID. + // _[0-9]+_, used for auxillary temporaries which derived from a SPIR-V ID. + if (name.size() < 2) + return false; + + if (name[0] != '_' || !is_numeric(name[1])) + return false; + + size_t index = 2; + while (index < name.size() && is_numeric(name[index])) + index++; + + return index == name.size() || (index < name.size() && name[index] == '_'); + } +} - if (member) +bool ParsedIR::is_globally_reserved_identifier(std::string &str, bool allow_reserved_prefixes) +{ + return is_reserved_identifier(str, false, allow_reserved_prefixes); +} + +uint32_t ParsedIR::get_spirv_version() const +{ + return spirv[1]; +} + +static string make_unreserved_identifier(const string &name) +{ + if (is_reserved_prefix(name)) + return "_RESERVED_IDENTIFIER_FIXUP_" + name; + else + return "_RESERVED_IDENTIFIER_FIXUP" + name; +} + +void ParsedIR::sanitize_underscores(std::string &str) +{ + // Compact adjacent underscores to make it valid. + auto dst = str.begin(); + auto src = dst; + bool saw_underscore = false; + while (src != str.end()) + { + bool is_underscore = *src == '_'; + if (saw_underscore && is_underscore) { - // _m variables are reserved by the internal implementation, - // otherwise, make sure the name is a valid identifier. - if (i == 0) - c = isalpha(c) ? c : '_'; - else if (i == 2 && str[0] == '_' && str[1] == 'm') - c = isalpha(c) ? c : '_'; - else - c = isalnum(c) ? c : '_'; + src++; } else { - // _ variables are reserved by the internal implementation, - // otherwise, make sure the name is a valid identifier. - if (i == 0 || (str[0] == '_' && i == 1)) - c = isalpha(c) ? c : '_'; - else - c = isalnum(c) ? c : '_'; + if (dst != src) + *dst = *src; + dst++; + src++; + saw_underscore = is_underscore; } } + str.erase(dst, str.end()); +} + +static string ensure_valid_identifier(const string &name) +{ + // Functions in glslangValidator are mangled with name( stuff. + // Normally, we would never see '(' in any legal identifiers, so just strip them out. + auto str = name.substr(0, name.find('(')); + + if (str.empty()) + return str; + + if (is_numeric(str[0])) + str[0] = '_'; + + for (auto &c : str) + if (!is_alphanumeric(c) && c != '_') + c = '_'; + + ParsedIR::sanitize_underscores(str); return str; } -const string &ParsedIR::get_name(uint32_t id) const +const string &ParsedIR::get_name(ID id) const { auto *m = find_meta(id); if (m) @@ -169,7 +305,7 @@ const string &ParsedIR::get_name(uint32_t id) const return empty_string; } -const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const +const string &ParsedIR::get_member_name(TypeID id, uint32_t index) const { auto *m = find_meta(id); if (m) @@ -182,38 +318,48 @@ const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const return empty_string; } -void ParsedIR::set_name(uint32_t id, const string &name) +void ParsedIR::sanitize_identifier(std::string &name, bool member, bool allow_reserved_prefixes) { - auto &str = meta[id].decoration.alias; - str.clear(); - - if (name.empty()) - return; - - // Reserved for temporaries. - if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) - return; - - str = ensure_valid_identifier(name, false); + if (!is_valid_identifier(name)) + name = ensure_valid_identifier(name); + if (is_reserved_identifier(name, member, allow_reserved_prefixes)) + name = make_unreserved_identifier(name); } -void ParsedIR::set_member_name(uint32_t id, uint32_t index, const string &name) +void ParsedIR::fixup_reserved_names() { - meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); - - auto &str = meta[id].members[index].alias; - str.clear(); - if (name.empty()) - return; + for (uint32_t id : meta_needing_name_fixup) + { + // Don't rename remapped variables like 'gl_LastFragDepthARM'. + if (ids[id].get_type() == TypeVariable && get(id).remapped_variable) + continue; + + auto &m = meta[id]; + sanitize_identifier(m.decoration.alias, false, false); + for (auto &memb : m.members) + sanitize_identifier(memb.alias, true, false); + } + meta_needing_name_fixup.clear(); +} - // Reserved for unnamed members. - if (name[0] == '_' && name.size() >= 3 && name[1] == 'm' && isdigit(name[2])) - return; +void ParsedIR::set_name(ID id, const string &name) +{ + auto &m = meta[id]; + m.decoration.alias = name; + if (!is_valid_identifier(name) || is_reserved_identifier(name, false, false)) + meta_needing_name_fixup.insert(id); +} - str = ensure_valid_identifier(name, true); +void ParsedIR::set_member_name(TypeID id, uint32_t index, const string &name) +{ + auto &m = meta[id]; + m.members.resize(max(m.members.size(), size_t(index) + 1)); + m.members[index].alias = name; + if (!is_valid_identifier(name) || is_reserved_identifier(name, true, false)) + meta_needing_name_fixup.insert(id); } -void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const string &argument) +void ParsedIR::set_decoration_string(ID id, Decoration decoration, const string &argument) { auto &dec = meta[id].decoration; dec.decoration_flags.set(decoration); @@ -229,7 +375,7 @@ void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const s } } -void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argument) +void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument) { auto &dec = meta[id].decoration; dec.decoration_flags.set(decoration); @@ -253,6 +399,18 @@ void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argum dec.offset = argument; break; + case DecorationXfbBuffer: + dec.xfb_buffer = argument; + break; + + case DecorationXfbStride: + dec.xfb_stride = argument; + break; + + case DecorationStream: + dec.stream = argument; + break; + case DecorationArrayStride: dec.array_stride = argument; break; @@ -295,10 +453,11 @@ void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argum } } -void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument) +void ParsedIR::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument) { - meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); - auto &dec = meta[id].members[index]; + auto &m = meta[id]; + m.members.resize(max(m.members.size(), size_t(index) + 1)); + auto &dec = m.members[index]; dec.decoration_flags.set(decoration); switch (decoration) @@ -324,6 +483,18 @@ void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration dec dec.offset = argument; break; + case DecorationXfbBuffer: + dec.xfb_buffer = argument; + break; + + case DecorationXfbStride: + dec.xfb_stride = argument; + break; + + case DecorationStream: + dec.stream = argument; + break; + case DecorationSpecId: dec.spec_id = argument; break; @@ -343,7 +514,7 @@ void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration dec // Recursively marks any constants referenced by the specified constant instruction as being used // as an array length. The id must be a constant instruction (SPIRConstant or SPIRConstantOp). -void ParsedIR::mark_used_as_array_length(uint32_t id) +void ParsedIR::mark_used_as_array_length(ID id) { switch (ids[id].get_type()) { @@ -354,8 +525,16 @@ void ParsedIR::mark_used_as_array_length(uint32_t id) case TypeConstantOp: { auto &cop = get(id); - for (uint32_t arg_id : cop.arguments) - mark_used_as_array_length(arg_id); + if (cop.opcode == OpCompositeExtract) + mark_used_as_array_length(cop.arguments[0]); + else if (cop.opcode == OpCompositeInsert) + { + mark_used_as_array_length(cop.arguments[0]); + mark_used_as_array_length(cop.arguments[1]); + } + else + for (uint32_t arg_id : cop.arguments) + mark_used_as_array_length(arg_id); break; } @@ -367,6 +546,17 @@ void ParsedIR::mark_used_as_array_length(uint32_t id) } } +Bitset ParsedIR::get_buffer_block_type_flags(const SPIRType &type) const +{ + if (type.member_types.empty()) + return {}; + + Bitset all_members_flags = get_member_decoration_bitset(type.self, 0); + for (uint32_t i = 1; i < uint32_t(type.member_types.size()); i++) + all_members_flags.merge_and(get_member_decoration_bitset(type.self, i)); + return all_members_flags; +} + Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const { auto &type = get(var.basetype); @@ -383,15 +573,12 @@ Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const if (type.member_types.empty()) return base_flags; - Bitset all_members_flags = get_member_decoration_bitset(type.self, 0); - for (uint32_t i = 1; i < uint32_t(type.member_types.size()); i++) - all_members_flags.merge_and(get_member_decoration_bitset(type.self, i)); - + auto all_members_flags = get_buffer_block_type_flags(type); base_flags.merge_or(all_members_flags); return base_flags; } -const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index) const +const Bitset &ParsedIR::get_member_decoration_bitset(TypeID id, uint32_t index) const { auto *m = find_meta(id); if (m) @@ -404,12 +591,12 @@ const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index return cleared_bitset; } -bool ParsedIR::has_decoration(uint32_t id, Decoration decoration) const +bool ParsedIR::has_decoration(ID id, Decoration decoration) const { return get_decoration_bitset(id).get(decoration); } -uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const +uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const { auto *m = find_meta(id); if (!m) @@ -429,6 +616,12 @@ uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const return dec.component; case DecorationOffset: return dec.offset; + case DecorationXfbBuffer: + return dec.xfb_buffer; + case DecorationXfbStride: + return dec.xfb_stride; + case DecorationStream: + return dec.stream; case DecorationBinding: return dec.binding; case DecorationDescriptorSet: @@ -450,7 +643,7 @@ uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const } } -const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration) const +const string &ParsedIR::get_decoration_string(ID id, Decoration decoration) const { auto *m = find_meta(id); if (!m) @@ -471,7 +664,7 @@ const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration } } -void ParsedIR::unset_decoration(uint32_t id, Decoration decoration) +void ParsedIR::unset_decoration(ID id, Decoration decoration) { auto &dec = meta[id].decoration; dec.decoration_flags.clear(decoration); @@ -493,6 +686,18 @@ void ParsedIR::unset_decoration(uint32_t id, Decoration decoration) dec.offset = 0; break; + case DecorationXfbBuffer: + dec.xfb_buffer = 0; + break; + + case DecorationXfbStride: + dec.xfb_stride = 0; + break; + + case DecorationStream: + dec.stream = 0; + break; + case DecorationBinding: dec.binding = 0; break; @@ -533,12 +738,12 @@ void ParsedIR::unset_decoration(uint32_t id, Decoration decoration) } } -bool ParsedIR::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +bool ParsedIR::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { return get_member_decoration_bitset(id, index).get(decoration); } -uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +uint32_t ParsedIR::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { auto *m = find_meta(id); if (!m) @@ -563,6 +768,12 @@ uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration return dec.binding; case DecorationOffset: return dec.offset; + case DecorationXfbBuffer: + return dec.xfb_buffer; + case DecorationXfbStride: + return dec.xfb_stride; + case DecorationStream: + return dec.stream; case DecorationSpecId: return dec.spec_id; case DecorationIndex: @@ -572,7 +783,7 @@ uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration } } -const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const +const Bitset &ParsedIR::get_decoration_bitset(ID id) const { auto *m = find_meta(id); if (m) @@ -584,9 +795,10 @@ const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const return cleared_bitset; } -void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration, const string &argument) +void ParsedIR::set_member_decoration_string(TypeID id, uint32_t index, Decoration decoration, const string &argument) { - meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); + auto &m = meta[id]; + m.members.resize(max(m.members.size(), size_t(index) + 1)); auto &dec = meta[id].members[index]; dec.decoration_flags.set(decoration); @@ -601,7 +813,7 @@ void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decorat } } -const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const +const string &ParsedIR::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const { auto *m = find_meta(id); if (m) @@ -624,7 +836,7 @@ const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index return empty_string; } -void ParsedIR::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration) +void ParsedIR::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration) { auto &m = meta[id]; if (index >= m.members.size()) @@ -651,6 +863,18 @@ void ParsedIR::unset_member_decoration(uint32_t id, uint32_t index, Decoration d dec.offset = 0; break; + case DecorationXfbBuffer: + dec.xfb_buffer = 0; + break; + + case DecorationXfbStride: + dec.xfb_stride = 0; + break; + + case DecorationStream: + dec.stream = 0; + break; + case DecorationSpecId: dec.spec_id = 0; break; @@ -677,7 +901,7 @@ uint32_t ParsedIR::increase_bound_by(uint32_t incr_amount) return uint32_t(curr_bound); } -void ParsedIR::remove_typed_id(Types type, uint32_t id) +void ParsedIR::remove_typed_id(Types type, ID id) { auto &type_ids = ids_for_type[type]; type_ids.erase(remove(begin(type_ids), end(type_ids), id), end(type_ids)); @@ -692,18 +916,25 @@ void ParsedIR::reset_all_of_type(Types type) ids_for_type[type].clear(); } -void ParsedIR::add_typed_id(Types type, uint32_t id) +void ParsedIR::add_typed_id(Types type, ID id) { - if (loop_iteration_depth) + if (loop_iteration_depth_hard != 0) SPIRV_CROSS_THROW("Cannot add typed ID while looping over it."); + if (loop_iteration_depth_soft != 0) + { + if (!ids[id].empty()) + SPIRV_CROSS_THROW("Cannot override IDs when loop is soft locked."); + return; + } + if (ids[id].empty() || ids[id].get_type() != type) { switch (type) { case TypeConstant: ids_for_constant_or_variable.push_back(id); - ids_for_constant_or_type.push_back(id); + ids_for_constant_undef_or_type.push_back(id); break; case TypeVariable: @@ -712,7 +943,8 @@ void ParsedIR::add_typed_id(Types type, uint32_t id) case TypeType: case TypeConstantOp: - ids_for_constant_or_type.push_back(id); + case TypeUndef: + ids_for_constant_undef_or_type.push_back(id); break; default: @@ -731,7 +963,7 @@ void ParsedIR::add_typed_id(Types type, uint32_t id) } } -const Meta *ParsedIR::find_meta(uint32_t id) const +const Meta *ParsedIR::find_meta(ID id) const { auto itr = meta.find(id); if (itr != end(meta)) @@ -740,7 +972,7 @@ const Meta *ParsedIR::find_meta(uint32_t id) const return nullptr; } -Meta *ParsedIR::find_meta(uint32_t id) +Meta *ParsedIR::find_meta(ID id) { auto itr = meta.find(id); if (itr != end(meta)) @@ -749,4 +981,94 @@ Meta *ParsedIR::find_meta(uint32_t id) return nullptr; } +ParsedIR::LoopLock ParsedIR::create_loop_hard_lock() const +{ + return ParsedIR::LoopLock(&loop_iteration_depth_hard); +} + +ParsedIR::LoopLock ParsedIR::create_loop_soft_lock() const +{ + return ParsedIR::LoopLock(&loop_iteration_depth_soft); +} + +ParsedIR::LoopLock::~LoopLock() +{ + if (lock) + (*lock)--; +} + +ParsedIR::LoopLock::LoopLock(uint32_t *lock_) + : lock(lock_) +{ + if (lock) + (*lock)++; +} + +ParsedIR::LoopLock::LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT +{ + *this = std::move(other); +} + +ParsedIR::LoopLock &ParsedIR::LoopLock::operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT +{ + if (lock) + (*lock)--; + lock = other.lock; + other.lock = nullptr; + return *this; +} + +void ParsedIR::make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_id_set) +{ + auto &constant_type = get(type); + + if (constant_type.pointer) + { + if (add_to_typed_id_set) + add_typed_id(TypeConstant, id); + auto &constant = variant_set(ids[id], type); + constant.self = id; + constant.make_null(constant_type); + } + else if (!constant_type.array.empty()) + { + assert(constant_type.parent_type); + uint32_t parent_id = increase_bound_by(1); + make_constant_null(parent_id, constant_type.parent_type, add_to_typed_id_set); + + if (!constant_type.array_size_literal.back()) + SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal."); + + SmallVector elements(constant_type.array.back()); + for (uint32_t i = 0; i < constant_type.array.back(); i++) + elements[i] = parent_id; + + if (add_to_typed_id_set) + add_typed_id(TypeConstant, id); + variant_set(ids[id], type, elements.data(), uint32_t(elements.size()), false).self = id; + } + else if (!constant_type.member_types.empty()) + { + uint32_t member_ids = increase_bound_by(uint32_t(constant_type.member_types.size())); + SmallVector elements(constant_type.member_types.size()); + for (uint32_t i = 0; i < constant_type.member_types.size(); i++) + { + make_constant_null(member_ids + i, constant_type.member_types[i], add_to_typed_id_set); + elements[i] = member_ids + i; + } + + if (add_to_typed_id_set) + add_typed_id(TypeConstant, id); + variant_set(ids[id], type, elements.data(), uint32_t(elements.size()), false).self = id; + } + else + { + if (add_to_typed_id_set) + add_typed_id(TypeConstant, id); + auto &constant = variant_set(ids[id], type); + constant.self = id; + constant.make_null(constant_type); + } +} + } // namespace SPIRV_CROSS_NAMESPACE diff --git a/spirv_cross_parsed_ir.hpp b/spirv_cross_parsed_ir.hpp index 79e9e15bb05..7f35c3815cd 100644 --- a/spirv_cross_parsed_ir.hpp +++ b/spirv_cross_parsed_ir.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2018-2019 Arm Limited + * Copyright 2018-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_PARSED_IR_HPP #define SPIRV_CROSS_PARSED_IR_HPP @@ -57,19 +64,26 @@ class ParsedIR SmallVector ids; // Various meta data for IDs, decorations, names, etc. - std::unordered_map meta; + std::unordered_map meta; // Holds all IDs which have a certain type. // This is needed so we can iterate through a specific kind of resource quickly, // and in-order of module declaration. - SmallVector ids_for_type[TypeCount]; + SmallVector ids_for_type[TypeCount]; // Special purpose lists which contain a union of types. // This is needed so we can declare specialization constants and structs in an interleaved fashion, // among other things. - // Constants can be of struct type, and struct array sizes can use specialization constants. - SmallVector ids_for_constant_or_type; - SmallVector ids_for_constant_or_variable; + // Constants can be undef or of struct type, and struct array sizes can use specialization constants. + SmallVector ids_for_constant_undef_or_type; + SmallVector ids_for_constant_or_variable; + + // We need to keep track of the width the Ops that contains a type for the + // OpSwitch instruction, since this one doesn't contains the type in the + // instruction itself. And in some case we need to cast the condition to + // wider types. We only need the width to do the branch fixup since the + // type check itself can be done at runtime + std::unordered_map load_type_width; // Declared capabilities and extensions in the SPIR-V module. // Not really used except for reflection at the moment. @@ -88,12 +102,12 @@ class ParsedIR }; using BlockMetaFlags = uint8_t; SmallVector block_meta; - std::unordered_map continue_block_to_loop_header; + std::unordered_map continue_block_to_loop_header; // Normally, we'd stick SPIREntryPoint in ids array, but it conflicts with SPIRFunction. // Entry points can therefore be seen as some sort of meta structure. - std::unordered_map entry_points; - uint32_t default_entry_point = 0; + std::unordered_map entry_points; + FunctionID default_entry_point = 0; struct Source { @@ -114,50 +128,76 @@ class ParsedIR // Can be useful for simple "raw" reflection. // However, most members are here because the Parser needs most of these, // and might as well just have the whole suite of decoration/name handling in one place. - void set_name(uint32_t id, const std::string &name); - const std::string &get_name(uint32_t id) const; - void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0); - void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument); - bool has_decoration(uint32_t id, spv::Decoration decoration) const; - uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const; - const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const; - const Bitset &get_decoration_bitset(uint32_t id) const; - void unset_decoration(uint32_t id, spv::Decoration decoration); + void set_name(ID id, const std::string &name); + const std::string &get_name(ID id) const; + void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0); + void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument); + bool has_decoration(ID id, spv::Decoration decoration) const; + uint32_t get_decoration(ID id, spv::Decoration decoration) const; + const std::string &get_decoration_string(ID id, spv::Decoration decoration) const; + const Bitset &get_decoration_bitset(ID id) const; + void unset_decoration(ID id, spv::Decoration decoration); // Decoration handling methods (for members of a struct). - void set_member_name(uint32_t id, uint32_t index, const std::string &name); - const std::string &get_member_name(uint32_t id, uint32_t index) const; - void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); - void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, + void set_member_name(TypeID id, uint32_t index, const std::string &name); + const std::string &get_member_name(TypeID id, uint32_t index) const; + void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); + void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, const std::string &argument); - uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; - const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const; - bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; - const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const; - void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration); + uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const; + bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const; + void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration); - void mark_used_as_array_length(uint32_t id); + void mark_used_as_array_length(ID id); uint32_t increase_bound_by(uint32_t count); Bitset get_buffer_block_flags(const SPIRVariable &var) const; + Bitset get_buffer_block_type_flags(const SPIRType &type) const; + + void add_typed_id(Types type, ID id); + void remove_typed_id(Types type, ID id); + + class LoopLock + { + public: + explicit LoopLock(uint32_t *counter); + LoopLock(const LoopLock &) = delete; + void operator=(const LoopLock &) = delete; + LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT; + LoopLock &operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT; + ~LoopLock(); + + private: + uint32_t *lock; + }; + + // This must be held while iterating over a type ID array. + // It is undefined if someone calls set<>() while we're iterating over a data structure, so we must + // make sure that this case is avoided. - void add_typed_id(Types type, uint32_t id); - void remove_typed_id(Types type, uint32_t id); + // If we have a hard lock, it is an error to call set<>(), and an exception is thrown. + // If we have a soft lock, we silently ignore any additions to the typed arrays. + // This should only be used for physical ID remapping where we need to create an ID, but we will never + // care about iterating over them. + LoopLock create_loop_hard_lock() const; + LoopLock create_loop_soft_lock() const; template void for_each_typed_id(const Op &op) { - loop_iteration_depth++; + auto loop_lock = create_loop_hard_lock(); for (auto &id : ids_for_type[T::type]) { if (ids[id].get_type() == static_cast(T::type)) op(id, get(id)); } - loop_iteration_depth--; } template void for_each_typed_id(const Op &op) const { + auto loop_lock = create_loop_hard_lock(); for (auto &id : ids_for_type[T::type]) { if (ids[id].get_type() == static_cast(T::type)) @@ -173,14 +213,24 @@ class ParsedIR void reset_all_of_type(Types type); - Meta *find_meta(uint32_t id); - const Meta *find_meta(uint32_t id) const; + Meta *find_meta(ID id); + const Meta *find_meta(ID id) const; const std::string &get_empty_string() const { return empty_string; } + void make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_id_set); + + void fixup_reserved_names(); + + static void sanitize_underscores(std::string &str); + static void sanitize_identifier(std::string &str, bool member, bool allow_reserved_prefixes); + static bool is_globally_reserved_identifier(std::string &str, bool allow_reserved_prefixes); + + uint32_t get_spirv_version() const; + private: template T &get(uint32_t id) @@ -194,9 +244,12 @@ class ParsedIR return variant_get(ids[id]); } - uint32_t loop_iteration_depth = 0; + mutable uint32_t loop_iteration_depth_hard = 0; + mutable uint32_t loop_iteration_depth_soft = 0; std::string empty_string; Bitset cleared_bitset; + + std::unordered_set meta_needing_name_fixup; }; } // namespace SPIRV_CROSS_NAMESPACE diff --git a/spirv_cross_util.cpp b/spirv_cross_util.cpp index 6ab5d264568..7cff010d1c1 100644 --- a/spirv_cross_util.cpp +++ b/spirv_cross_util.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2015-2019 Arm Limited + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #include "spirv_cross_util.hpp" #include "spirv_common.hpp" diff --git a/spirv_cross_util.hpp b/spirv_cross_util.hpp index 7c4030b0b29..e6e3fcdb634 100644 --- a/spirv_cross_util.hpp +++ b/spirv_cross_util.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2015-2019 Arm Limited + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_UTIL_HPP #define SPIRV_CROSS_UTIL_HPP diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 837135cb87e..4b22d47eaeb 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2015-2019 Arm Limited + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #include "spirv_glsl.hpp" #include "GLSL.std.450.h" #include "spirv_common.hpp" @@ -33,6 +40,13 @@ using namespace spv; using namespace SPIRV_CROSS_NAMESPACE; using namespace std; +enum ExtraSubExpressionType +{ + // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map. + EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000, + EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000 +}; + static bool is_unsigned_opcode(Op op) { // Don't have to be exhaustive, only relevant for legacy target checking ... @@ -145,32 +159,6 @@ static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard } } -// Sanitizes underscores for GLSL where multiple underscores in a row are not allowed. -string CompilerGLSL::sanitize_underscores(const string &str) -{ - string res; - res.reserve(str.size()); - - bool last_underscore = false; - for (auto c : str) - { - if (c == '_') - { - if (last_underscore) - continue; - - res += c; - last_underscore = true; - } - else - { - res += c; - last_underscore = false; - } - } - return res; -} - void CompilerGLSL::init() { if (ir.source.known) @@ -286,7 +274,7 @@ static uint32_t pls_format_to_components(PlsFormat format) } } -static const char *vector_swizzle(int vecsize, int index) +const char *CompilerGLSL::vector_swizzle(int vecsize, int index) { static const char *const swizzle[4][4] = { { ".x", ".y", ".z", ".w" }, @@ -308,8 +296,19 @@ static const char *vector_swizzle(int vecsize, int index) return swizzle[vecsize - 1][index]; } -void CompilerGLSL::reset() +void CompilerGLSL::reset(uint32_t iteration_count) { + // Sanity check the iteration count to be robust against a certain class of bugs where + // we keep forcing recompilations without making clear forward progress. + // In buggy situations we will loop forever, or loop for an unbounded number of iterations. + // Certain types of recompilations are considered to make forward progress, + // but in almost all situations, we'll never see more than 3 iterations. + // It is highly context-sensitive when we need to force recompilation, + // and it is not practical with the current architecture + // to resolve everything up front. + if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress) + SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!"); + // We do some speculative optimizations which should pretty much always work out, // but just in case the SPIR-V is rather weird, recompile until it's happy. // This typically only means one extra pass. @@ -317,11 +316,18 @@ void CompilerGLSL::reset() // Clear invalid expression tracking. invalid_expressions.clear(); + composite_insert_overwritten.clear(); current_function = nullptr; // Clear temporary usage tracking. expression_usage_counts.clear(); forwarded_temporaries.clear(); + suppressed_usage_tracking.clear(); + + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + flushed_phi_variables.clear(); + + current_emitting_switch_stack.clear(); reset_name_caches(); @@ -337,6 +343,7 @@ void CompilerGLSL::reset() statement_count = 0; indent = 0; + current_loop_level = 0; } void CompilerGLSL::remap_pls_variables() @@ -366,6 +373,28 @@ void CompilerGLSL::remap_pls_variables() } } +void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent) +{ + subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location }); + inout_color_attachments.push_back({ color_location, coherent }); +} + +bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const +{ + return std::find_if(begin(inout_color_attachments), end(inout_color_attachments), + [&](const std::pair &elem) { + return elem.first == location; + }) != end(inout_color_attachments); +} + +bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const +{ + return std::find_if(begin(inout_color_attachments), end(inout_color_attachments), + [&](const std::pair &elem) { + return elem.first == location && !elem.second; + }) != end(inout_color_attachments); +} + void CompilerGLSL::find_static_extensions() { ir.for_each_typed_id([&](uint32_t, const SPIRType &type) { @@ -378,10 +407,9 @@ void CompilerGLSL::find_static_extensions() } else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) { - if (options.es) - SPIRV_CROSS_THROW("64-bit integers not supported in ES profile."); - if (!options.es) - require_extension_internal("GL_ARB_gpu_shader_int64"); + if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. + SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); + require_extension_internal("GL_ARB_gpu_shader_int64"); } else if (type.basetype == SPIRType::Half) { @@ -435,15 +463,47 @@ void CompilerGLSL::find_static_extensions() require_extension_internal("GL_ARB_tessellation_shader"); break; - case ExecutionModelRayGenerationNV: - case ExecutionModelIntersectionNV: - case ExecutionModelAnyHitNV: - case ExecutionModelClosestHitNV: - case ExecutionModelMissNV: - case ExecutionModelCallableNV: + case ExecutionModelRayGenerationKHR: + case ExecutionModelIntersectionKHR: + case ExecutionModelAnyHitKHR: + case ExecutionModelClosestHitKHR: + case ExecutionModelMissKHR: + case ExecutionModelCallableKHR: + // NV enums are aliases. if (options.es || options.version < 460) SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above."); - require_extension_internal("GL_NV_ray_tracing"); + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics."); + + // Need to figure out if we should target KHR or NV extension based on capabilities. + for (auto &cap : ir.declared_capabilities) + { + if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR || + cap == CapabilityRayTraversalPrimitiveCullingKHR) + { + ray_tracing_is_khr = true; + break; + } + } + + if (ray_tracing_is_khr) + { + // In KHR ray tracing we pass payloads by pointer instead of location, + // so make sure we assign locations properly. + ray_tracing_khr_fixup_locations(); + require_extension_internal("GL_EXT_ray_tracing"); + } + else + require_extension_internal("GL_NV_ray_tracing"); + break; + + case ExecutionModelMeshEXT: + case ExecutionModelTaskEXT: + if (options.es || options.version < 450) + SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above."); + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics."); + require_extension_internal("GL_EXT_mesh_shader"); break; default: @@ -451,7 +511,35 @@ void CompilerGLSL::find_static_extensions() } if (!pls_inputs.empty() || !pls_outputs.empty()) + { + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders."); require_extension_internal("GL_EXT_shader_pixel_local_storage"); + } + + if (!inout_color_attachments.empty()) + { + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders."); + if (options.vulkan_semantics) + SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL."); + + bool has_coherent = false; + bool has_incoherent = false; + + for (auto &att : inout_color_attachments) + { + if (att.second) + has_coherent = true; + else + has_incoherent = true; + } + + if (has_coherent) + require_extension_internal("GL_EXT_shader_framebuffer_fetch"); + if (has_incoherent) + require_extension_internal("GL_EXT_shader_framebuffer_fetch_non_coherent"); + } if (options.separate_shader_objects && !options.es && options.version < 410) require_extension_internal("GL_ARB_separate_shader_objects"); @@ -471,38 +559,121 @@ void CompilerGLSL::find_static_extensions() SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported."); } - // Check for nonuniform qualifier. + // Check for nonuniform qualifier and passthrough. // Instead of looping over all decorations to find this, just look at capabilities. for (auto &cap : ir.declared_capabilities) { - bool nonuniform_indexing = false; switch (cap) { case CapabilityShaderNonUniformEXT: + if (!options.vulkan_semantics) + require_extension_internal("GL_NV_gpu_shader5"); + else + require_extension_internal("GL_EXT_nonuniform_qualifier"); + break; case CapabilityRuntimeDescriptorArrayEXT: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL."); require_extension_internal("GL_EXT_nonuniform_qualifier"); - nonuniform_indexing = true; + break; + + case CapabilityGeometryShaderPassthroughNV: + if (execution.model == ExecutionModelGeometry) + { + require_extension_internal("GL_NV_geometry_shader_passthrough"); + execution.geometry_passthrough = true; + } + break; + + case CapabilityVariablePointers: + case CapabilityVariablePointersStorageBuffer: + SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL."); + + case CapabilityMultiView: + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_multiview"); + else + { + require_extension_internal("GL_OVR_multiview2"); + if (options.ovr_multiview_view_count == 0) + SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2."); + if (get_execution_model() != ExecutionModelVertex) + SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders."); + } + break; + + case CapabilityRayQueryKHR: + if (options.es || options.version < 460 || !options.vulkan_semantics) + SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460."); + require_extension_internal("GL_EXT_ray_query"); + ray_tracing_is_khr = true; + break; + + case CapabilityRayTraversalPrimitiveCullingKHR: + if (options.es || options.version < 460 || !options.vulkan_semantics) + SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460."); + require_extension_internal("GL_EXT_ray_flags_primitive_culling"); + ray_tracing_is_khr = true; break; default: break; } + } - if (nonuniform_indexing) - break; + if (options.ovr_multiview_view_count) + { + if (options.vulkan_semantics) + SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics."); + if (get_execution_model() != ExecutionModelVertex) + SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders."); + require_extension_internal("GL_OVR_multiview2"); } + + // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR. + for (auto &ext : ir.declared_extensions) + if (ext == "SPV_NV_fragment_shader_barycentric") + barycentric_is_nv = true; +} + +void CompilerGLSL::ray_tracing_khr_fixup_locations() +{ + uint32_t location = 0; + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + // Incoming payload storage can also be used for tracing. + if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR && + var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR) + return; + if (is_hidden_variable(var)) + return; + set_decoration(var.self, DecorationLocation, location++); + }); } string CompilerGLSL::compile() { - if (options.vulkan_semantics) - backend.allow_precision_qualifiers = true; + ir.fixup_reserved_names(); + + if (!options.vulkan_semantics) + { + // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers + backend.nonuniform_qualifier = ""; + backend.needs_row_major_load_workaround = options.enable_row_major_load_workaround; + } + backend.allow_precision_qualifiers = options.vulkan_semantics || options.es; backend.force_gl_in_out_block = true; backend.supports_extensions = true; + backend.use_array_constructor = true; + backend.workgroup_size_is_hidden = true; + backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics; + backend.support_precise_qualifier = + (!options.es && options.version >= 400) || (options.es && options.version >= 320); + + if (is_legacy_es()) + backend.support_case_fallthrough = false; // Scan the SPIR-V to find trivial uses of extensions. + fixup_anonymous_struct_names(); fixup_type_alias(); reorder_type_alias(); build_function_control_flow_graphs_and_analyze(); @@ -510,6 +681,9 @@ string CompilerGLSL::compile() fixup_image_load_store_access(); update_active_builtins(); analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); + if (!inout_color_attachments.empty()) + emit_inout_fragment_outputs_copy_to_subpass_inputs(); // Shaders might cast unrelated data to pointers of non-block types. // Find all such instances and make sure we can cast the pointers to a synthesized block type. @@ -519,21 +693,32 @@ string CompilerGLSL::compile() uint32_t pass_count = 0; do { - if (pass_count >= 3) - SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); - - reset(); + reset(pass_count); buffer.reset(); emit_header(); emit_resources(); + emit_extension_workarounds(get_execution_model()); emit_function(get(ir.default_entry_point), Bitset()); pass_count++; } while (is_forcing_recompilation()); + // Implement the interlocked wrapper function at the end. + // The body was implemented in lieu of main(). + if (interlocked_is_complex) + { + statement("void main()"); + begin_scope(); + statement("// Interlocks were used in a way not compatible with GLSL, this is very slow."); + statement("SPIRV_Cross_beginInvocationInterlock();"); + statement("spvMainInterlockedBody();"); + statement("SPIRV_Cross_endInvocationInterlock();"); + end_scope(); + } + // Entry point in GLSL is always main(). get_entry_point().name = "main"; @@ -549,6 +734,8 @@ void CompilerGLSL::build_workgroup_size(SmallVector &arguments, const Sp const SpecializationConstant &wg_y, const SpecializationConstant &wg_z) { auto &execution = get_entry_point(); + bool builtin_workgroup = execution.workgroup_size.constant != 0; + bool use_local_size_id = !builtin_workgroup && execution.flags.get(ExecutionModeLocalSizeId); if (wg_x.id) { @@ -557,6 +744,8 @@ void CompilerGLSL::build_workgroup_size(SmallVector &arguments, const Sp else arguments.push_back(join("local_size_x = ", get(wg_x.id).specialization_constant_macro_name)); } + else if (use_local_size_id && execution.workgroup_size.id_x) + arguments.push_back(join("local_size_x = ", get(execution.workgroup_size.id_x).scalar())); else arguments.push_back(join("local_size_x = ", execution.workgroup_size.x)); @@ -567,6 +756,8 @@ void CompilerGLSL::build_workgroup_size(SmallVector &arguments, const Sp else arguments.push_back(join("local_size_y = ", get(wg_y.id).specialization_constant_macro_name)); } + else if (use_local_size_id && execution.workgroup_size.id_y) + arguments.push_back(join("local_size_y = ", get(execution.workgroup_size.id_y).scalar())); else arguments.push_back(join("local_size_y = ", execution.workgroup_size.y)); @@ -577,10 +768,27 @@ void CompilerGLSL::build_workgroup_size(SmallVector &arguments, const Sp else arguments.push_back(join("local_size_z = ", get(wg_z.id).specialization_constant_macro_name)); } + else if (use_local_size_id && execution.workgroup_size.id_z) + arguments.push_back(join("local_size_z = ", get(execution.workgroup_size.id_z).scalar())); else arguments.push_back(join("local_size_z = ", execution.workgroup_size.z)); } +void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature) +{ + if (options.vulkan_semantics) + { + auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature); + require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension)); + } + else + { + if (!shader_subgroup_supporter.is_feature_requested(feature)) + force_recompile(); + shader_subgroup_supporter.request_feature(feature); + } +} + void CompilerGLSL::emit_header() { auto &execution = get_entry_point(); @@ -600,9 +808,48 @@ void CompilerGLSL::emit_header() require_extension_internal("GL_ARB_shader_image_load_store"); } + // Needed for: layout(post_depth_coverage) in; + if (execution.flags.get(ExecutionModePostDepthCoverage)) + require_extension_internal("GL_ARB_post_depth_coverage"); + + // Needed for: layout({pixel,sample}_interlock_[un]ordered) in; + bool interlock_used = execution.flags.get(ExecutionModePixelInterlockOrderedEXT) || + execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT); + + if (interlock_used) + { + if (options.es) + { + if (options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock."); + require_extension_internal("GL_NV_fragment_shader_interlock"); + } + else + { + if (options.version < 420) + require_extension_internal("GL_ARB_shader_image_load_store"); + require_extension_internal("GL_ARB_fragment_shader_interlock"); + } + } + for (auto &ext : forced_extensions) { - if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") + if (ext == "GL_ARB_gpu_shader_int64") + { + statement("#if defined(GL_ARB_gpu_shader_int64)"); + statement("#extension GL_ARB_gpu_shader_int64 : require"); + if (!options.vulkan_semantics || options.es) + { + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); + } + statement("#else"); + statement("#error No extension available for 64-bit integers."); + statement("#endif"); + } + else if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") { // Special case, this extension has a potential fallback to another vendor extension in normal GLSL. // GL_AMD_gpu_shader_half_float is a superset, so try that first. @@ -622,23 +869,135 @@ void CompilerGLSL::emit_header() statement("#error No extension available for FP16."); statement("#endif"); } + else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int8") + { + if (options.vulkan_semantics) + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require"); + else + { + statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)"); + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require"); + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); + statement("#else"); + statement("#error No extension available for Int8."); + statement("#endif"); + } + } else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16") { if (options.vulkan_semantics) statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); else { - statement("#if defined(GL_AMD_gpu_shader_int16)"); + statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)"); + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); + statement("#elif defined(GL_AMD_gpu_shader_int16)"); statement("#extension GL_AMD_gpu_shader_int16 : require"); + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); statement("#else"); statement("#error No extension available for Int16."); statement("#endif"); } } + else if (ext == "GL_ARB_post_depth_coverage") + { + if (options.es) + statement("#extension GL_EXT_post_depth_coverage : require"); + else + { + statement("#if defined(GL_ARB_post_depth_coverge)"); + statement("#extension GL_ARB_post_depth_coverage : require"); + statement("#else"); + statement("#extension GL_EXT_post_depth_coverage : require"); + statement("#endif"); + } + } + else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters") + { + // Soft-enable this extension on plain GLSL. + statement("#ifdef ", ext); + statement("#extension ", ext, " : enable"); + statement("#endif"); + } + else if (ext == "GL_EXT_control_flow_attributes") + { + // These are just hints so we can conditionally enable and fallback in the shader. + statement("#if defined(GL_EXT_control_flow_attributes)"); + statement("#extension GL_EXT_control_flow_attributes : require"); + statement("#define SPIRV_CROSS_FLATTEN [[flatten]]"); + statement("#define SPIRV_CROSS_BRANCH [[dont_flatten]]"); + statement("#define SPIRV_CROSS_UNROLL [[unroll]]"); + statement("#define SPIRV_CROSS_LOOP [[dont_unroll]]"); + statement("#else"); + statement("#define SPIRV_CROSS_FLATTEN"); + statement("#define SPIRV_CROSS_BRANCH"); + statement("#define SPIRV_CROSS_UNROLL"); + statement("#define SPIRV_CROSS_LOOP"); + statement("#endif"); + } + else if (ext == "GL_NV_fragment_shader_interlock") + { + statement("#extension GL_NV_fragment_shader_interlock : require"); + statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()"); + statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()"); + } + else if (ext == "GL_ARB_fragment_shader_interlock") + { + statement("#ifdef GL_ARB_fragment_shader_interlock"); + statement("#extension GL_ARB_fragment_shader_interlock : enable"); + statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()"); + statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()"); + statement("#elif defined(GL_INTEL_fragment_shader_ordering)"); + statement("#extension GL_INTEL_fragment_shader_ordering : enable"); + statement("#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()"); + statement("#define SPIRV_Cross_endInvocationInterlock()"); + statement("#endif"); + } else statement("#extension ", ext, " : require"); } + if (!options.vulkan_semantics) + { + using Supp = ShaderSubgroupSupportHelper; + auto result = shader_subgroup_supporter.resolve(); + + for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++) + { + auto feature = static_cast(feature_index); + if (!shader_subgroup_supporter.is_feature_requested(feature)) + continue; + + auto exts = Supp::get_candidates_for_feature(feature, result); + if (exts.empty()) + continue; + + statement(""); + + for (auto &ext : exts) + { + const char *name = Supp::get_extension_name(ext); + const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext); + auto extra_names = Supp::get_extra_required_extension_names(ext); + statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")", + (*extra_predicate != '\0' ? " && " : ""), extra_predicate); + for (const auto &e : extra_names) + statement("#extension ", e, " : enable"); + statement("#extension ", name, " : require"); + } + + if (!Supp::can_feature_be_implemented_without_extensions(feature)) + { + statement("#else"); + statement("#error No extensions available to emulate requested subgroup feature."); + } + + statement("#endif"); + } + } + for (auto &header : header_lines) statement(header); @@ -647,8 +1006,11 @@ void CompilerGLSL::emit_header() switch (execution.model) { + case ExecutionModelVertex: + if (options.ovr_multiview_view_count) + inputs.push_back(join("num_views = ", options.ovr_multiview_view_count)); + break; case ExecutionModelGeometry: - outputs.push_back(join("max_vertices = ", execution.output_vertices)); if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1) inputs.push_back(join("invocations = ", execution.invocations)); if (execution.flags.get(ExecutionModeInputPoints)) @@ -661,12 +1023,18 @@ void CompilerGLSL::emit_header() inputs.push_back("triangles"); if (execution.flags.get(ExecutionModeInputTrianglesAdjacency)) inputs.push_back("triangles_adjacency"); - if (execution.flags.get(ExecutionModeOutputTriangleStrip)) - outputs.push_back("triangle_strip"); - if (execution.flags.get(ExecutionModeOutputPoints)) - outputs.push_back("points"); - if (execution.flags.get(ExecutionModeOutputLineStrip)) - outputs.push_back("line_strip"); + + if (!execution.geometry_passthrough) + { + // For passthrough, these are implies and cannot be declared in shader. + outputs.push_back(join("max_vertices = ", execution.output_vertices)); + if (execution.flags.get(ExecutionModeOutputTriangleStrip)) + outputs.push_back("triangle_strip"); + if (execution.flags.get(ExecutionModeOutputPoints)) + outputs.push_back("points"); + if (execution.flags.get(ExecutionModeOutputLineStrip)) + outputs.push_back("line_strip"); + } break; case ExecutionModelTessellationControl: @@ -701,15 +1069,18 @@ void CompilerGLSL::emit_header() break; case ExecutionModelGLCompute: + case ExecutionModelTaskEXT: + case ExecutionModelMeshEXT: { - if (execution.workgroup_size.constant != 0) + if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId)) { SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro // declarations before we can emit the work group size. - if (options.vulkan_semantics || ((wg_x.id == 0) && (wg_y.id == 0) && (wg_z.id == 0))) + if (options.vulkan_semantics || + ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0)))) build_workgroup_size(inputs, wg_x, wg_y, wg_z); } else @@ -718,6 +1089,18 @@ void CompilerGLSL::emit_header() inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); } + + if (execution.model == ExecutionModelMeshEXT) + { + outputs.push_back(join("max_vertices = ", execution.output_vertices)); + outputs.push_back(join("max_primitives = ", execution.output_primitives)); + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + outputs.push_back("triangles"); + else if (execution.flags.get(ExecutionModeOutputLinesEXT)) + outputs.push_back("lines"); + else if (execution.flags.get(ExecutionModeOutputPoints)) + outputs.push_back("points"); + } break; } @@ -763,6 +1146,27 @@ void CompilerGLSL::emit_header() if (execution.flags.get(ExecutionModeEarlyFragmentTests)) inputs.push_back("early_fragment_tests"); + if (execution.flags.get(ExecutionModePostDepthCoverage)) + inputs.push_back("post_depth_coverage"); + + if (interlock_used) + statement("#if defined(GL_ARB_fragment_shader_interlock)"); + + if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT)) + statement("layout(pixel_interlock_ordered) in;"); + else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT)) + statement("layout(pixel_interlock_unordered) in;"); + else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT)) + statement("layout(sample_interlock_ordered) in;"); + else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT)) + statement("layout(sample_interlock_unordered) in;"); + + if (interlock_used) + { + statement("#elif !defined(GL_INTEL_fragment_shader_ordering)"); + statement("#error Fragment Shader Interlock/Ordering extension missing!"); + statement("#endif"); + } if (!options.es && execution.flags.get(ExecutionModeDepthGreater)) statement("layout(depth_greater) out float gl_FragDepth;"); @@ -775,6 +1179,10 @@ void CompilerGLSL::emit_header() break; } + for (auto &cap : ir.declared_capabilities) + if (cap == CapabilityRayTraversalPrimitiveCullingKHR) + statement("layout(primitive_culling);"); + if (!inputs.empty()) statement("layout(", merge(inputs), ") in;"); if (!outputs.empty()) @@ -795,7 +1203,8 @@ void CompilerGLSL::emit_struct(SPIRType &type) // Type-punning with these types is legal, which complicates things // when we are storing struct and array types in an SSBO for example. // If the type master is packed however, we can no longer assume that the struct declaration will be redundant. - if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked)) + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) return; add_resource_name(type.self); @@ -823,6 +1232,9 @@ void CompilerGLSL::emit_struct(SPIRType &type) emitted = true; } + if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget)) + emit_struct_padding_target(type); + end_scope_decl(); if (emitted) @@ -846,8 +1258,33 @@ string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) res += "sample "; if (flags.get(DecorationInvariant)) res += "invariant "; + if (flags.get(DecorationPerPrimitiveEXT)) + res += "perprimitiveEXT "; + if (flags.get(DecorationExplicitInterpAMD)) + { + require_extension_internal("GL_AMD_shader_explicit_vertex_parameter"); res += "__explicitInterpAMD "; + } + + if (flags.get(DecorationPerVertexKHR)) + { + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + res += "pervertexNV "; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + res += "pervertexEXT "; + } + } return res; } @@ -857,8 +1294,7 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) if (is_legacy()) return ""; - bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || - ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); if (!is_block) return ""; @@ -869,6 +1305,9 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) SmallVector attr; + if (has_member_decoration(type.self, index, DecorationPassthroughNV)) + attr.push_back("passthrough"); + // We can only apply layouts on members in block interfaces. // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly. // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct @@ -911,8 +1350,11 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers. // This is only done selectively in GLSL as needed. - if (has_extended_decoration(type.self, SPIRVCrossDecorationPacked) && dec.decoration_flags.get(DecorationOffset)) + if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) && + dec.decoration_flags.get(DecorationOffset)) attr.push_back(join("offset = ", dec.offset)); + else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset)) + attr.push_back(join("xfb_offset = ", dec.offset)); if (attr.empty()) return ""; @@ -1160,24 +1602,22 @@ uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const B auto &tmp = get(parent); uint32_t size = type_to_packed_size(tmp, flags, packing); - if (tmp.array.empty()) - { - uint32_t alignment = type_to_packed_alignment(type, flags, packing); - return (size + alignment - 1) & ~(alignment - 1); - } - else - { - // For multidimensional arrays, array stride always matches size of subtype. - // The alignment cannot change because multidimensional arrays are basically N * M array elements. - return size; - } + uint32_t alignment = type_to_packed_alignment(type, flags, packing); + return (size + alignment - 1) & ~(alignment - 1); } uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) { if (!type.array.empty()) { - return to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing); + uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing); + + // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size, + // so that it is possible to pack other vectors into the last element. + if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct) + packed_size -= (4 - type.vecsize) * (type.width / 8); + + return packed_size; } // If using PhysicalStorageBufferEXT storage class, this is a pointer, @@ -1250,6 +1690,11 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &f else size = type.vecsize * type.columns * base_alignment; } + + // For matrices in HLSL, the last element has a size which depends on its vector size, + // so that it is possible to pack other vectors into the last element. + if (packing_is_hlsl(packing) && type.columns > 1) + size -= (4 - type.vecsize) * (type.width / 8); } } @@ -1257,7 +1702,8 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &f } bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, - uint32_t start_offset, uint32_t end_offset) + uint32_t *failed_validation_index, uint32_t start_offset, + uint32_t end_offset) { // This is very tricky and error prone, but try to be exhaustive and correct here. // SPIR-V doesn't directly say if we're using std430 or std140. @@ -1301,7 +1747,7 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty(); uint32_t packed_size = 0; - if (!member_can_be_unsized) + if (!member_can_be_unsized || packing_is_hlsl(packing)) packed_size = type_to_packed_size(memb_type, member_flags, packing); // We only need to care about this if we have non-array types which can straddle the vec4 boundary. @@ -1314,13 +1760,14 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin packed_alignment = max(packed_alignment, 16u); } - uint32_t alignment = max(packed_alignment, pad_alignment); - offset = (offset + alignment - 1) & ~(alignment - 1); - + uint32_t actual_offset = type_struct_member_offset(type, i); // Field is not in the specified range anymore and we can ignore any further fields. - if (offset >= end_offset) + if (actual_offset >= end_offset) break; + uint32_t alignment = max(packed_alignment, pad_alignment); + offset = (offset + alignment - 1) & ~(alignment - 1); + // The next member following a struct member is aligned to the base alignment of the struct that came before. // GL 4.5 spec, 7.6.2.2. if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer) @@ -1329,21 +1776,35 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin pad_alignment = 1; // Only care about packing if we are in the given range - if (offset >= start_offset) + if (actual_offset >= start_offset) { // We only care about offsets in std140, std430, etc ... // For EnhancedLayout variants, we have the flexibility to choose our own offsets. if (!packing_has_flexible_offset(packing)) { - uint32_t actual_offset = type_struct_member_offset(type, i); if (actual_offset != offset) // This cannot be the packing we're looking for. + { + if (failed_validation_index) + *failed_validation_index = i; return false; + } + } + else if ((actual_offset & (alignment - 1)) != 0) + { + // We still need to verify that alignment rules are observed, even if we have explicit offset. + if (failed_validation_index) + *failed_validation_index = i; + return false; } // Verify array stride rules. if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) != type_struct_member_array_stride(type, i)) + { + if (failed_validation_index) + *failed_validation_index = i; return false; + } // Verify that sub-structs also follow packing rules. // We cannot use enhanced layouts on substructs, so they better be up to spec. @@ -1352,12 +1813,14 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin if (!memb_type.pointer && !memb_type.member_types.empty() && !buffer_is_packing_standard(memb_type, substruct_packing)) { + if (failed_validation_index) + *failed_validation_index = i; return false; } } // Bump size. - offset += packed_size; + offset = actual_offset + packed_size; } return true; @@ -1408,17 +1871,22 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) if (is_legacy()) return ""; + if (subpass_input_is_framebuffer_fetch(var.self)) + return ""; + SmallVector attr; - auto &dec = ir.meta[var.self].decoration; auto &type = get(var.basetype); - auto &flags = dec.decoration_flags; - auto typeflags = ir.meta[type.self].decoration.decoration_flags; + auto &flags = get_decoration_bitset(var.self); + auto &typeflags = get_decoration_bitset(type.self); + + if (flags.get(DecorationPassthroughNV)) + attr.push_back("passthrough"); if (options.vulkan_semantics && var.storage == StorageClassPushConstant) attr.push_back("push_constant"); - else if (var.storage == StorageClassShaderRecordBufferNV) - attr.push_back("shaderRecordNV"); + else if (var.storage == StorageClassShaderRecordBufferKHR) + attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV"); if (flags.get(DecorationRowMajor)) attr.push_back("row_major"); @@ -1428,7 +1896,7 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) if (options.vulkan_semantics) { if (flags.get(DecorationInputAttachmentIndex)) - attr.push_back(join("input_attachment_index = ", dec.input_attachment)); + attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex))); } bool is_block = has_decoration(type.self, DecorationBlock); @@ -1441,37 +1909,153 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) // If our members have location decorations, we don't need to // emit location decorations at the top as well (looks weird). if (!combined_decoration.get(DecorationLocation)) - attr.push_back(join("location = ", dec.location)); + attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation))); } - // Can only declare Component if we can declare location. - if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block)) + if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput && + location_is_non_coherent_framebuffer_fetch(get_decoration(var.self, DecorationLocation))) { - if (!options.es) - { - if (options.version < 440 && options.version >= 140) - require_extension_internal("GL_ARB_enhanced_layouts"); - else if (options.version < 140) - SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); - attr.push_back(join("component = ", dec.component)); - } - else - SPIRV_CROSS_THROW("Component decoration is not supported in ES targets."); + attr.push_back("noncoherent"); + } + + // Transform feedback + bool uses_enhanced_layouts = false; + if (is_block && var.storage == StorageClassOutput) + { + // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself, + // since all members must match the same xfb_buffer. The only thing we will declare for members of the block + // is the xfb_offset. + uint32_t member_count = uint32_t(type.member_types.size()); + bool have_xfb_buffer_stride = false; + bool have_any_xfb_offset = false; + bool have_geom_stream = false; + uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; + + if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride)) + { + have_xfb_buffer_stride = true; + xfb_buffer = get_decoration(var.self, DecorationXfbBuffer); + xfb_stride = get_decoration(var.self, DecorationXfbStride); + } + + if (flags.get(DecorationStream)) + { + have_geom_stream = true; + geom_stream = get_decoration(var.self, DecorationStream); + } + + // Verify that none of the members violate our assumption. + for (uint32_t i = 0; i < member_count; i++) + { + if (has_member_decoration(type.self, i, DecorationStream)) + { + uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream); + if (have_geom_stream && member_geom_stream != geom_stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = member_geom_stream; + } + + // Only members with an Offset decoration participate in XFB. + if (!has_member_decoration(type.self, i, DecorationOffset)) + continue; + have_any_xfb_offset = true; + + if (has_member_decoration(type.self, i, DecorationXfbBuffer)) + { + uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer); + if (have_xfb_buffer_stride && buffer_index != xfb_buffer) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + have_xfb_buffer_stride = true; + xfb_buffer = buffer_index; + } + + if (has_member_decoration(type.self, i, DecorationXfbStride)) + { + uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride); + if (have_xfb_buffer_stride && stride != xfb_stride) + SPIRV_CROSS_THROW("IO block member XfbStride mismatch."); + have_xfb_buffer_stride = true; + xfb_stride = stride; + } + } + + if (have_xfb_buffer_stride && have_any_xfb_offset) + { + attr.push_back(join("xfb_buffer = ", xfb_buffer)); + attr.push_back(join("xfb_stride = ", xfb_stride)); + uses_enhanced_layouts = true; + } + + if (have_geom_stream) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream))); + } + } + else if (var.storage == StorageClassOutput) + { + if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset)) + { + // XFB for standalone variables, we can emit all decorations. + attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer))); + attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride))); + attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset))); + uses_enhanced_layouts = true; + } + + if (flags.get(DecorationStream)) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream))); + } + } + + // Can only declare Component if we can declare location. + if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block)) + { + uses_enhanced_layouts = true; + attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent))); + } + + if (uses_enhanced_layouts) + { + if (!options.es) + { + if (options.version < 440 && options.version >= 140) + require_extension_internal("GL_ARB_enhanced_layouts"); + else if (options.version < 140) + SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40."); + if (!options.es && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + } + else if (options.es) + SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL."); } if (flags.get(DecorationIndex)) - attr.push_back(join("index = ", dec.index)); + attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex))); // Do not emit set = decoration in regular GLSL output, but // we need to preserve it in Vulkan GLSL mode. - if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferNV) + if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR) { if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics) - attr.push_back(join("set = ", dec.set)); + attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet))); } bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant; - bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV || + bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR || (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock)); bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer; bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock); @@ -1493,14 +2077,14 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) if (!can_use_buffer_blocks && var.storage == StorageClassUniform) can_use_binding = false; - if (var.storage == StorageClassShaderRecordBufferNV) + if (var.storage == StorageClassShaderRecordBufferKHR) can_use_binding = false; if (can_use_binding && flags.get(DecorationBinding)) - attr.push_back(join("binding = ", dec.binding)); + attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding))); - if (flags.get(DecorationOffset)) - attr.push_back(join("offset = ", dec.offset)); + if (var.storage != StorageClassOutput && flags.get(DecorationOffset)) + attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset))); // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. // If SPIR-V does not comply with either layout, we cannot really work around it. @@ -1551,7 +2135,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool suppo if (!options.es && !options.vulkan_semantics && options.version < 440) require_extension_internal("GL_ARB_enhanced_layouts"); - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std430"; } else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) @@ -1565,12 +2149,12 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool suppo if (!options.es && !options.vulkan_semantics && options.version < 440) require_extension_internal("GL_ARB_enhanced_layouts"); - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std140"; } else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout)) { - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); require_extension_internal("GL_EXT_scalar_block_layout"); return "scalar"; } @@ -1585,7 +2169,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool suppo buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) { // UBOs can support std430 with GL_EXT_scalar_block_layout. - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); require_extension_internal("GL_EXT_scalar_block_layout"); return "std430"; } @@ -1618,9 +2202,8 @@ void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) // OpenGL has no concept of push constant blocks, implement it as a uniform struct. auto &type = get(var.basetype); - auto &flags = ir.meta[var.self].decoration.decoration_flags; - flags.clear(DecorationBinding); - flags.clear(DecorationDescriptorSet); + unset_decoration(var.self, DecorationBinding); + unset_decoration(var.self, DecorationDescriptorSet); #if 0 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) @@ -1630,14 +2213,13 @@ void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. - auto &block_flags = ir.meta[type.self].decoration.decoration_flags; - bool block_flag = block_flags.get(DecorationBlock); - block_flags.clear(DecorationBlock); + bool block_flag = has_decoration(type.self, DecorationBlock); + unset_decoration(type.self, DecorationBlock); emit_struct(type); if (block_flag) - block_flags.set(DecorationBlock); + set_decoration(type.self, DecorationBlock); emit_uniform(var); statement(""); @@ -1677,8 +2259,9 @@ void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var) statement(""); } -void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration) +void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration) { + auto &type = get(type_id); string buffer_name; if (forward_declaration) @@ -1710,6 +2293,9 @@ void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_decl block_names.insert(buffer_name); block_ssbo_names.insert(buffer_name); + + // Ensure we emit the correct name when emitting non-forward pointer type. + ir.meta[type.self].decoration.alias = buffer_name; } else if (type.basetype != SPIRType::Struct) buffer_name = type_to_glsl(type); @@ -1718,8 +2304,34 @@ void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_decl if (!forward_declaration) { + auto itr = physical_storage_type_to_alignment.find(type_id); + uint32_t alignment = 0; + if (itr != physical_storage_type_to_alignment.end()) + alignment = itr->second.alignment; + if (type.basetype == SPIRType::Struct) - statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true), ") buffer ", buffer_name); + { + SmallVector attributes; + attributes.push_back("buffer_reference"); + if (alignment) + attributes.push_back(join("buffer_reference_align = ", alignment)); + attributes.push_back(buffer_to_packing_standard(type, true)); + + auto flags = ir.get_buffer_block_type_flags(type); + string decorations; + if (flags.get(DecorationRestrict)) + decorations += " restrict"; + if (flags.get(DecorationCoherent)) + decorations += " coherent"; + if (flags.get(DecorationNonReadable)) + decorations += " writeonly"; + if (flags.get(DecorationNonWritable)) + decorations += " readonly"; + + statement("layout(", merge(attributes), ")", decorations, " buffer ", buffer_name); + } + else if (alignment) + statement("layout(buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name); else statement("layout(buffer_reference) buffer ", buffer_name); @@ -1757,7 +2369,7 @@ void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var) auto &type = get(var.basetype); Bitset flags = ir.get_buffer_block_flags(var); - bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV || + bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); bool is_restrict = ssbo && flags.get(DecorationRestrict); bool is_writeonly = ssbo && flags.get(DecorationNonReadable); @@ -1848,12 +2460,24 @@ const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) { auto &execution = get_entry_point(); + if (subpass_input_is_framebuffer_fetch(var.self)) + return ""; + if (var.storage == StorageClassInput || var.storage == StorageClassOutput) { if (is_legacy() && execution.model == ExecutionModelVertex) return var.storage == StorageClassInput ? "attribute " : "varying "; else if (is_legacy() && execution.model == ExecutionModelFragment) return "varying "; // Fragment outputs are renamed so they never hit this case. + else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput) + { + uint32_t loc = get_decoration(var.self, DecorationLocation); + bool is_inout = location_is_framebuffer_fetch(loc); + if (is_inout) + return "inout "; + else + return "out "; + } else return var.storage == StorageClassInput ? "in " : "out "; } @@ -1862,74 +2486,143 @@ const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) { return "uniform "; } - else if (var.storage == StorageClassRayPayloadNV) + else if (var.storage == StorageClassRayPayloadKHR) { - return "rayPayloadNV "; + return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV "; } - else if (var.storage == StorageClassIncomingRayPayloadNV) + else if (var.storage == StorageClassIncomingRayPayloadKHR) { - return "rayPayloadInNV "; + return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV "; } - else if (var.storage == StorageClassHitAttributeNV) + else if (var.storage == StorageClassHitAttributeKHR) { - return "hitAttributeNV "; + return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV "; } - else if (var.storage == StorageClassCallableDataNV) + else if (var.storage == StorageClassCallableDataKHR) { - return "callableDataNV "; + return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV "; } - else if (var.storage == StorageClassIncomingCallableDataNV) + else if (var.storage == StorageClassIncomingCallableDataKHR) { - return "callableDataInNV "; + return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV "; } return ""; } +void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector &indices) +{ + uint32_t member_type_id = type.self; + const SPIRType *member_type = &type; + const SPIRType *parent_type = nullptr; + auto flattened_name = basename; + for (auto &index : indices) + { + flattened_name += "_"; + flattened_name += to_member_name(*member_type, index); + parent_type = member_type; + member_type_id = member_type->member_types[index]; + member_type = &get(member_type_id); + } + + assert(member_type->basetype != SPIRType::Struct); + + // We're overriding struct member names, so ensure we do so on the primary type. + if (parent_type->type_alias) + parent_type = &get(parent_type->type_alias); + + // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row, + // which is not allowed. + ParsedIR::sanitize_underscores(flattened_name); + + uint32_t last_index = indices.back(); + + // Pass in the varying qualifier here so it will appear in the correct declaration order. + // Replace member name while emitting it so it encodes both struct name and member name. + auto backup_name = get_member_name(parent_type->self, last_index); + auto member_name = to_member_name(*parent_type, last_index); + set_member_name(parent_type->self, last_index, flattened_name); + emit_struct_member(*parent_type, member_type_id, last_index, qual); + // Restore member name. + set_member_name(parent_type->self, last_index, member_name); +} + +void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector &indices) +{ + auto sub_indices = indices; + sub_indices.push_back(0); + + const SPIRType *member_type = &type; + for (auto &index : indices) + member_type = &get(member_type->member_types[index]); + + assert(member_type->basetype == SPIRType::Struct); + + if (!member_type->array.empty()) + SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks."); + + for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) + { + sub_indices.back() = i; + if (get(member_type->member_types[i]).basetype == SPIRType::Struct) + emit_flattened_io_block_struct(basename, type, qual, sub_indices); + else + emit_flattened_io_block_member(basename, type, qual, sub_indices); + } +} + void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual) { - auto &type = get(var.basetype); - if (!type.array.empty()) + auto &var_type = get(var.basetype); + if (!var_type.array.empty()) SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings."); + // Emit flattened types based on the type alias. Normally, we are never supposed to emit + // struct declarations for aliased types. + auto &type = var_type.type_alias ? get(var_type.type_alias) : var_type; + auto old_flags = ir.meta[type.self].decoration.decoration_flags; // Emit the members as if they are part of a block to get all qualifiers. ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock); type.member_name_cache.clear(); + SmallVector member_indices; + member_indices.push_back(0); + auto basename = to_name(var.self); + uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); auto &membertype = get(member); + member_indices.back() = i; if (membertype.basetype == SPIRType::Struct) - SPIRV_CROSS_THROW("Cannot flatten struct inside structs in I/O variables."); - - // Pass in the varying qualifier here so it will appear in the correct declaration order. - // Replace member name while emitting it so it encodes both struct name and member name. - // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row, - // which is not allowed. - auto backup_name = get_member_name(type.self, i); - auto member_name = to_member_name(type, i); - set_member_name(type.self, i, sanitize_underscores(join(to_name(var.self), "_", member_name))); - emit_struct_member(type, member, i, qual); - // Restore member name. - set_member_name(type.self, i, member_name); + emit_flattened_io_block_struct(basename, type, qual, member_indices); + else + emit_flattened_io_block_member(basename, type, qual, member_indices); i++; } ir.meta[type.self].decoration.decoration_flags = old_flags; - // Treat this variable as flattened from now on. - flattened_structs.insert(var.self); + // Treat this variable as fully flattened from now on. + flattened_structs[var.self] = true; } void CompilerGLSL::emit_interface_block(const SPIRVariable &var) { auto &type = get(var.basetype); + if (var.storage == StorageClassInput && type.basetype == SPIRType::Double && + !options.es && options.version < 410) + { + require_extension_internal("GL_ARB_vertex_attrib_64bit"); + } + // Either make it plain in/out or in/out blocks depending on what shader is doing ... bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); const char *qual = to_storage_qualifiers_glsl(var); @@ -1939,7 +2632,8 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) // ESSL earlier than 310 and GLSL earlier than 150 did not support // I/O variables which are struct types. // To support this, flatten the struct into separate varyings instead. - if ((options.es && options.version < 310) || (!options.es && options.version < 150)) + if (options.force_flattened_io_blocks || (options.es && options.version < 310) || + (!options.es && options.version < 150)) { // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320. // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150). @@ -1954,6 +2648,9 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) require_extension_internal("GL_EXT_shader_io_blocks"); } + // Workaround to make sure we can emit "patch in/out" correctly. + fixup_io_block_patch_primitive_qualifiers(var); + // Block names should never alias. auto block_name = to_name(type.self, false); @@ -1975,7 +2672,15 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) // Instance names cannot alias block names. resource_names.insert(block_name); - statement(layout_for_variable(var), qual, block_name); + const char *block_qualifier; + if (has_decoration(var.self, DecorationPatch)) + block_qualifier = "patch "; + else if (has_decoration(var.self, DecorationPerPrimitiveEXT)) + block_qualifier = "perprimitiveEXT "; + else + block_qualifier = ""; + + statement(layout_for_variable(var), block_qualifier, qual, block_name); begin_scope(); type.member_name_cache.clear(); @@ -1999,22 +2704,38 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) // I/O variables which are struct types. // To support this, flatten the struct into separate varyings instead. if (type.basetype == SPIRType::Struct && - ((options.es && options.version < 310) || (!options.es && options.version < 150))) + (options.force_flattened_io_blocks || (options.es && options.version < 310) || + (!options.es && options.version < 150))) { emit_flattened_io_block(var, qual); } else { add_resource_name(var.self); + + // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays. + // Opt for unsized as it's the more "correct" variant to use. + bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() && + !has_decoration(var.self, DecorationPatch) && + (get_entry_point().model == ExecutionModelTessellationControl || + get_entry_point().model == ExecutionModelTessellationEvaluation); + + uint32_t old_array_size = 0; + bool old_array_size_literal = true; + + if (control_point_input_array) + { + swap(type.array.back(), old_array_size); + swap(type.array_size_literal.back(), old_array_size_literal); + } + statement(layout_for_variable(var), to_qualifiers_glsl(var.self), variable_decl(type, to_name(var.self), var.self), ";"); - // If a StorageClassOutput variable has an initializer, we need to initialize it in main(). - if (var.storage == StorageClassOutput && var.initializer) + if (control_point_input_array) { - auto &entry_func = this->get(ir.default_entry_point); - entry_func.fixup_hooks_in.push_back( - [&]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); }); + swap(type.array.back(), old_array_size); + swap(type.array_size_literal.back(), old_array_size_literal); } } } @@ -2023,7 +2744,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) void CompilerGLSL::emit_uniform(const SPIRVariable &var) { auto &type = get(var.basetype); - if (type.basetype == SPIRType::Image && type.image.sampled == 2) + if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData) { if (!options.es && options.version < 420) require_extension_internal("GL_ARB_shader_image_load_store"); @@ -2043,17 +2764,37 @@ string CompilerGLSL::constant_value_macro_name(uint32_t id) void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant) { auto &type = get(constant.basetype); + add_resource_name(constant.self); auto name = to_name(constant.self); statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";"); } +int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const +{ + auto &entry_point = get_entry_point(); + int index = -1; + + // Need to redirect specialization constants which are used as WorkGroupSize to the builtin, + // since the spec constant declarations are never explicitly declared. + if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(ExecutionModeLocalSizeId)) + { + if (c.self == entry_point.workgroup_size.id_x) + index = 0; + else if (c.self == entry_point.workgroup_size.id_y) + index = 1; + else if (c.self == entry_point.workgroup_size.id_z) + index = 2; + } + + return index; +} + void CompilerGLSL::emit_constant(const SPIRConstant &constant) { auto &type = get(constant.constant_type); - auto name = to_name(constant.self); SpecializationConstant wg_x, wg_y, wg_z; - uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); // This specialization constant is implicitly declared by emitting layout() in; if (constant.self == workgroup_size_id) @@ -2062,7 +2803,8 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant) // These specialization constants are implicitly declared by emitting layout() in; // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration // later can use macro overrides for work group size. - bool is_workgroup_size_constant = constant.self == wg_x.id || constant.self == wg_y.id || constant.self == wg_z.id; + bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id || + ConstantID(constant.self) == wg_z.id; if (options.vulkan_semantics && is_workgroup_size_constant) { @@ -2076,6 +2818,9 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant) return; } + add_resource_name(constant.self); + auto name = to_name(constant.self); + // Only scalars have constant IDs. if (has_decoration(constant.self, DecorationSpecId)) { @@ -2106,6 +2851,46 @@ void CompilerGLSL::emit_entry_point_declarations() { } +void CompilerGLSL::replace_illegal_names(const unordered_set &keywords) +{ + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + if (is_hidden_variable(var)) + return; + + auto *meta = ir.find_meta(var.self); + if (!meta) + return; + + auto &m = meta->decoration; + if (keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + }); + + ir.for_each_typed_id([&](uint32_t, const SPIRFunction &func) { + auto *meta = ir.find_meta(func.self); + if (!meta) + return; + + auto &m = meta->decoration; + if (keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + }); + + ir.for_each_typed_id([&](uint32_t, const SPIRType &type) { + auto *meta = ir.find_meta(type.self); + if (!meta) + return; + + auto &m = meta->decoration; + if (keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + + for (auto &memb : meta->members) + if (keywords.find(memb.alias) != end(keywords)) + memb.alias = join("_", memb.alias); + }); +} + void CompilerGLSL::replace_illegal_names() { // clang-format off @@ -2160,14 +2945,7 @@ void CompilerGLSL::replace_illegal_names() }; // clang-format on - ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { - if (!is_hidden_variable(var)) - { - auto &m = ir.meta[var.self].decoration; - if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords)) - m.alias = join("_", m.alias); - } - }); + replace_illegal_names(keywords); } void CompilerGLSL::replace_fragment_output(SPIRVariable &var) @@ -2274,19 +3052,124 @@ void CompilerGLSL::emit_pls() void CompilerGLSL::fixup_image_load_store_access() { + if (!options.enable_storage_image_qualifier_deduction) + return; + ir.for_each_typed_id([&](uint32_t var, const SPIRVariable &) { auto &vartype = expression_type(var); - if (vartype.basetype == SPIRType::Image) + if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2) { - // Older glslangValidator does not emit required qualifiers here. + // Very old glslangValidator and HLSL compilers do not emit required qualifiers here. // Solve this by making the image access as restricted as possible and loosen up if we need to. // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. - auto &flags = ir.meta[var].decoration.decoration_flags; - if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable)) + if (!has_decoration(var, DecorationNonWritable) && !has_decoration(var, DecorationNonReadable)) + { + set_decoration(var, DecorationNonWritable); + set_decoration(var, DecorationNonReadable); + } + } + }); +} + +static bool is_block_builtin(BuiltIn builtin) +{ + return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || + builtin == BuiltInCullDistance; +} + +bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) +{ + // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block. + + if (storage != StorageClassOutput) + return false; + bool should_force = false; + + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + if (should_force) + return; + + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if (var.storage == storage && block && is_builtin_variable(var)) + { + uint32_t member_count = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < member_count; i++) + { + if (has_member_decoration(type.self, i, DecorationBuiltIn) && + is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) && + has_member_decoration(type.self, i, DecorationOffset)) + { + should_force = true; + } + } + } + else if (var.storage == storage && !block && is_builtin_variable(var)) + { + if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) && + has_decoration(var.self, DecorationOffset)) + { + should_force = true; + } + } + }); + + // If we're declaring clip/cull planes with control points we need to force block declaration. + if ((get_execution_model() == ExecutionModelTessellationControl || + get_execution_model() == ExecutionModelMeshEXT) && + (clip_distance_count || cull_distance_count)) + { + should_force = true; + } + + return should_force; +} + +void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model) +{ + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block && + is_builtin_variable(var)) + { + if (model != ExecutionModelMeshEXT) + { + // Make sure the array has a supported name in the code. + if (var.storage == StorageClassOutput) + set_name(var.self, "gl_out"); + else if (var.storage == StorageClassInput) + set_name(var.self, "gl_in"); + } + else + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT)) + { + set_name(var.self, "gl_MeshPrimitivesEXT"); + set_name(type.self, "gl_MeshPerPrimitiveEXT"); + } + else + { + set_name(var.self, "gl_MeshVerticesEXT"); + set_name(type.self, "gl_MeshPerVertexEXT"); + } + } + } + + if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block) + { + auto *m = ir.find_meta(var.self); + if (m && m->decoration.builtin) { - flags.set(DecorationNonWritable); - flags.set(DecorationNonReadable); + auto builtin_type = m->decoration.builtin_type; + if (builtin_type == BuiltInPrimitivePointIndicesEXT) + set_name(var.self, "gl_PrimitivePointIndicesEXT"); + else if (builtin_type == BuiltInPrimitiveLineIndicesEXT) + set_name(var.self, "gl_PrimitiveLineIndicesEXT"); + else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT) + set_name(var.self, "gl_PrimitiveTriangleIndicesEXT"); } } }); @@ -2298,13 +3181,23 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo Bitset global_builtins; const SPIRVariable *block_var = nullptr; bool emitted_block = false; - bool builtin_array = false; // Need to use declared size in the type. // These variables might have been declared, but not statically used, so we haven't deduced their size yet. uint32_t cull_distance_size = 0; uint32_t clip_distance_size = 0; + bool have_xfb_buffer_stride = false; + bool have_geom_stream = false; + bool have_any_xfb_offset = false; + uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; + std::unordered_map builtin_xfb_offsets; + + const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool { + return builtin == BuiltInPosition || builtin == BuiltInPointSize || + builtin == BuiltInClipDistance || builtin == BuiltInCullDistance; + }; + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); bool block = has_decoration(type.self, DecorationBlock); @@ -2315,28 +3208,91 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo uint32_t index = 0; for (auto &m : ir.meta[type.self].members) { - if (m.builtin) + if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) { builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) - cull_distance_size = this->get(type.member_types[index]).array.front(); + cull_distance_size = to_array_size_literal(this->get(type.member_types[index])); else if (m.builtin_type == BuiltInClipDistance) - clip_distance_size = this->get(type.member_types[index]).array.front(); + clip_distance_size = to_array_size_literal(this->get(type.member_types[index])); + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset)) + { + have_any_xfb_offset = true; + builtin_xfb_offsets[m.builtin_type] = m.offset; + } + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) + { + uint32_t stream = m.stream; + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } } index++; } + + if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) && + has_decoration(var.self, DecorationXfbStride)) + { + uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer); + uint32_t stride = get_decoration(var.self, DecorationXfbStride); + if (have_xfb_buffer_stride && buffer_index != xfb_buffer) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + if (have_xfb_buffer_stride && stride != xfb_stride) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + have_xfb_buffer_stride = true; + xfb_buffer = buffer_index; + xfb_stride = stride; + } + + if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream)) + { + uint32_t stream = get_decoration(var.self, DecorationStream); + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } } else if (var.storage == storage && !block && is_builtin_variable(var)) { // While we're at it, collect all declared global builtins (HLSL mostly ...). auto &m = ir.meta[var.self].decoration; - if (m.builtin) + if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) { global_builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) - cull_distance_size = type.array.front(); + cull_distance_size = to_array_size_literal(type); else if (m.builtin_type == BuiltInClipDistance) - clip_distance_size = type.array.front(); + clip_distance_size = to_array_size_literal(type); + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) && + m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset)) + { + have_any_xfb_offset = true; + builtin_xfb_offsets[m.builtin_type] = m.offset; + uint32_t buffer_index = m.xfb_buffer; + uint32_t stride = m.xfb_stride; + if (have_xfb_buffer_stride && buffer_index != xfb_buffer) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + if (have_xfb_buffer_stride && stride != xfb_stride) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + have_xfb_buffer_stride = true; + xfb_buffer = buffer_index; + xfb_stride = stride; + } + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) + { + uint32_t stream = get_decoration(var.self, DecorationStream); + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } } } @@ -2348,7 +3304,6 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo emitted_builtins = builtins; emitted_block = true; - builtin_array = !type.array.empty(); block_var = &var; }); @@ -2365,54 +3320,114 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo return; if (storage == StorageClassOutput) - statement("out gl_PerVertex"); + { + SmallVector attr; + if (have_xfb_buffer_stride && have_any_xfb_offset) + { + if (!options.es) + { + if (options.version < 440 && options.version >= 140) + require_extension_internal("GL_ARB_enhanced_layouts"); + else if (options.version < 140) + SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); + if (!options.es && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + } + else if (options.es) + SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer."); + attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride)); + } + + if (have_geom_stream) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", geom_stream)); + } + + if (model == ExecutionModelMeshEXT) + statement("out gl_MeshPerVertexEXT"); + else if (!attr.empty()) + statement("layout(", merge(attr), ") out gl_PerVertex"); + else + statement("out gl_PerVertex"); + } else - statement("in gl_PerVertex"); + { + // If we have passthrough, there is no way PerVertex cannot be passthrough. + if (get_entry_point().geometry_passthrough) + statement("layout(passthrough) in gl_PerVertex"); + else + statement("in gl_PerVertex"); + } begin_scope(); if (emitted_builtins.get(BuiltInPosition)) - statement("vec4 gl_Position;"); + { + auto itr = builtin_xfb_offsets.find(BuiltInPosition); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;"); + else + statement("vec4 gl_Position;"); + } + if (emitted_builtins.get(BuiltInPointSize)) - statement("float gl_PointSize;"); + { + auto itr = builtin_xfb_offsets.find(BuiltInPointSize); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;"); + else + statement("float gl_PointSize;"); + } + if (emitted_builtins.get(BuiltInClipDistance)) - statement("float gl_ClipDistance[", clip_distance_size, "];"); + { + auto itr = builtin_xfb_offsets.find(BuiltInClipDistance); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];"); + else + statement("float gl_ClipDistance[", clip_distance_size, "];"); + } + if (emitted_builtins.get(BuiltInCullDistance)) - statement("float gl_CullDistance[", cull_distance_size, "];"); + { + auto itr = builtin_xfb_offsets.find(BuiltInCullDistance); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];"); + else + statement("float gl_CullDistance[", cull_distance_size, "];"); + } + + bool builtin_array = model == ExecutionModelTessellationControl || + (model == ExecutionModelMeshEXT && storage == StorageClassOutput) || + (model == ExecutionModelGeometry && storage == StorageClassInput) || + (model == ExecutionModelTessellationEvaluation && storage == StorageClassInput); - bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl; if (builtin_array) { - // Make sure the array has a supported name in the code. - if (storage == StorageClassOutput) - set_name(block_var->self, "gl_out"); - else if (storage == StorageClassInput) - set_name(block_var->self, "gl_in"); + const char *instance_name; + if (model == ExecutionModelMeshEXT) + instance_name = "gl_MeshVerticesEXT"; // Per primitive is never synthesized. + else + instance_name = storage == StorageClassInput ? "gl_in" : "gl_out"; if (model == ExecutionModelTessellationControl && storage == StorageClassOutput) - end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]")); + end_scope_decl(join(instance_name, "[", get_entry_point().output_vertices, "]")); else - end_scope_decl(join(to_name(block_var->self), tessellation ? "[gl_MaxPatchVertices]" : "[]")); + end_scope_decl(join(instance_name, "[]")); } else end_scope_decl(); statement(""); } -void CompilerGLSL::declare_undefined_values() +bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const { - bool emitted = false; - ir.for_each_typed_id([&](uint32_t, const SPIRUndef &undef) { - statement(variable_decl(this->get(undef.basetype), to_name(undef.self), undef.self), ";"); - emitted = true; - }); - - if (emitted) - statement(""); -} - -bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const -{ - bool statically_assigned = var.statically_assigned && var.static_expression != 0 && var.remapped_variable; + bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable; if (statically_assigned) { @@ -2439,6 +3454,19 @@ void CompilerGLSL::emit_resources() if (!pls_inputs.empty() || !pls_outputs.empty()) emit_pls(); + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + case ExecutionModelMeshEXT: + fixup_implicit_builtin_block_names(execution.model); + break; + + default: + break; + } + // Emit custom gl_PerVertex for SSO compatibility. if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment) { @@ -2452,6 +3480,7 @@ void CompilerGLSL::emit_resources() break; case ExecutionModelVertex: + case ExecutionModelMeshEXT: emit_declared_builtin_block(StorageClassOutput, execution.model); break; @@ -2459,6 +3488,16 @@ void CompilerGLSL::emit_resources() break; } } + else if (should_force_emit_builtin_block(StorageClassOutput)) + { + emit_declared_builtin_block(StorageClassOutput, execution.model); + } + else if (execution.geometry_passthrough) + { + // Need to declare gl_in with Passthrough. + // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass. + emit_declared_builtin_block(StorageClassInput, execution.model); + } else { // Need to redeclare clip/cull distance with explicit size to use them. @@ -2484,44 +3523,76 @@ void CompilerGLSL::emit_resources() // emit specialization constants as actual floats, // spec op expressions will redirect to the constant name. // - for (auto &id_ : ir.ids_for_constant_or_type) { - auto &id = ir.ids[id_]; - - if (id.get_type() == TypeConstant) + auto loop_lock = ir.create_loop_hard_lock(); + for (auto &id_ : ir.ids_for_constant_undef_or_type) { - auto &c = id.get(); + auto &id = ir.ids[id_]; - bool needs_declaration = c.specialization || c.is_used_as_lut; - - if (needs_declaration) + if (id.get_type() == TypeConstant) { - if (!options.vulkan_semantics && c.specialization) + auto &c = id.get(); + + bool needs_declaration = c.specialization || c.is_used_as_lut; + + if (needs_declaration) { - c.specialization_constant_macro_name = - constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + if (!options.vulkan_semantics && c.specialization) + { + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + } + emit_constant(c); + emitted = true; } - emit_constant(c); + } + else if (id.get_type() == TypeConstantOp) + { + emit_specialization_constant_op(id.get()); emitted = true; } - } - else if (id.get_type() == TypeConstantOp) - { - emit_specialization_constant_op(id.get()); - emitted = true; - } - else if (id.get_type() == TypeType) - { - auto &type = id.get(); - if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && - (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) && - !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) + else if (id.get_type() == TypeType) { - if (emitted) - statement(""); - emitted = false; + auto *type = &id.get(); - emit_struct(type); + bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer && + (!has_decoration(type->self, DecorationBlock) && + !has_decoration(type->self, DecorationBufferBlock)); + + // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs. + if (type->basetype == SPIRType::Struct && type->pointer && + has_decoration(type->self, DecorationBlock) && + (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR || + type->storage == StorageClassHitAttributeKHR)) + { + type = &get(type->parent_type); + is_natural_struct = true; + } + + if (is_natural_struct) + { + if (emitted) + statement(""); + emitted = false; + + emit_struct(*type); + } + } + else if (id.get_type() == TypeUndef) + { + auto &undef = id.get(); + auto &type = this->get(undef.basetype); + // OpUndef can be void for some reason ... + if (type.basetype == SPIRType::Void) + return; + + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(undef.basetype)); + + // FIXME: If used in a constant, we must declare it as one. + statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";"); + emitted = true; } } } @@ -2533,12 +3604,12 @@ void CompilerGLSL::emit_resources() // If the work group size depends on a specialization constant, we need to declare the layout() block // after constants (and their macros) have been declared. if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics && - execution.workgroup_size.constant != 0) + (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId))) { SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); - if ((wg_x.id != 0) || (wg_y.id != 0) || (wg_z.id != 0)) + if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0))) { SmallVector inputs; build_workgroup_size(inputs, wg_x, wg_y, wg_z); @@ -2553,28 +3624,28 @@ void CompilerGLSL::emit_resources() { for (auto type : physical_storage_non_block_pointer_types) { - emit_buffer_reference_block(get(type), false); + emit_buffer_reference_block(type, false); } // Output buffer reference blocks. // Do this in two stages, one with forward declaration, // and one without. Buffer reference blocks can reference themselves // to support things like linked lists. - ir.for_each_typed_id([&](uint32_t, SPIRType &type) { - bool has_block_flags = has_decoration(type.self, DecorationBlock); - if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) && + ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { + if (type.basetype == SPIRType::Struct && type.pointer && + type.pointer_depth == 1 && !type_is_array_of_pointers(type) && type.storage == StorageClassPhysicalStorageBufferEXT) { - emit_buffer_reference_block(type, true); + emit_buffer_reference_block(self, true); } }); - ir.for_each_typed_id([&](uint32_t, SPIRType &type) { - bool has_block_flags = has_decoration(type.self, DecorationBlock); - if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) && + ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { + if (type.basetype == SPIRType::Struct && + type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) && type.storage == StorageClassPhysicalStorageBufferEXT) { - emit_buffer_reference_block(type, false); + emit_buffer_reference_block(self, false); } }); } @@ -2584,7 +3655,7 @@ void CompilerGLSL::emit_resources() auto &type = this->get(var.basetype); bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform || - type.storage == StorageClassShaderRecordBufferNV; + type.storage == StorageClassShaderRecordBufferKHR; bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); @@ -2624,9 +3695,9 @@ void CompilerGLSL::emit_resources() if (var.storage != StorageClassFunction && type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter || - type.storage == StorageClassRayPayloadNV || type.storage == StorageClassIncomingRayPayloadNV || - type.storage == StorageClassCallableDataNV || type.storage == StorageClassIncomingCallableDataNV || - type.storage == StorageClassHitAttributeNV) && + type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR || + type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR || + type.storage == StorageClassHitAttributeKHR) && !is_hidden_variable(var)) { emit_uniform(var); @@ -2638,26 +3709,71 @@ void CompilerGLSL::emit_resources() statement(""); emitted = false; + bool emitted_base_instance = false; + // Output in/out interfaces. ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); + bool is_hidden = is_hidden_variable(var); + + // Unused output I/O variables might still be required to implement framebuffer fetch. + if (var.storage == StorageClassOutput && !is_legacy() && + location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0) + { + is_hidden = false; + } + if (var.storage != StorageClassFunction && type.pointer && (var.storage == StorageClassInput || var.storage == StorageClassOutput) && - interface_variable_exists_in_entry_point(var.self) && !is_hidden_variable(var)) + interface_variable_exists_in_entry_point(var.self) && !is_hidden) { + if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput && + type.array.size() == 1) + { + SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader."); + } emit_interface_block(var); emitted = true; } else if (is_builtin_variable(var)) { + auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); // For gl_InstanceIndex emulation on GLES, the API user needs to // supply this uniform. - if (options.vertex.support_nonzero_base_instance && - ir.meta[var.self].decoration.builtin_type == BuiltInInstanceIndex && !options.vulkan_semantics) + + // The draw parameter extension is soft-enabled on GL with some fallbacks. + if (!options.vulkan_semantics) { - statement("uniform int SPIRV_Cross_BaseInstance;"); - emitted = true; + if (!emitted_base_instance && + ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) || + (builtin == BuiltInBaseInstance))) + { + statement("#ifdef GL_ARB_shader_draw_parameters"); + statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB"); + statement("#else"); + // A crude, but simple workaround which should be good enough for non-indirect draws. + statement("uniform int SPIRV_Cross_BaseInstance;"); + statement("#endif"); + emitted = true; + emitted_base_instance = true; + } + else if (builtin == BuiltInBaseVertex) + { + statement("#ifdef GL_ARB_shader_draw_parameters"); + statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB"); + statement("#else"); + // A crude, but simple workaround which should be good enough for non-indirect draws. + statement("uniform int SPIRV_Cross_BaseVertex;"); + statement("#endif"); + } + else if (builtin == BuiltInDrawIndex) + { + statement("#ifndef GL_ARB_shader_draw_parameters"); + // Cannot really be worked around. + statement("#error GL_ARB_shader_draw_parameters is not supported."); + statement("#endif"); + } } } }); @@ -2666,9698 +3782,13776 @@ void CompilerGLSL::emit_resources() for (auto global : global_variables) { auto &var = get(global); + if (is_hidden_variable(var, true)) + continue; + if (var.storage != StorageClassOutput) { if (!variable_is_lut(var)) { add_resource_name(var.self); - statement(variable_decl(var), ";"); + + string initializer; + if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate && + !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var))) + { + initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var))); + } + + statement(variable_decl(var), initializer, ";"); emitted = true; } } + else if (var.initializer && maybe_get(var.initializer) != nullptr) + { + emit_output_variable_initializer(var); + } } if (emitted) statement(""); - - declare_undefined_values(); -} - -// Returns a string representation of the ID, usable as a function arg. -// Default is to simply return the expression representation fo the arg ID. -// Subclasses may override to modify the return value. -string CompilerGLSL::to_func_call_arg(uint32_t id) -{ - // Make sure that we use the name of the original variable, and not the parameter alias. - uint32_t name_id = id; - auto *var = maybe_get(id); - if (var && var->basevariable) - name_id = var->basevariable; - return to_expression(name_id); -} - -void CompilerGLSL::handle_invalid_expression(uint32_t id) -{ - // We tried to read an invalidated expression. - // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated. - forced_temporaries.insert(id); - force_recompile(); -} - -// Converts the format of the current expression from packed to unpacked, -// by wrapping the expression in a constructor of the appropriate type. -// GLSL does not support packed formats, so simply return the expression. -// Subclasses that do will override -string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t) -{ - return expr_str; } -// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all. -void CompilerGLSL::strip_enclosed_expression(string &expr) +void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var) { - if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')') - return; + // If a StorageClassOutput variable has an initializer, we need to initialize it in main(). + auto &entry_func = this->get(ir.default_entry_point); + auto &type = get(var.basetype); + bool is_patch = has_decoration(var.self, DecorationPatch); + bool is_block = has_decoration(type.self, DecorationBlock); + bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch; - // Have to make sure that our first and last parens actually enclose everything inside it. - uint32_t paren_count = 0; - for (auto &c : expr) + if (is_block) { - if (c == '(') - paren_count++; - else if (c == ')') - { - paren_count--; - - // If we hit 0 and this is not the final char, our first and final parens actually don't - // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d). - if (paren_count == 0 && &c != &expr.back()) - return; - } - } - expr.erase(expr.size() - 1, 1); - expr.erase(begin(expr)); -} + uint32_t member_count = uint32_t(type.member_types.size()); + bool type_is_array = type.array.size() == 1; + uint32_t array_size = 1; + if (type_is_array) + array_size = to_array_size_literal(type); + uint32_t iteration_count = is_control_point ? 1 : array_size; -string CompilerGLSL::enclose_expression(const string &expr) -{ - bool need_parens = false; + // If the initializer is a block, we must initialize each block member one at a time. + for (uint32_t i = 0; i < member_count; i++) + { + // These outputs might not have been properly declared, so don't initialize them in that case. + if (has_member_decoration(type.self, i, DecorationBuiltIn)) + { + if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance && + !cull_distance_count) + continue; - // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back - // unary expressions. - if (!expr.empty()) - { - auto c = expr.front(); - if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*') - need_parens = true; - } + if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance && + !clip_distance_count) + continue; + } - if (!need_parens) - { - uint32_t paren_count = 0; - for (auto c : expr) - { - if (c == '(' || c == '[') - paren_count++; - else if (c == ')' || c == ']') + // We need to build a per-member array first, essentially transposing from AoS to SoA. + // This code path hits when we have an array of blocks. + string lut_name; + if (type_is_array) { - assert(paren_count); - paren_count--; + lut_name = join("_", var.self, "_", i, "_init"); + uint32_t member_type_id = get(var.basetype).member_types[i]; + auto &member_type = get(member_type_id); + auto array_type = member_type; + array_type.parent_type = member_type_id; + array_type.array.push_back(array_size); + array_type.array_size_literal.push_back(true); + + SmallVector exprs; + exprs.reserve(array_size); + auto &c = get(var.initializer); + for (uint32_t j = 0; j < array_size; j++) + exprs.push_back(to_expression(get(c.subconstants[j]).subconstants[i])); + statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ", + type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");"); } - else if (c == ' ' && paren_count == 0) + + for (uint32_t j = 0; j < iteration_count; j++) { - need_parens = true; - break; + entry_func.fixup_hooks_in.push_back([=, &var]() { + AccessChainMeta meta; + auto &c = this->get(var.initializer); + + uint32_t invocation_id = 0; + uint32_t member_index_id = 0; + if (is_control_point) + { + uint32_t ids = ir.increase_bound_by(3); + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set(ids, uint_type); + set(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true); + set(ids + 2, ids, i, false); + invocation_id = ids + 1; + member_index_id = ids + 2; + } + + if (is_patch) + { + statement("if (gl_InvocationID == 0)"); + begin_scope(); + } + + if (type_is_array && !is_control_point) + { + uint32_t indices[2] = { j, i }; + auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); + statement(chain, " = ", lut_name, "[", j, "];"); + } + else if (is_control_point) + { + uint32_t indices[2] = { invocation_id, member_index_id }; + auto chain = access_chain_internal(var.self, indices, 2, 0, &meta); + statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];"); + } + else + { + auto chain = + access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); + statement(chain, " = ", to_expression(c.subconstants[i]), ";"); + } + + if (is_patch) + end_scope(); + }); } } - assert(paren_count == 0); } - - // If this expression contains any spaces which are not enclosed by parentheses, - // we need to enclose it so we can treat the whole string as an expression. - // This happens when two expressions have been part of a binary op earlier. - if (need_parens) - return join('(', expr, ')'); - else - return expr; -} - -string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr) -{ - // If this expression starts with an address-of operator ('&'), then - // just return the part after the operator. - // TODO: Strip parens if unnecessary? - if (expr.front() == '&') - return expr.substr(1); - else if (backend.native_pointers) - return join('*', expr); - else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct && - expr_type.pointer_depth == 1) + else if (is_control_point) { - return join(enclose_expression(expr), ".value"); + auto lut_name = join("_", var.self, "_init"); + statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type), + " = ", to_expression(var.initializer), ";"); + entry_func.fixup_hooks_in.push_back([&, lut_name]() { + statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];"); + }); } - else - return expr; -} - -string CompilerGLSL::address_of_expression(const std::string &expr) -{ - if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')') + else if (has_decoration(var.self, DecorationBuiltIn) && + BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask) { - // If we have an expression which looks like (*foo), taking the address of it is the same as stripping - // the first two and last characters. We might have to enclose the expression. - // This doesn't work for cases like (*foo + 10), - // but this is an r-value expression which we cannot take the address of anyways. - return enclose_expression(expr.substr(2, expr.size() - 3)); + // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_< + entry_func.fixup_hooks_in.push_back([&] { + auto &c = this->get(var.initializer); + uint32_t num_constants = uint32_t(c.subconstants.size()); + for (uint32_t i = 0; i < num_constants; i++) + { + // Don't use to_expression on constant since it might be uint, just fish out the raw int. + statement(to_expression(var.self), "[", i, "] = ", + convert_to_string(this->get(c.subconstants[i]).scalar_i32()), ";"); + } + }); } - else if (expr.front() == '*') + else { - // If this expression starts with a dereference operator ('*'), then - // just return the part after the operator. - return expr.substr(1); + auto lut_name = join("_", var.self, "_init"); + statement("const ", type_to_glsl(type), " ", lut_name, + type_to_array_glsl(type), " = ", to_expression(var.initializer), ";"); + entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() { + if (is_patch) + { + statement("if (gl_InvocationID == 0)"); + begin_scope(); + } + statement(to_expression(var.self), " = ", lut_name, ";"); + if (is_patch) + end_scope(); + }); } - else - return join('&', enclose_expression(expr)); -} - -// Just like to_expression except that we enclose the expression inside parentheses if needed. -string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read) -{ - return enclose_expression(to_expression(id, register_expression_read)); } -string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read) +void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) { - // If we need to transpose, it will also take care of unpacking rules. - auto *e = maybe_get(id); - bool need_transpose = e && e->need_transpose; - if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked)) - return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id), - get_extended_decoration(id, SPIRVCrossDecorationPackedType)); - else - return to_expression(id, register_expression_read); -} + static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4", + "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" }; -string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read) -{ - // If we need to transpose, it will also take care of unpacking rules. - auto *e = maybe_get(id); - bool need_transpose = e && e->need_transpose; - if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked)) - return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id), - get_extended_decoration(id, SPIRVCrossDecorationPackedType)); - else - return to_enclosed_expression(id, register_expression_read); -} + if (!options.vulkan_semantics) + { + using Supp = ShaderSubgroupSupportHelper; + auto result = shader_subgroup_supporter.resolve(); -string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read) -{ - auto &type = expression_type(id); - if (type.pointer && should_dereference(id)) - return dereference_expression(type, to_enclosed_expression(id, register_expression_read)); - else - return to_expression(id, register_expression_read); -} + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result); -string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read) -{ - auto &type = expression_type(id); - if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) - return address_of_expression(to_enclosed_expression(id, register_expression_read)); - else - return to_unpacked_expression(id, register_expression_read); -} + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); -string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read) -{ - auto &type = expression_type(id); - if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) - return address_of_expression(to_enclosed_expression(id, register_expression_read)); - else - return to_enclosed_unpacked_expression(id, register_expression_read); -} + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } -string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index) -{ - auto expr = to_enclosed_expression(id); - if (has_extended_decoration(id, SPIRVCrossDecorationPacked)) - return join(expr, "[", index, "]"); - else - return join(expr, ".", index_to_swizzle(index)); -} + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result); -string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type) -{ - uint32_t size = to_array_size_literal(type); - auto &parent = get(type.parent_type); - string expr = "{ "; + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); - for (uint32_t i = 0; i < size; i++) - { - auto subexpr = join(base_expr, "[", convert_to_string(i), "]"); - if (parent.array.empty()) - expr += subexpr; - else - expr += to_rerolled_array_expression(subexpr, parent); + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupSize gl_WarpSizeNV"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupSize gl_SubGroupSizeARB"); + break; + case Supp::AMD_gcn_shader: + statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } - if (i + 1 < size) - expr += ", "; - } + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result); - expr += " }"; - return expr; -} + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); -string CompilerGLSL::to_composite_constructor_expression(uint32_t id) -{ - auto &type = expression_type(id); - if (!backend.array_is_value_type && !type.array.empty()) - { - // For this case, we need to "re-roll" an array initializer from a temporary. - // We cannot simply pass the array directly, since it decays to a pointer and it cannot - // participate in a struct initializer. E.g. - // float arr[2] = { 1.0, 2.0 }; - // Foo foo = { arr }; must be transformed to - // Foo foo = { { arr[0], arr[1] } }; - // The array sizes cannot be deduced from specialization constants since we cannot use any loops. + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } - // We're only triggering one read of the array expression, but this is fine since arrays have to be declared - // as temporaries anyways. - return to_rerolled_array_expression(to_enclosed_expression(id), type); - } - else - return to_expression(id); -} + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result); -string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) -{ - auto itr = invalid_expressions.find(id); - if (itr != end(invalid_expressions)) - handle_invalid_expression(id); + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); - if (ir.ids[id].get_type() == TypeExpression) - { - // We might have a more complex chain of dependencies. - // A possible scenario is that we - // - // %1 = OpLoad - // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1. - // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that. - // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions. - // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before. - // - // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store, - // and see that we should not forward reads of the original variable. - auto &expr = get(id); - for (uint32_t dep : expr.expression_dependencies) - if (invalid_expressions.find(dep) != end(invalid_expressions)) - handle_invalid_expression(dep); - } + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupID gl_WarpIDNV"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } - if (register_expression_read) - track_expression_read(id); + if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups)) + { + auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result); - switch (ir.ids[id].get_type()) - { - case TypeExpression: - { - auto &e = get(id); - if (e.base_expression) - return to_enclosed_expression(e.base_expression) + e.expression; - else if (e.need_transpose) + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_NumSubgroups gl_WarpsPerSMNV"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First)) { - bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPacked); - return convert_row_major_matrix(e.expression, get(e.expression_type), is_packed); + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_shuffle: + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcastFirst(", t, + " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }"); + } + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcast(", t, + " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }"); + } + break; + case Supp::ARB_shader_ballot: + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcastFirst(", t, + " value) { return readFirstInvocationARB(value); }"); + } + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcast(", t, + " value, uint id) { return readInvocationARB(value, id); }"); + } + break; + default: + break; + } + } + statement("#endif"); + statement(""); } - else + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB)) { - if (is_forcing_recompilation()) + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result); + + for (auto &e : exts) { - // During first compilation phase, certain expression patterns can trigger exponential growth of memory. - // Avoid this by returning dummy expressions during this phase. - // Do not use empty expressions here, because those are sentinels for other cases. - return "_"; + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }"); + statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }"); + break; + default: + break; + } } - else - return e.expression; + statement("#else"); + statement("uint subgroupBallotFindLSB(uvec4 value)"); + begin_scope(); + statement("int firstLive = findLSB(value.x);"); + statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));"); + end_scope(); + statement("uint subgroupBallotFindMSB(uvec4 value)"); + begin_scope(); + statement("int firstLive = findMSB(value.y);"); + statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));"); + end_scope(); + statement("#endif"); + statement(""); } - } - case TypeConstant: - { - auto &c = get(id); - auto &type = get(c.constant_type); + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result); - // WorkGroupSize may be a constant. - auto &dec = ir.meta[c.self].decoration; - if (dec.builtin) - return builtin_to_glsl(dec.builtin_type, StorageClassGeneric); - else if (c.specialization) - return to_name(id); - else if (c.is_used_as_lut) - return to_name(id); - else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) - return to_name(id); - else if (!type.array.empty() && !backend.can_declare_arrays_inline) - return to_name(id); - else - return constant_expression(c); - } + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); - case TypeConstantOp: - return to_name(id); + switch (e) + { + case Supp::NV_gpu_shader_5: + statement("bool subgroupAll(bool value) { return allThreadsNV(value); }"); + statement("bool subgroupAny(bool value) { return anyThreadNV(value); }"); + statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }"); + break; + case Supp::ARB_shader_group_vote: + statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }"); + statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }"); + statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }"); + break; + case Supp::AMD_gcn_shader: + statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }"); + statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }"); + statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || " + "b == ballotAMD(true); }"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } - case TypeVariable: - { - auto &var = get(id); - // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, - // the variable has not been declared yet. - if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) - return to_expression(var.static_expression); - else if (var.deferred_declaration) + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT)) { - var.deferred_declaration = false; - return variable_decl(var); + statement("#ifndef GL_KHR_shader_subgroup_vote"); + statement( + "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return " + "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }"); + for (const char *t : workaround_types) + statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")"); + statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND"); + statement("#endif"); + statement(""); } - else if (flattened_structs.count(id)) + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot)) { - return load_flattened_struct(var); + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }"); + break; + case Supp::ARB_shader_ballot: + statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); } - else + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect)) { - auto &dec = ir.meta[var.self].decoration; - if (dec.builtin) - return builtin_to_glsl(dec.builtin_type, var.storage); - else - return to_name(id); + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("bool subgroupElect()"); + begin_scope(); + statement("uvec4 activeMask = subgroupBallot(true);"); + statement("uint firstLive = subgroupBallotFindLSB(activeMask);"); + statement("return gl_SubgroupInvocationID == firstLive;"); + end_scope(); + statement("#endif"); + statement(""); } - } - case TypeCombinedImageSampler: - // This type should never be taken the expression of directly. - // The intention is that texture sampling functions will extract the image and samplers - // separately and take their expressions as needed. - // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler - // expression ala sampler2D(texture, sampler). - SPIRV_CROSS_THROW("Combined image samplers have no default expression representation."); - - case TypeAccessChain: - // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad. - SPIRV_CROSS_THROW("Access chains have no default expression representation."); - - default: - return to_name(id); - } -} - -string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) -{ - auto &type = get(cop.basetype); - bool binary = false; - bool unary = false; - string op; - - if (is_legacy() && is_unsigned_opcode(cop.opcode)) - SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); - - // TODO: Find a clean way to reuse emit_instruction. - switch (cop.opcode) - { - case OpSConvert: - case OpUConvert: - case OpFConvert: - op = type_to_glsl_constructor(type); - break; + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier)) + { + // Extensions we're using in place of GL_KHR_shader_subgroup_basic state + // that subgroup execute in lockstep so this barrier is implicit. + // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier, + // and a specific test of optimizing scans by leveraging lock-step invocation execution, + // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`. + // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19 + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupBarrier() { memoryBarrierShared(); }"); + statement("#endif"); + statement(""); + } -#define GLSL_BOP(opname, x) \ - case Op##opname: \ - binary = true; \ - op = x; \ - break + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier)) + { + if (model == spv::ExecutionModelGLCompute) + { + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }"); + statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }"); + statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }"); + statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }"); + statement("#endif"); + } + else + { + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupMemoryBarrier() { memoryBarrier(); }"); + statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }"); + statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }"); + statement("#endif"); + } + statement(""); + } -#define GLSL_UOP(opname, x) \ - case Op##opname: \ - unary = true; \ - op = x; \ - break + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout)) + { + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("bool subgroupInverseBallot(uvec4 value)"); + begin_scope(); + statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));"); + end_scope(); - GLSL_UOP(SNegate, "-"); - GLSL_UOP(Not, "~"); - GLSL_BOP(IAdd, "+"); - GLSL_BOP(ISub, "-"); - GLSL_BOP(IMul, "*"); - GLSL_BOP(SDiv, "/"); - GLSL_BOP(UDiv, "/"); - GLSL_BOP(UMod, "%"); - GLSL_BOP(SMod, "%"); - GLSL_BOP(ShiftRightLogical, ">>"); - GLSL_BOP(ShiftRightArithmetic, ">>"); - GLSL_BOP(ShiftLeftLogical, "<<"); - GLSL_BOP(BitwiseOr, "|"); - GLSL_BOP(BitwiseXor, "^"); - GLSL_BOP(BitwiseAnd, "&"); - GLSL_BOP(LogicalOr, "||"); - GLSL_BOP(LogicalAnd, "&&"); - GLSL_UOP(LogicalNot, "!"); - GLSL_BOP(LogicalEqual, "=="); - GLSL_BOP(LogicalNotEqual, "!="); - GLSL_BOP(IEqual, "=="); - GLSL_BOP(INotEqual, "!="); - GLSL_BOP(ULessThan, "<"); - GLSL_BOP(SLessThan, "<"); - GLSL_BOP(ULessThanEqual, "<="); - GLSL_BOP(SLessThanEqual, "<="); - GLSL_BOP(UGreaterThan, ">"); - GLSL_BOP(SGreaterThan, ">"); - GLSL_BOP(UGreaterThanEqual, ">="); - GLSL_BOP(SGreaterThanEqual, ">="); + statement("uint subgroupBallotInclusiveBitCount(uvec4 value)"); + begin_scope(); + statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;"); + statement("ivec2 c = bitCount(v);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); - case OpSelect: - { - if (cop.arguments.size() < 3) - SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + statement("uint subgroupBallotExclusiveBitCount(uvec4 value)"); + begin_scope(); + statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;"); + statement("ivec2 c = bitCount(v);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); + statement("#endif"); + statement(""); + } - // This one is pretty annoying. It's triggered from - // uint(bool), int(bool) from spec constants. - // In order to preserve its compile-time constness in Vulkan GLSL, - // we need to reduce the OpSelect expression back to this simplified model. - // If we cannot, fail. - if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0])) + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount)) { - // Implement as a simple cast down below. + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("uint subgroupBallotBitCount(uvec4 value)"); + begin_scope(); + statement("ivec2 c = bitCount(value.xy);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); + statement("#endif"); + statement(""); } - else + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract)) { - // Implement a ternary and pray the compiler understands it :) - return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]); + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("bool subgroupBallotBitExtract(uvec4 value, uint index)"); + begin_scope(); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("uint shifted = value.x >> index;"); + statement_no_indent("#else"); + statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);"); + statement_no_indent("#endif"); + statement("return (shifted & 1u) != 0u;"); + end_scope(); + statement("#endif"); + statement(""); } - break; } - case OpVectorShuffle: + if (!workaround_ubo_load_overload_types.empty()) { - string expr = type_to_glsl_constructor(type); - expr += "("; - - uint32_t left_components = expression_type(cop.arguments[0]).vecsize; - string left_arg = to_enclosed_expression(cop.arguments[0]); - string right_arg = to_enclosed_expression(cop.arguments[1]); - - for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++) + for (auto &type_id : workaround_ubo_load_overload_types) { - uint32_t index = cop.arguments[i]; - if (index >= left_components) - expr += right_arg + "." + "xyzw"[index - left_components]; - else - expr += left_arg + "." + "xyzw"[index]; + auto &type = get(type_id); - if (i + 1 < uint32_t(cop.arguments.size())) - expr += ", "; + if (options.es && is_matrix(type)) + { + // Need both variants. + // GLSL cannot overload on precision, so need to dispatch appropriately. + statement("highp ", type_to_glsl(type), " spvWorkaroundRowMajor(highp ", type_to_glsl(type), " wrap) { return wrap; }"); + statement("mediump ", type_to_glsl(type), " spvWorkaroundRowMajorMP(mediump ", type_to_glsl(type), " wrap) { return wrap; }"); + } + else + { + statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type), " wrap) { return wrap; }"); + } } - - expr += ")"; - return expr; + statement(""); } - case OpCompositeExtract: + if (requires_transpose_2x2) { - auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); - return expr; + statement("mat2 spvTranspose(mat2 m)"); + begin_scope(); + statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);"); + end_scope(); + statement(""); } - case OpCompositeInsert: - SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported."); + if (requires_transpose_3x3) + { + statement("mat3 spvTranspose(mat3 m)"); + begin_scope(); + statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);"); + end_scope(); + statement(""); + } - default: - // Some opcodes are unimplemented here, these are currently not possible to test from glslang. - SPIRV_CROSS_THROW("Unimplemented spec constant op."); + if (requires_transpose_4x4) + { + statement("mat4 spvTranspose(mat4 m)"); + begin_scope(); + statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], " + "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);"); + end_scope(); + statement(""); } +} - uint32_t bit_width = 0; - if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert) - bit_width = expression_type(cop.arguments[0]).width; +// Returns a string representation of the ID, usable as a function arg. +// Default is to simply return the expression representation fo the arg ID. +// Subclasses may override to modify the return value. +string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id) +{ + // Make sure that we use the name of the original variable, and not the parameter alias. + uint32_t name_id = id; + auto *var = maybe_get(id); + if (var && var->basevariable) + name_id = var->basevariable; + return to_expression(name_id); +} - SPIRType::BaseType input_type; - bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode); +void CompilerGLSL::force_temporary_and_recompile(uint32_t id) +{ + auto res = forced_temporaries.insert(id); - switch (cop.opcode) - { - case OpIEqual: - case OpINotEqual: - input_type = to_signed_basetype(bit_width); - break; + // Forcing new temporaries guarantees forward progress. + if (res.second) + force_recompile_guarantee_forward_progress(); + else + force_recompile(); +} - case OpSLessThan: - case OpSLessThanEqual: - case OpSGreaterThan: - case OpSGreaterThanEqual: - case OpSMod: - case OpSDiv: - case OpShiftRightArithmetic: - case OpSConvert: - case OpSNegate: - input_type = to_signed_basetype(bit_width); - break; +uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision) +{ + // Constants do not have innate precision. + auto handle_type = ir.ids[id].get_type(); + if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef) + return id; - case OpULessThan: - case OpULessThanEqual: - case OpUGreaterThan: - case OpUGreaterThanEqual: - case OpUMod: - case OpUDiv: - case OpShiftRightLogical: - case OpUConvert: - input_type = to_unsigned_basetype(bit_width); - break; + // Ignore anything that isn't 32-bit values. + auto &type = get(type_id); + if (type.pointer) + return id; + if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int) + return id; - default: - input_type = type.basetype; - break; + if (precision == Options::DontCare) + { + // If precision is consumed as don't care (operations only consisting of constants), + // we need to bind the expression to a temporary, + // otherwise we have no way of controlling the precision later. + auto itr = forced_temporaries.insert(id); + if (itr.second) + force_recompile_guarantee_forward_progress(); + return id; } -#undef GLSL_BOP -#undef GLSL_UOP - if (binary) - { - if (cop.arguments.size() < 2) - SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + auto current_precision = has_decoration(id, DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp; + if (current_precision == precision) + return id; - string cast_op0; - string cast_op1; - auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0], - cop.arguments[1], skip_cast_if_equal_type); + auto itr = temporary_to_mirror_precision_alias.find(id); + if (itr == temporary_to_mirror_precision_alias.end()) + { + uint32_t alias_id = ir.increase_bound_by(1); + auto &m = ir.meta[alias_id]; + if (auto *input_m = ir.find_meta(id)) + m = *input_m; - if (type.basetype != input_type && type.basetype != SPIRType::Boolean) + const char *prefix; + if (precision == Options::Mediump) { - expected_type.basetype = input_type; - auto expr = bitcast_glsl_op(type, expected_type); - expr += '('; - expr += join(cast_op0, " ", op, " ", cast_op1); - expr += ')'; - return expr; + set_decoration(alias_id, DecorationRelaxedPrecision); + prefix = "mp_copy_"; } else - return join("(", cast_op0, " ", op, " ", cast_op1, ")"); - } - else if (unary) - { - if (cop.arguments.size() < 1) - SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + { + unset_decoration(alias_id, DecorationRelaxedPrecision); + prefix = "hp_copy_"; + } - // Auto-bitcast to result type as needed. - // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants. - return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")"); + auto alias_name = join(prefix, to_name(id)); + ParsedIR::sanitize_underscores(alias_name); + set_name(alias_id, alias_name); + + emit_op(type_id, alias_id, to_expression(id), true); + temporary_to_mirror_precision_alias[id] = alias_id; + forced_temporaries.insert(id); + forced_temporaries.insert(alias_id); + force_recompile_guarantee_forward_progress(); + id = alias_id; } - else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert) + else { - if (cop.arguments.size() < 1) - SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + id = itr->second; + } - auto &arg_type = expression_type(cop.arguments[0]); - if (arg_type.width < type.width && input_type != arg_type.basetype) + return id; +} + +void CompilerGLSL::handle_invalid_expression(uint32_t id) +{ + // We tried to read an invalidated expression. + // This means we need another pass at compilation, but next time, + // force temporary variables so that they cannot be invalidated. + force_temporary_and_recompile(id); + + // If the invalid expression happened as a result of a CompositeInsert + // overwrite, we must block this from happening next iteration. + if (composite_insert_overwritten.count(id)) + block_composite_insert_overwrite.insert(id); +} + +// Converts the format of the current expression from packed to unpacked, +// by wrapping the expression in a constructor of the appropriate type. +// GLSL does not support packed formats, so simply return the expression. +// Subclasses that do will override. +string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool) +{ + return expr_str; +} + +// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all. +void CompilerGLSL::strip_enclosed_expression(string &expr) +{ + if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')') + return; + + // Have to make sure that our first and last parens actually enclose everything inside it. + uint32_t paren_count = 0; + for (auto &c : expr) + { + if (c == '(') + paren_count++; + else if (c == ')') { - auto expected = arg_type; - expected.basetype = input_type; - return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")"); + paren_count--; + + // If we hit 0 and this is not the final char, our first and final parens actually don't + // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d). + if (paren_count == 0 && &c != &expr.back()) + return; } - else - return join(op, "(", to_expression(cop.arguments[0]), ")"); - } - else - { - if (cop.arguments.size() < 1) - SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); - return join(op, "(", to_expression(cop.arguments[0]), ")"); } + expr.erase(expr.size() - 1, 1); + expr.erase(begin(expr)); } -string CompilerGLSL::constant_expression(const SPIRConstant &c) +string CompilerGLSL::enclose_expression(const string &expr) { - auto &type = get(c.constant_type); + bool need_parens = false; - if (type.pointer) + // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back + // unary expressions. + if (!expr.empty()) { - return backend.null_pointer_literal; + auto c = expr.front(); + if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*') + need_parens = true; } - else if (!c.subconstants.empty()) + + if (!need_parens) { - // Handles Arrays and structures. - string res; - if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct && - type.array.empty()) - { - res = type_to_glsl_constructor(type) + "{ "; - } - else if (backend.use_initializer_list) - { - res = "{ "; - } - else + uint32_t paren_count = 0; + for (auto c : expr) { - res = type_to_glsl_constructor(type) + "("; + if (c == '(' || c == '[') + paren_count++; + else if (c == ')' || c == ']') + { + assert(paren_count); + paren_count--; + } + else if (c == ' ' && paren_count == 0) + { + need_parens = true; + break; + } } + assert(paren_count == 0); + } - for (auto &elem : c.subconstants) - { - auto &subc = get(elem); - if (subc.specialization) - res += to_name(elem); - else - res += constant_expression(subc); + // If this expression contains any spaces which are not enclosed by parentheses, + // we need to enclose it so we can treat the whole string as an expression. + // This happens when two expressions have been part of a binary op earlier. + if (need_parens) + return join('(', expr, ')'); + else + return expr; +} - if (&elem != &c.subconstants.back()) - res += ", "; - } +string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr) +{ + // If this expression starts with an address-of operator ('&'), then + // just return the part after the operator. + // TODO: Strip parens if unnecessary? + if (expr.front() == '&') + return expr.substr(1); + else if (backend.native_pointers) + return join('*', expr); + else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct && + expr_type.pointer_depth == 1) + { + return join(enclose_expression(expr), ".value"); + } + else + return expr; +} - res += backend.use_initializer_list ? " }" : ")"; - return res; +string CompilerGLSL::address_of_expression(const std::string &expr) +{ + if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')') + { + // If we have an expression which looks like (*foo), taking the address of it is the same as stripping + // the first two and last characters. We might have to enclose the expression. + // This doesn't work for cases like (*foo + 10), + // but this is an r-value expression which we cannot take the address of anyways. + return enclose_expression(expr.substr(2, expr.size() - 3)); } - else if (c.columns() == 1) + else if (expr.front() == '*') { - return constant_expression_vector(c, 0); + // If this expression starts with a dereference operator ('*'), then + // just return the part after the operator. + return expr.substr(1); } else - { - string res = type_to_glsl(get(c.constant_type)) + "("; - for (uint32_t col = 0; col < c.columns(); col++) - { - if (c.specialization_constant_id(col) != 0) - res += to_name(c.specialization_constant_id(col)); - else - res += constant_expression_vector(c, col); + return join('&', enclose_expression(expr)); +} - if (col + 1 < c.columns()) - res += ", "; - } - res += ")"; - return res; - } +// Just like to_expression except that we enclose the expression inside parentheses if needed. +string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read) +{ + return enclose_expression(to_expression(id, register_expression_read)); } -#ifdef _MSC_VER -// sprintf warning. -// We cannot rely on snprintf existing because, ..., MSVC. -#pragma warning(push) -#pragma warning(disable : 4996) -#endif +// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans. +// need_transpose must be forced to false. +string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id) +{ + return unpack_expression_type(to_expression(id), expression_type(id), + get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), + has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true); +} -string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read) { - string res; - float float_value = c.scalar_f16(col, row); + // If we need to transpose, it will also take care of unpacking rules. + auto *e = maybe_get(id); + bool need_transpose = e && e->need_transpose; + bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); - // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots - // of complicated workarounds, just value-cast to the half type always. - if (std::isnan(float_value) || std::isinf(float_value)) + if (!need_transpose && (is_remapped || is_packed)) { - SPIRType type; - type.basetype = SPIRType::Half; - type.vecsize = 1; - type.columns = 1; - - if (float_value == numeric_limits::infinity()) - res = join(type_to_glsl(type), "(1.0 / 0.0)"); - else if (float_value == -numeric_limits::infinity()) - res = join(type_to_glsl(type), "(-1.0 / 0.0)"); - else if (std::isnan(float_value)) - res = join(type_to_glsl(type), "(0.0 / 0.0)"); - else - SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + return unpack_expression_type(to_expression(id, register_expression_read), + get_pointee_type(expression_type_id(id)), + get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), + has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false); } else - { - SPIRType type; - type.basetype = SPIRType::Half; - type.vecsize = 1; - type.columns = 1; - res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")"); - } + return to_expression(id, register_expression_read); +} - return res; +string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read) +{ + return enclose_expression(to_unpacked_expression(id, register_expression_read)); } -string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read) { - string res; - float float_value = c.scalar_f32(col, row); + auto &type = expression_type(id); + if (type.pointer && should_dereference(id)) + return dereference_expression(type, to_enclosed_expression(id, register_expression_read)); + else + return to_expression(id, register_expression_read); +} - if (std::isnan(float_value) || std::isinf(float_value)) +string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read) +{ + auto &type = expression_type(id); + if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) + return address_of_expression(to_enclosed_expression(id, register_expression_read)); + else + return to_unpacked_expression(id, register_expression_read); +} + +string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read) +{ + auto &type = expression_type(id); + if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) + return address_of_expression(to_enclosed_expression(id, register_expression_read)); + else + return to_enclosed_unpacked_expression(id, register_expression_read); +} + +string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index) +{ + auto expr = to_enclosed_expression(id); + if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked)) + return join(expr, "[", index, "]"); + else + return join(expr, ".", index_to_swizzle(index)); +} + +string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c, + const uint32_t *chain, uint32_t length) +{ + // It is kinda silly if application actually enter this path since they know the constant up front. + // It is useful here to extract the plain constant directly. + SPIRConstant tmp; + tmp.constant_type = result_type; + auto &composite_type = get(c.constant_type); + assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty()); + assert(!c.specialization); + + if (is_matrix(composite_type)) { - // Use special representation. - if (!is_legacy()) + if (length == 2) { - SPIRType out_type; - SPIRType in_type; - out_type.basetype = SPIRType::Float; - in_type.basetype = SPIRType::UInt; - out_type.vecsize = 1; - in_type.vecsize = 1; - out_type.width = 32; - in_type.width = 32; - - char print_buffer[32]; - sprintf(print_buffer, "0x%xu", c.scalar(col, row)); - res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")"); + tmp.m.c[0].vecsize = 1; + tmp.m.columns = 1; + tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]]; } else { - if (float_value == numeric_limits::infinity()) - { - if (backend.float_literal_suffix) - res = "(1.0f / 0.0f)"; - else - res = "(1.0 / 0.0)"; - } - else if (float_value == -numeric_limits::infinity()) - { - if (backend.float_literal_suffix) - res = "(-1.0f / 0.0f)"; - else - res = "(-1.0 / 0.0)"; - } - else if (std::isnan(float_value)) - { - if (backend.float_literal_suffix) - res = "(0.0f / 0.0f)"; - else - res = "(0.0 / 0.0)"; - } - else - SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + assert(length == 1); + tmp.m.c[0].vecsize = composite_type.vecsize; + tmp.m.columns = 1; + tmp.m.c[0] = c.m.c[chain[0]]; } } else { - res = convert_to_string(float_value, current_locale_radix_character); - if (backend.float_literal_suffix) - res += "f"; + assert(length == 1); + tmp.m.c[0].vecsize = 1; + tmp.m.columns = 1; + tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]]; } - return res; + return constant_expression(tmp); } -std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type) { - string res; - double double_value = c.scalar_f64(col, row); + uint32_t size = to_array_size_literal(type); + auto &parent = get(type.parent_type); + string expr = "{ "; - if (std::isnan(double_value) || std::isinf(double_value)) + for (uint32_t i = 0; i < size; i++) { - // Use special representation. - if (!is_legacy()) - { - SPIRType out_type; - SPIRType in_type; - out_type.basetype = SPIRType::Double; - in_type.basetype = SPIRType::UInt64; - out_type.vecsize = 1; - in_type.vecsize = 1; - out_type.width = 64; - in_type.width = 64; + auto subexpr = join(base_expr, "[", convert_to_string(i), "]"); + if (parent.array.empty()) + expr += subexpr; + else + expr += to_rerolled_array_expression(subexpr, parent); - uint64_t u64_value = c.scalar_u64(col, row); + if (i + 1 < size) + expr += ", "; + } - if (options.es) - SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile."); - require_extension_internal("GL_ARB_gpu_shader_int64"); + expr += " }"; + return expr; +} - char print_buffer[64]; - sprintf(print_buffer, "0x%llx%s", static_cast(u64_value), - backend.long_long_literal_suffix ? "ull" : "ul"); - res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")"); - } - else - { - if (options.es) - SPIRV_CROSS_THROW("FP64 not supported in ES profile."); - if (options.version < 400) - require_extension_internal("GL_ARB_gpu_shader_fp64"); +string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool block_like_type) +{ + auto &type = expression_type(id); - if (double_value == numeric_limits::infinity()) - { - if (backend.double_literal_suffix) - res = "(1.0lf / 0.0lf)"; - else - res = "(1.0 / 0.0)"; - } - else if (double_value == -numeric_limits::infinity()) - { - if (backend.double_literal_suffix) - res = "(-1.0lf / 0.0lf)"; - else - res = "(-1.0 / 0.0)"; - } - else if (std::isnan(double_value)) - { - if (backend.double_literal_suffix) - res = "(0.0lf / 0.0lf)"; - else - res = "(0.0 / 0.0)"; - } - else - SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); - } - } - else + bool reroll_array = !type.array.empty() && + (!backend.array_is_value_type || + (block_like_type && !backend.array_is_value_type_in_buffer_blocks)); + + if (reroll_array) { - res = convert_to_string(double_value, current_locale_radix_character); - if (backend.double_literal_suffix) - res += "lf"; - } + // For this case, we need to "re-roll" an array initializer from a temporary. + // We cannot simply pass the array directly, since it decays to a pointer and it cannot + // participate in a struct initializer. E.g. + // float arr[2] = { 1.0, 2.0 }; + // Foo foo = { arr }; must be transformed to + // Foo foo = { { arr[0], arr[1] } }; + // The array sizes cannot be deduced from specialization constants since we cannot use any loops. - return res; + // We're only triggering one read of the array expression, but this is fine since arrays have to be declared + // as temporaries anyways. + return to_rerolled_array_expression(to_enclosed_expression(id), type); + } + else + return to_unpacked_expression(id); } -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) +string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id) { - auto type = get(c.constant_type); - type.columns = 1; + string expr = to_expression(id); - auto scalar_type = type; - scalar_type.vecsize = 1; + if (has_decoration(id, DecorationNonUniform)) + convert_non_uniform_expression(expr, id); - string res; - bool splat = backend.use_constructor_splatting && c.vector_size() > 1; - bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1; + return expr; +} - if (!type_is_floating_point(type)) +string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) +{ + auto itr = invalid_expressions.find(id); + if (itr != end(invalid_expressions)) + handle_invalid_expression(id); + + if (ir.ids[id].get_type() == TypeExpression) { - // Cannot swizzle literal integers as a special case. - swizzle_splat = false; + // We might have a more complex chain of dependencies. + // A possible scenario is that we + // + // %1 = OpLoad + // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1. + // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that. + // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions. + // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before. + // + // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store, + // and see that we should not forward reads of the original variable. + auto &expr = get(id); + for (uint32_t dep : expr.expression_dependencies) + if (invalid_expressions.find(dep) != end(invalid_expressions)) + handle_invalid_expression(dep); } - if (splat || swizzle_splat) + if (register_expression_read) + track_expression_read(id); + + switch (ir.ids[id].get_type()) { - // Cannot use constant splatting if we have specialization constants somewhere in the vector. - for (uint32_t i = 0; i < c.vector_size(); i++) + case TypeExpression: + { + auto &e = get(id); + if (e.base_expression) + return to_enclosed_expression(e.base_expression) + e.expression; + else if (e.need_transpose) { - if (c.specialization_constant_id(vector, i) != 0) + // This should not be reached for access chains, since we always deal explicitly with transpose state + // when consuming an access chain expression. + uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + return convert_row_major_matrix(e.expression, get(e.expression_type), physical_type_id, + is_packed); + } + else if (flattened_structs.count(id)) + { + return load_flattened_struct(e.expression, get(e.expression_type)); + } + else + { + if (is_forcing_recompilation()) { - splat = false; - swizzle_splat = false; - break; + // During first compilation phase, certain expression patterns can trigger exponential growth of memory. + // Avoid this by returning dummy expressions during this phase. + // Do not use empty expressions here, because those are sentinels for other cases. + return "_"; } + else + return e.expression; } } - if (splat || swizzle_splat) + case TypeConstant: { - if (type.width == 64) + auto &c = get(id); + auto &type = get(c.constant_type); + + // WorkGroupSize may be a constant. + if (has_decoration(c.self, DecorationBuiltIn)) + return builtin_to_glsl(BuiltIn(get_decoration(c.self, DecorationBuiltIn)), StorageClassGeneric); + else if (c.specialization) { - uint64_t ident = c.scalar_u64(vector, 0); - for (uint32_t i = 1; i < c.vector_size(); i++) + if (backend.workgroup_size_is_hidden) { - if (ident != c.scalar_u64(vector, i)) + int wg_index = get_constant_mapping_to_workgroup_component(c); + if (wg_index >= 0) { - splat = false; - swizzle_splat = false; - break; + auto wg_size = join(builtin_to_glsl(BuiltInWorkgroupSize, StorageClassInput), vector_swizzle(1, wg_index)); + if (type.basetype != SPIRType::UInt) + wg_size = bitcast_expression(type, SPIRType::UInt, wg_size); + return wg_size; } } + + if (expression_is_forwarded(id)) + return constant_expression(c); + + return to_name(id); } + else if (c.is_used_as_lut) + return to_name(id); + else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) + return to_name(id); + else if (!type.array.empty() && !backend.can_declare_arrays_inline) + return to_name(id); else - { - uint32_t ident = c.scalar(vector, 0); - for (uint32_t i = 1; i < c.vector_size(); i++) - { - if (ident != c.scalar(vector, i)) - { - splat = false; - swizzle_splat = false; - } - } - } + return constant_expression(c); } - if (c.vector_size() > 1 && !swizzle_splat) - res += type_to_glsl(type) + "("; + case TypeConstantOp: + return to_name(id); - switch (type.basetype) + case TypeVariable: { - case SPIRType::Half: - if (splat || swizzle_splat) - { - res += convert_half_to_string(c, vector, 0); - if (swizzle_splat) - res = remap_swizzle(get(c.constant_type), 1, res); - } - else + auto &var = get(id); + // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, + // the variable has not been declared yet. + if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) { - for (uint32_t i = 0; i < c.vector_size(); i++) + // We might try to load from a loop variable before it has been initialized. + // Prefer static expression and fallback to initializer. + if (var.static_expression) + return to_expression(var.static_expression); + else if (var.initializer) + return to_expression(var.initializer); + else { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - res += convert_half_to_string(c, vector, i); - - if (i + 1 < c.vector_size()) - res += ", "; + // We cannot declare the variable yet, so have to fake it. + uint32_t undef_id = ir.increase_bound_by(1); + return emit_uninitialized_temporary_expression(get_variable_data_type_id(var), undef_id).expression; } } - break; - - case SPIRType::Float: - if (splat || swizzle_splat) + else if (var.deferred_declaration) { - res += convert_float_to_string(c, vector, 0); - if (swizzle_splat) - res = remap_swizzle(get(c.constant_type), 1, res); + var.deferred_declaration = false; + return variable_decl(var); + } + else if (flattened_structs.count(id)) + { + return load_flattened_struct(to_name(id), get(var.basetype)); } else { - for (uint32_t i = 0; i < c.vector_size(); i++) - { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - res += convert_float_to_string(c, vector, i); - - if (i + 1 < c.vector_size()) - res += ", "; - } + auto &dec = ir.meta[var.self].decoration; + if (dec.builtin) + return builtin_to_glsl(dec.builtin_type, var.storage); + else + return to_name(id); } - break; + } - case SPIRType::Double: - if (splat || swizzle_splat) - { - res += convert_double_to_string(c, vector, 0); - if (swizzle_splat) - res = remap_swizzle(get(c.constant_type), 1, res); - } - else - { - for (uint32_t i = 0; i < c.vector_size(); i++) - { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - res += convert_double_to_string(c, vector, i); + case TypeCombinedImageSampler: + // This type should never be taken the expression of directly. + // The intention is that texture sampling functions will extract the image and samplers + // separately and take their expressions as needed. + // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler + // expression ala sampler2D(texture, sampler). + SPIRV_CROSS_THROW("Combined image samplers have no default expression representation."); - if (i + 1 < c.vector_size()) - res += ", "; - } - } - break; + case TypeAccessChain: + // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad. + SPIRV_CROSS_THROW("Access chains have no default expression representation."); - case SPIRType::Int64: - if (splat) + default: + return to_name(id); + } +} + +SmallVector CompilerGLSL::get_composite_constant_ids(ConstantID const_id) +{ + if (auto *constant = maybe_get(const_id)) + { + const auto &type = get(constant->constant_type); + if (is_array(type) || type.basetype == SPIRType::Struct) + return constant->subconstants; + if (is_matrix(type)) + return SmallVector(constant->m.id); + if (is_vector(type)) + return SmallVector(constant->m.c[0].id); + SPIRV_CROSS_THROW("Unexpected scalar constant!"); + } + if (!const_composite_insert_ids.count(const_id)) + SPIRV_CROSS_THROW("Unimplemented for this OpSpecConstantOp!"); + return const_composite_insert_ids[const_id]; +} + +void CompilerGLSL::fill_composite_constant(SPIRConstant &constant, TypeID type_id, + const SmallVector &initializers) +{ + auto &type = get(type_id); + constant.specialization = true; + if (is_array(type) || type.basetype == SPIRType::Struct) + { + constant.subconstants = initializers; + } + else if (is_matrix(type)) + { + constant.m.columns = type.columns; + for (uint32_t i = 0; i < type.columns; ++i) { - res += convert_to_string(c.scalar_i64(vector, 0)); - if (backend.long_long_literal_suffix) - res += "ll"; - else - res += "l"; + constant.m.id[i] = initializers[i]; + constant.m.c[i].vecsize = type.vecsize; } - else - { - for (uint32_t i = 0; i < c.vector_size(); i++) - { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - { - res += convert_to_string(c.scalar_i64(vector, i)); - if (backend.long_long_literal_suffix) - res += "ll"; - else - res += "l"; - } + } + else if (is_vector(type)) + { + constant.m.c[0].vecsize = type.vecsize; + for (uint32_t i = 0; i < type.vecsize; ++i) + constant.m.c[0].id[i] = initializers[i]; + } + else + SPIRV_CROSS_THROW("Unexpected scalar in SpecConstantOp CompositeInsert!"); +} - if (i + 1 < c.vector_size()) - res += ", "; - } - } +void CompilerGLSL::set_composite_constant(ConstantID const_id, TypeID type_id, + const SmallVector &initializers) +{ + if (maybe_get(const_id)) + { + const_composite_insert_ids[const_id] = initializers; + return; + } + + auto &constant = set(const_id, type_id); + fill_composite_constant(constant, type_id, initializers); + forwarded_temporaries.insert(const_id); +} + +TypeID CompilerGLSL::get_composite_member_type(TypeID type_id, uint32_t member_idx) +{ + auto &type = get(type_id); + if (is_array(type)) + return type.parent_type; + if (type.basetype == SPIRType::Struct) + return type.member_types[member_idx]; + if (is_matrix(type)) + return type.parent_type; + if (is_vector(type)) + return type.parent_type; + SPIRV_CROSS_THROW("Shouldn't reach lower than vector handling OpSpecConstantOp CompositeInsert!"); +} + +string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) +{ + auto &type = get(cop.basetype); + bool binary = false; + bool unary = false; + string op; + + if (is_legacy() && is_unsigned_opcode(cop.opcode)) + SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); + + // TODO: Find a clean way to reuse emit_instruction. + switch (cop.opcode) + { + case OpSConvert: + case OpUConvert: + case OpFConvert: + op = type_to_glsl_constructor(type); break; - case SPIRType::UInt64: - if (splat) +#define GLSL_BOP(opname, x) \ + case Op##opname: \ + binary = true; \ + op = x; \ + break + +#define GLSL_UOP(opname, x) \ + case Op##opname: \ + unary = true; \ + op = x; \ + break + + GLSL_UOP(SNegate, "-"); + GLSL_UOP(Not, "~"); + GLSL_BOP(IAdd, "+"); + GLSL_BOP(ISub, "-"); + GLSL_BOP(IMul, "*"); + GLSL_BOP(SDiv, "/"); + GLSL_BOP(UDiv, "/"); + GLSL_BOP(UMod, "%"); + GLSL_BOP(SMod, "%"); + GLSL_BOP(ShiftRightLogical, ">>"); + GLSL_BOP(ShiftRightArithmetic, ">>"); + GLSL_BOP(ShiftLeftLogical, "<<"); + GLSL_BOP(BitwiseOr, "|"); + GLSL_BOP(BitwiseXor, "^"); + GLSL_BOP(BitwiseAnd, "&"); + GLSL_BOP(LogicalOr, "||"); + GLSL_BOP(LogicalAnd, "&&"); + GLSL_UOP(LogicalNot, "!"); + GLSL_BOP(LogicalEqual, "=="); + GLSL_BOP(LogicalNotEqual, "!="); + GLSL_BOP(IEqual, "=="); + GLSL_BOP(INotEqual, "!="); + GLSL_BOP(ULessThan, "<"); + GLSL_BOP(SLessThan, "<"); + GLSL_BOP(ULessThanEqual, "<="); + GLSL_BOP(SLessThanEqual, "<="); + GLSL_BOP(UGreaterThan, ">"); + GLSL_BOP(SGreaterThan, ">"); + GLSL_BOP(UGreaterThanEqual, ">="); + GLSL_BOP(SGreaterThanEqual, ">="); + + case OpSRem: + { + uint32_t op0 = cop.arguments[0]; + uint32_t op1 = cop.arguments[1]; + return join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + } + + case OpSelect: + { + if (cop.arguments.size() < 3) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + // This one is pretty annoying. It's triggered from + // uint(bool), int(bool) from spec constants. + // In order to preserve its compile-time constness in Vulkan GLSL, + // we need to reduce the OpSelect expression back to this simplified model. + // If we cannot, fail. + if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0])) { - res += convert_to_string(c.scalar_u64(vector, 0)); - if (backend.long_long_literal_suffix) - res += "ull"; - else - res += "ul"; + // Implement as a simple cast down below. } else { - for (uint32_t i = 0; i < c.vector_size(); i++) - { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - { - res += convert_to_string(c.scalar_u64(vector, i)); - if (backend.long_long_literal_suffix) - res += "ull"; - else - res += "ul"; - } - - if (i + 1 < c.vector_size()) - res += ", "; - } + // Implement a ternary and pray the compiler understands it :) + return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]); } break; + } - case SPIRType::UInt: - if (splat) + case OpVectorShuffle: + { + string expr = type_to_glsl_constructor(type); + expr += "("; + + uint32_t left_components = expression_type(cop.arguments[0]).vecsize; + string left_arg = to_enclosed_expression(cop.arguments[0]); + string right_arg = to_enclosed_expression(cop.arguments[1]); + + for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++) { - res += convert_to_string(c.scalar(vector, 0)); - if (is_legacy()) + uint32_t index = cop.arguments[i]; + if (index == 0xFFFFFFFF) { - // Fake unsigned constant literals with signed ones if possible. - // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. - if (c.scalar_i32(vector, 0) < 0) - SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); + SPIRConstant c; + c.constant_type = type.parent_type; + assert(type.parent_type != ID(0)); + expr += constant_expression(c); } - else if (backend.uint32_t_literal_suffix) - res += "u"; - } - else - { - for (uint32_t i = 0; i < c.vector_size(); i++) + else if (index >= left_components) { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - { - res += convert_to_string(c.scalar(vector, i)); - if (is_legacy()) - { - // Fake unsigned constant literals with signed ones if possible. - // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. - if (c.scalar_i32(vector, i) < 0) - SPIRV_CROSS_THROW( - "Tried to convert uint literal into int, but this made the literal negative."); - } - else if (backend.uint32_t_literal_suffix) - res += "u"; - } - - if (i + 1 < c.vector_size()) - res += ", "; + expr += right_arg + "." + "xyzw"[index - left_components]; } - } - break; - - case SPIRType::Int: - if (splat) - res += convert_to_string(c.scalar_i32(vector, 0)); - else - { - for (uint32_t i = 0; i < c.vector_size(); i++) + else { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - res += convert_to_string(c.scalar_i32(vector, i)); - if (i + 1 < c.vector_size()) - res += ", "; + expr += left_arg + "." + "xyzw"[index]; } + + if (i + 1 < uint32_t(cop.arguments.size())) + expr += ", "; } - break; - case SPIRType::UShort: - if (splat) - { - res += convert_to_string(c.scalar(vector, 0)); - if (is_legacy()) - { - // Fake unsigned constant literals with signed ones if possible. - // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. - if (c.scalar_i16(vector, 0) < 0) - SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); - } - else - res += backend.uint16_t_literal_suffix; - } - else - { - for (uint32_t i = 0; i < c.vector_size(); i++) - { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - { - res += convert_to_string(c.scalar(vector, i)); - if (is_legacy()) - { - // Fake unsigned constant literals with signed ones if possible. - // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. - if (c.scalar_i16(vector, i) < 0) - SPIRV_CROSS_THROW( - "Tried to convert uint literal into int, but this made the literal negative."); - } - else - res += backend.uint16_t_literal_suffix; - } + expr += ")"; + return expr; + } - if (i + 1 < c.vector_size()) - res += ", "; - } - } - break; + case OpCompositeExtract: + { + auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + return expr; + } - case SPIRType::Short: - if (splat) - { - res += convert_to_string(c.scalar_i16(vector, 0)); - res += backend.int16_t_literal_suffix; - } - else - { - for (uint32_t i = 0; i < c.vector_size(); i++) - { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - { - res += convert_to_string(c.scalar_i16(vector, i)); - res += backend.int16_t_literal_suffix; - } - if (i + 1 < c.vector_size()) - res += ", "; - } - } - break; + case OpCompositeInsert: + { + SmallVector new_init = get_composite_constant_ids(cop.arguments[1]); + uint32_t idx; + uint32_t target_id = cop.self; + uint32_t target_type_id = cop.basetype; + // We have to drill down to the part we want to modify, and create new + // constants for each containing part. + for (idx = 2; idx < cop.arguments.size() - 1; ++idx) + { + uint32_t new_const = ir.increase_bound_by(1); + uint32_t old_const = new_init[cop.arguments[idx]]; + new_init[cop.arguments[idx]] = new_const; + set_composite_constant(target_id, target_type_id, new_init); + new_init = get_composite_constant_ids(old_const); + target_id = new_const; + target_type_id = get_composite_member_type(target_type_id, cop.arguments[idx]); + } + // Now replace the initializer with the one from this instruction. + new_init[cop.arguments[idx]] = cop.arguments[0]; + set_composite_constant(target_id, target_type_id, new_init); + SPIRConstant tmp_const(cop.basetype); + fill_composite_constant(tmp_const, cop.basetype, const_composite_insert_ids[cop.self]); + return constant_expression(tmp_const); + } - case SPIRType::UByte: - if (splat) - { - res += convert_to_string(c.scalar_u8(vector, 0)); - } - else - { - for (uint32_t i = 0; i < c.vector_size(); i++) - { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - { - res += type_to_glsl(scalar_type); - res += "("; - res += convert_to_string(c.scalar_u8(vector, i)); - res += ")"; - } + default: + // Some opcodes are unimplemented here, these are currently not possible to test from glslang. + SPIRV_CROSS_THROW("Unimplemented spec constant op."); + } - if (i + 1 < c.vector_size()) - res += ", "; - } - } - break; + uint32_t bit_width = 0; + if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert) + bit_width = expression_type(cop.arguments[0]).width; - case SPIRType::SByte: - if (splat) - { - res += convert_to_string(c.scalar_i8(vector, 0)); - } - else - { - for (uint32_t i = 0; i < c.vector_size(); i++) - { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - { - res += type_to_glsl(scalar_type); - res += "("; - res += convert_to_string(c.scalar_i8(vector, i)); - res += ")"; - } + SPIRType::BaseType input_type; + bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode); - if (i + 1 < c.vector_size()) - res += ", "; - } - } + switch (cop.opcode) + { + case OpIEqual: + case OpINotEqual: + input_type = to_signed_basetype(bit_width); break; - case SPIRType::Boolean: - if (splat) - res += c.scalar(vector, 0) ? "true" : "false"; - else - { - for (uint32_t i = 0; i < c.vector_size(); i++) - { - if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) - res += to_name(c.specialization_constant_id(vector, i)); - else - res += c.scalar(vector, i) ? "true" : "false"; + case OpSLessThan: + case OpSLessThanEqual: + case OpSGreaterThan: + case OpSGreaterThanEqual: + case OpSMod: + case OpSDiv: + case OpShiftRightArithmetic: + case OpSConvert: + case OpSNegate: + input_type = to_signed_basetype(bit_width); + break; - if (i + 1 < c.vector_size()) - res += ", "; - } - } + case OpULessThan: + case OpULessThanEqual: + case OpUGreaterThan: + case OpUGreaterThanEqual: + case OpUMod: + case OpUDiv: + case OpShiftRightLogical: + case OpUConvert: + input_type = to_unsigned_basetype(bit_width); break; default: - SPIRV_CROSS_THROW("Invalid constant expression basetype."); + input_type = type.basetype; + break; } - if (c.vector_size() > 1 && !swizzle_splat) - res += ")"; - - return res; -} +#undef GLSL_BOP +#undef GLSL_UOP + if (binary) + { + if (cop.arguments.size() < 2) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); -SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id) -{ - forced_temporaries.insert(id); - emit_uninitialized_temporary(type, id); - return set(id, to_name(id), type, true); -} + string cast_op0; + string cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0], + cop.arguments[1], skip_cast_if_equal_type); -void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id) -{ - // If we're declaring temporaries inside continue blocks, - // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. - if (current_continue_block && !hoisted_temporaries.count(result_id)) - { - auto &header = get(current_continue_block->loop_dominator); - if (find_if(begin(header.declare_temporary), end(header.declare_temporary), - [result_type, result_id](const pair &tmp) { - return tmp.first == result_type && tmp.second == result_id; - }) == end(header.declare_temporary)) + if (type.basetype != input_type && type.basetype != SPIRType::Boolean) { - header.declare_temporary.emplace_back(result_type, result_id); - hoisted_temporaries.insert(result_id); - force_recompile(); + expected_type.basetype = input_type; + auto expr = bitcast_glsl_op(type, expected_type); + expr += '('; + expr += join(cast_op0, " ", op, " ", cast_op1); + expr += ')'; + return expr; } + else + return join("(", cast_op0, " ", op, " ", cast_op1, ")"); } - else if (hoisted_temporaries.count(result_id) == 0) + else if (unary) { - auto &type = get(result_type); - auto &flags = ir.meta[result_id].decoration.decoration_flags; + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); - // The result_id has not been made into an expression yet, so use flags interface. - add_local_variable_name(result_id); - statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";"); + // Auto-bitcast to result type as needed. + // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants. + return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")"); } -} - -string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) -{ - auto &type = get(result_type); - auto &flags = ir.meta[result_id].decoration.decoration_flags; - - // If we're declaring temporaries inside continue blocks, - // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. - if (current_continue_block && !hoisted_temporaries.count(result_id)) + else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert) { - auto &header = get(current_continue_block->loop_dominator); - if (find_if(begin(header.declare_temporary), end(header.declare_temporary), - [result_type, result_id](const pair &tmp) { - return tmp.first == result_type && tmp.second == result_id; - }) == end(header.declare_temporary)) + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + auto &arg_type = expression_type(cop.arguments[0]); + if (arg_type.width < type.width && input_type != arg_type.basetype) { - header.declare_temporary.emplace_back(result_type, result_id); - hoisted_temporaries.insert(result_id); - force_recompile(); + auto expected = arg_type; + expected.basetype = input_type; + return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")"); } - - return join(to_name(result_id), " = "); - } - else if (hoisted_temporaries.count(result_id)) - { - // The temporary has already been declared earlier, so just "declare" the temporary by writing to it. - return join(to_name(result_id), " = "); + else + return join(op, "(", to_expression(cop.arguments[0]), ")"); } else { - // The result_id has not been made into an expression yet, so use flags interface. - add_local_variable_name(result_id); - return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = "); + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + return join(op, "(", to_expression(cop.arguments[0]), ")"); } } -bool CompilerGLSL::expression_is_forwarded(uint32_t id) +string CompilerGLSL::constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope) { - return forwarded_temporaries.find(id) != end(forwarded_temporaries); -} + auto &type = get(c.constant_type); -SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, - bool suppress_usage_tracking) -{ - if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries))) + if (type.pointer) { - // Just forward it without temporary. - // If the forward is trivial, we do not force flushing to temporary for this expression. - if (!suppress_usage_tracking) - forwarded_temporaries.insert(result_id); - - return set(result_id, rhs, result_type, true); + return backend.null_pointer_literal; } - else + else if (!c.subconstants.empty()) { - // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). - statement(declare_temporary(result_type, result_id), rhs, ";"); - return set(result_id, to_name(result_id), result_type, true); - } -} + // Handles Arrays and structures. + string res; -void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) -{ - bool forward = should_forward(op0); - emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward); - inherit_expression_dependencies(result_id, op0); -} + // Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration. + // Outside a block-like struct declaration, we can always bind to a constant array with templated type. + // Should look at ArrayStride here as well, but it's possible to declare a constant struct + // with Offset = 0, using no ArrayStride on the enclosed array type. + // A particular CTS test hits this scenario. + bool array_type_decays = inside_block_like_struct_scope && + !type.array.empty() && !backend.array_is_value_type_in_buffer_blocks; -void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) -{ - bool forward = should_forward(op0) && should_forward(op1); - emit_op(result_type, result_id, - join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward); + // Allow Metal to use the array template to make arrays a value type + bool needs_trailing_tracket = false; + if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct && + type.array.empty()) + { + res = type_to_glsl_constructor(type) + "{ "; + } + else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type && + !type.array.empty() && !array_type_decays) + { + res = type_to_glsl_constructor(type) + "({ "; + needs_trailing_tracket = true; + } + else if (backend.use_initializer_list) + { + res = "{ "; + } + else + { + res = type_to_glsl_constructor(type) + "("; + } - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); -} + uint32_t subconstant_index = 0; + for (auto &elem : c.subconstants) + { + if (auto *op = maybe_get(elem)) + { + res += constant_op_expression(*op); + } + else if (maybe_get(elem) != nullptr) + { + res += to_name(elem); + } + else + { + auto &subc = get(elem); + if (subc.specialization && !expression_is_forwarded(elem)) + res += to_name(elem); + else + { + if (type.array.empty() && type.basetype == SPIRType::Struct) + { + // When we get down to emitting struct members, override the block-like information. + // For constants, we can freely mix and match block-like state. + inside_block_like_struct_scope = + has_member_decoration(type.self, subconstant_index, DecorationOffset); + } -void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op) -{ - auto &type = get(result_type); - auto expr = type_to_glsl_constructor(type); - expr += '('; - for (uint32_t i = 0; i < type.vecsize; i++) - { - // Make sure to call to_expression multiple times to ensure - // that these expressions are properly flushed to temporaries if needed. - expr += op; - expr += to_extract_component_expression(operand, i); + res += constant_expression(subc, inside_block_like_struct_scope); + } + } - if (i + 1 < type.vecsize) - expr += ", "; - } - expr += ')'; - emit_op(result_type, result_id, expr, should_forward(operand)); + if (&elem != &c.subconstants.back()) + res += ", "; - inherit_expression_dependencies(result_id, operand); -} + subconstant_index++; + } -void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - const char *op) -{ - auto &type = get(result_type); - auto expr = type_to_glsl_constructor(type); - expr += '('; - for (uint32_t i = 0; i < type.vecsize; i++) - { - // Make sure to call to_expression multiple times to ensure - // that these expressions are properly flushed to temporaries if needed. - expr += to_extract_component_expression(op0, i); - expr += ' '; - expr += op; - expr += ' '; - expr += to_extract_component_expression(op1, i); + res += backend.use_initializer_list ? " }" : ")"; + if (needs_trailing_tracket) + res += ")"; - if (i + 1 < type.vecsize) - expr += ", "; + return res; } - expr += ')'; - emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0) + { + // Metal tessellation likes empty structs which are then constant expressions. + if (backend.supports_empty_struct) + return "{ }"; + else if (backend.use_typed_initializer_list) + return join(type_to_glsl(get(c.constant_type)), "{ 0 }"); + else if (backend.use_initializer_list) + return "{ 0 }"; + else + return join(type_to_glsl(get(c.constant_type)), "(0)"); + } + else if (c.columns() == 1) + { + return constant_expression_vector(c, 0); + } + else + { + string res = type_to_glsl(get(c.constant_type)) + "("; + for (uint32_t col = 0; col < c.columns(); col++) + { + if (c.specialization_constant_id(col) != 0) + res += to_name(c.specialization_constant_id(col)); + else + res += constant_expression_vector(c, col); - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); + if (col + 1 < c.columns()) + res += ", "; + } + res += ")"; + return res; + } } -SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type, - uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type) -{ - auto &type0 = expression_type(op0); - auto &type1 = expression_type(op1); - - // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs. - // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected - // since equality test is exactly the same. - bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type); +#ifdef _MSC_VER +// snprintf does not exist or is buggy on older MSVC versions, some of them +// being used by MinGW. Use sprintf instead and disable corresponding warning. +#pragma warning(push) +#pragma warning(disable : 4996) +#endif - // Create a fake type so we can bitcast to it. - // We only deal with regular arithmetic types here like int, uints and so on. - SPIRType expected_type; - expected_type.basetype = input_type; - expected_type.vecsize = type0.vecsize; - expected_type.columns = type0.columns; - expected_type.width = type0.width; +string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + float float_value = c.scalar_f16(col, row); - if (cast) + // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots + // of complicated workarounds, just value-cast to the half type always. + if (std::isnan(float_value) || std::isinf(float_value)) { - cast_op0 = bitcast_glsl(expected_type, op0); - cast_op1 = bitcast_glsl(expected_type, op1); + SPIRType type; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + + if (float_value == numeric_limits::infinity()) + res = join(type_to_glsl(type), "(1.0 / 0.0)"); + else if (float_value == -numeric_limits::infinity()) + res = join(type_to_glsl(type), "(-1.0 / 0.0)"); + else if (std::isnan(float_value)) + res = join(type_to_glsl(type), "(0.0 / 0.0)"); + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); } else { - // If we don't cast, our actual input type is that of the first (or second) argument. - cast_op0 = to_enclosed_unpacked_expression(op0); - cast_op1 = to_enclosed_unpacked_expression(op1); - input_type = type0.basetype; + SPIRType type; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")"); } - return expected_type; + return res; } -void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) +string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) { - string cast_op0, cast_op1; - auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); - auto &out_type = get(result_type); + string res; + float float_value = c.scalar_f32(col, row); - // We might have casted away from the result type, so bitcast again. - // For example, arithmetic right shift with uint inputs. - // Special case boolean outputs since relational opcodes output booleans instead of int/uint. - string expr; - if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) + if (std::isnan(float_value) || std::isinf(float_value)) { - expected_type.basetype = input_type; - expr = bitcast_glsl_op(out_type, expected_type); - expr += '('; - expr += join(cast_op0, " ", op, " ", cast_op1); - expr += ')'; + // Use special representation. + if (!is_legacy()) + { + SPIRType out_type; + SPIRType in_type; + out_type.basetype = SPIRType::Float; + in_type.basetype = SPIRType::UInt; + out_type.vecsize = 1; + in_type.vecsize = 1; + out_type.width = 32; + in_type.width = 32; + + char print_buffer[32]; +#ifdef _WIN32 + sprintf(print_buffer, "0x%xu", c.scalar(col, row)); +#else + snprintf(print_buffer, sizeof(print_buffer), "0x%xu", c.scalar(col, row)); +#endif + + const char *comment = "inf"; + if (float_value == -numeric_limits::infinity()) + comment = "-inf"; + else if (std::isnan(float_value)) + comment = "nan"; + res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)"); + } + else + { + if (float_value == numeric_limits::infinity()) + { + if (backend.float_literal_suffix) + res = "(1.0f / 0.0f)"; + else + res = "(1.0 / 0.0)"; + } + else if (float_value == -numeric_limits::infinity()) + { + if (backend.float_literal_suffix) + res = "(-1.0f / 0.0f)"; + else + res = "(-1.0 / 0.0)"; + } + else if (std::isnan(float_value)) + { + if (backend.float_literal_suffix) + res = "(0.0f / 0.0f)"; + else + res = "(0.0 / 0.0)"; + } + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } } else - expr += join(cast_op0, " ", op, " ", cast_op1); + { + res = convert_to_string(float_value, current_locale_radix_character); + if (backend.float_literal_suffix) + res += "f"; + } - emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); + return res; } -void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) { - bool forward = should_forward(op0); - emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward); - inherit_expression_dependencies(result_id, op0); -} + string res; + double double_value = c.scalar_f64(col, row); -void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - const char *op) -{ - bool forward = should_forward(op0) && should_forward(op1); - emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"), - forward); - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); -} - -void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, - SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type) -{ - auto &out_type = get(result_type); - auto &expr_type = expression_type(op0); - auto expected_type = out_type; - - // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends. - expected_type.basetype = input_type; - expected_type.width = expr_type.width; - string cast_op = expr_type.basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); - - string expr; - if (out_type.basetype != expected_result_type) - { - expected_type.basetype = expected_result_type; - expected_type.width = out_type.width; - expr = bitcast_glsl_op(out_type, expected_type); - expr += '('; - expr += join(op, "(", cast_op, ")"); - expr += ')'; - } - else + if (std::isnan(double_value) || std::isinf(double_value)) { - expr += join(op, "(", cast_op, ")"); - } - - emit_op(result_type, result_id, expr, should_forward(op0)); - inherit_expression_dependencies(result_id, op0); -} + // Use special representation. + if (!is_legacy()) + { + SPIRType out_type; + SPIRType in_type; + out_type.basetype = SPIRType::Double; + in_type.basetype = SPIRType::UInt64; + out_type.vecsize = 1; + in_type.vecsize = 1; + out_type.width = 64; + in_type.width = 64; -void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - uint32_t op2, const char *op, SPIRType::BaseType input_type) -{ - auto &out_type = get(result_type); - auto expected_type = out_type; - expected_type.basetype = input_type; - string cast_op0 = - expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); - string cast_op1 = - expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1); - string cast_op2 = - expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2); + uint64_t u64_value = c.scalar_u64(col, row); - string expr; - if (out_type.basetype != input_type) - { - expr = bitcast_glsl_op(out_type, expected_type); - expr += '('; - expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); - expr += ')'; - } - else - { - expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); - } + if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. + SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); + require_extension_internal("GL_ARB_gpu_shader_int64"); - emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); - inherit_expression_dependencies(result_id, op2); -} + char print_buffer[64]; +#ifdef _WIN32 + sprintf(print_buffer, "0x%llx%s", static_cast(u64_value), + backend.long_long_literal_suffix ? "ull" : "ul"); +#else + snprintf(print_buffer, sizeof(print_buffer), "0x%llx%s", static_cast(u64_value), + backend.long_long_literal_suffix ? "ull" : "ul"); +#endif -void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) -{ - string cast_op0, cast_op1; - auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); - auto &out_type = get(result_type); + const char *comment = "inf"; + if (double_value == -numeric_limits::infinity()) + comment = "-inf"; + else if (std::isnan(double_value)) + comment = "nan"; + res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)"); + } + else + { + if (options.es) + SPIRV_CROSS_THROW("FP64 not supported in ES profile."); + if (options.version < 400) + require_extension_internal("GL_ARB_gpu_shader_fp64"); - // Special case boolean outputs since relational opcodes output booleans instead of int/uint. - string expr; - if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) - { - expected_type.basetype = input_type; - expr = bitcast_glsl_op(out_type, expected_type); - expr += '('; - expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); - expr += ')'; + if (double_value == numeric_limits::infinity()) + { + if (backend.double_literal_suffix) + res = "(1.0lf / 0.0lf)"; + else + res = "(1.0 / 0.0)"; + } + else if (double_value == -numeric_limits::infinity()) + { + if (backend.double_literal_suffix) + res = "(-1.0lf / 0.0lf)"; + else + res = "(-1.0 / 0.0)"; + } + else if (std::isnan(double_value)) + { + if (backend.double_literal_suffix) + res = "(0.0lf / 0.0lf)"; + else + res = "(0.0 / 0.0)"; + } + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } } else { - expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); + res = convert_to_string(double_value, current_locale_radix_character); + if (backend.double_literal_suffix) + res += "lf"; } - emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); + return res; } -void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - uint32_t op2, const char *op) +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) { - bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2); - emit_op(result_type, result_id, - join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", - to_unpacked_expression(op2), ")"), - forward); + auto type = get(c.constant_type); + type.columns = 1; - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); - inherit_expression_dependencies(result_id, op2); -} + auto scalar_type = type; + scalar_type.vecsize = 1; -void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - uint32_t op2, uint32_t op3, const char *op) -{ - bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); - emit_op(result_type, result_id, - join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", - to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"), - forward); + string res; + bool splat = backend.use_constructor_splatting && c.vector_size() > 1; + bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1; - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); - inherit_expression_dependencies(result_id, op2); - inherit_expression_dependencies(result_id, op3); -} + if (!type_is_floating_point(type)) + { + // Cannot swizzle literal integers as a special case. + swizzle_splat = false; + } -// EXT_shader_texture_lod only concerns fragment shaders so lod tex functions -// are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex -// functions in vertex shaders so we revert those back to plain calls when -// the lod is a constant value of zero. -bool CompilerGLSL::check_explicit_lod_allowed(uint32_t lod) -{ - auto &execution = get_entry_point(); - bool allowed = !is_legacy_es() || execution.model == ExecutionModelFragment; - if (!allowed && lod != 0) + if (splat || swizzle_splat) { - auto *lod_constant = maybe_get(lod); - if (!lod_constant || lod_constant->scalar_f32() != 0.0f) + // Cannot use constant splatting if we have specialization constants somewhere in the vector. + for (uint32_t i = 0; i < c.vector_size(); i++) { - SPIRV_CROSS_THROW("Explicit lod not allowed in legacy ES non-fragment shaders."); + if (c.specialization_constant_id(vector, i) != 0) + { + splat = false; + swizzle_splat = false; + break; + } } } - return allowed; -} -string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod, uint32_t tex) -{ - const char *type; - switch (imgtype.image.dim) + if (splat || swizzle_splat) { - case spv::Dim1D: - type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D"; - break; - case spv::Dim2D: - type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; - break; - case spv::Dim3D: - type = "3D"; - break; - case spv::DimCube: - type = "Cube"; - break; - case spv::DimRect: - type = "2DRect"; - break; - case spv::DimBuffer: - type = "Buffer"; - break; - case spv::DimSubpassData: - type = "2D"; - break; - default: - type = ""; - break; + if (type.width == 64) + { + uint64_t ident = c.scalar_u64(vector, 0); + for (uint32_t i = 1; i < c.vector_size(); i++) + { + if (ident != c.scalar_u64(vector, i)) + { + splat = false; + swizzle_splat = false; + break; + } + } + } + else + { + uint32_t ident = c.scalar(vector, 0); + for (uint32_t i = 1; i < c.vector_size(); i++) + { + if (ident != c.scalar(vector, i)) + { + splat = false; + swizzle_splat = false; + } + } + } } - bool use_explicit_lod = check_explicit_lod_allowed(lod); + if (c.vector_size() > 1 && !swizzle_splat) + res += type_to_glsl(type) + "("; - if (op == "textureLod" || op == "textureProjLod" || op == "textureGrad" || op == "textureProjGrad") + switch (type.basetype) { - if (is_legacy_es()) + case SPIRType::Half: + if (splat || swizzle_splat) { - if (use_explicit_lod) - require_extension_internal("GL_EXT_shader_texture_lod"); + res += convert_half_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get(c.constant_type), 1, res); } - else if (is_legacy()) - require_extension_internal("GL_ARB_shader_texture_lod"); - } - - if (op == "textureLodOffset" || op == "textureProjLodOffset") - { - if (is_legacy_es()) - SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES")); + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_half_to_string(c, vector, i); - require_extension_internal("GL_EXT_gpu_shader4"); - } + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; - // GLES has very limited support for shadow samplers. - // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers, - // everything else can just throw - if (image_is_comparison(imgtype, tex) && is_legacy_es()) - { - if (op == "texture" || op == "textureProj") - require_extension_internal("GL_EXT_shadow_samplers"); + case SPIRType::Float: + if (splat || swizzle_splat) + { + res += convert_float_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get(c.constant_type), 1, res); + } else - SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES")); - } + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_float_to_string(c, vector, i); - bool is_es_and_depth = is_legacy_es() && image_is_comparison(imgtype, tex); - std::string type_prefix = image_is_comparison(imgtype, tex) ? "shadow" : "texture"; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; - if (op == "texture") - return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type); - else if (op == "textureLod") - { - if (use_explicit_lod) - return join(type_prefix, type, is_legacy_es() ? "LodEXT" : "Lod"); - else - return join(type_prefix, type); - } - else if (op == "textureProj") - return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj"); - else if (op == "textureGrad") - return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad"); - else if (op == "textureProjLod") - { - if (use_explicit_lod) - return join(type_prefix, type, is_legacy_es() ? "ProjLodEXT" : "ProjLod"); - else - return join(type_prefix, type, "Proj"); - } - else if (op == "textureLodOffset") - { - if (use_explicit_lod) - return join(type_prefix, type, "LodOffset"); + case SPIRType::Double: + if (splat || swizzle_splat) + { + res += convert_double_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get(c.constant_type), 1, res); + } else - return join(type_prefix, type); - } - else if (op == "textureProjGrad") - return join(type_prefix, type, - is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad"); - else if (op == "textureProjLodOffset") + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_double_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Int64: { - if (use_explicit_lod) - return join(type_prefix, type, "ProjLodOffset"); + auto tmp = type; + tmp.vecsize = 1; + tmp.columns = 1; + auto int64_type = type_to_glsl(tmp); + + if (splat) + { + res += convert_to_string(c.scalar_i64(vector, 0), int64_type, backend.long_long_literal_suffix); + } else - return join(type_prefix, type, "ProjOffset"); - } - else - { - SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op)); - } -} + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_to_string(c.scalar_i64(vector, i), int64_type, backend.long_long_literal_suffix); -bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp) -{ - auto *cleft = maybe_get(left); - auto *cright = maybe_get(right); - auto &lerptype = expression_type(lerp); + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + } - // If our targets aren't constants, we cannot use construction. - if (!cleft || !cright) - return false; + case SPIRType::UInt64: + if (splat) + { + res += convert_to_string(c.scalar_u64(vector, 0)); + if (backend.long_long_literal_suffix) + res += "ull"; + else + res += "ul"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar_u64(vector, i)); + if (backend.long_long_literal_suffix) + res += "ull"; + else + res += "ul"; + } - // If our targets are spec constants, we cannot use construction. - if (cleft->specialization || cright->specialization) - return false; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; - // We can only use trivial construction if we have a scalar - // (should be possible to do it for vectors as well, but that is overkill for now). - if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1) - return false; + case SPIRType::UInt: + if (splat) + { + res += convert_to_string(c.scalar(vector, 0)); + if (is_legacy()) + { + // Fake unsigned constant literals with signed ones if possible. + // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. + if (c.scalar_i32(vector, 0) < 0) + SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); + } + else if (backend.uint32_t_literal_suffix) + res += "u"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar(vector, i)); + if (is_legacy()) + { + // Fake unsigned constant literals with signed ones if possible. + // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. + if (c.scalar_i32(vector, i) < 0) + SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made " + "the literal negative."); + } + else if (backend.uint32_t_literal_suffix) + res += "u"; + } - // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor. - bool ret = false; - switch (type.basetype) - { - case SPIRType::Short: - case SPIRType::UShort: - ret = cleft->scalar_u16() == 0 && cright->scalar_u16() == 1; + if (i + 1 < c.vector_size()) + res += ", "; + } + } break; case SPIRType::Int: - case SPIRType::UInt: - ret = cleft->scalar() == 0 && cright->scalar() == 1; + if (splat) + res += convert_to_string(c.scalar_i32(vector, 0)); + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_to_string(c.scalar_i32(vector, i)); + if (i + 1 < c.vector_size()) + res += ", "; + } + } break; - case SPIRType::Half: - ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f; - break; + case SPIRType::UShort: + if (splat) + { + res += convert_to_string(c.scalar(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + if (*backend.uint16_t_literal_suffix) + { + res += convert_to_string(c.scalar_u16(vector, i)); + res += backend.uint16_t_literal_suffix; + } + else + { + // If backend doesn't have a literal suffix, we need to value cast. + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_u16(vector, i)); + res += ")"; + } + } - case SPIRType::Float: - ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f; + if (i + 1 < c.vector_size()) + res += ", "; + } + } break; - case SPIRType::Double: - ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0; - break; + case SPIRType::Short: + if (splat) + { + res += convert_to_string(c.scalar_i16(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + if (*backend.int16_t_literal_suffix) + { + res += convert_to_string(c.scalar_i16(vector, i)); + res += backend.int16_t_literal_suffix; + } + else + { + // If backend doesn't have a literal suffix, we need to value cast. + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_i16(vector, i)); + res += ")"; + } + } - case SPIRType::Int64: - case SPIRType::UInt64: - ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1; + if (i + 1 < c.vector_size()) + res += ", "; + } + } break; - default: - break; - } + case SPIRType::UByte: + if (splat) + { + res += convert_to_string(c.scalar_u8(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_u8(vector, i)); + res += ")"; + } - if (ret) - op = type_to_glsl_constructor(type); - return ret; -} + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; -string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value, - uint32_t false_value) -{ - string expr; - auto &lerptype = expression_type(select); + case SPIRType::SByte: + if (splat) + { + res += convert_to_string(c.scalar_i8(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_i8(vector, i)); + res += ")"; + } - if (lerptype.vecsize == 1) - expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ", - to_enclosed_pointer_expression(false_value)); - else - { - auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); }; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; - expr = type_to_glsl_constructor(restype); - expr += "("; - for (uint32_t i = 0; i < restype.vecsize; i++) + case SPIRType::Boolean: + if (splat) + res += c.scalar(vector, 0) ? "true" : "false"; + else { - expr += swiz(select, i); - expr += " ? "; - expr += swiz(true_value, i); - expr += " : "; - expr += swiz(false_value, i); - if (i + 1 < restype.vecsize) - expr += ", "; + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += c.scalar(vector, i) ? "true" : "false"; + + if (i + 1 < c.vector_size()) + res += ", "; + } } - expr += ")"; + break; + + default: + SPIRV_CROSS_THROW("Invalid constant expression basetype."); } - return expr; + if (c.vector_size() > 1 && !swizzle_splat) + res += ")"; + + return res; } -void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp) +SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id) { - auto &lerptype = expression_type(lerp); - auto &restype = get(result_type); + forced_temporaries.insert(id); + emit_uninitialized_temporary(type, id); + return set(id, to_name(id), type, true); +} - // If this results in a variable pointer, assume it may be written through. - if (restype.pointer) +void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id) +{ + // If we're declaring temporaries inside continue blocks, + // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. + if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id)) { - register_write(left); - register_write(right); + auto &header = get(current_continue_block->loop_dominator); + if (find_if(begin(header.declare_temporary), end(header.declare_temporary), + [result_type, result_id](const pair &tmp) { + return tmp.first == result_type && tmp.second == result_id; + }) == end(header.declare_temporary)) + { + header.declare_temporary.emplace_back(result_type, result_id); + hoisted_temporaries.insert(result_id); + force_recompile(); + } } + else if (hoisted_temporaries.count(result_id) == 0) + { + auto &type = get(result_type); + auto &flags = get_decoration_bitset(result_id); - string mix_op; - bool has_boolean_mix = backend.boolean_mix_support && - ((options.es && options.version >= 310) || (!options.es && options.version >= 450)); - bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp); + // The result_id has not been made into an expression yet, so use flags interface. + add_local_variable_name(result_id); - // Cannot use boolean mix when the lerp argument is just one boolean, - // fall back to regular trinary statements. - if (lerptype.vecsize == 1) - has_boolean_mix = false; + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(result_type)); - // If we can reduce the mix to a simple cast, do so. - // This helps for cases like int(bool), uint(bool) which is implemented with - // OpSelect bool 1 0. - if (trivial_mix) - { - emit_unary_func_op(result_type, id, lerp, mix_op.c_str()); - } - else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean) - { - // Boolean mix not supported on desktop without extension. - // Was added in OpenGL 4.5 with ES 3.1 compat. - // - // Could use GL_EXT_shader_integer_mix on desktop at least, - // but Apple doesn't support it. :( - // Just implement it as ternary expressions. - auto expr = to_ternary_expression(get(result_type), lerp, right, left); - emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp)); - inherit_expression_dependencies(id, left); - inherit_expression_dependencies(id, right); - inherit_expression_dependencies(id, lerp); + statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";"); } - else - emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); } -string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id) +string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) { - // Keep track of the array indices we have used to load the image. - // We'll need to use the same array index into the combined image sampler array. - auto image_expr = to_expression(image_id); - string array_expr; - auto array_index = image_expr.find_first_of('['); - if (array_index != string::npos) - array_expr = image_expr.substr(array_index, string::npos); - - auto &args = current_function->arguments; - - // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect - // all possible combinations into new sampler2D uniforms. - auto *image = maybe_get_backing_variable(image_id); - auto *samp = maybe_get_backing_variable(samp_id); - if (image) - image_id = image->self; - if (samp) - samp_id = samp->self; - - auto image_itr = find_if(begin(args), end(args), - [image_id](const SPIRFunction::Parameter ¶m) { return param.id == image_id; }); - - auto sampler_itr = find_if(begin(args), end(args), - [samp_id](const SPIRFunction::Parameter ¶m) { return param.id == samp_id; }); + auto &type = get(result_type); - if (image_itr != end(args) || sampler_itr != end(args)) + // If we're declaring temporaries inside continue blocks, + // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. + if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id)) { - // If any parameter originates from a parameter, we will find it in our argument list. - bool global_image = image_itr == end(args); - bool global_sampler = sampler_itr == end(args); - uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args)); - uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args)); - - auto &combined = current_function->combined_parameters; - auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { - return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && - p.sampler_id == sid; - }); - - if (itr != end(combined)) - return to_expression(itr->id) + array_expr; - else + auto &header = get(current_continue_block->loop_dominator); + if (find_if(begin(header.declare_temporary), end(header.declare_temporary), + [result_type, result_id](const pair &tmp) { + return tmp.first == result_type && tmp.second == result_id; + }) == end(header.declare_temporary)) { - SPIRV_CROSS_THROW( - "Cannot find mapping for combined sampler parameter, was build_combined_image_samplers() used " - "before compile() was called?"); + header.declare_temporary.emplace_back(result_type, result_id); + hoisted_temporaries.insert(result_id); + force_recompile_guarantee_forward_progress(); } + + return join(to_name(result_id), " = "); + } + else if (hoisted_temporaries.count(result_id)) + { + // The temporary has already been declared earlier, so just "declare" the temporary by writing to it. + return join(to_name(result_id), " = "); } else { - // For global sampler2D, look directly at the global remapping table. - auto &mapping = combined_image_samplers; - auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) { - return combined.image_id == image_id && combined.sampler_id == samp_id; - }); - - if (itr != end(combined_image_samplers)) - return to_expression(itr->combined_id) + array_expr; - else - { - SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used " - "before compile() was called?"); - } + // The result_id has not been made into an expression yet, so use flags interface. + add_local_variable_name(result_id); + auto &flags = get_decoration_bitset(result_id); + return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = "); } } -void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) +bool CompilerGLSL::expression_is_forwarded(uint32_t id) const { - if (options.vulkan_semantics && combined_image_samplers.empty()) + return forwarded_temporaries.count(id) != 0; +} + +bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const +{ + return suppressed_usage_tracking.count(id) != 0; +} + +bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const +{ + auto *expr = maybe_get(id); + if (!expr) + return false; + + // If we're emitting code at a deeper loop level than when we emitted the expression, + // we're probably reading the same expression over and over. + return current_loop_level > expr->emitted_loop_level; +} + +SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, + bool suppress_usage_tracking) +{ + if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries))) { - emit_binary_func_op(result_type, result_id, image_id, samp_id, - type_to_glsl(get(result_type), result_id).c_str()); + // Just forward it without temporary. + // If the forward is trivial, we do not force flushing to temporary for this expression. + forwarded_temporaries.insert(result_id); + if (suppress_usage_tracking) + suppressed_usage_tracking.insert(result_id); - // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. - forwarded_temporaries.erase(result_id); + return set(result_id, rhs, result_type, true); } else { - // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. - emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); + // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). + statement(declare_temporary(result_type, result_id), rhs, ";"); + return set(result_id, to_name(result_id), result_type, true); } } -static inline bool image_opcode_is_sample_no_dref(Op op) +void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) { - switch (op) - { - case OpImageSampleExplicitLod: - case OpImageSampleImplicitLod: - case OpImageSampleProjExplicitLod: - case OpImageSampleProjImplicitLod: - case OpImageFetch: - case OpImageRead: - case OpImageSparseSampleExplicitLod: - case OpImageSparseSampleImplicitLod: - case OpImageSparseSampleProjExplicitLod: - case OpImageSparseSampleProjImplicitLod: - case OpImageSparseFetch: - case OpImageSparseRead: - return true; - - default: - return false; - } + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward); + inherit_expression_dependencies(result_id, op0); } -void CompilerGLSL::emit_texture_op(const Instruction &i) +void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) { - auto *ops = stream(i); - auto op = static_cast(i.op); - uint32_t length = i.length; + auto &type = get(result_type); + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(type_to_glsl(type), "(", op, to_enclosed_unpacked_expression(op0), ")"), forward); + inherit_expression_dependencies(result_id, op0); +} - SmallVector inherited_expressions; +void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) +{ + // Various FP arithmetic opcodes such as add, sub, mul will hit this. + bool force_temporary_precise = backend.support_precise_qualifier && + has_decoration(result_id, DecorationNoContraction) && + type_is_floating_point(get(result_type)); + bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise; - uint32_t result_type_id = ops[0]; - uint32_t id = ops[1]; - uint32_t img = ops[2]; - uint32_t coord = ops[3]; - uint32_t dref = 0; - uint32_t comp = 0; - bool gather = false; - bool proj = false; - bool fetch = false; - const uint32_t *opt = nullptr; + emit_op(result_type, result_id, + join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward); - auto &result_type = get(result_type_id); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} - inherited_expressions.push_back(coord); +void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op) +{ + auto &type = get(result_type); + auto expr = type_to_glsl_constructor(type); + expr += '('; + for (uint32_t i = 0; i < type.vecsize; i++) + { + // Make sure to call to_expression multiple times to ensure + // that these expressions are properly flushed to temporaries if needed. + expr += op; + expr += to_extract_component_expression(operand, i); - // Make sure non-uniform decoration is back-propagated to where it needs to be. - if (has_decoration(img, DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(img); + if (i + 1 < type.vecsize) + expr += ", "; + } + expr += ')'; + emit_op(result_type, result_id, expr, should_forward(operand)); - switch (op) - { - case OpImageSampleDrefImplicitLod: - case OpImageSampleDrefExplicitLod: - dref = ops[4]; - opt = &ops[5]; - length -= 5; - break; + inherit_expression_dependencies(result_id, operand); +} - case OpImageSampleProjDrefImplicitLod: - case OpImageSampleProjDrefExplicitLod: - dref = ops[4]; - opt = &ops[5]; - length -= 5; - proj = true; - break; +void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, bool negate, SPIRType::BaseType expected_type) +{ + auto &type0 = expression_type(op0); + auto &type1 = expression_type(op1); - case OpImageDrefGather: - dref = ops[4]; - opt = &ops[5]; - length -= 5; - gather = true; - break; + SPIRType target_type0 = type0; + SPIRType target_type1 = type1; + target_type0.basetype = expected_type; + target_type1.basetype = expected_type; + target_type0.vecsize = 1; + target_type1.vecsize = 1; - case OpImageGather: - comp = ops[4]; - opt = &ops[5]; - length -= 5; - gather = true; - break; + auto &type = get(result_type); + auto expr = type_to_glsl_constructor(type); + expr += '('; + for (uint32_t i = 0; i < type.vecsize; i++) + { + // Make sure to call to_expression multiple times to ensure + // that these expressions are properly flushed to temporaries if needed. + if (negate) + expr += "!("; - case OpImageFetch: - case OpImageRead: // Reads == fetches in Metal (other langs will not get here) - opt = &ops[4]; - length -= 4; - fetch = true; - break; + if (expected_type != SPIRType::Unknown && type0.basetype != expected_type) + expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i)); + else + expr += to_extract_component_expression(op0, i); - case OpImageSampleProjImplicitLod: - case OpImageSampleProjExplicitLod: - opt = &ops[4]; - length -= 4; - proj = true; - break; + expr += ' '; + expr += op; + expr += ' '; - default: - opt = &ops[4]; - length -= 4; - break; - } + if (expected_type != SPIRType::Unknown && type1.basetype != expected_type) + expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i)); + else + expr += to_extract_component_expression(op1, i); - // Bypass pointers because we need the real image struct - auto &type = expression_type(img); - auto &imgtype = get(type.self); + if (negate) + expr += ")"; - uint32_t coord_components = 0; - switch (imgtype.image.dim) - { - case spv::Dim1D: - coord_components = 1; - break; - case spv::Dim2D: - coord_components = 2; - break; - case spv::Dim3D: - coord_components = 3; - break; - case spv::DimCube: - coord_components = 3; - break; - case spv::DimBuffer: - coord_components = 1; - break; - default: - coord_components = 2; - break; + if (i + 1 < type.vecsize) + expr += ", "; } + expr += ')'; + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); - if (dref) - inherited_expressions.push_back(dref); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} - if (proj) - coord_components++; - if (imgtype.image.arrayed) - coord_components++; +SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type, + uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type) +{ + auto &type0 = expression_type(op0); + auto &type1 = expression_type(op1); - uint32_t bias = 0; - uint32_t lod = 0; - uint32_t grad_x = 0; - uint32_t grad_y = 0; - uint32_t coffset = 0; - uint32_t offset = 0; - uint32_t coffsets = 0; - uint32_t sample = 0; - uint32_t minlod = 0; - uint32_t flags = 0; + // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs. + // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected + // since equality test is exactly the same. + bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type); - if (length) + // Create a fake type so we can bitcast to it. + // We only deal with regular arithmetic types here like int, uints and so on. + SPIRType expected_type; + expected_type.basetype = input_type; + expected_type.vecsize = type0.vecsize; + expected_type.columns = type0.columns; + expected_type.width = type0.width; + + if (cast) { - flags = *opt++; - length--; + cast_op0 = bitcast_glsl(expected_type, op0); + cast_op1 = bitcast_glsl(expected_type, op1); + } + else + { + // If we don't cast, our actual input type is that of the first (or second) argument. + cast_op0 = to_enclosed_unpacked_expression(op0); + cast_op1 = to_enclosed_unpacked_expression(op1); + input_type = type0.basetype; } - auto test = [&](uint32_t &v, uint32_t flag) { - if (length && (flags & flag)) - { - v = *opt++; - inherited_expressions.push_back(v); - length--; - } - }; + return expected_type; +} - test(bias, ImageOperandsBiasMask); - test(lod, ImageOperandsLodMask); - test(grad_x, ImageOperandsGradMask); - test(grad_y, ImageOperandsGradMask); - test(coffset, ImageOperandsConstOffsetMask); - test(offset, ImageOperandsOffsetMask); - test(coffsets, ImageOperandsConstOffsetsMask); - test(sample, ImageOperandsSampleMask); - test(minlod, ImageOperandsMinLodMask); +bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) +{ + // Some bitcasts may require complex casting sequences, and are implemented here. + // Otherwise a simply unary function will do with bitcast_glsl_op. + auto &output_type = get(result_type); + auto &input_type = expression_type(op0); string expr; - bool forward = false; - expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset), - (!!grad_x || !!grad_y), !!dref, lod, minlod); - expr += "("; - expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod, - coffset, offset, bias, comp, sample, minlod, &forward); - expr += ")"; - // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here. - if (is_legacy() && image_is_comparison(imgtype, img)) - expr += ".r"; + if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1) + expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))"); + else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half && + input_type.vecsize == 2) + expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))"); + else + return false; - // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. - // Remap back to 4 components as sampling opcodes expect. - if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op)) - { - bool image_is_depth = false; - const auto *combined = maybe_get(img); - uint32_t image_id = combined ? combined->image : img; + emit_op(result_type, id, expr, should_forward(op0)); + return true; +} - if (combined && image_is_comparison(imgtype, combined->image)) - image_is_depth = true; - else if (image_is_comparison(imgtype, img)) - image_is_depth = true; +void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type, + bool skip_cast_if_equal_type, + bool implicit_integer_promotion) +{ + string cast_op0, cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); + auto &out_type = get(result_type); - // We must also check the backing variable for the image. - // We might have loaded an OpImage, and used that handle for two different purposes. - // Once with comparison, once without. - auto *image_variable = maybe_get_backing_variable(image_id); - if (image_variable && image_is_comparison(get(image_variable->basetype), image_variable->self)) - image_is_depth = true; + // We might have casted away from the result type, so bitcast again. + // For example, arithmetic right shift with uint inputs. + // Special case boolean outputs since relational opcodes output booleans instead of int/uint. + auto bitop = join(cast_op0, " ", op, " ", cast_op1); + string expr; - if (image_is_depth) - expr = remap_swizzle(result_type, 1, expr); + if (implicit_integer_promotion) + { + // Simple value cast. + expr = join(type_to_glsl(out_type), '(', bitop, ')'); } - - if (!backend.support_small_type_sampling_result && result_type.width < 32) + else if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) { - // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically. - // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision. - expr = join(type_to_glsl_constructor(result_type), "(", expr, ")"); + expected_type.basetype = input_type; + expr = join(bitcast_glsl_op(out_type, expected_type), '(', bitop, ')'); } - - // Deals with reads from MSL. We might need to downconvert to fewer components. - if (op == OpImageRead) - expr = remap_swizzle(result_type, 4, expr); - - emit_op(result_type_id, id, expr, forward); - for (auto &inherit : inherited_expressions) - inherit_expression_dependencies(id, inherit); - - switch (op) + else { - case OpImageSampleDrefImplicitLod: - case OpImageSampleImplicitLod: - case OpImageSampleProjImplicitLod: - case OpImageSampleProjDrefImplicitLod: - register_control_dependent_expression(id); - break; - - default: - break; + expr = std::move(bitop); } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); } -bool CompilerGLSL::expression_is_constant_null(uint32_t id) const +void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) { - auto *c = maybe_get(id); - if (!c) - return false; - return c->constant_is_null(); + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward); + inherit_expression_dependencies(result_id, op0); } -// Returns the function name for a texture sampling function for the specified image and sampling characteristics. -// For some subclasses, the function is a method on the specified image. -string CompilerGLSL::to_function_name(uint32_t tex, const SPIRType &imgtype, bool is_fetch, bool is_gather, - bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool, - uint32_t lod, uint32_t minlod) +void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) { - if (minlod != 0) - SPIRV_CROSS_THROW("Sparse texturing not yet supported."); - - string fname; - - // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. - // To emulate this, we will have to use textureGrad with a constant gradient of 0. - // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. - // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. - bool workaround_lod_array_shadow_as_grad = false; - if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && - image_is_comparison(imgtype, tex) && lod) - { - if (!expression_is_constant_null(lod)) - { - SPIRV_CROSS_THROW( - "textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be expressed in GLSL."); - } - workaround_lod_array_shadow_as_grad = true; - } - - if (is_fetch) - fname += "texelFetch"; - else - { - fname += "texture"; - - if (is_gather) - fname += "Gather"; - if (has_array_offsets) - fname += "Offsets"; - if (is_proj) - fname += "Proj"; - if (has_grad || workaround_lod_array_shadow_as_grad) - fname += "Grad"; - if (!!lod && !workaround_lod_array_shadow_as_grad) - fname += "Lod"; - } - - if (has_offset) - fname += "Offset"; - - return is_legacy() ? legacy_tex_op(fname, imgtype, lod, tex) : fname; + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"), + forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); } -std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) +void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) { - auto *var = maybe_get_backing_variable(id); - - // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL. - // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions. - if (var) + auto &type = get(result_type); + if (type_is_floating_point(type)) { - auto &type = get(var->basetype); - if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) - { - if (options.vulkan_semantics) - { - // Newer glslang supports this extension to deal with texture2D as argument to texture functions. - if (dummy_sampler_id) - SPIRV_CROSS_THROW("Vulkan GLSL should not have a dummy sampler for combining."); - require_extension_internal("GL_EXT_samplerless_texture_functions"); - } - else - { - if (!dummy_sampler_id) - SPIRV_CROSS_THROW( - "Cannot find dummy sampler ID. Was build_dummy_sampler_for_combined_images() called?"); - - return to_combined_image_sampler(id, dummy_sampler_id); - } - } + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics."); + if (options.es) + SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL."); + require_extension_internal("GL_EXT_shader_atomic_float"); } - return to_expression(id); + forced_temporaries.insert(result_id); + emit_op(result_type, result_id, + join(op, "(", to_non_uniform_aware_expression(op0), ", ", + to_unpacked_expression(op1), ")"), false); + flush_all_atomic_capable_variables(); } -// Returns the function args for a texture sampling function for the specified image and sampling characteristics. -string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, - bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, - uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, - uint32_t bias, uint32_t comp, uint32_t sample, uint32_t /*minlod*/, - bool *p_forward) +void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, + uint32_t op0, uint32_t op1, uint32_t op2, + const char *op) { - string farg_str; - if (is_fetch) - farg_str = convert_separate_image_to_expression(img); - else - farg_str = to_expression(img); - - bool swizz_func = backend.swizzle_is_function; - auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { - if (comps == in_comps) - return ""; - - switch (comps) - { - case 1: - return ".x"; - case 2: - return swizz_func ? ".xy()" : ".xy"; - case 3: - return swizz_func ? ".xyz()" : ".xyz"; - default: - return ""; - } - }; + forced_temporaries.insert(result_id); + emit_op(result_type, result_id, + join(op, "(", to_non_uniform_aware_expression(op0), ", ", + to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false); + flush_all_atomic_capable_variables(); +} - bool forward = should_forward(coord); +void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, + SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type) +{ + auto &out_type = get(result_type); + auto &expr_type = expression_type(op0); + auto expected_type = out_type; - // The IR can give us more components than we need, so chop them off as needed. - auto swizzle_expr = swizzle(coord_components, expression_type(coord).vecsize); - // Only enclose the UV expression if needed. - auto coord_expr = (*swizzle_expr == '\0') ? to_expression(coord) : (to_enclosed_expression(coord) + swizzle_expr); + // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends. + expected_type.basetype = input_type; + expected_type.width = expr_type.width; - // texelFetch only takes int, not uint. - auto &coord_type = expression_type(coord); - if (coord_type.basetype == SPIRType::UInt) + string cast_op; + if (expr_type.basetype != input_type) { - auto expected_type = coord_type; - expected_type.vecsize = coord_components; - expected_type.basetype = SPIRType::Int; - coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr); + if (expr_type.basetype == SPIRType::Boolean) + cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")"); + else + cast_op = bitcast_glsl(expected_type, op0); } + else + cast_op = to_unpacked_expression(op0); - // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. - // To emulate this, we will have to use textureGrad with a constant gradient of 0. - // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. - // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. - bool workaround_lod_array_shadow_as_grad = - ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && - image_is_comparison(imgtype, img) && lod; - - if (dref) + string expr; + if (out_type.basetype != expected_result_type) { - forward = forward && should_forward(dref); - - // SPIR-V splits dref and coordinate. - if (is_gather || coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather. - { - farg_str += ", "; - farg_str += to_expression(coord); - farg_str += ", "; - farg_str += to_expression(dref); - } - else if (is_proj) - { - // Have to reshuffle so we get vec4(coord, dref, proj), special case. - // Other shading languages splits up the arguments for coord and compare value like SPIR-V. - // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow. - farg_str += ", vec4("; - - if (imgtype.image.dim == Dim1D) - { - // Could reuse coord_expr, but we will mess up the temporary usage checking. - farg_str += to_enclosed_expression(coord) + ".x"; - farg_str += ", "; - farg_str += "0.0, "; - farg_str += to_expression(dref); - farg_str += ", "; - farg_str += to_enclosed_expression(coord) + ".y)"; - } - else if (imgtype.image.dim == Dim2D) - { - // Could reuse coord_expr, but we will mess up the temporary usage checking. - farg_str += to_enclosed_expression(coord) + (swizz_func ? ".xy()" : ".xy"); - farg_str += ", "; - farg_str += to_expression(dref); - farg_str += ", "; - farg_str += to_enclosed_expression(coord) + ".z)"; - } - else - SPIRV_CROSS_THROW("Invalid type for textureProj with shadow."); - } + expected_type.basetype = expected_result_type; + expected_type.width = out_type.width; + if (out_type.basetype == SPIRType::Boolean) + expr = type_to_glsl(out_type); else - { - // Create a composite which merges coord/dref into a single vector. - auto type = expression_type(coord); - type.vecsize = coord_components + 1; - farg_str += ", "; - farg_str += type_to_glsl_constructor(type); - farg_str += "("; - farg_str += coord_expr; - farg_str += ", "; - farg_str += to_expression(dref); - farg_str += ")"; - } + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op, ")"); + expr += ')'; } else { - farg_str += ", "; - farg_str += coord_expr; + expr += join(op, "(", cast_op, ")"); } - if (grad_x || grad_y) - { - forward = forward && should_forward(grad_x); - forward = forward && should_forward(grad_y); - farg_str += ", "; - farg_str += to_expression(grad_x); - farg_str += ", "; - farg_str += to_expression(grad_y); - } + emit_op(result_type, result_id, expr, should_forward(op0)); + inherit_expression_dependencies(result_id, op0); +} - if (lod) - { - if (workaround_lod_array_shadow_as_grad) - { - // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0. - // Implementing this as plain texture() is not safe on some implementations. - if (imgtype.image.dim == Dim2D) - farg_str += ", vec2(0.0), vec2(0.0)"; - else if (imgtype.image.dim == DimCube) - farg_str += ", vec3(0.0), vec3(0.0)"; - } - else - { - if (check_explicit_lod_allowed(lod)) - { - forward = forward && should_forward(lod); - farg_str += ", "; +// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs +// and different vector sizes all at once. Need a special purpose method here. +void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op, + SPIRType::BaseType expected_result_type, + SPIRType::BaseType input_type0, SPIRType::BaseType input_type1, + SPIRType::BaseType input_type2) +{ + auto &out_type = get(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type0; - auto &lod_expr_type = expression_type(lod); + string cast_op0 = + expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); - // Lod expression for TexelFetch in GLSL must be int, and only int. - if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms && - lod_expr_type.basetype != SPIRType::Int) - { - farg_str += join("int(", to_expression(lod), ")"); - } - else - { - farg_str += to_expression(lod); - } - } - } - } - else if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) - { - // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. - farg_str += ", 0"; - } + auto op1_expr = to_unpacked_expression(op1); + auto op2_expr = to_unpacked_expression(op2); - if (coffset) - { - forward = forward && should_forward(coffset); - farg_str += ", "; - farg_str += to_expression(coffset); - } - else if (offset) - { - forward = forward && should_forward(offset); - farg_str += ", "; - farg_str += to_expression(offset); - } + // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit. + expected_type.basetype = input_type1; + expected_type.vecsize = 1; + string cast_op1 = expression_type(op1).basetype != input_type1 ? + join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") : + op1_expr; - if (bias) - { - forward = forward && should_forward(bias); - farg_str += ", "; - farg_str += to_expression(bias); - } + expected_type.basetype = input_type2; + expected_type.vecsize = 1; + string cast_op2 = expression_type(op2).basetype != input_type2 ? + join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") : + op2_expr; - if (comp) + string expr; + if (out_type.basetype != expected_result_type) { - forward = forward && should_forward(comp); - farg_str += ", "; - farg_str += to_expression(comp); + expected_type.vecsize = out_type.vecsize; + expected_type.basetype = expected_result_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + expr += ')'; } - - if (sample) + else { - farg_str += ", "; - farg_str += to_expression(sample); + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); } - *p_forward = forward; - - return farg_str; -} + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} -void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length) +void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op, SPIRType::BaseType input_type) { - auto op = static_cast(eop); + auto &out_type = get(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type; + string cast_op0 = + expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); + string cast_op1 = + expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1); + string cast_op2 = + expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2); - if (is_legacy() && is_unsigned_glsl_opcode(op)) - SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets."); + string expr; + if (out_type.basetype != input_type) + { + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + } - // If we need to do implicit bitcasts, make sure we do it with the correct type. - uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length); - auto int_type = to_signed_basetype(integer_width); - auto uint_type = to_unsigned_basetype(integer_width); + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} - switch (op) - { - // FP fiddling - case GLSLstd450Round: - emit_unary_func_op(result_type, id, args[0], "round"); - break; +void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0, + uint32_t op1, const char *op, SPIRType::BaseType input_type) +{ + // Special purpose method for implementing clustered subgroup opcodes. + // Main difference is that op1 does not participate in any casting, it needs to be a literal. + auto &out_type = get(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type; + string cast_op0 = + expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); - case GLSLstd450RoundEven: - if ((options.es && options.version >= 300) || (!options.es && options.version >= 130)) - emit_unary_func_op(result_type, id, args[0], "roundEven"); - else - SPIRV_CROSS_THROW("roundEven supported only in ESSL 300 and GLSL 130 and up."); - break; + string expr; + if (out_type.basetype != input_type) + { + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")"); + } - case GLSLstd450Trunc: - emit_unary_func_op(result_type, id, args[0], "trunc"); - break; - case GLSLstd450SAbs: - emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type); - break; - case GLSLstd450FAbs: - emit_unary_func_op(result_type, id, args[0], "abs"); - break; - case GLSLstd450SSign: - emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type); - break; - case GLSLstd450FSign: - emit_unary_func_op(result_type, id, args[0], "sign"); - break; - case GLSLstd450Floor: - emit_unary_func_op(result_type, id, args[0], "floor"); - break; - case GLSLstd450Ceil: - emit_unary_func_op(result_type, id, args[0], "ceil"); - break; - case GLSLstd450Fract: - emit_unary_func_op(result_type, id, args[0], "fract"); - break; - case GLSLstd450Radians: - emit_unary_func_op(result_type, id, args[0], "radians"); - break; - case GLSLstd450Degrees: - emit_unary_func_op(result_type, id, args[0], "degrees"); - break; - case GLSLstd450Fma: - if ((!options.es && options.version < 400) || (options.es && options.version < 320)) - { - auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ", - to_enclosed_expression(args[2])); + emit_op(result_type, result_id, expr, should_forward(op0)); + inherit_expression_dependencies(result_id, op0); +} - emit_op(result_type, id, expr, - should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2])); - for (uint32_t i = 0; i < 3; i++) - inherit_expression_dependencies(id, args[i]); - } - else - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma"); - break; - case GLSLstd450Modf: - register_call_out_argument(args[1]); - forced_temporaries.insert(id); - emit_binary_func_op(result_type, id, args[0], args[1], "modf"); - break; +void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) +{ + string cast_op0, cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); + auto &out_type = get(result_type); - case GLSLstd450ModfStruct: + // Special case boolean outputs since relational opcodes output booleans instead of int/uint. + string expr; + if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) { - forced_temporaries.insert(id); - auto &type = get(result_type); - emit_uninitialized_temporary_expression(result_type, id); - statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ", - to_expression(id), ".", to_member_name(type, 1), ");"); - break; + expected_type.basetype = input_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); } - // Minmax - case GLSLstd450UMin: - emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false); - break; + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} - case GLSLstd450SMin: - emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false); - break; +void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2); + emit_op(result_type, result_id, + join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", + to_unpacked_expression(op2), ")"), + forward); - case GLSLstd450FMin: - emit_binary_func_op(result_type, id, args[0], args[1], "min"); - break; + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} - case GLSLstd450FMax: - emit_binary_func_op(result_type, id, args[0], args[1], "max"); - break; +void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, uint32_t op3, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); + emit_op(result_type, result_id, + join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", + to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"), + forward); - case GLSLstd450UMax: - emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false); - break; + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); + inherit_expression_dependencies(result_id, op3); +} - case GLSLstd450SMax: - emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false); - break; +void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, uint32_t op3, const char *op, + SPIRType::BaseType offset_count_type) +{ + // Only need to cast offset/count arguments. Types of base/insert must be same as result type, + // and bitfieldInsert is sign invariant. + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); - case GLSLstd450FClamp: - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); - break; + auto op0_expr = to_unpacked_expression(op0); + auto op1_expr = to_unpacked_expression(op1); + auto op2_expr = to_unpacked_expression(op2); + auto op3_expr = to_unpacked_expression(op3); - case GLSLstd450UClamp: - emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type); - break; + SPIRType target_type; + target_type.vecsize = 1; + target_type.basetype = offset_count_type; - case GLSLstd450SClamp: - emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type); - break; + if (expression_type(op2).basetype != offset_count_type) + { + // Value-cast here. Input might be 16-bit. GLSL requires int. + op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")"); + } - // Trig - case GLSLstd450Sin: - emit_unary_func_op(result_type, id, args[0], "sin"); - break; - case GLSLstd450Cos: - emit_unary_func_op(result_type, id, args[0], "cos"); + if (expression_type(op3).basetype != offset_count_type) + { + // Value-cast here. Input might be 16-bit. GLSL requires int. + op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")"); + } + + emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); + inherit_expression_dependencies(result_id, op3); +} + +string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex) +{ + const char *type; + switch (imgtype.image.dim) + { + case spv::Dim1D: + // Force 2D path for ES. + if (options.es) + type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; + else + type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D"; break; - case GLSLstd450Tan: - emit_unary_func_op(result_type, id, args[0], "tan"); + case spv::Dim2D: + type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; break; - case GLSLstd450Asin: - emit_unary_func_op(result_type, id, args[0], "asin"); + case spv::Dim3D: + type = "3D"; break; - case GLSLstd450Acos: - emit_unary_func_op(result_type, id, args[0], "acos"); + case spv::DimCube: + type = "Cube"; break; - case GLSLstd450Atan: - emit_unary_func_op(result_type, id, args[0], "atan"); + case spv::DimRect: + type = "2DRect"; break; - case GLSLstd450Sinh: - emit_unary_func_op(result_type, id, args[0], "sinh"); + case spv::DimBuffer: + type = "Buffer"; break; - case GLSLstd450Cosh: - emit_unary_func_op(result_type, id, args[0], "cosh"); + case spv::DimSubpassData: + type = "2D"; break; - case GLSLstd450Tanh: - emit_unary_func_op(result_type, id, args[0], "tanh"); - break; - case GLSLstd450Asinh: - emit_unary_func_op(result_type, id, args[0], "asinh"); - break; - case GLSLstd450Acosh: - emit_unary_func_op(result_type, id, args[0], "acosh"); - break; - case GLSLstd450Atanh: - emit_unary_func_op(result_type, id, args[0], "atanh"); - break; - case GLSLstd450Atan2: - emit_binary_func_op(result_type, id, args[0], args[1], "atan"); - break; - - // Exponentials - case GLSLstd450Pow: - emit_binary_func_op(result_type, id, args[0], args[1], "pow"); - break; - case GLSLstd450Exp: - emit_unary_func_op(result_type, id, args[0], "exp"); - break; - case GLSLstd450Log: - emit_unary_func_op(result_type, id, args[0], "log"); - break; - case GLSLstd450Exp2: - emit_unary_func_op(result_type, id, args[0], "exp2"); - break; - case GLSLstd450Log2: - emit_unary_func_op(result_type, id, args[0], "log2"); - break; - case GLSLstd450Sqrt: - emit_unary_func_op(result_type, id, args[0], "sqrt"); - break; - case GLSLstd450InverseSqrt: - emit_unary_func_op(result_type, id, args[0], "inversesqrt"); + default: + type = ""; break; + } - // Matrix math - case GLSLstd450Determinant: - emit_unary_func_op(result_type, id, args[0], "determinant"); - break; - case GLSLstd450MatrixInverse: - emit_unary_func_op(result_type, id, args[0], "inverse"); - break; + // In legacy GLSL, an extension is required for textureLod in the fragment + // shader or textureGrad anywhere. + bool legacy_lod_ext = false; + auto &execution = get_entry_point(); + if (op == "textureGrad" || op == "textureProjGrad" || + ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex)) + { + if (is_legacy_es()) + { + legacy_lod_ext = true; + require_extension_internal("GL_EXT_shader_texture_lod"); + } + else if (is_legacy_desktop()) + require_extension_internal("GL_ARB_shader_texture_lod"); + } - // Lerping - case GLSLstd450FMix: - case GLSLstd450IMix: + if (op == "textureLodOffset" || op == "textureProjLodOffset") { - emit_mix_op(result_type, id, args[0], args[1], args[2]); - break; + if (is_legacy_es()) + SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES")); + + require_extension_internal("GL_EXT_gpu_shader4"); } - case GLSLstd450Step: - emit_binary_func_op(result_type, id, args[0], args[1], "step"); - break; - case GLSLstd450SmoothStep: - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep"); - break; - // Packing - case GLSLstd450Frexp: - register_call_out_argument(args[1]); - forced_temporaries.insert(id); - emit_binary_func_op(result_type, id, args[0], args[1], "frexp"); - break; + // GLES has very limited support for shadow samplers. + // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers, + // everything else can just throw + bool is_comparison = is_depth_image(imgtype, tex); + if (is_comparison && is_legacy_es()) + { + if (op == "texture" || op == "textureProj") + require_extension_internal("GL_EXT_shadow_samplers"); + else + SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES")); + } - case GLSLstd450FrexpStruct: + if (op == "textureSize") { - forced_temporaries.insert(id); - auto &type = get(result_type); - emit_uninitialized_temporary_expression(result_type, id); - statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ", - to_expression(id), ".", to_member_name(type, 1), ");"); - break; + if (is_legacy_es()) + SPIRV_CROSS_THROW("textureSize not supported in legacy ES"); + if (is_comparison) + SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL"); + require_extension_internal("GL_EXT_gpu_shader4"); } - case GLSLstd450Ldexp: - emit_binary_func_op(result_type, id, args[0], args[1], "ldexp"); - break; - case GLSLstd450PackSnorm4x8: - emit_unary_func_op(result_type, id, args[0], "packSnorm4x8"); - break; - case GLSLstd450PackUnorm4x8: - emit_unary_func_op(result_type, id, args[0], "packUnorm4x8"); - break; - case GLSLstd450PackSnorm2x16: - emit_unary_func_op(result_type, id, args[0], "packSnorm2x16"); - break; - case GLSLstd450PackUnorm2x16: - emit_unary_func_op(result_type, id, args[0], "packUnorm2x16"); - break; - case GLSLstd450PackHalf2x16: - emit_unary_func_op(result_type, id, args[0], "packHalf2x16"); - break; - case GLSLstd450UnpackSnorm4x8: - emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8"); - break; - case GLSLstd450UnpackUnorm4x8: - emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8"); - break; - case GLSLstd450UnpackSnorm2x16: - emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16"); - break; - case GLSLstd450UnpackUnorm2x16: - emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16"); - break; - case GLSLstd450UnpackHalf2x16: - emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16"); - break; + if (op == "texelFetch" && is_legacy_es()) + SPIRV_CROSS_THROW("texelFetch not supported in legacy ES"); - case GLSLstd450PackDouble2x32: - emit_unary_func_op(result_type, id, args[0], "packDouble2x32"); - break; - case GLSLstd450UnpackDouble2x32: - emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32"); - break; + bool is_es_and_depth = is_legacy_es() && is_comparison; + std::string type_prefix = is_comparison ? "shadow" : "texture"; - // Vector math - case GLSLstd450Length: - emit_unary_func_op(result_type, id, args[0], "length"); - break; - case GLSLstd450Distance: - emit_binary_func_op(result_type, id, args[0], args[1], "distance"); - break; - case GLSLstd450Cross: - emit_binary_func_op(result_type, id, args[0], args[1], "cross"); - break; - case GLSLstd450Normalize: - emit_unary_func_op(result_type, id, args[0], "normalize"); - break; - case GLSLstd450FaceForward: - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward"); - break; - case GLSLstd450Reflect: - emit_binary_func_op(result_type, id, args[0], args[1], "reflect"); - break; - case GLSLstd450Refract: - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract"); - break; + if (op == "texture") + return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type); + else if (op == "textureLod") + return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod"); + else if (op == "textureProj") + return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj"); + else if (op == "textureGrad") + return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad"); + else if (op == "textureProjLod") + return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod"); + else if (op == "textureLodOffset") + return join(type_prefix, type, "LodOffset"); + else if (op == "textureProjGrad") + return join(type_prefix, type, + is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad"); + else if (op == "textureProjLodOffset") + return join(type_prefix, type, "ProjLodOffset"); + else if (op == "textureSize") + return join("textureSize", type); + else if (op == "texelFetch") + return join("texelFetch", type); + else + { + SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op)); + } +} - // Bit-fiddling - case GLSLstd450FindILsb: - emit_unary_func_op(result_type, id, args[0], "findLSB"); - break; +bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp) +{ + auto *cleft = maybe_get(left); + auto *cright = maybe_get(right); + auto &lerptype = expression_type(lerp); - case GLSLstd450FindSMsb: - emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type); - break; + // If our targets aren't constants, we cannot use construction. + if (!cleft || !cright) + return false; - case GLSLstd450FindUMsb: - emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type, - int_type); // findMSB always returns int. - break; + // If our targets are spec constants, we cannot use construction. + if (cleft->specialization || cright->specialization) + return false; - // Multisampled varying - case GLSLstd450InterpolateAtCentroid: - emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid"); - break; - case GLSLstd450InterpolateAtSample: - emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample"); - break; - case GLSLstd450InterpolateAtOffset: - emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset"); - break; + auto &value_type = get(cleft->constant_type); - case GLSLstd450NMin: - case GLSLstd450NMax: - { - emit_nminmax_op(result_type, id, args[0], args[1], op); - break; - } + if (lerptype.basetype != SPIRType::Boolean) + return false; + if (value_type.basetype == SPIRType::Struct || is_array(value_type)) + return false; + if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize) + return false; - case GLSLstd450NClamp: + // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select. + // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly. + // Just avoid this case. + if (value_type.columns > 1) + return false; + + // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor. + bool ret = true; + for (uint32_t row = 0; ret && row < value_type.vecsize; row++) { - // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. - // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. - uint32_t &max_id = extra_sub_expressions[id | 0x80000000u]; - if (!max_id) - max_id = ir.increase_bound_by(1); + switch (type.basetype) + { + case SPIRType::Short: + case SPIRType::UShort: + ret = cleft->scalar_u16(0, row) == 0 && cright->scalar_u16(0, row) == 1; + break; - // Inherit precision qualifiers. - ir.meta[max_id] = ir.meta[id]; + case SPIRType::Int: + case SPIRType::UInt: + ret = cleft->scalar(0, row) == 0 && cright->scalar(0, row) == 1; + break; - emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax); - emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin); - break; - } + case SPIRType::Half: + ret = cleft->scalar_f16(0, row) == 0.0f && cright->scalar_f16(0, row) == 1.0f; + break; - default: - statement("// unimplemented GLSL op ", eop); - break; + case SPIRType::Float: + ret = cleft->scalar_f32(0, row) == 0.0f && cright->scalar_f32(0, row) == 1.0f; + break; + + case SPIRType::Double: + ret = cleft->scalar_f64(0, row) == 0.0 && cright->scalar_f64(0, row) == 1.0; + break; + + case SPIRType::Int64: + case SPIRType::UInt64: + ret = cleft->scalar_u64(0, row) == 0 && cright->scalar_u64(0, row) == 1; + break; + + default: + ret = false; + break; + } } + + if (ret) + op = type_to_glsl_constructor(type); + return ret; } -void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op) +string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value, + uint32_t false_value) { - // Need to emulate this call. - uint32_t &ids = extra_sub_expressions[id]; - if (!ids) - { - ids = ir.increase_bound_by(5); - auto btype = get(result_type); - btype.basetype = SPIRType::Boolean; - set(ids, btype); - } + string expr; + auto &lerptype = expression_type(select); - uint32_t btype_id = ids + 0; - uint32_t left_nan_id = ids + 1; - uint32_t right_nan_id = ids + 2; - uint32_t tmp_id = ids + 3; - uint32_t mixed_first_id = ids + 4; + if (lerptype.vecsize == 1) + expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ", + to_enclosed_pointer_expression(false_value)); + else + { + auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); }; - // Inherit precision qualifiers. - ir.meta[tmp_id] = ir.meta[id]; - ir.meta[mixed_first_id] = ir.meta[id]; + expr = type_to_glsl_constructor(restype); + expr += "("; + for (uint32_t i = 0; i < restype.vecsize; i++) + { + expr += swiz(select, i); + expr += " ? "; + expr += swiz(true_value, i); + expr += " : "; + expr += swiz(false_value, i); + if (i + 1 < restype.vecsize) + expr += ", "; + } + expr += ")"; + } - emit_unary_func_op(btype_id, left_nan_id, op0, "isnan"); - emit_unary_func_op(btype_id, right_nan_id, op1, "isnan"); - emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max"); - emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id); - emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id); + return expr; } -void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, - uint32_t) +void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp) { - require_extension_internal("GL_AMD_shader_ballot"); + auto &lerptype = expression_type(lerp); + auto &restype = get(result_type); - enum AMDShaderBallot + // If this results in a variable pointer, assume it may be written through. + if (restype.pointer) { - SwizzleInvocationsAMD = 1, - SwizzleInvocationsMaskedAMD = 2, - WriteInvocationAMD = 3, - MbcntAMD = 4 - }; + register_write(left); + register_write(right); + } - auto op = static_cast(eop); + string mix_op; + bool has_boolean_mix = *backend.boolean_mix_function && + ((options.es && options.version >= 310) || (!options.es && options.version >= 450)); + bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp); - switch (op) + // Cannot use boolean mix when the lerp argument is just one boolean, + // fall back to regular trinary statements. + if (lerptype.vecsize == 1) + has_boolean_mix = false; + + // If we can reduce the mix to a simple cast, do so. + // This helps for cases like int(bool), uint(bool) which is implemented with + // OpSelect bool 1 0. + if (trivial_mix) { - case SwizzleInvocationsAMD: - emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD"); - register_control_dependent_expression(id); - break; + emit_unary_func_op(result_type, id, lerp, mix_op.c_str()); + } + else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean) + { + // Boolean mix not supported on desktop without extension. + // Was added in OpenGL 4.5 with ES 3.1 compat. + // + // Could use GL_EXT_shader_integer_mix on desktop at least, + // but Apple doesn't support it. :( + // Just implement it as ternary expressions. + auto expr = to_ternary_expression(get(result_type), lerp, right, left); + emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp)); + inherit_expression_dependencies(id, left); + inherit_expression_dependencies(id, right); + inherit_expression_dependencies(id, lerp); + } + else if (lerptype.basetype == SPIRType::Boolean) + emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function); + else + emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); +} - case SwizzleInvocationsMaskedAMD: - emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD"); - register_control_dependent_expression(id); - break; +string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id) +{ + // Keep track of the array indices we have used to load the image. + // We'll need to use the same array index into the combined image sampler array. + auto image_expr = to_non_uniform_aware_expression(image_id); + string array_expr; + auto array_index = image_expr.find_first_of('['); + if (array_index != string::npos) + array_expr = image_expr.substr(array_index, string::npos); - case WriteInvocationAMD: - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD"); - register_control_dependent_expression(id); - break; + auto &args = current_function->arguments; - case MbcntAMD: - emit_unary_func_op(result_type, id, args[0], "mbcntAMD"); - register_control_dependent_expression(id); - break; + // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect + // all possible combinations into new sampler2D uniforms. + auto *image = maybe_get_backing_variable(image_id); + auto *samp = maybe_get_backing_variable(samp_id); + if (image) + image_id = image->self; + if (samp) + samp_id = samp->self; - default: - statement("// unimplemented SPV AMD shader ballot op ", eop); - break; - } -} + auto image_itr = find_if(begin(args), end(args), + [image_id](const SPIRFunction::Parameter ¶m) { return image_id == param.id; }); -void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop, - const uint32_t *args, uint32_t) -{ - require_extension_internal("GL_AMD_shader_explicit_vertex_parameter"); + auto sampler_itr = find_if(begin(args), end(args), + [samp_id](const SPIRFunction::Parameter ¶m) { return samp_id == param.id; }); - enum AMDShaderExplicitVertexParameter + if (image_itr != end(args) || sampler_itr != end(args)) { - InterpolateAtVertexAMD = 1 - }; + // If any parameter originates from a parameter, we will find it in our argument list. + bool global_image = image_itr == end(args); + bool global_sampler = sampler_itr == end(args); + VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args))); + VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args))); - auto op = static_cast(eop); + auto &combined = current_function->combined_parameters; + auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { + return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && + p.sampler_id == sid; + }); - switch (op) + if (itr != end(combined)) + return to_expression(itr->id) + array_expr; + else + { + SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was " + "build_combined_image_samplers() used " + "before compile() was called?"); + } + } + else { - case InterpolateAtVertexAMD: - emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD"); - break; + // For global sampler2D, look directly at the global remapping table. + auto &mapping = combined_image_samplers; + auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) { + return combined.image_id == image_id && combined.sampler_id == samp_id; + }); - default: - statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop); - break; + if (itr != end(combined_image_samplers)) + return to_expression(itr->combined_id) + array_expr; + else + { + SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used " + "before compile() was called?"); + } } } -void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, - const uint32_t *args, uint32_t) +bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op) { - require_extension_internal("GL_AMD_shader_trinary_minmax"); - - enum AMDShaderTrinaryMinMax - { - FMin3AMD = 1, - UMin3AMD = 2, - SMin3AMD = 3, - FMax3AMD = 4, - UMax3AMD = 5, - SMax3AMD = 6, - FMid3AMD = 7, - UMid3AMD = 8, - SMid3AMD = 9 - }; - - auto op = static_cast(eop); - switch (op) { - case FMin3AMD: - case UMin3AMD: - case SMin3AMD: - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3"); - break; - - case FMax3AMD: - case UMax3AMD: - case SMax3AMD: - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3"); - break; - - case FMid3AMD: - case UMid3AMD: - case SMid3AMD: - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3"); - break; - + case OpGroupNonUniformElect: + case OpGroupNonUniformBallot: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + case OpControlBarrier: + case OpMemoryBarrier: + case OpGroupNonUniformBallotBitCount: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformInverseBallot: + return true; default: - statement("// unimplemented SPV AMD shader trinary minmax op ", eop); - break; + return false; } } -void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, - uint32_t) +void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) { - require_extension_internal("GL_AMD_gcn_shader"); - - enum AMDGCNShader + if (options.vulkan_semantics && combined_image_samplers.empty()) { - CubeFaceIndexAMD = 1, - CubeFaceCoordAMD = 2, - TimeAMD = 3 - }; + emit_binary_func_op(result_type, result_id, image_id, samp_id, + type_to_glsl(get(result_type), result_id).c_str()); + } + else + { + // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. + emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); + } - auto op = static_cast(eop); + // Make sure to suppress usage tracking and any expression invalidation. + // It is illegal to create temporaries of opaque types. + forwarded_temporaries.erase(result_id); +} +static inline bool image_opcode_is_sample_no_dref(Op op) +{ switch (op) { - case CubeFaceIndexAMD: - emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD"); - break; - case CubeFaceCoordAMD: - emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD"); - break; - case TimeAMD: - { - string expr = "timeAMD()"; - emit_op(result_type, id, expr, true); - register_control_dependent_expression(id); - break; - } + case OpImageSampleExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageFetch: + case OpImageRead: + case OpImageSparseSampleExplicitLod: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseFetch: + case OpImageSparseRead: + return true; default: - statement("// unimplemented SPV AMD gcn shader op ", eop); - break; + return false; } } -void CompilerGLSL::emit_subgroup_op(const Instruction &i) +void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id, + uint32_t &texel_id) { - const uint32_t *ops = stream(i); - auto op = static_cast(i.op); + // Need to allocate two temporaries. + if (options.es) + SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL."); + require_extension_internal("GL_ARB_sparse_texture2"); - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics."); + auto &temps = extra_sub_expressions[id]; + if (temps == 0) + temps = ir.increase_bound_by(2); - switch (op) - { - case OpGroupNonUniformElect: - require_extension_internal("GL_KHR_shader_subgroup_basic"); - break; + feedback_id = temps + 0; + texel_id = temps + 1; - case OpGroupNonUniformBroadcast: - case OpGroupNonUniformBroadcastFirst: - case OpGroupNonUniformBallot: - case OpGroupNonUniformInverseBallot: - case OpGroupNonUniformBallotBitExtract: - case OpGroupNonUniformBallotBitCount: - case OpGroupNonUniformBallotFindLSB: - case OpGroupNonUniformBallotFindMSB: - require_extension_internal("GL_KHR_shader_subgroup_ballot"); - break; + auto &return_type = get(result_type_id); + if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2) + SPIRV_CROSS_THROW("Invalid return type for sparse feedback."); + emit_uninitialized_temporary(return_type.member_types[0], feedback_id); + emit_uninitialized_temporary(return_type.member_types[1], texel_id); +} - case OpGroupNonUniformShuffle: - case OpGroupNonUniformShuffleXor: - require_extension_internal("GL_KHR_shader_subgroup_shuffle"); - break; +uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const +{ + auto itr = extra_sub_expressions.find(id); + if (itr == extra_sub_expressions.end()) + return 0; + else + return itr->second + 1; +} - case OpGroupNonUniformShuffleUp: - case OpGroupNonUniformShuffleDown: - require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative"); - break; +void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse) +{ + auto *ops = stream(i); + auto op = static_cast(i.op); - case OpGroupNonUniformAll: - case OpGroupNonUniformAny: - case OpGroupNonUniformAllEqual: - require_extension_internal("GL_KHR_shader_subgroup_vote"); - break; + SmallVector inherited_expressions; - case OpGroupNonUniformFAdd: - case OpGroupNonUniformFMul: - case OpGroupNonUniformFMin: - case OpGroupNonUniformFMax: - case OpGroupNonUniformIAdd: - case OpGroupNonUniformIMul: - case OpGroupNonUniformSMin: - case OpGroupNonUniformSMax: - case OpGroupNonUniformUMin: - case OpGroupNonUniformUMax: - case OpGroupNonUniformBitwiseAnd: - case OpGroupNonUniformBitwiseOr: - case OpGroupNonUniformBitwiseXor: + uint32_t result_type_id = ops[0]; + uint32_t id = ops[1]; + auto &return_type = get(result_type_id); + + uint32_t sparse_code_id = 0; + uint32_t sparse_texel_id = 0; + if (sparse) + emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id); + + bool forward = false; + string expr = to_texture_op(i, sparse, &forward, inherited_expressions); + + if (sparse) { - auto operation = static_cast(ops[3]); - if (operation == GroupOperationClusteredReduce) - { - require_extension_internal("GL_KHR_shader_subgroup_clustered"); - } - else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan || - operation == GroupOperationReduce) - { - require_extension_internal("GL_KHR_shader_subgroup_arithmetic"); - } - else - SPIRV_CROSS_THROW("Invalid group operation."); - break; + statement(to_expression(sparse_code_id), " = ", expr, ";"); + expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id), + ")"); + forward = true; + inherited_expressions.clear(); } - case OpGroupNonUniformQuadSwap: - case OpGroupNonUniformQuadBroadcast: - require_extension_internal("GL_KHR_shader_subgroup_quad"); + emit_op(result_type_id, id, expr, forward); + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(id, inherit); + + // Do not register sparse ops as control dependent as they are always lowered to a temporary. + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(id); break; default: - SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + break; } +} - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; +std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward, + SmallVector &inherited_expressions) +{ + auto *ops = stream(i); + auto op = static_cast(i.op); + uint32_t length = i.length; - auto scope = static_cast(get(ops[2]).scalar()); - if (scope != ScopeSubgroup) - SPIRV_CROSS_THROW("Only subgroup scope is supported."); + uint32_t result_type_id = ops[0]; + VariableID img = ops[2]; + uint32_t coord = ops[3]; + uint32_t dref = 0; + uint32_t comp = 0; + bool gather = false; + bool proj = false; + bool fetch = false; + bool nonuniform_expression = false; + const uint32_t *opt = nullptr; + + auto &result_type = get(result_type_id); + + inherited_expressions.push_back(coord); + if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img)) + nonuniform_expression = true; switch (op) { - case OpGroupNonUniformElect: - emit_op(result_type, id, "subgroupElect()", true); + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; break; - case OpGroupNonUniformBroadcast: - emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast"); + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + proj = true; break; - case OpGroupNonUniformBroadcastFirst: - emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst"); + case OpImageDrefGather: + case OpImageSparseDrefGather: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + gather = true; + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("textureGather requires ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400."); break; - case OpGroupNonUniformBallot: - emit_unary_func_op(result_type, id, ops[3], "subgroupBallot"); + case OpImageGather: + case OpImageSparseGather: + comp = ops[4]; + opt = &ops[5]; + length -= 5; + gather = true; + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("textureGather requires ESSL 310."); + else if (!options.es && options.version < 400) + { + if (!expression_is_constant_null(comp)) + SPIRV_CROSS_THROW("textureGather with component requires GLSL 400."); + require_extension_internal("GL_ARB_texture_gather"); + } break; - case OpGroupNonUniformInverseBallot: - emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot"); + case OpImageFetch: + case OpImageSparseFetch: + case OpImageRead: // Reads == fetches in Metal (other langs will not get here) + opt = &ops[4]; + length -= 4; + fetch = true; break; - case OpGroupNonUniformBallotBitExtract: - emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract"); + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleProjExplicitLod: + opt = &ops[4]; + length -= 4; + proj = true; break; - case OpGroupNonUniformBallotFindLSB: - emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB"); + default: + opt = &ops[4]; + length -= 4; break; + } - case OpGroupNonUniformBallotFindMSB: - emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB"); - break; + // Bypass pointers because we need the real image struct + auto &type = expression_type(img); + auto &imgtype = get(type.self); - case OpGroupNonUniformBallotBitCount: + uint32_t coord_components = 0; + switch (imgtype.image.dim) { - auto operation = static_cast(ops[3]); - if (operation == GroupOperationReduce) - emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount"); - else if (operation == GroupOperationInclusiveScan) - emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount"); - else if (operation == GroupOperationExclusiveScan) - emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount"); - else - SPIRV_CROSS_THROW("Invalid BitCount operation."); + case spv::Dim1D: + coord_components = 1; break; - } - - case OpGroupNonUniformShuffle: - emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle"); + case spv::Dim2D: + coord_components = 2; break; - - case OpGroupNonUniformShuffleXor: - emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor"); + case spv::Dim3D: + coord_components = 3; break; - - case OpGroupNonUniformShuffleUp: - emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp"); + case spv::DimCube: + coord_components = 3; break; - - case OpGroupNonUniformShuffleDown: - emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown"); + case spv::DimBuffer: + coord_components = 1; break; - - case OpGroupNonUniformAll: - emit_unary_func_op(result_type, id, ops[3], "subgroupAll"); + default: + coord_components = 2; break; + } - case OpGroupNonUniformAny: - emit_unary_func_op(result_type, id, ops[3], "subgroupAny"); - break; + if (dref) + inherited_expressions.push_back(dref); - case OpGroupNonUniformAllEqual: - emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual"); - break; + if (proj) + coord_components++; + if (imgtype.image.arrayed) + coord_components++; - // clang-format off -#define GLSL_GROUP_OP(op, glsl_op) \ -case OpGroupNonUniform##op: \ - { \ - auto operation = static_cast(ops[3]); \ - if (operation == GroupOperationReduce) \ - emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \ - else if (operation == GroupOperationInclusiveScan) \ - emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \ - else if (operation == GroupOperationExclusiveScan) \ - emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \ - else if (operation == GroupOperationClusteredReduce) \ - emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \ - else \ - SPIRV_CROSS_THROW("Invalid group operation."); \ - break; \ - } - GLSL_GROUP_OP(FAdd, Add) - GLSL_GROUP_OP(FMul, Mul) - GLSL_GROUP_OP(FMin, Min) - GLSL_GROUP_OP(FMax, Max) - GLSL_GROUP_OP(IAdd, Add) - GLSL_GROUP_OP(IMul, Mul) - GLSL_GROUP_OP(SMin, Min) - GLSL_GROUP_OP(SMax, Max) - GLSL_GROUP_OP(UMin, Min) - GLSL_GROUP_OP(UMax, Max) - GLSL_GROUP_OP(BitwiseAnd, And) - GLSL_GROUP_OP(BitwiseOr, Or) - GLSL_GROUP_OP(BitwiseXor, Xor) -#undef GLSL_GROUP_OP - // clang-format on + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t grad_x = 0; + uint32_t grad_y = 0; + uint32_t coffset = 0; + uint32_t offset = 0; + uint32_t coffsets = 0; + uint32_t sample = 0; + uint32_t minlod = 0; + uint32_t flags = 0; - case OpGroupNonUniformQuadSwap: + if (length) { - uint32_t direction = get(ops[4]).scalar(); - if (direction == 0) - emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal"); - else if (direction == 1) - emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical"); - else if (direction == 2) - emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal"); - else - SPIRV_CROSS_THROW("Invalid quad swap direction."); - break; + flags = *opt++; + length--; } - case OpGroupNonUniformQuadBroadcast: - { - emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast"); - break; - } + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + inherited_expressions.push_back(v); + length--; + } + }; - default: - SPIRV_CROSS_THROW("Invalid opcode for subgroup."); - } + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + test(grad_x, ImageOperandsGradMask); + test(grad_y, ImageOperandsGradMask); + test(coffset, ImageOperandsConstOffsetMask); + test(offset, ImageOperandsOffsetMask); + test(coffsets, ImageOperandsConstOffsetsMask); + test(sample, ImageOperandsSampleMask); + test(minlod, ImageOperandsMinLodMask); - register_control_dependent_expression(id); -} + TextureFunctionBaseArguments base_args = {}; + base_args.img = img; + base_args.imgtype = &imgtype; + base_args.is_fetch = fetch != 0; + base_args.is_gather = gather != 0; + base_args.is_proj = proj != 0; -string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) -{ - // OpBitcast can deal with pointers. - if (out_type.pointer || in_type.pointer) - return type_to_glsl(out_type); + string expr; + TextureFunctionNameArguments name_args = {}; + + name_args.base = base_args; + name_args.has_array_offsets = coffsets != 0; + name_args.has_offset = coffset != 0 || offset != 0; + name_args.has_grad = grad_x != 0 || grad_y != 0; + name_args.has_dref = dref != 0; + name_args.is_sparse_feedback = sparse; + name_args.has_min_lod = minlod != 0; + name_args.lod = lod; + expr += to_function_name(name_args); + expr += "("; - if (out_type.basetype == in_type.basetype) - return ""; + uint32_t sparse_texel_id = 0; + if (sparse) + sparse_texel_id = get_sparse_feedback_texel_id(ops[1]); + + TextureFunctionArguments args = {}; + args.base = base_args; + args.coord = coord; + args.coord_components = coord_components; + args.dref = dref; + args.grad_x = grad_x; + args.grad_y = grad_y; + args.lod = lod; + + if (coffsets) + args.offset = coffsets; + else if (coffset) + args.offset = coffset; + else + args.offset = offset; + + args.bias = bias; + args.component = comp; + args.sample = sample; + args.sparse_texel = sparse_texel_id; + args.min_lod = minlod; + args.nonuniform_expression = nonuniform_expression; + expr += to_function_args(args, forward); + expr += ")"; - assert(out_type.basetype != SPIRType::Boolean); - assert(in_type.basetype != SPIRType::Boolean); + // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here. + if (is_legacy() && !options.es && is_depth_image(imgtype, img)) + expr += ".r"; - bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type); - bool same_size_cast = out_type.width == in_type.width; + // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. + // Remap back to 4 components as sampling opcodes expect. + if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op)) + { + bool image_is_depth = false; + const auto *combined = maybe_get(img); + VariableID image_id = combined ? combined->image : img; - // Trivial bitcast case, casts between integers. - if (integral_cast && same_size_cast) - return type_to_glsl(out_type); + if (combined && is_depth_image(imgtype, combined->image)) + image_is_depth = true; + else if (is_depth_image(imgtype, img)) + image_is_depth = true; - // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types). - if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1) - return "unpack8"; - else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1) - return "pack16"; - else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1) - return "pack32"; + // We must also check the backing variable for the image. + // We might have loaded an OpImage, and used that handle for two different purposes. + // Once with comparison, once without. + auto *image_variable = maybe_get_backing_variable(image_id); + if (image_variable && is_depth_image(get(image_variable->basetype), image_variable->self)) + image_is_depth = true; - // Floating <-> Integer special casts. Just have to enumerate all cases. :( - // 16-bit, 32-bit and 64-bit floats. - if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) - { - if (is_legacy_es()) - SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL."); - else if (!options.es && options.version < 330) - require_extension_internal("GL_ARB_shader_bit_encoding"); - return "floatBitsToUint"; - } - else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) - { - if (is_legacy_es()) - SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL."); - else if (!options.es && options.version < 330) - require_extension_internal("GL_ARB_shader_bit_encoding"); - return "floatBitsToInt"; - } - else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) - { - if (is_legacy_es()) - SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL."); - else if (!options.es && options.version < 330) - require_extension_internal("GL_ARB_shader_bit_encoding"); - return "uintBitsToFloat"; + if (image_is_depth) + expr = remap_swizzle(result_type, 1, expr); } - else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) + + if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32) { - if (is_legacy_es()) - SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL."); - else if (!options.es && options.version < 330) - require_extension_internal("GL_ARB_shader_bit_encoding"); - return "intBitsToFloat"; + // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically. + // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision. + expr = join(type_to_glsl_constructor(result_type), "(", expr, ")"); } - else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) - return "doubleBitsToInt64"; - else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) - return "doubleBitsToUint64"; - else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) - return "int64BitsToDouble"; - else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) - return "uint64BitsToDouble"; - else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half) - return "float16BitsToInt16"; - else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) - return "float16BitsToUint16"; - else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short) - return "int16BitsToFloat16"; - else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) - return "uint16BitsToFloat16"; + // Deals with reads from MSL. We might need to downconvert to fewer components. + if (op == OpImageRead) + expr = remap_swizzle(result_type, 4, expr); - // And finally, some even more special purpose casts. - if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2) - return "packUint2x32"; - else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) - return "unpackFloat2x16"; - else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) - return "packFloat2x16"; - else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2) - return "packInt2x16"; - else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1) - return "unpackInt2x16"; - else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2) - return "packUint2x16"; - else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) - return "unpackUint2x16"; - else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4) - return "packInt4x16"; - else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1) - return "unpackInt4x16"; - else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4) - return "packUint4x16"; - else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1) - return "unpackUint4x16"; + return expr; +} - return ""; +bool CompilerGLSL::expression_is_constant_null(uint32_t id) const +{ + auto *c = maybe_get(id); + if (!c) + return false; + return c->constant_is_null(); } -string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument) +bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr) { - auto op = bitcast_glsl_op(result_type, expression_type(argument)); - if (op.empty()) - return to_enclosed_unpacked_expression(argument); - else - return join(op, "(", to_unpacked_expression(argument), ")"); + auto &type = expression_type(ptr); + if (type.array.empty()) + return false; + + if (!backend.array_is_value_type) + return true; + + auto *var = maybe_get_backing_variable(ptr); + if (!var) + return false; + + auto &backed_type = get(var->basetype); + return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct && + has_member_decoration(backed_type.self, 0, DecorationOffset); } -std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg) +// Returns the function name for a texture sampling function for the specified image and sampling characteristics. +// For some subclasses, the function is a method on the specified image. +string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args) { - auto expr = to_expression(arg); - auto &src_type = expression_type(arg); - if (src_type.basetype != target_type) + if (args.has_min_lod) { - auto target = src_type; - target.basetype = target_type; - expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")"); + if (options.es) + SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL."); + require_extension_internal("GL_ARB_sparse_texture_clamp"); } - return expr; -} + string fname; + auto &imgtype = *args.base.imgtype; + VariableID tex = args.base.img; -std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, - const std::string &expr) -{ - if (target_type.basetype == expr_type) - return expr; + // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. + // To emulate this, we will have to use textureGrad with a constant gradient of 0. + // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. + // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. + bool workaround_lod_array_shadow_as_grad = false; + if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && + is_depth_image(imgtype, tex) && args.lod && !args.base.is_fetch) + { + if (!expression_is_constant_null(args.lod)) + { + SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be " + "expressed in GLSL."); + } + workaround_lod_array_shadow_as_grad = true; + } - auto src_type = target_type; - src_type.basetype = expr_type; - return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")"); -} + if (args.is_sparse_feedback) + fname += "sparse"; -string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) -{ - switch (builtin) + if (args.base.is_fetch) + fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch"; + else { - case BuiltInPosition: - return "gl_Position"; - case BuiltInPointSize: - return "gl_PointSize"; - case BuiltInClipDistance: - return "gl_ClipDistance"; - case BuiltInCullDistance: - return "gl_CullDistance"; - case BuiltInVertexId: - if (options.vulkan_semantics) - SPIRV_CROSS_THROW( - "Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics."); - return "gl_VertexID"; - case BuiltInInstanceId: - if (options.vulkan_semantics) - SPIRV_CROSS_THROW( - "Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics."); - return "gl_InstanceID"; - case BuiltInVertexIndex: - if (options.vulkan_semantics) - return "gl_VertexIndex"; - else - return "gl_VertexID"; // gl_VertexID already has the base offset applied. - case BuiltInInstanceIndex: - if (options.vulkan_semantics) - return "gl_InstanceIndex"; - else if (options.vertex.support_nonzero_base_instance) - return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID. - else - return "gl_InstanceID"; - case BuiltInPrimitiveId: - if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry) - return "gl_PrimitiveIDIn"; - else - return "gl_PrimitiveID"; - case BuiltInInvocationId: - return "gl_InvocationID"; - case BuiltInLayer: - return "gl_Layer"; - case BuiltInViewportIndex: - return "gl_ViewportIndex"; - case BuiltInTessLevelOuter: - return "gl_TessLevelOuter"; - case BuiltInTessLevelInner: - return "gl_TessLevelInner"; - case BuiltInTessCoord: - return "gl_TessCoord"; - case BuiltInFragCoord: - return "gl_FragCoord"; - case BuiltInPointCoord: - return "gl_PointCoord"; - case BuiltInFrontFacing: - return "gl_FrontFacing"; - case BuiltInFragDepth: - return "gl_FragDepth"; - case BuiltInNumWorkgroups: - return "gl_NumWorkGroups"; - case BuiltInWorkgroupSize: - return "gl_WorkGroupSize"; - case BuiltInWorkgroupId: - return "gl_WorkGroupID"; - case BuiltInLocalInvocationId: - return "gl_LocalInvocationID"; - case BuiltInGlobalInvocationId: - return "gl_GlobalInvocationID"; - case BuiltInLocalInvocationIndex: - return "gl_LocalInvocationIndex"; - case BuiltInHelperInvocation: - return "gl_HelperInvocation"; - case BuiltInBaseVertex: - if (options.es) - SPIRV_CROSS_THROW("BaseVertex not supported in ES profile."); - if (options.version < 460) - { - require_extension_internal("GL_ARB_shader_draw_parameters"); - return "gl_BaseVertexARB"; - } - return "gl_BaseVertex"; - case BuiltInBaseInstance: - if (options.es) - SPIRV_CROSS_THROW("BaseInstance not supported in ES profile."); - if (options.version < 460) - { - require_extension_internal("GL_ARB_shader_draw_parameters"); - return "gl_BaseInstanceARB"; - } - return "gl_BaseInstance"; - case BuiltInDrawIndex: - if (options.es) - SPIRV_CROSS_THROW("DrawIndex not supported in ES profile."); - if (options.version < 460) - { - require_extension_internal("GL_ARB_shader_draw_parameters"); - return "gl_DrawIDARB"; - } - return "gl_DrawID"; - - case BuiltInSampleId: - if (options.es && options.version < 320) - require_extension_internal("GL_OES_sample_variables"); - if (!options.es && options.version < 400) - SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400."); - return "gl_SampleID"; - - case BuiltInSampleMask: - if (options.es && options.version < 320) - require_extension_internal("GL_OES_sample_variables"); - if (!options.es && options.version < 400) - SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400."); - - if (storage == StorageClassInput) - return "gl_SampleMaskIn"; - else - return "gl_SampleMask"; - - case BuiltInSamplePosition: - if (options.es && options.version < 320) - require_extension_internal("GL_OES_sample_variables"); - if (!options.es && options.version < 400) - SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400."); - return "gl_SamplePosition"; + fname += args.is_sparse_feedback ? "Texture" : "texture"; - case BuiltInViewIndex: - if (options.vulkan_semantics) - { - require_extension_internal("GL_EXT_multiview"); - return "gl_ViewIndex"; - } - else - { - require_extension_internal("GL_OVR_multiview2"); - return "gl_ViewID_OVR"; - } + if (args.base.is_gather) + fname += "Gather"; + if (args.has_array_offsets) + fname += "Offsets"; + if (args.base.is_proj) + fname += "Proj"; + if (args.has_grad || workaround_lod_array_shadow_as_grad) + fname += "Grad"; + if (args.lod != 0 && !workaround_lod_array_shadow_as_grad) + fname += "Lod"; + } - case BuiltInNumSubgroups: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_basic"); - return "gl_NumSubgroups"; + if (args.has_offset) + fname += "Offset"; - case BuiltInSubgroupId: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_basic"); - return "gl_SubgroupID"; + if (args.has_min_lod) + fname += "Clamp"; - case BuiltInSubgroupSize: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_basic"); - return "gl_SubgroupSize"; + if (args.is_sparse_feedback || args.has_min_lod) + fname += "ARB"; - case BuiltInSubgroupLocalInvocationId: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_basic"); - return "gl_SubgroupInvocationID"; + return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname; +} - case BuiltInSubgroupEqMask: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_ballot"); - return "gl_SubgroupEqMask"; +std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) +{ + auto *var = maybe_get_backing_variable(id); - case BuiltInSubgroupGeMask: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_ballot"); - return "gl_SubgroupGeMask"; + // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL. + // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions. + if (var) + { + auto &type = get(var->basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) + { + if (options.vulkan_semantics) + { + if (dummy_sampler_id) + { + // Don't need to consider Shadow state since the dummy sampler is always non-shadow. + auto sampled_type = type; + sampled_type.basetype = SPIRType::SampledImage; + return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ", + to_expression(dummy_sampler_id), ")"); + } + else + { + // Newer glslang supports this extension to deal with texture2D as argument to texture functions. + require_extension_internal("GL_EXT_samplerless_texture_functions"); + } + } + else + { + if (!dummy_sampler_id) + SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was " + "build_dummy_sampler_for_combined_images() called?"); - case BuiltInSubgroupGtMask: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_ballot"); - return "gl_SubgroupGtMask"; + return to_combined_image_sampler(id, dummy_sampler_id); + } + } + } - case BuiltInSubgroupLeMask: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_ballot"); - return "gl_SubgroupLeMask"; + return to_non_uniform_aware_expression(id); +} - case BuiltInSubgroupLtMask: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_ballot"); - return "gl_SubgroupLtMask"; +// Returns the function args for a texture sampling function for the specified image and sampling characteristics. +string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward) +{ + VariableID img = args.base.img; + auto &imgtype = *args.base.imgtype; - case BuiltInLaunchIdNV: - return "gl_LaunchIDNV"; - case BuiltInLaunchSizeNV: - return "gl_LaunchSizeNV"; - case BuiltInWorldRayOriginNV: - return "gl_WorldRayOriginNV"; - case BuiltInWorldRayDirectionNV: - return "gl_WorldRayDirectionNV"; - case BuiltInObjectRayOriginNV: - return "gl_ObjectRayOriginNV"; - case BuiltInObjectRayDirectionNV: - return "gl_ObjectRayDirectionNV"; - case BuiltInRayTminNV: - return "gl_RayTminNV"; - case BuiltInRayTmaxNV: - return "gl_RayTmaxNV"; - case BuiltInInstanceCustomIndexNV: - return "gl_InstanceCustomIndexNV"; - case BuiltInObjectToWorldNV: - return "gl_ObjectToWorldNV"; - case BuiltInWorldToObjectNV: - return "gl_WorldToObjectNV"; - case BuiltInHitTNV: - return "gl_HitTNV"; - case BuiltInHitKindNV: - return "gl_HitKindNV"; - case BuiltInIncomingRayFlagsNV: - return "gl_IncomingRayFlagsNV"; + string farg_str; + if (args.base.is_fetch) + farg_str = convert_separate_image_to_expression(img); + else + farg_str = to_non_uniform_aware_expression(img); - case BuiltInBaryCoordNV: + if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos) { - if (options.es && options.version < 320) - SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320."); - else if (!options.es && options.version < 450) - SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450."); - require_extension_internal("GL_NV_fragment_shader_barycentric"); - return "gl_BaryCoordNV"; + // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way. + farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")"); } - case BuiltInBaryCoordNoPerspNV: - { - if (options.es && options.version < 320) - SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320."); - else if (!options.es && options.version < 450) - SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450."); - require_extension_internal("GL_NV_fragment_shader_barycentric"); - return "gl_BaryCoordNoPerspNV"; - } + bool swizz_func = backend.swizzle_is_function; + auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { + if (comps == in_comps) + return ""; - case BuiltInFragStencilRefEXT: - { - if (!options.es) + switch (comps) { - require_extension_internal("GL_ARB_shader_stencil_export"); - return "gl_FragStencilRefARB"; + case 1: + return ".x"; + case 2: + return swizz_func ? ".xy()" : ".xy"; + case 3: + return swizz_func ? ".xyz()" : ".xyz"; + default: + return ""; } - else - SPIRV_CROSS_THROW("Stencil export not supported in GLES."); - } + }; - default: - return join("gl_BuiltIn_", convert_to_string(builtin)); - } -} + bool forward = should_forward(args.coord); -const char *CompilerGLSL::index_to_swizzle(uint32_t index) -{ - switch (index) + // The IR can give us more components than we need, so chop them off as needed. + auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize); + // Only enclose the UV expression if needed. + auto coord_expr = + (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr); + + // texelFetch only takes int, not uint. + auto &coord_type = expression_type(args.coord); + if (coord_type.basetype == SPIRType::UInt) { - case 0: - return "x"; - case 1: - return "y"; - case 2: - return "z"; - case 3: - return "w"; - default: - SPIRV_CROSS_THROW("Swizzle index out of range"); + auto expected_type = coord_type; + expected_type.vecsize = args.coord_components; + expected_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr); } -} - -string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, - AccessChainFlags flags, AccessChainMeta *meta) -{ - string expr; - bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; - bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0; - bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; - bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; + // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. + // To emulate this, we will have to use textureGrad with a constant gradient of 0. + // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. + // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. + bool workaround_lod_array_shadow_as_grad = + ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && + is_depth_image(imgtype, img) && args.lod != 0 && !args.base.is_fetch; - if (!chain_only) - expr = to_enclosed_expression(base, register_expression_read); + if (args.dref) + { + forward = forward && should_forward(args.dref); - // Start traversing type hierarchy at the proper non-pointer types, - // but keep type_id referencing the original pointer for use below. - uint32_t type_id = expression_type_id(base); - - if (!backend.native_pointers) - { - if (ptr_chain) - SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain."); - - // Wrapped buffer reference pointer types will need to poke into the internal "value" member before - // continuing the access chain. - if (should_dereference(base)) + // SPIR-V splits dref and coordinate. + if (args.base.is_gather || + args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather. { - auto &type = get(type_id); - expr = dereference_expression(type, expr); + farg_str += ", "; + farg_str += to_expression(args.coord); + farg_str += ", "; + farg_str += to_expression(args.dref); } - } - - const auto *type = &get_pointee_type(type_id); - - bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos; - bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base); - bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPacked); - uint32_t packed_type = get_extended_decoration(base, SPIRVCrossDecorationPackedType); - bool is_invariant = has_decoration(base, DecorationInvariant); - bool pending_array_enclose = false; - bool dimension_flatten = false; - - const auto append_index = [&](uint32_t index) { - expr += "["; - - // If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier. - bool nonuniform_index = - has_decoration(index, DecorationNonUniformEXT) && - (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock)); - if (nonuniform_index) + else if (args.base.is_proj) { - expr += backend.nonuniform_qualifier; - expr += "("; - } - - if (index_is_literal) - expr += convert_to_string(index); - else - expr += to_expression(index, register_expression_read); - - if (nonuniform_index) - expr += ")"; - - expr += "]"; - }; - - for (uint32_t i = 0; i < count; i++) - { - uint32_t index = indices[i]; + // Have to reshuffle so we get vec4(coord, dref, proj), special case. + // Other shading languages splits up the arguments for coord and compare value like SPIR-V. + // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow. + farg_str += ", vec4("; - // Pointer chains - if (ptr_chain && i == 0) - { - // If we are flattening multidimensional arrays, only create opening bracket on first - // array index. - if (options.flatten_multidimensional_arrays) + if (imgtype.image.dim == Dim1D) { - dimension_flatten = type->array.size() >= 1; - pending_array_enclose = dimension_flatten; - if (pending_array_enclose) - expr += "["; + // Could reuse coord_expr, but we will mess up the temporary usage checking. + farg_str += to_enclosed_expression(args.coord) + ".x"; + farg_str += ", "; + farg_str += "0.0, "; + farg_str += to_expression(args.dref); + farg_str += ", "; + farg_str += to_enclosed_expression(args.coord) + ".y)"; } - - if (options.flatten_multidimensional_arrays && dimension_flatten) + else if (imgtype.image.dim == Dim2D) { - // If we are flattening multidimensional arrays, do manual stride computation. - if (index_is_literal) - expr += convert_to_string(index); - else - expr += to_enclosed_expression(index, register_expression_read); - - for (auto j = uint32_t(type->array.size()); j; j--) - { - expr += " * "; - expr += enclose_expression(to_array_size(*type, j - 1)); - } - - if (type->array.empty()) - pending_array_enclose = false; - else - expr += " + "; - - if (!pending_array_enclose) - expr += "]"; + // Could reuse coord_expr, but we will mess up the temporary usage checking. + farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy"); + farg_str += ", "; + farg_str += to_expression(args.dref); + farg_str += ", "; + farg_str += to_enclosed_expression(args.coord) + ".z)"; } else - { - append_index(index); - } - - if (type->basetype == SPIRType::ControlPointArray) - { - type_id = type->parent_type; - type = &get(type_id); - } - - access_chain_is_arrayed = true; + SPIRV_CROSS_THROW("Invalid type for textureProj with shadow."); } - // Arrays - else if (!type->array.empty()) + else { - // If we are flattening multidimensional arrays, only create opening bracket on first - // array index. - if (options.flatten_multidimensional_arrays && !pending_array_enclose) - { - dimension_flatten = type->array.size() > 1; - pending_array_enclose = dimension_flatten; - if (pending_array_enclose) - expr += "["; - } - - assert(type->parent_type); + // Create a composite which merges coord/dref into a single vector. + auto type = expression_type(args.coord); + type.vecsize = args.coord_components + 1; + if (imgtype.image.dim == Dim1D && options.es) + type.vecsize++; + farg_str += ", "; + farg_str += type_to_glsl_constructor(type); + farg_str += "("; - auto *var = maybe_get(base); - if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) && - !has_decoration(type->self, DecorationBlock)) + if (imgtype.image.dim == Dim1D && options.es) { - // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared. - // Normally, these variables live in blocks when compiled from GLSL, - // but HLSL seems to just emit straight arrays here. - // We must pretend this access goes through gl_in/gl_out arrays - // to be able to access certain builtins as arrays. - auto builtin = ir.meta[base].decoration.builtin_type; - switch (builtin) + if (imgtype.image.arrayed) { - // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom. - // case BuiltInClipDistance: - case BuiltInPosition: - case BuiltInPointSize: - if (var->storage == StorageClassInput) - expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr); - else if (var->storage == StorageClassOutput) - expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr); - else - append_index(index); - break; - - default: - append_index(index); - break; + farg_str += enclose_expression(coord_expr) + ".x"; + farg_str += ", 0.0, "; + farg_str += enclose_expression(coord_expr) + ".y"; } - } - else if (options.flatten_multidimensional_arrays && dimension_flatten) - { - // If we are flattening multidimensional arrays, do manual stride computation. - auto &parent_type = get(type->parent_type); - - if (index_is_literal) - expr += convert_to_string(index); else - expr += to_enclosed_expression(index, register_expression_read); - - for (auto j = uint32_t(parent_type.array.size()); j; j--) { - expr += " * "; - expr += enclose_expression(to_array_size(parent_type, j - 1)); + farg_str += coord_expr; + farg_str += ", 0.0"; } - - if (parent_type.array.empty()) - pending_array_enclose = false; - else - expr += " + "; - - if (!pending_array_enclose) - expr += "]"; } else - { - append_index(index); - } - - type_id = type->parent_type; - type = &get(type_id); + farg_str += coord_expr; - access_chain_is_arrayed = true; + farg_str += ", "; + farg_str += to_expression(args.dref); + farg_str += ")"; } - // For structs, the index refers to a constant, which indexes into the members. - // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. - else if (type->basetype == SPIRType::Struct) + } + else + { + if (imgtype.image.dim == Dim1D && options.es) { - if (!index_is_literal) - index = get(index).scalar(); - - if (index >= type->member_types.size()) - SPIRV_CROSS_THROW("Member index is out of bounds!"); - - BuiltIn builtin; - if (is_member_builtin(*type, index, &builtin)) + // Have to fake a second coordinate. + if (type_is_floating_point(coord_type)) { - // FIXME: We rely here on OpName on gl_in/gl_out to make this work properly. - // To make this properly work by omitting all OpName opcodes, - // we need to infer gl_in or gl_out based on the builtin, and stage. - if (access_chain_is_arrayed) + // Cannot mix proj and array. + if (imgtype.image.arrayed || args.base.is_proj) { - expr += "."; - expr += builtin_to_glsl(builtin, type->storage); + coord_expr = join("vec3(", enclose_expression(coord_expr), ".x, 0.0, ", + enclose_expression(coord_expr), ".y)"); } else - expr = builtin_to_glsl(builtin, type->storage); + coord_expr = join("vec2(", coord_expr, ", 0.0)"); } else { - // If the member has a qualified name, use it as the entire chain - string qual_mbr_name = get_member_qualified_name(type_id, index); - if (!qual_mbr_name.empty()) - expr = qual_mbr_name; + if (imgtype.image.arrayed) + { + coord_expr = join("ivec3(", enclose_expression(coord_expr), + ".x, 0, ", + enclose_expression(coord_expr), ".y)"); + } else - expr += to_member_reference(base, *type, index, ptr_chain); + coord_expr = join("ivec2(", coord_expr, ", 0)"); } + } - if (has_member_decoration(type->self, index, DecorationInvariant)) - is_invariant = true; + farg_str += ", "; + farg_str += coord_expr; + } - is_packed = member_is_packed_type(*type, index); - if (is_packed) - packed_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPackedType); - else - packed_type = 0; + if (args.grad_x || args.grad_y) + { + forward = forward && should_forward(args.grad_x); + forward = forward && should_forward(args.grad_y); + farg_str += ", "; + farg_str += to_expression(args.grad_x); + farg_str += ", "; + farg_str += to_expression(args.grad_y); + } - row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); - type = &get(type->member_types[index]); + if (args.lod) + { + if (workaround_lod_array_shadow_as_grad) + { + // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0. + // Implementing this as plain texture() is not safe on some implementations. + if (imgtype.image.dim == Dim2D) + farg_str += ", vec2(0.0), vec2(0.0)"; + else if (imgtype.image.dim == DimCube) + farg_str += ", vec3(0.0), vec3(0.0)"; } - // Matrix -> Vector - else if (type->columns > 1) + else { - if (row_major_matrix_needs_conversion) - { - expr = convert_row_major_matrix(expr, *type, is_packed); - row_major_matrix_needs_conversion = false; - is_packed = false; - packed_type = 0; - } + forward = forward && should_forward(args.lod); + farg_str += ", "; - expr += "["; - if (index_is_literal) - expr += convert_to_string(index); + // Lod expression for TexelFetch in GLSL must be int, and only int. + if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) + farg_str += bitcast_expression(SPIRType::Int, args.lod); else - expr += to_expression(index, register_expression_read); - expr += "]"; - - type_id = type->parent_type; - type = &get(type_id); + farg_str += to_expression(args.lod); } - // Vector -> Scalar - else if (type->vecsize > 1) - { - if (index_is_literal && !is_packed) - { - expr += "."; - expr += index_to_swizzle(index); - } - else if (ir.ids[index].get_type() == TypeConstant && !is_packed) - { - auto &c = get(index); - expr += "."; - expr += index_to_swizzle(c.scalar()); - } - else if (index_is_literal) - { - // For packed vectors, we can only access them as an array, not by swizzle. - expr += join("[", index, "]"); - } - else - { - expr += "["; - expr += to_expression(index, register_expression_read); - expr += "]"; - } + } + else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) + { + // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. + farg_str += ", 0"; + } - is_packed = false; - packed_type = 0; - type_id = type->parent_type; - type = &get(type_id); - } - else if (!backend.allow_truncated_access_chain) - SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); + if (args.offset) + { + forward = forward && should_forward(args.offset); + farg_str += ", "; + farg_str += bitcast_expression(SPIRType::Int, args.offset); } - if (pending_array_enclose) + if (args.sample) { - SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, " - "but the access chain was terminated in the middle of a multidimensional array. " - "This is not supported."); + farg_str += ", "; + farg_str += bitcast_expression(SPIRType::Int, args.sample); } - if (meta) + if (args.min_lod) { - meta->need_transpose = row_major_matrix_needs_conversion; - meta->storage_is_packed = is_packed; - meta->storage_is_invariant = is_invariant; - meta->storage_packed_type = packed_type; + farg_str += ", "; + farg_str += to_expression(args.min_lod); } - return expr; -} - -string CompilerGLSL::to_flattened_struct_member(const SPIRVariable &var, uint32_t index) -{ - auto &type = get(var.basetype); - return sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, index))); -} - -string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, - AccessChainMeta *meta, bool ptr_chain) -{ - if (flattened_buffer_blocks.count(base)) + if (args.sparse_texel) { - uint32_t matrix_stride = 0; - bool need_transpose = false; - flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride, - ptr_chain); - - if (meta) - { - meta->need_transpose = target_type.columns > 1 && need_transpose; - meta->storage_is_packed = false; - } - - return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose); + // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments. + farg_str += ", "; + farg_str += to_expression(args.sparse_texel); } - else if (flattened_structs.count(base) && count > 0) - { - AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; - if (ptr_chain) - flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; - auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1); - if (meta) - { - meta->need_transpose = false; - meta->storage_is_packed = false; - } - return sanitize_underscores(join(to_name(base), "_", chain)); - } - else + if (args.bias) { - AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; - if (ptr_chain) - flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; - return access_chain_internal(base, indices, count, flags, meta); + forward = forward && should_forward(args.bias); + farg_str += ", "; + farg_str += to_expression(args.bias); } -} -string CompilerGLSL::load_flattened_struct(SPIRVariable &var) -{ - auto expr = type_to_glsl_constructor(get(var.basetype)); - expr += '('; - - auto &type = get(var.basetype); - for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + if (args.component && !expression_is_constant_null(args.component)) { - if (i) - expr += ", "; - - // Flatten the varyings. - // Apply name transformation for flattened I/O blocks. - expr += to_flattened_struct_member(var, i); + forward = forward && should_forward(args.component); + farg_str += ", "; + farg_str += bitcast_expression(SPIRType::Int, args.component); } - expr += ')'; - return expr; -} -void CompilerGLSL::store_flattened_struct(SPIRVariable &var, uint32_t value) -{ - // We're trying to store a structure which has been flattened. - // Need to copy members one by one. - auto rhs = to_expression(value); + *p_forward = forward; - // Store result locally. - // Since we're declaring a variable potentially multiple times here, - // store the variable in an isolated scope. - begin_scope(); - statement(variable_decl_function_local(var), " = ", rhs, ";"); + return farg_str; +} - auto &type = get(var.basetype); - for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) +Op CompilerGLSL::get_remapped_spirv_op(Op op) const +{ + if (options.relax_nan_checks) { - // Flatten the varyings. - // Apply name transformation for flattened I/O blocks. + switch (op) + { + case OpFUnordLessThan: + op = OpFOrdLessThan; + break; + case OpFUnordLessThanEqual: + op = OpFOrdLessThanEqual; + break; + case OpFUnordGreaterThan: + op = OpFOrdGreaterThan; + break; + case OpFUnordGreaterThanEqual: + op = OpFOrdGreaterThanEqual; + break; + case OpFUnordEqual: + op = OpFOrdEqual; + break; + case OpFOrdNotEqual: + op = OpFUnordNotEqual; + break; - auto lhs = sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, i))); - rhs = join(to_name(var.self), ".", to_member_name(type, i)); - statement(lhs, " = ", rhs, ";"); + default: + break; + } } - end_scope(); -} -std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, - const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, - bool need_transpose) -{ - if (!target_type.array.empty()) - SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened"); - else if (target_type.basetype == SPIRType::Struct) - return flattened_access_chain_struct(base, indices, count, target_type, offset); - else if (target_type.columns > 1) - return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose); - else - return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose); + return op; } -std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, - const SPIRType &target_type, uint32_t offset) +GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const { - std::string expr; - - expr += type_to_glsl_constructor(target_type); - expr += "("; - - for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i) + // Relax to non-NaN aware opcodes. + if (options.relax_nan_checks) { - if (i != 0) - expr += ", "; - - const SPIRType &member_type = get(target_type.member_types[i]); - uint32_t member_offset = type_struct_member_offset(target_type, i); - - // The access chain terminates at the struct, so we need to find matrix strides and row-major information - // ahead of time. - bool need_transpose = false; - uint32_t matrix_stride = 0; - if (member_type.columns > 1) + switch (std450_op) { - need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor); - matrix_stride = type_struct_member_matrix_stride(target_type, i); + case GLSLstd450NClamp: + std450_op = GLSLstd450FClamp; + break; + case GLSLstd450NMin: + std450_op = GLSLstd450FMin; + break; + case GLSLstd450NMax: + std450_op = GLSLstd450FMax; + break; + default: + break; } - - auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride, - need_transpose); - - // Cannot forward transpositions, so resolve them here. - if (need_transpose) - expr += convert_row_major_matrix(tmp, member_type, false); - else - expr += tmp; } - expr += ")"; - - return expr; + return std450_op; } -std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, - const SPIRType &target_type, uint32_t offset, - uint32_t matrix_stride, bool need_transpose) +void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length) { - assert(matrix_stride); - SPIRType tmp_type = target_type; - if (need_transpose) - swap(tmp_type.vecsize, tmp_type.columns); - - std::string expr; - - expr += type_to_glsl_constructor(tmp_type); - expr += "("; - - for (uint32_t i = 0; i < tmp_type.columns; i++) - { - if (i != 0) - expr += ", "; - - expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride, - /* need_transpose= */ false); - } - - expr += ")"; + auto op = static_cast(eop); - return expr; -} + if (is_legacy() && is_unsigned_glsl_opcode(op)) + SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets."); -std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, - const SPIRType &target_type, uint32_t offset, - uint32_t matrix_stride, bool need_transpose) -{ - auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16); + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); - auto buffer_name = to_name(expression_type(base).self); + op = get_remapped_glsl_op(op); - if (need_transpose) + switch (op) { - std::string expr; - - if (target_type.vecsize > 1) + // FP fiddling + case GLSLstd450Round: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "round"); + else { - expr += type_to_glsl_constructor(target_type); - expr += "("; + auto op0 = to_enclosed_expression(args[0]); + auto &op0_type = expression_type(args[0]); + auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))"); + bool forward = should_forward(args[0]); + emit_op(result_type, id, expr, forward); + inherit_expression_dependencies(id, args[0]); } + break; - for (uint32_t i = 0; i < target_type.vecsize; ++i) + case GLSLstd450RoundEven: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "roundEven"); + else if (!options.es) { - if (i != 0) - expr += ", "; - - uint32_t component_offset = result.second + i * matrix_stride; - - assert(component_offset % (target_type.width / 8) == 0); - uint32_t index = component_offset / (target_type.width / 8); - - expr += buffer_name; - expr += "["; - expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + - expr += convert_to_string(index / 4); - expr += "]"; - - expr += vector_swizzle(1, index % 4); + // This extension provides round() with round-to-even semantics. + require_extension_internal("GL_EXT_gpu_shader4"); + emit_unary_func_op(result_type, id, args[0], "round"); } + else + SPIRV_CROSS_THROW("roundEven supported only in ESSL 300."); + break; - if (target_type.vecsize > 1) + case GLSLstd450Trunc: + emit_unary_func_op(result_type, id, args[0], "trunc"); + break; + case GLSLstd450SAbs: + emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type); + break; + case GLSLstd450FAbs: + emit_unary_func_op(result_type, id, args[0], "abs"); + break; + case GLSLstd450SSign: + emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type); + break; + case GLSLstd450FSign: + emit_unary_func_op(result_type, id, args[0], "sign"); + break; + case GLSLstd450Floor: + emit_unary_func_op(result_type, id, args[0], "floor"); + break; + case GLSLstd450Ceil: + emit_unary_func_op(result_type, id, args[0], "ceil"); + break; + case GLSLstd450Fract: + emit_unary_func_op(result_type, id, args[0], "fract"); + break; + case GLSLstd450Radians: + emit_unary_func_op(result_type, id, args[0], "radians"); + break; + case GLSLstd450Degrees: + emit_unary_func_op(result_type, id, args[0], "degrees"); + break; + case GLSLstd450Fma: + if ((!options.es && options.version < 400) || (options.es && options.version < 320)) { - expr += ")"; + auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ", + to_enclosed_expression(args[2])); + + emit_op(result_type, id, expr, + should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2])); + for (uint32_t i = 0; i < 3; i++) + inherit_expression_dependencies(id, args[i]); } + else + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma"); + break; + case GLSLstd450Modf: + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "modf"); + break; - return expr; - } - else + case GLSLstd450ModfStruct: { - assert(result.second % (target_type.width / 8) == 0); - uint32_t index = result.second / (target_type.width / 8); - - std::string expr; + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, id); + statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ", + to_expression(id), ".", to_member_name(type, 1), ");"); + break; + } - expr += buffer_name; - expr += "["; + // Minmax + case GLSLstd450UMin: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false); + break; + + case GLSLstd450SMin: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false); + break; + + case GLSLstd450FMin: + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + break; + + case GLSLstd450FMax: + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + break; + + case GLSLstd450UMax: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false); + break; + + case GLSLstd450SMax: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false); + break; + + case GLSLstd450FClamp: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + break; + + case GLSLstd450UClamp: + emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type); + break; + + case GLSLstd450SClamp: + emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type); + break; + + // Trig + case GLSLstd450Sin: + emit_unary_func_op(result_type, id, args[0], "sin"); + break; + case GLSLstd450Cos: + emit_unary_func_op(result_type, id, args[0], "cos"); + break; + case GLSLstd450Tan: + emit_unary_func_op(result_type, id, args[0], "tan"); + break; + case GLSLstd450Asin: + emit_unary_func_op(result_type, id, args[0], "asin"); + break; + case GLSLstd450Acos: + emit_unary_func_op(result_type, id, args[0], "acos"); + break; + case GLSLstd450Atan: + emit_unary_func_op(result_type, id, args[0], "atan"); + break; + case GLSLstd450Sinh: + emit_unary_func_op(result_type, id, args[0], "sinh"); + break; + case GLSLstd450Cosh: + emit_unary_func_op(result_type, id, args[0], "cosh"); + break; + case GLSLstd450Tanh: + emit_unary_func_op(result_type, id, args[0], "tanh"); + break; + case GLSLstd450Asinh: + emit_unary_func_op(result_type, id, args[0], "asinh"); + break; + case GLSLstd450Acosh: + emit_unary_func_op(result_type, id, args[0], "acosh"); + break; + case GLSLstd450Atanh: + emit_unary_func_op(result_type, id, args[0], "atanh"); + break; + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan"); + break; + + // Exponentials + case GLSLstd450Pow: + emit_binary_func_op(result_type, id, args[0], args[1], "pow"); + break; + case GLSLstd450Exp: + emit_unary_func_op(result_type, id, args[0], "exp"); + break; + case GLSLstd450Log: + emit_unary_func_op(result_type, id, args[0], "log"); + break; + case GLSLstd450Exp2: + emit_unary_func_op(result_type, id, args[0], "exp2"); + break; + case GLSLstd450Log2: + emit_unary_func_op(result_type, id, args[0], "log2"); + break; + case GLSLstd450Sqrt: + emit_unary_func_op(result_type, id, args[0], "sqrt"); + break; + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, id, args[0], "inversesqrt"); + break; + + // Matrix math + case GLSLstd450Determinant: + emit_unary_func_op(result_type, id, args[0], "determinant"); + break; + case GLSLstd450MatrixInverse: + emit_unary_func_op(result_type, id, args[0], "inverse"); + break; + + // Lerping + case GLSLstd450FMix: + case GLSLstd450IMix: + { + emit_mix_op(result_type, id, args[0], args[1], args[2]); + break; + } + case GLSLstd450Step: + emit_binary_func_op(result_type, id, args[0], args[1], "step"); + break; + case GLSLstd450SmoothStep: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep"); + break; + + // Packing + case GLSLstd450Frexp: + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "frexp"); + break; + + case GLSLstd450FrexpStruct: + { + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, id); + statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ", + to_expression(id), ".", to_member_name(type, 1), ");"); + break; + } + + case GLSLstd450Ldexp: + { + bool forward = should_forward(args[0]) && should_forward(args[1]); + + auto op0 = to_unpacked_expression(args[0]); + auto op1 = to_unpacked_expression(args[1]); + auto &op1_type = expression_type(args[1]); + if (op1_type.basetype != SPIRType::Int) + { + // Need a value cast here. + auto target_type = op1_type; + target_type.basetype = SPIRType::Int; + op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")"); + } + + auto expr = join("ldexp(", op0, ", ", op1, ")"); + + emit_op(result_type, id, expr, forward); + inherit_expression_dependencies(id, args[0]); + inherit_expression_dependencies(id, args[1]); + break; + } + + case GLSLstd450PackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packSnorm4x8"); + break; + case GLSLstd450PackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packUnorm4x8"); + break; + case GLSLstd450PackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packSnorm2x16"); + break; + case GLSLstd450PackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packUnorm2x16"); + break; + case GLSLstd450PackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "packHalf2x16"); + break; + case GLSLstd450UnpackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8"); + break; + case GLSLstd450UnpackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8"); + break; + case GLSLstd450UnpackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16"); + break; + case GLSLstd450UnpackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16"); + break; + case GLSLstd450UnpackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16"); + break; + + case GLSLstd450PackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "packDouble2x32"); + break; + case GLSLstd450UnpackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32"); + break; + + // Vector math + case GLSLstd450Length: + emit_unary_func_op(result_type, id, args[0], "length"); + break; + case GLSLstd450Distance: + emit_binary_func_op(result_type, id, args[0], args[1], "distance"); + break; + case GLSLstd450Cross: + emit_binary_func_op(result_type, id, args[0], args[1], "cross"); + break; + case GLSLstd450Normalize: + emit_unary_func_op(result_type, id, args[0], "normalize"); + break; + case GLSLstd450FaceForward: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward"); + break; + case GLSLstd450Reflect: + emit_binary_func_op(result_type, id, args[0], args[1], "reflect"); + break; + case GLSLstd450Refract: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract"); + break; + + // Bit-fiddling + case GLSLstd450FindILsb: + // findLSB always returns int. + emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type); + break; + + case GLSLstd450FindSMsb: + emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type); + break; + + case GLSLstd450FindUMsb: + emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type, + int_type); // findMSB always returns int. + break; + + // Multisampled varying + case GLSLstd450InterpolateAtCentroid: + emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid"); + break; + case GLSLstd450InterpolateAtSample: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample"); + break; + case GLSLstd450InterpolateAtOffset: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset"); + break; + + case GLSLstd450NMin: + case GLSLstd450NMax: + { + emit_nminmax_op(result_type, id, args[0], args[1], op); + break; + } + + case GLSLstd450NClamp: + { + // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. + // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. + uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX]; + if (!max_id) + max_id = ir.increase_bound_by(1); + + // Inherit precision qualifiers. + ir.meta[max_id] = ir.meta[id]; + + emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax); + emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin); + break; + } + + default: + statement("// unimplemented GLSL op ", eop); + break; + } +} + +void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op) +{ + // Need to emulate this call. + uint32_t &ids = extra_sub_expressions[id]; + if (!ids) + { + ids = ir.increase_bound_by(5); + auto btype = get(result_type); + btype.basetype = SPIRType::Boolean; + set(ids, btype); + } + + uint32_t btype_id = ids + 0; + uint32_t left_nan_id = ids + 1; + uint32_t right_nan_id = ids + 2; + uint32_t tmp_id = ids + 3; + uint32_t mixed_first_id = ids + 4; + + // Inherit precision qualifiers. + ir.meta[tmp_id] = ir.meta[id]; + ir.meta[mixed_first_id] = ir.meta[id]; + + emit_unary_func_op(btype_id, left_nan_id, op0, "isnan"); + emit_unary_func_op(btype_id, right_nan_id, op1, "isnan"); + emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max"); + emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id); + emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id); +} + +void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, + uint32_t) +{ + require_extension_internal("GL_AMD_shader_ballot"); + + enum AMDShaderBallot + { + SwizzleInvocationsAMD = 1, + SwizzleInvocationsMaskedAMD = 2, + WriteInvocationAMD = 3, + MbcntAMD = 4 + }; + + auto op = static_cast(eop); + + switch (op) + { + case SwizzleInvocationsAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD"); + register_control_dependent_expression(id); + break; + + case SwizzleInvocationsMaskedAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD"); + register_control_dependent_expression(id); + break; + + case WriteInvocationAMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD"); + register_control_dependent_expression(id); + break; + + case MbcntAMD: + emit_unary_func_op(result_type, id, args[0], "mbcntAMD"); + register_control_dependent_expression(id); + break; + + default: + statement("// unimplemented SPV AMD shader ballot op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop, + const uint32_t *args, uint32_t) +{ + require_extension_internal("GL_AMD_shader_explicit_vertex_parameter"); + + enum AMDShaderExplicitVertexParameter + { + InterpolateAtVertexAMD = 1 + }; + + auto op = static_cast(eop); + + switch (op) + { + case InterpolateAtVertexAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD"); + break; + + default: + statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, + const uint32_t *args, uint32_t) +{ + require_extension_internal("GL_AMD_shader_trinary_minmax"); + + enum AMDShaderTrinaryMinMax + { + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9 + }; + + auto op = static_cast(eop); + + switch (op) + { + case FMin3AMD: + case UMin3AMD: + case SMin3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3"); + break; + + case FMax3AMD: + case UMax3AMD: + case SMax3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3"); + break; + + case FMid3AMD: + case UMid3AMD: + case SMid3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3"); + break; + + default: + statement("// unimplemented SPV AMD shader trinary minmax op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, + uint32_t) +{ + require_extension_internal("GL_AMD_gcn_shader"); + + enum AMDGCNShader + { + CubeFaceIndexAMD = 1, + CubeFaceCoordAMD = 2, + TimeAMD = 3 + }; + + auto op = static_cast(eop); + + switch (op) + { + case CubeFaceIndexAMD: + emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD"); + break; + case CubeFaceCoordAMD: + emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD"); + break; + case TimeAMD: + { + string expr = "timeAMD()"; + emit_op(result_type, id, expr, true); + register_control_dependent_expression(id); + break; + } + + default: + statement("// unimplemented SPV AMD gcn shader op ", eop); + break; + } +} + +void CompilerGLSL::emit_subgroup_op(const Instruction &i) +{ + const uint32_t *ops = stream(i); + auto op = static_cast(i.op); + + if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op)) + SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics."); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(i); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (op) + { + case OpGroupNonUniformElect: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect); + break; + + case OpGroupNonUniformBallotBitCount: + { + const GroupOperation operation = static_cast(ops[3]); + if (operation == GroupOperationReduce) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount); + else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); + } + break; + + case OpGroupNonUniformBallotBitExtract: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract); + break; + + case OpGroupNonUniformInverseBallot: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); + break; + + case OpGroupNonUniformBallot: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot); + break; + + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB); + break; + + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First); + break; + + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + require_extension_internal("GL_KHR_shader_subgroup_shuffle"); + break; + + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative"); + break; + + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + { + const SPIRType &type = expression_type(ops[3]); + if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool); + else + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT); + } + break; + + case OpGroupNonUniformFAdd: + case OpGroupNonUniformFMul: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformIMul: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + case OpGroupNonUniformLogicalAnd: + case OpGroupNonUniformLogicalOr: + case OpGroupNonUniformLogicalXor: + { + auto operation = static_cast(ops[3]); + if (operation == GroupOperationClusteredReduce) + { + require_extension_internal("GL_KHR_shader_subgroup_clustered"); + } + else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan || + operation == GroupOperationReduce) + { + require_extension_internal("GL_KHR_shader_subgroup_arithmetic"); + } + else + SPIRV_CROSS_THROW("Invalid group operation."); + break; + } + + case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformQuadBroadcast: + require_extension_internal("GL_KHR_shader_subgroup_quad"); + break; + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto scope = static_cast(evaluate_constant_u32(ops[2])); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + + switch (op) + { + case OpGroupNonUniformElect: + emit_op(result_type, id, "subgroupElect()", true); + break; + + case OpGroupNonUniformBroadcast: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast"); + break; + + case OpGroupNonUniformBroadcastFirst: + emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst"); + break; + + case OpGroupNonUniformBallot: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallot"); + break; + + case OpGroupNonUniformInverseBallot: + emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot"); + break; + + case OpGroupNonUniformBallotBitExtract: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract"); + break; + + case OpGroupNonUniformBallotFindLSB: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB"); + break; + + case OpGroupNonUniformBallotFindMSB: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB"); + break; + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast(ops[3]); + if (operation == GroupOperationReduce) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount"); + else if (operation == GroupOperationInclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount"); + else if (operation == GroupOperationExclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount"); + else + SPIRV_CROSS_THROW("Invalid BitCount operation."); + break; + } + + case OpGroupNonUniformShuffle: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle"); + break; + + case OpGroupNonUniformShuffleXor: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor"); + break; + + case OpGroupNonUniformShuffleUp: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp"); + break; + + case OpGroupNonUniformShuffleDown: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown"); + break; + + case OpGroupNonUniformAll: + emit_unary_func_op(result_type, id, ops[3], "subgroupAll"); + break; + + case OpGroupNonUniformAny: + emit_unary_func_op(result_type, id, ops[3], "subgroupAny"); + break; + + case OpGroupNonUniformAllEqual: + emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual"); + break; + + // clang-format off +#define GLSL_GROUP_OP(op, glsl_op) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \ + else if (operation == GroupOperationClusteredReduce) \ + emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + +#define GLSL_GROUP_OP_CAST(op, glsl_op, type) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \ + else if (operation == GroupOperationClusteredReduce) \ + emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + + GLSL_GROUP_OP(FAdd, Add) + GLSL_GROUP_OP(FMul, Mul) + GLSL_GROUP_OP(FMin, Min) + GLSL_GROUP_OP(FMax, Max) + GLSL_GROUP_OP(IAdd, Add) + GLSL_GROUP_OP(IMul, Mul) + GLSL_GROUP_OP_CAST(SMin, Min, int_type) + GLSL_GROUP_OP_CAST(SMax, Max, int_type) + GLSL_GROUP_OP_CAST(UMin, Min, uint_type) + GLSL_GROUP_OP_CAST(UMax, Max, uint_type) + GLSL_GROUP_OP(BitwiseAnd, And) + GLSL_GROUP_OP(BitwiseOr, Or) + GLSL_GROUP_OP(BitwiseXor, Xor) + GLSL_GROUP_OP(LogicalAnd, And) + GLSL_GROUP_OP(LogicalOr, Or) + GLSL_GROUP_OP(LogicalXor, Xor) +#undef GLSL_GROUP_OP +#undef GLSL_GROUP_OP_CAST + // clang-format on + + case OpGroupNonUniformQuadSwap: + { + uint32_t direction = evaluate_constant_u32(ops[4]); + if (direction == 0) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal"); + else if (direction == 1) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical"); + else if (direction == 2) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal"); + else + SPIRV_CROSS_THROW("Invalid quad swap direction."); + break; + } + + case OpGroupNonUniformQuadBroadcast: + { + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast"); + break; + } + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + register_control_dependent_expression(id); +} + +string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) +{ + // OpBitcast can deal with pointers. + if (out_type.pointer || in_type.pointer) + { + if (out_type.vecsize == 2 || in_type.vecsize == 2) + require_extension_internal("GL_EXT_buffer_reference_uvec2"); + return type_to_glsl(out_type); + } + + if (out_type.basetype == in_type.basetype) + return ""; + + assert(out_type.basetype != SPIRType::Boolean); + assert(in_type.basetype != SPIRType::Boolean); + + bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type); + bool same_size_cast = out_type.width == in_type.width; + + // Trivial bitcast case, casts between integers. + if (integral_cast && same_size_cast) + return type_to_glsl(out_type); + + // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types). + if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1) + return "unpack8"; + else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1) + return "pack16"; + else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1) + return "pack32"; + + // Floating <-> Integer special casts. Just have to enumerate all cases. :( + // 16-bit, 32-bit and 64-bit floats. + if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "floatBitsToUint"; + } + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "floatBitsToInt"; + } + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "uintBitsToFloat"; + } + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "intBitsToFloat"; + } + + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) + return "doubleBitsToInt64"; + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) + return "doubleBitsToUint64"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) + return "int64BitsToDouble"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) + return "uint64BitsToDouble"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half) + return "float16BitsToInt16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) + return "float16BitsToUint16"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short) + return "int16BitsToFloat16"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) + return "uint16BitsToFloat16"; + + // And finally, some even more special purpose casts. + if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2) + return "packUint2x32"; + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2) + return "unpackUint2x32"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + return "unpackFloat2x16"; + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) + return "packFloat2x16"; + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2) + return "packInt2x16"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1) + return "unpackInt2x16"; + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2) + return "packUint2x16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + return "unpackUint2x16"; + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4) + return "packInt4x16"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1) + return "unpackInt4x16"; + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4) + return "packUint4x16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1) + return "unpackUint4x16"; + + return ""; +} + +string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument) +{ + auto op = bitcast_glsl_op(result_type, expression_type(argument)); + if (op.empty()) + return to_enclosed_unpacked_expression(argument); + else + return join(op, "(", to_unpacked_expression(argument), ")"); +} + +std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg) +{ + auto expr = to_expression(arg); + auto &src_type = expression_type(arg); + if (src_type.basetype != target_type) + { + auto target = src_type; + target.basetype = target_type; + expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")"); + } + + return expr; +} + +std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, + const std::string &expr) +{ + if (target_type.basetype == expr_type) + return expr; + + auto src_type = target_type; + src_type.basetype = expr_type; + return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")"); +} + +string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) +{ + switch (builtin) + { + case BuiltInPosition: + return "gl_Position"; + case BuiltInPointSize: + return "gl_PointSize"; + case BuiltInClipDistance: + return "gl_ClipDistance"; + case BuiltInCullDistance: + return "gl_CullDistance"; + case BuiltInVertexId: + if (options.vulkan_semantics) + SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created " + "with GL semantics."); + return "gl_VertexID"; + case BuiltInInstanceId: + if (options.vulkan_semantics) + { + auto model = get_entry_point().model; + switch (model) + { + case spv::ExecutionModelIntersectionKHR: + case spv::ExecutionModelAnyHitKHR: + case spv::ExecutionModelClosestHitKHR: + // gl_InstanceID is allowed in these shaders. + break; + + default: + SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was " + "created with GL semantics."); + } + } + if (!options.es && options.version < 140) + { + require_extension_internal("GL_ARB_draw_instanced"); + } + return "gl_InstanceID"; + case BuiltInVertexIndex: + if (options.vulkan_semantics) + return "gl_VertexIndex"; + else + return "gl_VertexID"; // gl_VertexID already has the base offset applied. + case BuiltInInstanceIndex: + if (options.vulkan_semantics) + return "gl_InstanceIndex"; + + if (!options.es && options.version < 140) + { + require_extension_internal("GL_ARB_draw_instanced"); + } + + if (options.vertex.support_nonzero_base_instance) + { + if (!options.vulkan_semantics) + { + // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported. + require_extension_internal("GL_ARB_shader_draw_parameters"); + } + return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID. + } + else + return "gl_InstanceID"; + case BuiltInPrimitiveId: + if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry) + return "gl_PrimitiveIDIn"; + else + return "gl_PrimitiveID"; + case BuiltInInvocationId: + return "gl_InvocationID"; + case BuiltInLayer: + return "gl_Layer"; + case BuiltInViewportIndex: + return "gl_ViewportIndex"; + case BuiltInTessLevelOuter: + return "gl_TessLevelOuter"; + case BuiltInTessLevelInner: + return "gl_TessLevelInner"; + case BuiltInTessCoord: + return "gl_TessCoord"; + case BuiltInFragCoord: + return "gl_FragCoord"; + case BuiltInPointCoord: + return "gl_PointCoord"; + case BuiltInFrontFacing: + return "gl_FrontFacing"; + case BuiltInFragDepth: + return "gl_FragDepth"; + case BuiltInNumWorkgroups: + return "gl_NumWorkGroups"; + case BuiltInWorkgroupSize: + return "gl_WorkGroupSize"; + case BuiltInWorkgroupId: + return "gl_WorkGroupID"; + case BuiltInLocalInvocationId: + return "gl_LocalInvocationID"; + case BuiltInGlobalInvocationId: + return "gl_GlobalInvocationID"; + case BuiltInLocalInvocationIndex: + return "gl_LocalInvocationIndex"; + case BuiltInHelperInvocation: + return "gl_HelperInvocation"; + + case BuiltInBaseVertex: + if (options.es) + SPIRV_CROSS_THROW("BaseVertex not supported in ES profile."); + + if (options.vulkan_semantics) + { + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_BaseVertexARB"; + } + return "gl_BaseVertex"; + } + // On regular GL, this is soft-enabled and we emit ifdefs in code. + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "SPIRV_Cross_BaseVertex"; + + case BuiltInBaseInstance: + if (options.es) + SPIRV_CROSS_THROW("BaseInstance not supported in ES profile."); + + if (options.vulkan_semantics) + { + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_BaseInstanceARB"; + } + return "gl_BaseInstance"; + } + // On regular GL, this is soft-enabled and we emit ifdefs in code. + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "SPIRV_Cross_BaseInstance"; + + case BuiltInDrawIndex: + if (options.es) + SPIRV_CROSS_THROW("DrawIndex not supported in ES profile."); + + if (options.vulkan_semantics) + { + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_DrawIDARB"; + } + return "gl_DrawID"; + } + // On regular GL, this is soft-enabled and we emit ifdefs in code. + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_DrawIDARB"; + + case BuiltInSampleId: + if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400."); + return "gl_SampleID"; + + case BuiltInSampleMask: + if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400."); + + if (storage == StorageClassInput) + return "gl_SampleMaskIn"; + else + return "gl_SampleMask"; + + case BuiltInSamplePosition: + if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400."); + return "gl_SamplePosition"; + + case BuiltInViewIndex: + if (options.vulkan_semantics) + return "gl_ViewIndex"; + else + return "gl_ViewID_OVR"; + + case BuiltInNumSubgroups: + request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups); + return "gl_NumSubgroups"; + + case BuiltInSubgroupId: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID); + return "gl_SubgroupID"; + + case BuiltInSubgroupSize: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize); + return "gl_SubgroupSize"; + + case BuiltInSubgroupLocalInvocationId: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID); + return "gl_SubgroupInvocationID"; + + case BuiltInSubgroupEqMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupEqMask"; + + case BuiltInSubgroupGeMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupGeMask"; + + case BuiltInSubgroupGtMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupGtMask"; + + case BuiltInSubgroupLeMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupLeMask"; + + case BuiltInSubgroupLtMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupLtMask"; + + case BuiltInLaunchIdKHR: + return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV"; + case BuiltInLaunchSizeKHR: + return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV"; + case BuiltInWorldRayOriginKHR: + return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV"; + case BuiltInWorldRayDirectionKHR: + return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV"; + case BuiltInObjectRayOriginKHR: + return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV"; + case BuiltInObjectRayDirectionKHR: + return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV"; + case BuiltInRayTminKHR: + return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV"; + case BuiltInRayTmaxKHR: + return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV"; + case BuiltInInstanceCustomIndexKHR: + return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV"; + case BuiltInObjectToWorldKHR: + return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV"; + case BuiltInWorldToObjectKHR: + return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV"; + case BuiltInHitTNV: + // gl_HitTEXT is an alias of RayTMax in KHR. + return "gl_HitTNV"; + case BuiltInHitKindKHR: + return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV"; + case BuiltInIncomingRayFlagsKHR: + return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV"; + + case BuiltInBaryCoordKHR: + { + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNV"; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + return "gl_BaryCoordEXT"; + } + } + + case BuiltInBaryCoordNoPerspNV: + { + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNoPerspNV"; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + return "gl_BaryCoordNoPerspEXT"; + } + } + + case BuiltInFragStencilRefEXT: + { + if (!options.es) + { + require_extension_internal("GL_ARB_shader_stencil_export"); + return "gl_FragStencilRefARB"; + } + else + SPIRV_CROSS_THROW("Stencil export not supported in GLES."); + } + + case BuiltInPrimitiveShadingRateKHR: + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL."); + require_extension_internal("GL_EXT_fragment_shading_rate"); + return "gl_PrimitiveShadingRateEXT"; + } + + case BuiltInShadingRateKHR: + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL."); + require_extension_internal("GL_EXT_fragment_shading_rate"); + return "gl_ShadingRateEXT"; + } + + case BuiltInDeviceIndex: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for device group support."); + require_extension_internal("GL_EXT_device_group"); + return "gl_DeviceIndex"; + + case BuiltInFullyCoveredEXT: + if (!options.es) + require_extension_internal("GL_NV_conservative_raster_underestimation"); + else + SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation."); + return "gl_FragFullyCoveredNV"; + + case BuiltInPrimitiveTriangleIndicesEXT: + return "gl_PrimitiveTriangleIndicesEXT"; + case BuiltInPrimitiveLineIndicesEXT: + return "gl_PrimitiveLineIndicesEXT"; + case BuiltInPrimitivePointIndicesEXT: + return "gl_PrimitivePointIndicesEXT"; + case BuiltInCullPrimitiveEXT: + return "gl_CullPrimitiveEXT"; + + default: + return join("gl_BuiltIn_", convert_to_string(builtin)); + } +} + +const char *CompilerGLSL::index_to_swizzle(uint32_t index) +{ + switch (index) + { + case 0: + return "x"; + case 1: + return "y"; + case 2: + return "z"; + case 3: + return "w"; + default: + return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec. + } +} + +void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/, + AccessChainFlags flags, bool &access_chain_is_arrayed, + uint32_t index) +{ + bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; + bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; + bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; + + string idx_expr = index_is_literal ? convert_to_string(index) : to_unpacked_expression(index, register_expression_read); + + // For the case where the base of an OpPtrAccessChain already ends in [n], + // we need to use the index as an offset to the existing index, otherwise, + // we can just use the index directly. + if (ptr_chain && access_chain_is_arrayed) + { + size_t split_pos = expr.find_last_of(']'); + string expr_front = expr.substr(0, split_pos); + string expr_back = expr.substr(split_pos); + expr = expr_front + " + " + enclose_expression(idx_expr) + expr_back; + } + else + { + expr += "["; + expr += idx_expr; + expr += "]"; + } +} + +bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t) +{ + return true; +} + +string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, + AccessChainFlags flags, AccessChainMeta *meta) +{ + string expr; + + bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; + bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0; + bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0; + bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; + bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; + bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0; + + if (!chain_only) + { + // We handle transpose explicitly, so don't resolve that here. + auto *e = maybe_get(base); + bool old_transpose = e && e->need_transpose; + if (e) + e->need_transpose = false; + expr = to_enclosed_expression(base, register_expression_read); + if (e) + e->need_transpose = old_transpose; + } + + // Start traversing type hierarchy at the proper non-pointer types, + // but keep type_id referencing the original pointer for use below. + uint32_t type_id = expression_type_id(base); + + if (!backend.native_pointers) + { + if (ptr_chain) + SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain."); + + // Wrapped buffer reference pointer types will need to poke into the internal "value" member before + // continuing the access chain. + if (should_dereference(base)) + { + auto &type = get(type_id); + expr = dereference_expression(type, expr); + } + } + + const auto *type = &get_pointee_type(type_id); + + bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos; + bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base); + bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked); + uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID); + bool is_invariant = has_decoration(base, DecorationInvariant); + bool relaxed_precision = has_decoration(base, DecorationRelaxedPrecision); + bool pending_array_enclose = false; + bool dimension_flatten = false; + + const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) { + AccessChainFlags mod_flags = flags; + if (!is_literal) + mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT; + if (!is_ptr_chain) + mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT; + access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index); + check_physical_type_cast(expr, type, physical_type); + }; + + for (uint32_t i = 0; i < count; i++) + { + uint32_t index = indices[i]; + + bool is_literal = index_is_literal; + if (is_literal && msb_is_id && (index >> 31u) != 0u) + { + is_literal = false; + index &= 0x7fffffffu; + } + + // Pointer chains + if (ptr_chain && i == 0) + { + // If we are flattening multidimensional arrays, only create opening bracket on first + // array index. + if (options.flatten_multidimensional_arrays) + { + dimension_flatten = type->array.size() >= 1; + pending_array_enclose = dimension_flatten; + if (pending_array_enclose) + expr += "["; + } + + if (options.flatten_multidimensional_arrays && dimension_flatten) + { + // If we are flattening multidimensional arrays, do manual stride computation. + if (is_literal) + expr += convert_to_string(index); + else + expr += to_enclosed_expression(index, register_expression_read); + + for (auto j = uint32_t(type->array.size()); j; j--) + { + expr += " * "; + expr += enclose_expression(to_array_size(*type, j - 1)); + } + + if (type->array.empty()) + pending_array_enclose = false; + else + expr += " + "; + + if (!pending_array_enclose) + expr += "]"; + } + else + { + append_index(index, is_literal, true); + } + + if (type->basetype == SPIRType::ControlPointArray) + { + type_id = type->parent_type; + type = &get(type_id); + } + + access_chain_is_arrayed = true; + } + // Arrays + else if (!type->array.empty()) + { + // If we are flattening multidimensional arrays, only create opening bracket on first + // array index. + if (options.flatten_multidimensional_arrays && !pending_array_enclose) + { + dimension_flatten = type->array.size() > 1; + pending_array_enclose = dimension_flatten; + if (pending_array_enclose) + expr += "["; + } + + assert(type->parent_type); + + auto *var = maybe_get(base); + if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) && + !has_decoration(type->self, DecorationBlock)) + { + // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared. + // Normally, these variables live in blocks when compiled from GLSL, + // but HLSL seems to just emit straight arrays here. + // We must pretend this access goes through gl_in/gl_out arrays + // to be able to access certain builtins as arrays. + // Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT. + auto builtin = ir.meta[base].decoration.builtin_type; + bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT; + + switch (builtin) + { + // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom. + // case BuiltInClipDistance: + case BuiltInPosition: + case BuiltInPointSize: + if (mesh_shader) + expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr); + else if (var->storage == StorageClassInput) + expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr); + else if (var->storage == StorageClassOutput) + expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr); + else + append_index(index, is_literal); + break; + + case BuiltInPrimitiveId: + case BuiltInLayer: + case BuiltInViewportIndex: + case BuiltInCullPrimitiveEXT: + case BuiltInPrimitiveShadingRateKHR: + if (mesh_shader) + expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); + else + append_index(index, is_literal); + break; + + default: + append_index(index, is_literal); + break; + } + } + else if (backend.force_merged_mesh_block && i == 0 && var && + !is_builtin_variable(*var) && var->storage == StorageClassOutput) + { + if (is_per_primitive_variable(*var)) + expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); + else + expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr); + } + else if (options.flatten_multidimensional_arrays && dimension_flatten) + { + // If we are flattening multidimensional arrays, do manual stride computation. + auto &parent_type = get(type->parent_type); + + if (is_literal) + expr += convert_to_string(index); + else + expr += to_enclosed_expression(index, register_expression_read); + + for (auto j = uint32_t(parent_type.array.size()); j; j--) + { + expr += " * "; + expr += enclose_expression(to_array_size(parent_type, j - 1)); + } + + if (parent_type.array.empty()) + pending_array_enclose = false; + else + expr += " + "; + + if (!pending_array_enclose) + expr += "]"; + } + // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal. + // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask. + else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn)))) + { + append_index(index, is_literal); + } + + type_id = type->parent_type; + type = &get(type_id); + + access_chain_is_arrayed = true; + } + // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping. + // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. + else if (type->basetype == SPIRType::Struct) + { + if (!is_literal) + index = evaluate_constant_u32(index); + + if (index < uint32_t(type->member_type_index_redirection.size())) + index = type->member_type_index_redirection[index]; + + if (index >= type->member_types.size()) + SPIRV_CROSS_THROW("Member index is out of bounds!"); + + BuiltIn builtin = BuiltInMax; + if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base)) + { + if (access_chain_is_arrayed) + { + expr += "."; + expr += builtin_to_glsl(builtin, type->storage); + } + else + expr = builtin_to_glsl(builtin, type->storage); + } + else + { + // If the member has a qualified name, use it as the entire chain + string qual_mbr_name = get_member_qualified_name(type_id, index); + if (!qual_mbr_name.empty()) + expr = qual_mbr_name; + else if (flatten_member_reference) + expr += join("_", to_member_name(*type, index)); + else + { + // Any pointer de-refences for values are handled in the first access chain. + // For pointer chains, the pointer-ness is resolved through an array access. + // The only time this is not true is when accessing array of SSBO/UBO. + // This case is explicitly handled. + expr += to_member_reference(base, *type, index, ptr_chain || i != 0); + } + } + + if (has_member_decoration(type->self, index, DecorationInvariant)) + is_invariant = true; + if (has_member_decoration(type->self, index, DecorationRelaxedPrecision)) + relaxed_precision = true; + + is_packed = member_is_packed_physical_type(*type, index); + if (member_is_remapped_physical_type(*type, index)) + physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID); + else + physical_type = 0; + + row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); + type = &get(type->member_types[index]); + } + // Matrix -> Vector + else if (type->columns > 1) + { + // If we have a row-major matrix here, we need to defer any transpose in case this access chain + // is used to store a column. We can resolve it right here and now if we access a scalar directly, + // by flipping indexing order of the matrix. + + expr += "["; + if (is_literal) + expr += convert_to_string(index); + else + expr += to_unpacked_expression(index, register_expression_read); + expr += "]"; + + type_id = type->parent_type; + type = &get(type_id); + } + // Vector -> Scalar + else if (type->vecsize > 1) + { + string deferred_index; + if (row_major_matrix_needs_conversion) + { + // Flip indexing order. + auto column_index = expr.find_last_of('['); + if (column_index != string::npos) + { + deferred_index = expr.substr(column_index); + expr.resize(column_index); + } + } + + // Internally, access chain implementation can also be used on composites, + // ignore scalar access workarounds in this case. + StorageClass effective_storage = StorageClassGeneric; + bool ignore_potential_sliced_writes = false; + if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0) + { + if (expression_type(base).pointer) + effective_storage = get_expression_effective_storage_class(base); + + // Special consideration for control points. + // Control points can only be written by InvocationID, so there is no need + // to consider scalar access chains here. + // Cleans up some cases where it's very painful to determine the accurate storage class + // since blocks can be partially masked ... + auto *var = maybe_get_backing_variable(base); + if (var && var->storage == StorageClassOutput && + get_execution_model() == ExecutionModelTessellationControl && + !has_decoration(var->self, DecorationPatch)) + { + ignore_potential_sliced_writes = true; + } + } + else + ignore_potential_sliced_writes = true; + + if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) + { + // On some backends, we might not be able to safely access individual scalars in a vector. + // To work around this, we might have to cast the access chain reference to something which can, + // like a pointer to scalar, which we can then index into. + prepare_access_chain_for_scalar_access(expr, get(type->parent_type), effective_storage, + is_packed); + } + + if (is_literal) + { + bool out_of_bounds = (index >= type->vecsize); + + if (!is_packed && !row_major_matrix_needs_conversion) + { + expr += "."; + expr += index_to_swizzle(out_of_bounds ? 0 : index); + } + else + { + // For packed vectors, we can only access them as an array, not by swizzle. + expr += join("[", out_of_bounds ? 0 : index, "]"); + } + } + else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion) + { + auto &c = get(index); + bool out_of_bounds = (c.scalar() >= type->vecsize); + + if (c.specialization) + { + // If the index is a spec constant, we cannot turn extract into a swizzle. + expr += join("[", out_of_bounds ? "0" : to_expression(index), "]"); + } + else + { + expr += "."; + expr += index_to_swizzle(out_of_bounds ? 0 : c.scalar()); + } + } + else + { + expr += "["; + expr += to_unpacked_expression(index, register_expression_read); + expr += "]"; + } + + if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) + { + prepare_access_chain_for_scalar_access(expr, get(type->parent_type), effective_storage, + is_packed); + } + + expr += deferred_index; + row_major_matrix_needs_conversion = false; + + is_packed = false; + physical_type = 0; + type_id = type->parent_type; + type = &get(type_id); + } + else if (!backend.allow_truncated_access_chain) + SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); + } + + if (pending_array_enclose) + { + SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, " + "but the access chain was terminated in the middle of a multidimensional array. " + "This is not supported."); + } + + if (meta) + { + meta->need_transpose = row_major_matrix_needs_conversion; + meta->storage_is_packed = is_packed; + meta->storage_is_invariant = is_invariant; + meta->storage_physical_type = physical_type; + meta->relaxed_precision = relaxed_precision; + } + + return expr; +} + +void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t) +{ +} + +void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &) +{ +} + +string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index) +{ + auto ret = join(basename, "_", to_member_name(type, index)); + ParsedIR::sanitize_underscores(ret); + return ret; +} + +string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, + AccessChainMeta *meta, bool ptr_chain) +{ + if (flattened_buffer_blocks.count(base)) + { + uint32_t matrix_stride = 0; + uint32_t array_stride = 0; + bool need_transpose = false; + flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride, + &array_stride, ptr_chain); + + if (meta) + { + meta->need_transpose = target_type.columns > 1 && need_transpose; + meta->storage_is_packed = false; + } + + return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride, + need_transpose); + } + else if (flattened_structs.count(base) && count > 0) + { + AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; + if (ptr_chain) + flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; + + if (flattened_structs[base]) + { + flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT; + if (meta) + meta->flattened_struct = target_type.basetype == SPIRType::Struct; + } + + auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1); + if (meta) + { + meta->need_transpose = false; + meta->storage_is_packed = false; + } + + auto basename = to_flattened_access_chain_expression(base); + auto ret = join(basename, "_", chain); + ParsedIR::sanitize_underscores(ret); + return ret; + } + else + { + AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; + if (ptr_chain) + flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; + return access_chain_internal(base, indices, count, flags, meta); + } +} + +string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type) +{ + auto expr = type_to_glsl_constructor(type); + expr += '('; + + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + if (i) + expr += ", "; + + auto &member_type = get(type.member_types[i]); + if (member_type.basetype == SPIRType::Struct) + expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type); + else + expr += to_flattened_struct_member(basename, type, i); + } + expr += ')'; + return expr; +} + +std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id) +{ + // Do not use to_expression as that will unflatten access chains. + string basename; + if (const auto *var = maybe_get(id)) + basename = to_name(var->self); + else if (const auto *expr = maybe_get(id)) + basename = expr->expression; + else + basename = to_expression(id); + + return basename; +} + +void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type, + const SmallVector &indices) +{ + SmallVector sub_indices = indices; + sub_indices.push_back(0); + + auto *member_type = &type; + for (auto &index : indices) + member_type = &get(member_type->member_types[index]); + + for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) + { + sub_indices.back() = i; + auto lhs = join(basename, "_", to_member_name(*member_type, i)); + ParsedIR::sanitize_underscores(lhs); + + if (get(member_type->member_types[i]).basetype == SPIRType::Struct) + { + store_flattened_struct(lhs, rhs_id, type, sub_indices); + } + else + { + auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices); + statement(lhs, " = ", rhs, ";"); + } + } +} + +void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value) +{ + auto &type = expression_type(lhs_id); + auto basename = to_flattened_access_chain_expression(lhs_id); + store_flattened_struct(basename, value, type, {}); +} + +std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, + uint32_t /* array_stride */, bool need_transpose) +{ + if (!target_type.array.empty()) + SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened"); + else if (target_type.basetype == SPIRType::Struct) + return flattened_access_chain_struct(base, indices, count, target_type, offset); + else if (target_type.columns > 1) + return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose); + else + return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose); +} + +std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset) +{ + std::string expr; + + if (backend.can_declare_struct_inline) + { + expr += type_to_glsl_constructor(target_type); + expr += "("; + } + else + expr += "{"; + + for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i) + { + if (i != 0) + expr += ", "; + + const SPIRType &member_type = get(target_type.member_types[i]); + uint32_t member_offset = type_struct_member_offset(target_type, i); + + // The access chain terminates at the struct, so we need to find matrix strides and row-major information + // ahead of time. + bool need_transpose = false; + uint32_t matrix_stride = 0; + if (member_type.columns > 1) + { + need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor); + matrix_stride = type_struct_member_matrix_stride(target_type, i); + } + + auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride, + 0 /* array_stride */, need_transpose); + + // Cannot forward transpositions, so resolve them here. + if (need_transpose) + expr += convert_row_major_matrix(tmp, member_type, 0, false); + else + expr += tmp; + } + + expr += backend.can_declare_struct_inline ? ")" : "}"; + + return expr; +} + +std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, + uint32_t matrix_stride, bool need_transpose) +{ + assert(matrix_stride); + SPIRType tmp_type = target_type; + if (need_transpose) + swap(tmp_type.vecsize, tmp_type.columns); + + std::string expr; + + expr += type_to_glsl_constructor(tmp_type); + expr += "("; + + for (uint32_t i = 0; i < tmp_type.columns; i++) + { + if (i != 0) + expr += ", "; + + expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride, + /* need_transpose= */ false); + } + + expr += ")"; + + return expr; +} + +std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, + uint32_t matrix_stride, bool need_transpose) +{ + auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16); + + auto buffer_name = to_name(expression_type(base).self); + + if (need_transpose) + { + std::string expr; + + if (target_type.vecsize > 1) + { + expr += type_to_glsl_constructor(target_type); + expr += "("; + } + + for (uint32_t i = 0; i < target_type.vecsize; ++i) + { + if (i != 0) + expr += ", "; + + uint32_t component_offset = result.second + i * matrix_stride; + + assert(component_offset % (target_type.width / 8) == 0); + uint32_t index = component_offset / (target_type.width / 8); + + expr += buffer_name; + expr += "["; + expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + + expr += convert_to_string(index / 4); + expr += "]"; + + expr += vector_swizzle(1, index % 4); + } + + if (target_type.vecsize > 1) + { + expr += ")"; + } + + return expr; + } + else + { + assert(result.second % (target_type.width / 8) == 0); + uint32_t index = result.second / (target_type.width / 8); + + std::string expr; + + expr += buffer_name; + expr += "["; expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + expr += convert_to_string(index / 4); expr += "]"; - expr += vector_swizzle(target_type.vecsize, index % 4); + expr += vector_swizzle(target_type.vecsize, index % 4); + + return expr; + } +} + +std::pair CompilerGLSL::flattened_access_chain_offset( + const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride, + bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain) +{ + // Start traversing type hierarchy at the proper non-pointer types. + const auto *type = &get_pointee_type(basetype); + + std::string expr; + + // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout. + bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false; + uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0; + uint32_t array_stride = out_array_stride ? *out_array_stride : 0; + + for (uint32_t i = 0; i < count; i++) + { + uint32_t index = indices[i]; + + // Pointers + if (ptr_chain && i == 0) + { + // Here, the pointer type will be decorated with an array stride. + array_stride = get_decoration(basetype.self, DecorationArrayStride); + if (!array_stride) + SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); + + auto *constant = maybe_get(index); + if (constant) + { + // Constant array access. + offset += constant->scalar() * array_stride; + } + else + { + // Dynamic array access. + if (array_stride % word_stride) + { + SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " + "of a 4-component vector. " + "Likely culprit here is a float or vec2 array inside a push " + "constant block which is std430. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index); + expr += " * "; + expr += convert_to_string(array_stride / word_stride); + expr += " + "; + } + } + // Arrays + else if (!type->array.empty()) + { + auto *constant = maybe_get(index); + if (constant) + { + // Constant array access. + offset += constant->scalar() * array_stride; + } + else + { + // Dynamic array access. + if (array_stride % word_stride) + { + SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " + "of a 4-component vector. " + "Likely culprit here is a float or vec2 array inside a push " + "constant block which is std430. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(array_stride / word_stride); + expr += " + "; + } + + uint32_t parent_type = type->parent_type; + type = &get(parent_type); + + if (!type->array.empty()) + array_stride = get_decoration(parent_type, DecorationArrayStride); + } + // For structs, the index refers to a constant, which indexes into the members. + // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. + else if (type->basetype == SPIRType::Struct) + { + index = evaluate_constant_u32(index); + + if (index >= type->member_types.size()) + SPIRV_CROSS_THROW("Member index is out of bounds!"); + + offset += type_struct_member_offset(*type, index); + + auto &struct_type = *type; + type = &get(type->member_types[index]); + + if (type->columns > 1) + { + matrix_stride = type_struct_member_matrix_stride(struct_type, index); + row_major_matrix_needs_conversion = + combined_decoration_for_member(struct_type, index).get(DecorationRowMajor); + } + else + row_major_matrix_needs_conversion = false; + + if (!type->array.empty()) + array_stride = type_struct_member_array_stride(struct_type, index); + } + // Matrix -> Vector + else if (type->columns > 1) + { + auto *constant = maybe_get(index); + if (constant) + { + index = evaluate_constant_u32(index); + offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride); + } + else + { + uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride; + // Dynamic array access. + if (indexing_stride % word_stride) + { + SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a " + "4-component vector. " + "Likely culprit here is a row-major matrix being accessed dynamically. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(indexing_stride / word_stride); + expr += " + "; + } + + type = &get(type->parent_type); + } + // Vector -> Scalar + else if (type->vecsize > 1) + { + auto *constant = maybe_get(index); + if (constant) + { + index = evaluate_constant_u32(index); + offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8)); + } + else + { + uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8); + + // Dynamic array access. + if (indexing_stride % word_stride) + { + SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the " + "size of a 4-component vector. " + "This cannot be flattened in legacy targets."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(indexing_stride / word_stride); + expr += " + "; + } + + type = &get(type->parent_type); + } + else + SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); + } + + if (need_transpose) + *need_transpose = row_major_matrix_needs_conversion; + if (out_matrix_stride) + *out_matrix_stride = matrix_stride; + if (out_array_stride) + *out_array_stride = array_stride; + + return std::make_pair(expr, offset); +} + +bool CompilerGLSL::should_dereference(uint32_t id) +{ + const auto &type = expression_type(id); + // Non-pointer expressions don't need to be dereferenced. + if (!type.pointer) + return false; + + // Handles shouldn't be dereferenced either. + if (!expression_is_lvalue(id)) + return false; + + // If id is a variable but not a phi variable, we should not dereference it. + if (auto *var = maybe_get(id)) + return var->phi_variable; + + if (auto *expr = maybe_get(id)) + { + // If id is an access chain, we should not dereference it. + if (expr->access_chain) + return false; + + // If id is a forwarded copy of a variable pointer, we should not dereference it. + SPIRVariable *var = nullptr; + while (expr->loaded_from && expression_is_forwarded(expr->self)) + { + auto &src_type = expression_type(expr->loaded_from); + // To be a copy, the pointer and its source expression must be the + // same type. Can't check type.self, because for some reason that's + // usually the base type with pointers stripped off. This check is + // complex enough that I've hoisted it out of the while condition. + if (src_type.pointer != type.pointer || src_type.pointer_depth != type.pointer_depth || + src_type.parent_type != type.parent_type) + break; + if ((var = maybe_get(expr->loaded_from))) + break; + if (!(expr = maybe_get(expr->loaded_from))) + break; + } + + return !var || var->phi_variable; + } + + // Otherwise, we should dereference this pointer expression. + return true; +} + +bool CompilerGLSL::should_forward(uint32_t id) const +{ + // If id is a variable we will try to forward it regardless of force_temporary check below + // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL + + auto *var = maybe_get(id); + if (var) + { + // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation. + return !(has_decoration(id, DecorationBuiltIn) && has_decoration(id, DecorationVolatile)); + } + + // For debugging emit temporary variables for all expressions + if (options.force_temporary) + return false; + + // If an expression carries enough dependencies we need to stop forwarding at some point, + // or we explode compilers. There are usually limits to how much we can nest expressions. + auto *expr = maybe_get(id); + const uint32_t max_expression_dependencies = 64; + if (expr && expr->expression_dependencies.size() >= max_expression_dependencies) + return false; + + if (expr && expr->loaded_from + && has_decoration(expr->loaded_from, DecorationBuiltIn) + && has_decoration(expr->loaded_from, DecorationVolatile)) + { + // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation. + return false; + } + + // Immutable expression can always be forwarded. + if (is_immutable(id)) + return true; + + return false; +} + +bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const +{ + // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion. + return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id); +} + +void CompilerGLSL::track_expression_read(uint32_t id) +{ + switch (ir.ids[id].get_type()) + { + case TypeExpression: + { + auto &e = get(id); + for (auto implied_read : e.implied_read_expressions) + track_expression_read(implied_read); + break; + } + + case TypeAccessChain: + { + auto &e = get(id); + for (auto implied_read : e.implied_read_expressions) + track_expression_read(implied_read); + break; + } + + default: + break; + } + + // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. + // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. + if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id)) + { + auto &v = expression_usage_counts[id]; + v++; + + // If we create an expression outside a loop, + // but access it inside a loop, we're implicitly reading it multiple times. + // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion + // working inside the backend compiler. + if (expression_read_implies_multiple_reads(id)) + v++; + + if (v >= 2) + { + //if (v == 2) + // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); + + // Force a recompile after this pass to avoid forwarding this variable. + force_temporary_and_recompile(id); + } + } +} + +bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) +{ + if (forced_temporaries.find(id) != end(forced_temporaries)) + return false; + + for (uint32_t i = 0; i < num_args; i++) + if (!should_forward(args[i])) + return false; + + // We need to forward globals as well. + if (!pure) + { + for (auto global : global_variables) + if (!should_forward(global)) + return false; + for (auto aliased : aliased_variables) + if (!should_forward(aliased)) + return false; + } + + return true; +} + +void CompilerGLSL::register_impure_function_call() +{ + // Impure functions can modify globals and aliased variables, so invalidate them as well. + for (auto global : global_variables) + flush_dependees(get(global)); + for (auto aliased : aliased_variables) + flush_dependees(get(aliased)); +} + +void CompilerGLSL::register_call_out_argument(uint32_t id) +{ + register_write(id); + + auto *var = maybe_get(id); + if (var) + flush_variable_declaration(var->self); +} + +string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) +{ + // These variables are always function local, + // so make sure we emit the variable without storage qualifiers. + // Some backends will inject custom variables locally in a function + // with a storage qualifier which is not function-local. + auto old_storage = var.storage; + var.storage = StorageClassFunction; + auto expr = variable_decl(var); + var.storage = old_storage; + return expr; +} + +void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) +{ + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self)) + { + auto &type = get(var.basetype); + auto &flags = get_decoration_bitset(var.self); + statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); + flushed_phi_variables.insert(var.self); + } +} + +void CompilerGLSL::flush_variable_declaration(uint32_t id) +{ + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + auto *var = maybe_get(id); + if (var && var->deferred_declaration) + { + string initializer; + if (options.force_zero_initialized_variables && + (var->storage == StorageClassFunction || var->storage == StorageClassGeneric || + var->storage == StorageClassPrivate) && + !var->initializer && type_can_zero_initialize(get_variable_data_type(*var))) + { + initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var))); + } + + statement(variable_decl_function_local(*var), initializer, ";"); + var->deferred_declaration = false; + } + if (var) + { + emit_variable_temporary_copies(*var); + } +} + +bool CompilerGLSL::remove_duplicate_swizzle(string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto prevpos = op.find_last_of('.', pos - 1); + if (prevpos == string::npos) + return false; + + prevpos++; + + // Make sure there are only swizzles here ... + for (auto i = prevpos; i < pos; i++) + { + if (op[i] < 'w' || op[i] > 'z') + { + // If swizzles are foo.xyz() like in C++ backend for example, check for that. + if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') + break; + return false; + } + } + + // If original swizzle is large enough, just carve out the components we need. + // E.g. foobar.wyx.xy will turn into foobar.wy. + if (pos - prevpos >= final_swiz.size()) + { + op.erase(prevpos + final_swiz.size(), string::npos); + + // Add back the function call ... + if (backend.swizzle_is_function) + op += "()"; + } + return true; +} + +// Optimizes away vector swizzles where we have something like +// vec3 foo; +// foo.xyz <-- swizzle expression does nothing. +// This is a very common pattern after OpCompositeCombine. +bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto &type = expression_type(base); + + // Sanity checking ... + assert(type.columns == 1 && type.array.empty()); + + if (type.vecsize == final_swiz.size()) + op.erase(pos, string::npos); + return true; +} + +string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) +{ + ID base = 0; + string op; + string subop; + + // Can only merge swizzles for vectors. + auto &type = get(return_type); + bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1; + bool swizzle_optimization = false; + + for (uint32_t i = 0; i < length; i++) + { + auto *e = maybe_get(elems[i]); + + // If we're merging another scalar which belongs to the same base + // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! + if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base) + { + // Only supposed to be used for vector swizzle -> scalar. + assert(!e->expression.empty() && e->expression.front() == '.'); + subop += e->expression.substr(1, string::npos); + swizzle_optimization = true; + } + else + { + // We'll likely end up with duplicated swizzles, e.g. + // foobar.xyz.xyz from patterns like + // OpVectorShuffle + // OpCompositeExtract x 3 + // OpCompositeConstruct 3x + other scalar. + // Just modify op in-place. + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + subop += "()"; + + // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. + // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. + // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. + // Essentially, we can only remove one set of swizzles, since that's what we have control over ... + // Case 1: + // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. + // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. + // Case 2: + // foo.xyz: Duplicate swizzle won't kick in. + // If foo is vec3, we can remove xyz, giving just foo. + if (!remove_duplicate_swizzle(subop)) + remove_unity_swizzle(base, subop); + + // Strips away redundant parens if we created them during component extraction. + strip_enclosed_expression(subop); + swizzle_optimization = false; + op += subop; + } + else + op += subop; + + if (i) + op += ", "; + + bool uses_buffer_offset = + type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset); + subop = to_composite_constructor_expression(elems[i], uses_buffer_offset); + } + + base = e ? e->base_expression : ID(0); + } + + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + subop += "()"; + + if (!remove_duplicate_swizzle(subop)) + remove_unity_swizzle(base, subop); + // Strips away redundant parens if we created them during component extraction. + strip_enclosed_expression(subop); + } + + op += subop; + return op; +} + +bool CompilerGLSL::skip_argument(uint32_t id) const +{ + if (!combined_image_samplers.empty() || !options.vulkan_semantics) + { + auto &type = expression_type(id); + if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1)) + return true; + } + return false; +} + +bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs) +{ + // Do this with strings because we have a very clear pattern we can check for and it avoids + // adding lots of special cases to the code emission. + if (rhs.size() < lhs.size() + 3) + return false; + + // Do not optimize matrices. They are a bit awkward to reason about in general + // (in which order does operation happen?), and it does not work on MSL anyways. + if (type.vecsize > 1 && type.columns > 1) + return false; + + auto index = rhs.find(lhs); + if (index != 0) + return false; + + // TODO: Shift operators, but it's not important for now. + auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1); + if (op != lhs.size() + 1) + return false; + + // Check that the op is followed by space. This excludes && and ||. + if (rhs[op + 1] != ' ') + return false; + + char bop = rhs[op]; + auto expr = rhs.substr(lhs.size() + 3); + // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code. + // Find some common patterns which are equivalent. + if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)")) + statement(lhs, bop, bop, ";"); + else + statement(lhs, " ", bop, "= ", expr, ";"); + return true; +} + +void CompilerGLSL::register_control_dependent_expression(uint32_t expr) +{ + if (forwarded_temporaries.find(expr) == end(forwarded_temporaries)) + return; + + assert(current_emitting_block); + current_emitting_block->invalidate_expressions.push_back(expr); +} + +void CompilerGLSL::emit_block_instructions(SPIRBlock &block) +{ + current_emitting_block = █ + + if (backend.requires_relaxed_precision_analysis) + { + // If PHI variables are consumed in unexpected precision contexts, copy them here. + for (auto &phi : block.phi_variables) + { + auto itr = temporary_to_mirror_precision_alias.find(phi.function_variable); + if (itr != temporary_to_mirror_precision_alias.end()) + { + // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, + // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). + EmbeddedInstruction inst; + inst.op = OpCopyObject; + inst.length = 3; + inst.ops.push_back(expression_type_id(itr->first)); + inst.ops.push_back(itr->second); + inst.ops.push_back(itr->first); + emit_instruction(inst); + } + } + } + + for (auto &op : block.ops) + { + auto temporary_copy = handle_instruction_precision(op); + emit_instruction(op); + if (temporary_copy.dst_id) + { + // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, + // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). + EmbeddedInstruction inst; + inst.op = OpCopyObject; + inst.length = 3; + inst.ops.push_back(expression_type_id(temporary_copy.src_id)); + inst.ops.push_back(temporary_copy.dst_id); + inst.ops.push_back(temporary_copy.src_id); + + // Never attempt to hoist mirrored temporaries. + // They are hoisted in lock-step with their parents. + block_temporary_hoisting = true; + emit_instruction(inst); + block_temporary_hoisting = false; + } + } + + current_emitting_block = nullptr; +} + +void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr) +{ + // Allow trivially forwarded expressions like OpLoad or trivial shuffles, + // these will be marked as having suppressed usage tracking. + // Our only concern is to make sure arithmetic operations are done in similar ways. + if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) && + forced_invariant_temporaries.count(expr.self) == 0) + { + force_temporary_and_recompile(expr.self); + forced_invariant_temporaries.insert(expr.self); + + for (auto &dependent : expr.expression_dependencies) + disallow_forwarding_in_expression_chain(get(dependent)); + } +} + +void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id) +{ + // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to + // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary + // in one translation unit, but not another, e.g. due to multiple use of an expression. + // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent + // expressions to be temporaries. + // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough + // for all reasonable uses of invariant. + if (!has_decoration(store_id, DecorationInvariant)) + return; + + auto *expr = maybe_get(value_id); + if (!expr) + return; + + disallow_forwarding_in_expression_chain(*expr); +} + +void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) +{ + auto rhs = to_pointer_expression(rhs_expression); + + // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null. + if (!rhs.empty()) + { + handle_store_to_invariant_variable(lhs_expression, rhs_expression); + + if (!unroll_array_to_complex_store(lhs_expression, rhs_expression)) + { + auto lhs = to_dereferenced_expression(lhs_expression); + if (has_decoration(lhs_expression, DecorationNonUniform)) + convert_non_uniform_expression(lhs, lhs_expression); + + // We might need to cast in order to store to a builtin. + cast_to_variable_store(lhs_expression, rhs, expression_type(rhs_expression)); + + // Tries to optimize assignments like " = op expr". + // While this is purely cosmetic, this is important for legacy ESSL where loop + // variable increments must be in either i++ or i += const-expr. + // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. + if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + } + register_write(lhs_expression); + } +} + +uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const +{ + if (instr.length < 3) + return 32; + + auto *ops = stream(instr); + + switch (instr.op) + { + case OpSConvert: + case OpConvertSToF: + case OpUConvert: + case OpConvertUToF: + case OpIEqual: + case OpINotEqual: + case OpSLessThan: + case OpSLessThanEqual: + case OpSGreaterThan: + case OpSGreaterThanEqual: + case OpULessThan: + case OpULessThanEqual: + case OpUGreaterThan: + case OpUGreaterThanEqual: + return expression_type(ops[2]).width; + + default: + { + // We can look at result type which is more robust. + auto *type = maybe_get(ops[0]); + if (type && type_is_integral(*type)) + return type->width; + else + return 32; + } + } +} + +uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const +{ + if (length < 1) + return 32; + + switch (op) + { + case GLSLstd450SAbs: + case GLSLstd450SSign: + case GLSLstd450UMin: + case GLSLstd450SMin: + case GLSLstd450UMax: + case GLSLstd450SMax: + case GLSLstd450UClamp: + case GLSLstd450SClamp: + case GLSLstd450FindSMsb: + case GLSLstd450FindUMsb: + return expression_type(ops[0]).width; + + default: + { + // We don't need to care about other opcodes, just return 32. + return 32; + } + } +} + +void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length) +{ + // Only GLSL supports RelaxedPrecision directly. + // We cannot implement this in HLSL or MSL because it is tied to the type system. + // In SPIR-V, everything must masquerade as 32-bit. + if (!backend.requires_relaxed_precision_analysis) + return; + + auto input_precision = analyze_expression_precision(args, length); + + // For expressions which are loaded or directly forwarded, we inherit mediump implicitly. + // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id. + if (input_precision == Options::Mediump) + set_decoration(dst_id, DecorationRelaxedPrecision); +} + +CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const +{ + // Now, analyze the precision at which the arguments would run. + // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision + // for the inputs. Constants do not have inherent precision and do not contribute to this decision. + // If all inputs are constants, they inherit precision from outer expressions, including an l-value. + // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with + // correct precision. + bool expression_has_highp = false; + bool expression_has_mediump = false; - return expr; + for (uint32_t i = 0; i < length; i++) + { + uint32_t arg = args[i]; + + auto handle_type = ir.ids[arg].get_type(); + if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef) + continue; + + if (has_decoration(arg, DecorationRelaxedPrecision)) + expression_has_mediump = true; + else + expression_has_highp = true; } + + if (expression_has_highp) + return Options::Highp; + else if (expression_has_mediump) + return Options::Mediump; + else + return Options::DontCare; } -std::pair CompilerGLSL::flattened_access_chain_offset( - const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride, - bool *need_transpose, uint32_t *out_matrix_stride, bool ptr_chain) +void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length) { - // Start traversing type hierarchy at the proper non-pointer types. - const auto *type = &get_pointee_type(basetype); + if (!backend.requires_relaxed_precision_analysis) + return; - // This holds the type of the current pointer which we are traversing through. - // We always start out from a struct type which is the block. - // This is primarily used to reflect the array strides and matrix strides later. - // For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set - // accordingly as members of structs are accessed. - assert(type->basetype == SPIRType::Struct); - uint32_t type_id = 0; + auto &type = get(type_id); - std::string expr; + // RelaxedPrecision only applies to 32-bit values. + if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt) + return; - // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout. - bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false; - uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0; + bool operation_is_highp = !has_decoration(dst_id, DecorationRelaxedPrecision); - for (uint32_t i = 0; i < count; i++) + auto input_precision = analyze_expression_precision(args, length); + if (input_precision == Options::DontCare) { - uint32_t index = indices[i]; + consume_temporary_in_precision_context(type_id, dst_id, input_precision); + return; + } - // Pointers - if (ptr_chain && i == 0) + // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined. + // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit. + // However, if the expression is not, inputs must be expanded to 32-bit first, + // since the operation must run at high precision. + // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump, + // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations + // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables. + if ((operation_is_highp && input_precision == Options::Mediump) || + (!operation_is_highp && input_precision == Options::Highp)) + { + auto precision = operation_is_highp ? Options::Highp : Options::Mediump; + for (uint32_t i = 0; i < length; i++) { - // Here, the pointer type will be decorated with an array stride. - uint32_t array_stride = get_decoration(basetype.self, DecorationArrayStride); - if (!array_stride) - SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); + // Rewrites the opcode so that we consume an ID in correct precision context. + // This is pretty hacky, but it's the most straight forward way of implementing this without adding + // lots of extra passes to rewrite all code blocks. + args[i] = consume_temporary_in_precision_context(expression_type_id(args[i]), args[i], precision); + } + } +} - auto *constant = maybe_get(index); - if (constant) - { - // Constant array access. - offset += constant->scalar() * array_stride; - } - else - { - // Dynamic array access. - if (array_stride % word_stride) - { - SPIRV_CROSS_THROW( - "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. " - "Likely culprit here is a float or vec2 array inside a push constant block which is std430. " - "This cannot be flattened. Try using std140 layout instead."); - } +// This is probably not exhaustive ... +static bool opcode_is_precision_sensitive_operation(Op op) +{ + switch (op) + { + case OpFAdd: + case OpFSub: + case OpFMul: + case OpFNegate: + case OpIAdd: + case OpISub: + case OpIMul: + case OpSNegate: + case OpFMod: + case OpFDiv: + case OpFRem: + case OpSMod: + case OpSDiv: + case OpSRem: + case OpUMod: + case OpUDiv: + case OpVectorTimesMatrix: + case OpMatrixTimesVector: + case OpMatrixTimesMatrix: + case OpDPdx: + case OpDPdy: + case OpDPdxCoarse: + case OpDPdyCoarse: + case OpDPdxFine: + case OpDPdyFine: + case OpFwidth: + case OpFwidthCoarse: + case OpFwidthFine: + case OpVectorTimesScalar: + case OpMatrixTimesScalar: + case OpOuterProduct: + case OpFConvert: + case OpSConvert: + case OpUConvert: + case OpConvertSToF: + case OpConvertUToF: + case OpConvertFToU: + case OpConvertFToS: + return true; - expr += to_enclosed_expression(index); - expr += " * "; - expr += convert_to_string(array_stride / word_stride); - expr += " + "; - } - // Type ID is unchanged. - } - // Arrays - else if (!type->array.empty()) - { - // Here, the type_id will be a type ID for the array type itself. - uint32_t array_stride = get_decoration(type_id, DecorationArrayStride); - if (!array_stride) - SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); + default: + return false; + } +} - auto *constant = maybe_get(index); - if (constant) - { - // Constant array access. - offset += constant->scalar() * array_stride; - } - else - { - // Dynamic array access. - if (array_stride % word_stride) - { - SPIRV_CROSS_THROW( - "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. " - "Likely culprit here is a float or vec2 array inside a push constant block which is std430. " - "This cannot be flattened. Try using std140 layout instead."); - } +// Instructions which just load data but don't do any arithmetic operation should just inherit the decoration. +// SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only +// relevant when operating on the IDs, not when shuffling things around. +static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count) +{ + switch (op) + { + case OpLoad: + case OpAccessChain: + case OpInBoundsAccessChain: + case OpCompositeExtract: + case OpVectorExtractDynamic: + case OpSampledImage: + case OpImage: + case OpCopyObject: - expr += to_enclosed_expression(index, false); - expr += " * "; - expr += convert_to_string(array_stride / word_stride); - expr += " + "; - } + case OpImageRead: + case OpImageFetch: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageGather: + case OpImageDrefGather: + case OpImageSparseRead: + case OpImageSparseFetch: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseSampleExplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageSparseSampleDrefExplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + case OpImageSparseGather: + case OpImageSparseDrefGather: + arg_count = 1; + return true; - uint32_t parent_type = type->parent_type; - type = &get(parent_type); - type_id = parent_type; + case OpVectorShuffle: + arg_count = 2; + return true; - // Type ID now refers to the array type with one less dimension. - } - // For structs, the index refers to a constant, which indexes into the members. - // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. - else if (type->basetype == SPIRType::Struct) - { - index = get(index).scalar(); + case OpCompositeConstruct: + return true; - if (index >= type->member_types.size()) - SPIRV_CROSS_THROW("Member index is out of bounds!"); + default: + break; + } - offset += type_struct_member_offset(*type, index); - type_id = type->member_types[index]; + return false; +} - auto &struct_type = *type; - type = &get(type->member_types[index]); +CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction) +{ + auto ops = stream_mutable(instruction); + auto opcode = static_cast(instruction.op); + uint32_t length = instruction.length; - if (type->columns > 1) - { - matrix_stride = type_struct_member_matrix_stride(struct_type, index); - row_major_matrix_needs_conversion = - combined_decoration_for_member(struct_type, index).get(DecorationRowMajor); - } - else - row_major_matrix_needs_conversion = false; - } - // Matrix -> Vector - else if (type->columns > 1) + if (backend.requires_relaxed_precision_analysis) + { + if (length > 2) { - auto *constant = maybe_get(index); - if (constant) - { - index = get(index).scalar(); - offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride); - } - else - { - uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride; - // Dynamic array access. - if (indexing_stride % word_stride) - { - SPIRV_CROSS_THROW( - "Matrix stride for dynamic indexing must be divisible by the size of a 4-component vector. " - "Likely culprit here is a row-major matrix being accessed dynamically. " - "This cannot be flattened. Try using std140 layout instead."); - } + uint32_t forwarding_length = length - 2; - expr += to_enclosed_expression(index, false); - expr += " * "; - expr += convert_to_string(indexing_stride / word_stride); - expr += " + "; - } + if (opcode_is_precision_sensitive_operation(opcode)) + analyze_precision_requirements(ops[0], ops[1], &ops[2], forwarding_length); + else if (opcode == OpExtInst && length >= 5 && get(ops[2]).ext == SPIRExtension::GLSL) + analyze_precision_requirements(ops[0], ops[1], &ops[4], forwarding_length - 2); + else if (opcode_is_precision_forwarding_instruction(opcode, forwarding_length)) + forward_relaxed_precision(ops[1], &ops[2], forwarding_length); + } - uint32_t parent_type = type->parent_type; - type = &get(type->parent_type); - type_id = parent_type; + uint32_t result_type = 0, result_id = 0; + if (instruction_to_result_type(result_type, result_id, opcode, ops, length)) + { + auto itr = temporary_to_mirror_precision_alias.find(ops[1]); + if (itr != temporary_to_mirror_precision_alias.end()) + return { itr->second, itr->first }; } - // Vector -> Scalar - else if (type->vecsize > 1) + } + + return {}; +} + +void CompilerGLSL::emit_instruction(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto opcode = static_cast(instruction.op); + uint32_t length = instruction.length; + +#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_BOP_CAST(op, type) \ + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, \ + opcode_is_sign_invariant(opcode), implicit_integer_promotion) +#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) +#define GLSL_UOP_CAST(op) emit_unary_op_cast(ops[0], ops[1], ops[2], #op) +#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) +#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) +#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_BFOP_CAST(op, type) \ + emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(instruction); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + // Handle C implicit integer promotion rules. + // If we get implicit promotion to int, need to make sure we cast by value to intended return type, + // otherwise, future sign-dependent operations and bitcasts will break. + bool implicit_integer_promotion = integer_width < 32 && backend.implicit_c_integer_promotion_rules && + opcode_can_promote_integer_implicitly(opcode) && + get(ops[0]).vecsize == 1; + + opcode = get_remapped_spirv_op(opcode); + + switch (opcode) + { + // Dealing with memory + case OpLoad: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + + flush_variable_declaration(ptr); + + // If we're loading from memory that cannot be changed by the shader, + // just forward the expression directly to avoid needless temporaries. + // If an expression is mutable and forwardable, we speculate that it is immutable. + bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + + // If loading a non-native row-major matrix, mark the expression as need_transpose. + bool need_transpose = false; + bool old_need_transpose = false; + + auto *ptr_expression = maybe_get(ptr); + + if (forward) { - auto *constant = maybe_get(index); - if (constant) + // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while + // taking the expression. + if (ptr_expression && ptr_expression->need_transpose) { - index = get(index).scalar(); - offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8)); + old_need_transpose = true; + ptr_expression->need_transpose = false; + need_transpose = true; } - else - { - uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8); - - // Dynamic array access. - if (indexing_stride % word_stride) - { - SPIRV_CROSS_THROW( - "Stride for dynamic vector indexing must be divisible by the size of a 4-component vector. " - "This cannot be flattened in legacy targets."); - } + else if (is_non_native_row_major_matrix(ptr)) + need_transpose = true; + } - expr += to_enclosed_expression(index, false); - expr += " * "; - expr += convert_to_string(indexing_stride / word_stride); - expr += " + "; - } + // If we are forwarding this load, + // don't register the read to access chain here, defer that to when we actually use the expression, + // using the add_implied_read_expression mechanism. + string expr; - uint32_t parent_type = type->parent_type; - type = &get(type->parent_type); - type_id = parent_type; + bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked); + bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID); + if (forward || (!is_packed && !is_remapped)) + { + // For the simple case, we do not need to deal with repacking. + expr = to_dereferenced_expression(ptr, false); } else - SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); - } + { + // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before + // storing the expression to a temporary. + expr = to_unpacked_expression(ptr); + } - if (need_transpose) - *need_transpose = row_major_matrix_needs_conversion; - if (out_matrix_stride) - *out_matrix_stride = matrix_stride; + auto &type = get(result_type); + auto &expr_type = expression_type(ptr); - return std::make_pair(expr, offset); -} + // If the expression has more vector components than the result type, insert + // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might + // happen with e.g. the MSL backend replacing the type of an input variable. + if (expr_type.vecsize > type.vecsize) + expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0)); -bool CompilerGLSL::should_dereference(uint32_t id) -{ - const auto &type = expression_type(id); - // Non-pointer expressions don't need to be dereferenced. - if (!type.pointer) - return false; + if (forward && ptr_expression) + ptr_expression->need_transpose = old_need_transpose; - // Handles shouldn't be dereferenced either. - if (!expression_is_lvalue(id)) - return false; + // We might need to cast in order to load from a builtin. + cast_from_variable_load(ptr, expr, type); - // If id is a variable but not a phi variable, we should not dereference it. - if (auto *var = maybe_get(id)) - return var->phi_variable; + if (forward && ptr_expression) + ptr_expression->need_transpose = false; - // If id is an access chain, we should not dereference it. - if (auto *expr = maybe_get(id)) - return !expr->access_chain; + // We might be trying to load a gl_Position[N], where we should be + // doing float4[](gl_in[i].gl_Position, ...) instead. + // Similar workarounds are required for input arrays in tessellation. + // Also, loading from gl_SampleMask array needs special unroll. + unroll_array_from_complex_load(id, ptr, expr); - // Otherwise, we should dereference this pointer expression. - return true; -} + if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform)) + { + // If we're loading something non-opaque, we need to handle non-uniform descriptor access. + convert_non_uniform_expression(expr, ptr); + } -bool CompilerGLSL::should_forward(uint32_t id) -{ - // If id is a variable we will try to forward it regardless of force_temporary check below - // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL - auto *var = maybe_get(id); - if (var && var->forwardable) - return true; + if (forward && ptr_expression) + ptr_expression->need_transpose = old_need_transpose; - // For debugging emit temporary variables for all expressions - if (options.force_temporary) - return false; + bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0; - // Immutable expression can always be forwarded. - if (is_immutable(id)) - return true; + if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened) + rewrite_load_for_wrapped_row_major(expr, result_type, ptr); - return false; -} + // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. + // However, if we try to load a complex, composite object from a flattened buffer, + // we should avoid emitting the same code over and over and lower the result to a temporary. + bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1)); -void CompilerGLSL::track_expression_read(uint32_t id) -{ - switch (ir.ids[id].get_type()) - { - case TypeExpression: - { - auto &e = get(id); - for (auto implied_read : e.implied_read_expressions) - track_expression_read(implied_read); - break; - } + SPIRExpression *e = nullptr; + if (!forward && expression_is_non_value_type_array(ptr)) + { + // Complicated load case where we need to make a copy of ptr, but we cannot, because + // it is an array, and our backend does not support arrays as value types. + // Emit the temporary, and copy it explicitly. + e = &emit_uninitialized_temporary_expression(result_type, id); + emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr)); + } + else + e = &emit_op(result_type, id, expr, forward, !usage_tracking); - case TypeAccessChain: - { - auto &e = get(id); - for (auto implied_read : e.implied_read_expressions) - track_expression_read(implied_read); - break; - } + e->need_transpose = need_transpose; + register_read(id, ptr, forward); - default: + if (forward) + { + // Pass through whether the result is of a packed type and the physical type ID. + if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked)) + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)) + { + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, + get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)); + } + } + else + { + // This might have been set on an earlier compilation iteration, force it to be unset. + unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + } + + inherit_expression_dependencies(id, ptr); + if (forward) + add_implied_read_expression(*e, ptr); break; } - // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. - // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. - if (expression_is_forwarded(id)) + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: { - auto &v = expression_usage_counts[id]; - v++; + auto *var = maybe_get(ops[2]); + if (var) + flush_variable_declaration(var->self); - if (v >= 2) - { - //if (v == 2) - // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); + // If the base is immutable, the access chain pointer must also be. + // If an expression is mutable and forwardable, we speculate that it is immutable. + AccessChainMeta meta; + bool ptr_chain = opcode == OpPtrAccessChain; + auto &target_type = get(ops[0]); + auto e = access_chain(ops[2], &ops[3], length - 3, target_type, &meta, ptr_chain); - forced_temporaries.insert(id); - // Force a recompile after this pass to avoid forwarding this variable. - force_recompile(); - } - } -} + // If the base is flattened UBO of struct type, the expression has to be a composite. + // In that case, backends which do not support inline syntax need it to be bound to a temporary. + // Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted. + bool requires_temporary = false; + if (flattened_buffer_blocks.count(ops[2]) && target_type.basetype == SPIRType::Struct) + requires_temporary = !backend.can_declare_struct_inline; -bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) -{ - if (forced_temporaries.find(id) != end(forced_temporaries)) - return false; + auto &expr = requires_temporary ? + emit_op(ops[0], ops[1], std::move(e), false) : + set(ops[1], std::move(e), ops[0], should_forward(ops[2])); - for (uint32_t i = 0; i < num_args; i++) - if (!should_forward(args[i])) - return false; + auto *backing_variable = maybe_get_backing_variable(ops[2]); + expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]); + expr.need_transpose = meta.need_transpose; + expr.access_chain = true; - // We need to forward globals as well. - if (!pure) - { - for (auto global : global_variables) - if (!should_forward(global)) - return false; - for (auto aliased : aliased_variables) - if (!should_forward(aliased)) - return false; - } + // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. + if (meta.storage_is_packed) + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked); + if (meta.storage_physical_type != 0) + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); + if (meta.storage_is_invariant) + set_decoration(ops[1], DecorationInvariant); + if (meta.flattened_struct) + flattened_structs[ops[1]] = true; + if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) + set_decoration(ops[1], DecorationRelaxedPrecision); + + // If we have some expression dependencies in our access chain, this access chain is technically a forwarded + // temporary which could be subject to invalidation. + // Need to assume we're forwarded while calling inherit_expression_depdendencies. + forwarded_temporaries.insert(ops[1]); + // The access chain itself is never forced to a temporary, but its dependencies might. + suppressed_usage_tracking.insert(ops[1]); - return true; -} + for (uint32_t i = 2; i < length; i++) + { + inherit_expression_dependencies(ops[1], ops[i]); + add_implied_read_expression(expr, ops[i]); + } -void CompilerGLSL::register_impure_function_call() -{ - // Impure functions can modify globals and aliased variables, so invalidate them as well. - for (auto global : global_variables) - flush_dependees(get(global)); - for (auto aliased : aliased_variables) - flush_dependees(get(aliased)); -} + // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries, + // we're not forwarded after all. + if (expr.expression_dependencies.empty()) + forwarded_temporaries.erase(ops[1]); -void CompilerGLSL::register_call_out_argument(uint32_t id) -{ - register_write(id); + break; + } - auto *var = maybe_get(id); - if (var) - flush_variable_declaration(var->self); -} + case OpStore: + { + auto *var = maybe_get(ops[0]); -string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) -{ - // These variables are always function local, - // so make sure we emit the variable without storage qualifiers. - // Some backends will inject custom variables locally in a function - // with a storage qualifier which is not function-local. - auto old_storage = var.storage; - var.storage = StorageClassFunction; - auto expr = variable_decl(var); - var.storage = old_storage; - return expr; -} + if (var && var->statically_assigned) + var->static_expression = ops[1]; + else if (var && var->loop_variable && !var->loop_variable_enable) + var->static_expression = ops[1]; + else if (var && var->remapped_variable && var->static_expression) + { + // Skip the write. + } + else if (flattened_structs.count(ops[0])) + { + store_flattened_struct(ops[0], ops[1]); + register_write(ops[0]); + } + else + { + emit_store_statement(ops[0], ops[1]); + } -void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) -{ - if (var.allocate_temporary_copy) + // Storing a pointer results in a variable pointer, so we must conservatively assume + // we can write through it. + if (expression_type(ops[1]).pointer) + register_write(ops[1]); + break; + } + + case OpArrayLength: { - auto &type = get(var.basetype); - auto &flags = get_decoration_bitset(var.self); - statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + if (has_decoration(ops[2], DecorationNonUniform)) + convert_non_uniform_expression(e, ops[2]); + set(id, join(type_to_glsl(get(result_type)), "(", e, ".length())"), result_type, + true); + break; } -} -void CompilerGLSL::flush_variable_declaration(uint32_t id) -{ - auto *var = maybe_get(id); - if (var && var->deferred_declaration) - { - statement(variable_decl_function_local(*var), ";"); - emit_variable_temporary_copies(*var); - var->deferred_declaration = false; - } -} + // Function calls + case OpFunctionCall: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t func = ops[2]; + const auto *arg = &ops[3]; + length -= 3; + + auto &callee = get(func); + auto &return_type = get(callee.return_type); + bool pure = function_is_pure(callee); + + bool callee_has_out_variables = false; + bool emit_return_value_as_argument = false; -bool CompilerGLSL::remove_duplicate_swizzle(string &op) -{ - auto pos = op.find_last_of('.'); - if (pos == string::npos || pos == 0) - return false; + // Invalidate out variables passed to functions since they can be OpStore'd to. + for (uint32_t i = 0; i < length; i++) + { + if (callee.arguments[i].write_count) + { + register_call_out_argument(arg[i]); + callee_has_out_variables = true; + } - string final_swiz = op.substr(pos + 1, string::npos); + flush_variable_declaration(arg[i]); + } - if (backend.swizzle_is_function) - { - if (final_swiz.size() < 2) - return false; + if (!return_type.array.empty() && !backend.can_return_array) + { + callee_has_out_variables = true; + emit_return_value_as_argument = true; + } - if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") - final_swiz.erase(final_swiz.size() - 2, string::npos); - else - return false; - } + if (!pure) + register_impure_function_call(); - // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. - // If so, and previous swizzle is of same length, - // we can drop the final swizzle altogether. - for (uint32_t i = 0; i < final_swiz.size(); i++) - { - static const char expected[] = { 'x', 'y', 'z', 'w' }; - if (i >= 4 || final_swiz[i] != expected[i]) - return false; - } + string funexpr; + SmallVector arglist; + funexpr += to_name(func) + "("; - auto prevpos = op.find_last_of('.', pos - 1); - if (prevpos == string::npos) - return false; + if (emit_return_value_as_argument) + { + statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";"); + arglist.push_back(to_name(id)); + } - prevpos++; + for (uint32_t i = 0; i < length; i++) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg[i])) + continue; - // Make sure there are only swizzles here ... - for (auto i = prevpos; i < pos; i++) - { - if (op[i] < 'w' || op[i] > 'z') + arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i])); + } + + for (auto &combined : callee.combined_parameters) { - // If swizzles are foo.xyz() like in C++ backend for example, check for that. - if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') - break; - return false; + auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]); + auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]); + arglist.push_back(to_combined_image_sampler(image_id, sampler_id)); } - } - // If original swizzle is large enough, just carve out the components we need. - // E.g. foobar.wyx.xy will turn into foobar.wy. - if (pos - prevpos >= final_swiz.size()) - { - op.erase(prevpos + final_swiz.size(), string::npos); + append_global_func_args(callee, length, arglist); - // Add back the function call ... - if (backend.swizzle_is_function) - op += "()"; - } - return true; -} + funexpr += merge(arglist); + funexpr += ")"; -// Optimizes away vector swizzles where we have something like -// vec3 foo; -// foo.xyz <-- swizzle expression does nothing. -// This is a very common pattern after OpCompositeCombine. -bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) -{ - auto pos = op.find_last_of('.'); - if (pos == string::npos || pos == 0) - return false; + // Check for function call constraints. + check_function_call_constraints(arg, length); - string final_swiz = op.substr(pos + 1, string::npos); + if (return_type.basetype != SPIRType::Void) + { + // If the function actually writes to an out variable, + // take the conservative route and do not forward. + // The problem is that we might not read the function + // result (and emit the function) before an out variable + // is read (common case when return value is ignored! + // In order to avoid start tracking invalid variables, + // just avoid the forwarding problem altogether. + bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure && + (forced_temporaries.find(id) == end(forced_temporaries)); - if (backend.swizzle_is_function) - { - if (final_swiz.size() < 2) - return false; + if (emit_return_value_as_argument) + { + statement(funexpr, ";"); + set(id, to_name(id), result_type, true); + } + else + emit_op(result_type, id, funexpr, forward); - if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") - final_swiz.erase(final_swiz.size() - 2, string::npos); + // Function calls are implicit loads from all variables in question. + // Set dependencies for them. + for (uint32_t i = 0; i < length; i++) + register_read(id, arg[i], forward); + + // If we're going to forward the temporary result, + // put dependencies on every variable that must not change. + if (forward) + register_global_read_dependencies(callee, id); + } else - return false; - } + statement(funexpr, ";"); - // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. - // If so, and previous swizzle is of same length, - // we can drop the final swizzle altogether. - for (uint32_t i = 0; i < final_swiz.size(); i++) - { - static const char expected[] = { 'x', 'y', 'z', 'w' }; - if (i >= 4 || final_swiz[i] != expected[i]) - return false; + break; } - auto &type = expression_type(base); - - // Sanity checking ... - assert(type.columns == 1 && type.array.empty()); + // Composite munging + case OpCompositeConstruct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + const auto *const elems = &ops[2]; + length -= 2; - if (type.vecsize == final_swiz.size()) - op.erase(pos, string::npos); - return true; -} + bool forward = true; + for (uint32_t i = 0; i < length; i++) + forward = forward && should_forward(elems[i]); -string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) -{ - uint32_t base = 0; - string op; - string subop; + auto &out_type = get(result_type); + auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr; - // Can only merge swizzles for vectors. - auto &type = get(return_type); - bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1; - bool swizzle_optimization = false; + // Only splat if we have vector constructors. + // Arrays and structs must be initialized properly in full. + bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; - for (uint32_t i = 0; i < length; i++) - { - auto *e = maybe_get(elems[i]); + bool splat = false; + bool swizzle_splat = false; - // If we're merging another scalar which belongs to the same base - // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! - if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base) + if (in_type) { - // Only supposed to be used for vector swizzle -> scalar. - assert(!e->expression.empty() && e->expression.front() == '.'); - subop += e->expression.substr(1, string::npos); - swizzle_optimization = true; + splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting; + swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar; + + if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type)) + { + // Cannot swizzle literal integers as a special case. + swizzle_splat = false; + } } - else + + if (splat || swizzle_splat) { - // We'll likely end up with duplicated swizzles, e.g. - // foobar.xyz.xyz from patterns like - // OpVectorShuffle - // OpCompositeExtract x 3 - // OpCompositeConstruct 3x + other scalar. - // Just modify op in-place. - if (swizzle_optimization) + uint32_t input = elems[0]; + for (uint32_t i = 0; i < length; i++) { - if (backend.swizzle_is_function) - subop += "()"; + if (input != elems[i]) + { + splat = false; + swizzle_splat = false; + } + } + } - // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. - // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. - // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. - // Essentially, we can only remove one set of swizzles, since that's what we have control over ... - // Case 1: - // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. - // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. - // Case 2: - // foo.xyz: Duplicate swizzle won't kick in. - // If foo is vec3, we can remove xyz, giving just foo. - if (!remove_duplicate_swizzle(subop)) - remove_unity_swizzle(base, subop); + if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) + forward = false; + if (!out_type.array.empty() && !backend.can_declare_arrays_inline) + forward = false; + if (type_is_empty(out_type) && !backend.supports_empty_struct) + forward = false; - // Strips away redundant parens if we created them during component extraction. - strip_enclosed_expression(subop); - swizzle_optimization = false; - op += subop; + string constructor_op; + if (backend.use_initializer_list && composite) + { + bool needs_trailing_tracket = false; + // Only use this path if we are building composites. + // This path cannot be used for arithmetic. + if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty()) + constructor_op += type_to_glsl_constructor(get(result_type)); + else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty()) + { + // MSL path. Array constructor is baked into type here, do not use _constructor variant. + constructor_op += type_to_glsl_constructor(get(result_type)) + "("; + needs_trailing_tracket = true; } - else - op += subop; + constructor_op += "{ "; - if (i) - op += ", "; - subop = to_composite_constructor_expression(elems[i]); + if (type_is_empty(out_type) && !backend.supports_empty_struct) + constructor_op += "0"; + else if (splat) + constructor_op += to_unpacked_expression(elems[0]); + else + constructor_op += build_composite_combiner(result_type, elems, length); + constructor_op += " }"; + if (needs_trailing_tracket) + constructor_op += ")"; + } + else if (swizzle_splat && !composite) + { + constructor_op = remap_swizzle(get(result_type), 1, to_unpacked_expression(elems[0])); + } + else + { + constructor_op = type_to_glsl_constructor(get(result_type)) + "("; + if (type_is_empty(out_type) && !backend.supports_empty_struct) + constructor_op += "0"; + else if (splat) + constructor_op += to_unpacked_expression(elems[0]); + else + constructor_op += build_composite_combiner(result_type, elems, length); + constructor_op += ")"; } - base = e ? e->base_expression : 0; + if (!constructor_op.empty()) + { + emit_op(result_type, id, constructor_op, forward); + for (uint32_t i = 0; i < length; i++) + inherit_expression_dependencies(id, elems[i]); + } + break; } - if (swizzle_optimization) + case OpVectorInsertDynamic: { - if (backend.swizzle_is_function) - subop += "()"; - - if (!remove_duplicate_swizzle(subop)) - remove_unity_swizzle(base, subop); - // Strips away redundant parens if we created them during component extraction. - strip_enclosed_expression(subop); - } + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec = ops[2]; + uint32_t comp = ops[3]; + uint32_t index = ops[4]; - op += subop; - return op; -} + flush_variable_declaration(vec); -bool CompilerGLSL::skip_argument(uint32_t id) const -{ - if (!combined_image_samplers.empty() || !options.vulkan_semantics) - { - auto &type = expression_type(id); - if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1)) - return true; + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(vec), ";"); + set(id, to_name(id), result_type, true); + auto chain = access_chain_internal(id, &index, 1, 0, nullptr); + statement(chain, " = ", to_unpacked_expression(comp), ";"); + break; } - return false; -} - -bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs) -{ - // Do this with strings because we have a very clear pattern we can check for and it avoids - // adding lots of special cases to the code emission. - if (rhs.size() < lhs.size() + 3) - return false; - // Do not optimize matrices. They are a bit awkward to reason about in general - // (in which order does operation happen?), and it does not work on MSL anyways. - if (type.vecsize > 1 && type.columns > 1) - return false; + case OpVectorExtractDynamic: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; - auto index = rhs.find(lhs); - if (index != 0) - return false; + auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr); + emit_op(result_type, id, expr, should_forward(ops[2])); + inherit_expression_dependencies(id, ops[2]); + inherit_expression_dependencies(id, ops[3]); + break; + } - // TODO: Shift operators, but it's not important for now. - auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1); - if (op != lhs.size() + 1) - return false; + case OpCompositeExtract: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + length -= 3; - // Check that the op is followed by space. This excludes && and ||. - if (rhs[op + 1] != ' ') - return false; + auto &type = get(result_type); - char bop = rhs[op]; - auto expr = rhs.substr(lhs.size() + 3); - // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code. - // Find some common patterns which are equivalent. - if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)")) - statement(lhs, bop, bop, ";"); - else - statement(lhs, " ", bop, "= ", expr, ";"); - return true; -} + // We can only split the expression here if our expression is forwarded as a temporary. + bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries); -void CompilerGLSL::register_control_dependent_expression(uint32_t expr) -{ - if (forwarded_temporaries.find(expr) == end(forwarded_temporaries)) - return; + // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case. + auto &composite_type = expression_type(ops[2]); + bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty(); + if (composite_type_is_complex) + allow_base_expression = false; - assert(current_emitting_block); - current_emitting_block->invalidate_expressions.push_back(expr); -} + // Packed expressions or physical ID mapped expressions cannot be split up. + if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) || + has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID)) + allow_base_expression = false; -void CompilerGLSL::emit_block_instructions(SPIRBlock &block) -{ - current_emitting_block = █ - for (auto &op : block.ops) - emit_instruction(op); - current_emitting_block = nullptr; -} + // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern + // into the base expression. + if (is_non_native_row_major_matrix(ops[2])) + allow_base_expression = false; -void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr) -{ - if (forwarded_temporaries.count(expr.self)) - { - forced_temporaries.insert(expr.self); - force_recompile(); - } + AccessChainMeta meta; + SPIRExpression *e = nullptr; + auto *c = maybe_get(ops[2]); - for (auto &dependent : expr.expression_dependencies) - disallow_forwarding_in_expression_chain(get(dependent)); -} + if (c && !c->specialization && !composite_type_is_complex) + { + auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length); + e = &emit_op(result_type, id, expr, true, true); + } + else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) + { + // Only apply this optimization if result is scalar. -void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id) -{ - // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to - // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary - // in one translation unit, but not another, e.g. due to multiple use of an expression. - // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent - // expressions to be temporaries. - // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough - // for all reasonable uses of invariant. - if (!has_decoration(store_id, DecorationInvariant)) - return; + // We want to split the access chain from the base. + // This is so we can later combine different CompositeExtract results + // with CompositeConstruct without emitting code like + // + // vec3 temp = texture(...).xyz + // vec4(temp.x, temp.y, temp.z, 1.0). + // + // when we actually wanted to emit this + // vec4(texture(...).xyz, 1.0). + // + // Including the base will prevent this and would trigger multiple reads + // from expression causing it to be forced to an actual temporary in GLSL. + auto expr = access_chain_internal(ops[2], &ops[3], length, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT | + ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta); + e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2])); + inherit_expression_dependencies(id, ops[2]); + e->base_expression = ops[2]; - auto *expr = maybe_get(value_id); - if (!expr) - return; + if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) + set_decoration(ops[1], DecorationRelaxedPrecision); + } + else + { + auto expr = access_chain_internal(ops[2], &ops[3], length, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta); + e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2])); + inherit_expression_dependencies(id, ops[2]); + } - disallow_forwarding_in_expression_chain(*expr); -} + // Pass through some meta information to the loaded expression. + // We can still end up loading a buffer type to a variable, then CompositeExtract from it + // instead of loading everything through an access chain. + e->need_transpose = meta.need_transpose; + if (meta.storage_is_packed) + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + if (meta.storage_physical_type != 0) + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); + if (meta.storage_is_invariant) + set_decoration(id, DecorationInvariant); -void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) -{ - auto rhs = to_pointer_expression(rhs_expression); + break; + } - // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null. - if (!rhs.empty()) + case OpCompositeInsert: { - handle_store_to_invariant_variable(lhs_expression, rhs_expression); - - auto lhs = to_dereferenced_expression(lhs_expression); - - // We might need to bitcast in order to store to a builtin. - bitcast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression)); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t obj = ops[2]; + uint32_t composite = ops[3]; + const auto *elems = &ops[4]; + length -= 4; - // Tries to optimize assignments like " = op expr". - // While this is purely cosmetic, this is important for legacy ESSL where loop - // variable increments must be in either i++ or i += const-expr. - // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. - if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) - statement(lhs, " = ", rhs, ";"); - register_write(lhs_expression); - } -} + flush_variable_declaration(composite); -uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const -{ - if (instr.length < 3) - return 32; + // CompositeInsert requires a copy + modification, but this is very awkward code in HLL. + // Speculate that the input composite is no longer used, and we can modify it in-place. + // There are various scenarios where this is not possible to satisfy. + bool can_modify_in_place = true; + forced_temporaries.insert(id); - auto *ops = stream(instr); + // Cannot safely RMW PHI variables since they have no way to be invalidated, + // forcing temporaries is not going to help. + // This is similar for Constant and Undef inputs. + // The only safe thing to RMW is SPIRExpression. + // If the expression has already been used (i.e. used in a continue block), we have to keep using + // that loop variable, since we won't be able to override the expression after the fact. + // If the composite is hoisted, we might never be able to properly invalidate any usage + // of that composite in a subsequent loop iteration. + if (invalid_expressions.count(composite) || + block_composite_insert_overwrite.count(composite) || + hoisted_temporaries.count(id) || hoisted_temporaries.count(composite) || + maybe_get(composite) == nullptr) + { + can_modify_in_place = false; + } + else if (backend.requires_relaxed_precision_analysis && + has_decoration(composite, DecorationRelaxedPrecision) != + has_decoration(id, DecorationRelaxedPrecision) && + get(result_type).basetype != SPIRType::Struct) + { + // Similarly, if precision does not match for input and output, + // we cannot alias them. If we write a composite into a relaxed precision + // ID, we might get a false truncation. + can_modify_in_place = false; + } - switch (instr.op) - { - case OpSConvert: - case OpConvertSToF: - case OpUConvert: - case OpConvertUToF: - case OpIEqual: - case OpINotEqual: - case OpSLessThan: - case OpSLessThanEqual: - case OpSGreaterThan: - case OpSGreaterThanEqual: - return expression_type(ops[2]).width; + if (can_modify_in_place) + { + // Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place. + if (!forced_temporaries.count(composite)) + force_temporary_and_recompile(composite); - default: - { - // We can look at result type which is more robust. - auto *type = maybe_get(ops[0]); - if (type && type_is_integral(*type)) - return type->width; + auto chain = access_chain_internal(composite, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + statement(chain, " = ", to_unpacked_expression(obj), ";"); + set(id, to_expression(composite), result_type, true); + invalid_expressions.insert(composite); + composite_insert_overwritten.insert(composite); + } else - return 32; - } - } -} - -uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const -{ - if (length < 1) - return 32; + { + if (maybe_get(composite) != nullptr) + { + emit_uninitialized_temporary_expression(result_type, id); + } + else + { + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(composite), ";"); + set(id, to_name(id), result_type, true); + } - switch (op) - { - case GLSLstd450SAbs: - case GLSLstd450SSign: - case GLSLstd450UMin: - case GLSLstd450SMin: - case GLSLstd450UMax: - case GLSLstd450SMax: - case GLSLstd450UClamp: - case GLSLstd450SClamp: - case GLSLstd450FindSMsb: - case GLSLstd450FindUMsb: - return expression_type(ops[0]).width; + auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + statement(chain, " = ", to_unpacked_expression(obj), ";"); + } - default: - { - // We don't need to care about other opcodes, just return 32. - return 32; - } + break; } -} -void CompilerGLSL::emit_instruction(const Instruction &instruction) -{ - auto ops = stream(instruction); - auto opcode = static_cast(instruction.op); - uint32_t length = instruction.length; + case OpCopyMemory: + { + uint32_t lhs = ops[0]; + uint32_t rhs = ops[1]; + if (lhs != rhs) + { + uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET]; + if (!tmp_id) + tmp_id = ir.increase_bound_by(1); + uint32_t tmp_type_id = expression_type(rhs).parent_type; -#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) -#define GLSL_BOP_CAST(op, type) \ - emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) -#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) -#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) -#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) -#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) -#define GLSL_BFOP_CAST(op, type) \ - emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) -#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) -#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) + EmbeddedInstruction fake_load, fake_store; + fake_load.op = OpLoad; + fake_load.length = 3; + fake_load.ops.push_back(tmp_type_id); + fake_load.ops.push_back(tmp_id); + fake_load.ops.push_back(rhs); - // If we need to do implicit bitcasts, make sure we do it with the correct type. - uint32_t integer_width = get_integer_width_for_instruction(instruction); - auto int_type = to_signed_basetype(integer_width); - auto uint_type = to_unsigned_basetype(integer_width); + fake_store.op = OpStore; + fake_store.length = 2; + fake_store.ops.push_back(lhs); + fake_store.ops.push_back(tmp_id); - switch (opcode) - { - // Dealing with memory - case OpLoad: + // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible. + // Synthesize a fake Load and Store pair for CopyMemory. + emit_instruction(fake_load); + emit_instruction(fake_store); + } + break; + } + + case OpCopyLogical: { + // This is used for copying object of different types, arrays and structs. + // We need to unroll the copy, element-by-element. uint32_t result_type = ops[0]; uint32_t id = ops[1]; - uint32_t ptr = ops[2]; - - flush_variable_declaration(ptr); + uint32_t rhs = ops[2]; - // If we're loading from memory that cannot be changed by the shader, - // just forward the expression directly to avoid needless temporaries. - // If an expression is mutable and forwardable, we speculate that it is immutable. - bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + emit_uninitialized_temporary_expression(result_type, id); + emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {}); + break; + } - // If loading a non-native row-major matrix, mark the expression as need_transpose. - bool need_transpose = false; - bool old_need_transpose = false; + case OpCopyObject: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t rhs = ops[2]; + bool pointer = get(result_type).pointer; - auto *ptr_expression = maybe_get(ptr); - if (ptr_expression && ptr_expression->need_transpose) + auto *chain = maybe_get(rhs); + auto *imgsamp = maybe_get(rhs); + if (chain) { - old_need_transpose = true; - ptr_expression->need_transpose = false; - need_transpose = true; + // Cannot lower to a SPIRExpression, just copy the object. + auto &e = set(id, *chain); + e.self = id; + } + else if (imgsamp) + { + // Cannot lower to a SPIRExpression, just copy the object. + // GLSL does not currently use this type and will never get here, but MSL does. + // Handled here instead of CompilerMSL for better integration and general handling, + // and in case GLSL or other subclasses require it in the future. + auto &e = set(id, *imgsamp); + e.self = id; + } + else if (expression_is_lvalue(rhs) && !pointer) + { + // Need a copy. + // For pointer types, we copy the pointer itself. + emit_op(result_type, id, to_unpacked_expression(rhs), false); } - else if (is_non_native_row_major_matrix(ptr)) - need_transpose = true; + else + { + // RHS expression is immutable, so just forward it. + // Copying these things really make no sense, but + // seems to be allowed anyways. + auto &e = emit_op(result_type, id, to_expression(rhs), true, true); + if (pointer) + { + auto *var = maybe_get_backing_variable(rhs); + e.loaded_from = var ? var->self : ID(0); + } - // If we are forwarding this load, - // don't register the read to access chain here, defer that to when we actually use the expression, - // using the add_implied_read_expression mechanism. - auto expr = to_dereferenced_expression(ptr, !forward); + // If we're copying an access chain, need to inherit the read expressions. + auto *rhs_expr = maybe_get(rhs); + if (rhs_expr) + { + e.implied_read_expressions = rhs_expr->implied_read_expressions; + e.expression_dependencies = rhs_expr->expression_dependencies; + } + } + break; + } - // We might need to bitcast in order to load from a builtin. - bitcast_from_builtin_load(ptr, expr, get(result_type)); + case OpVectorShuffle: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec0 = ops[2]; + uint32_t vec1 = ops[3]; + const auto *elems = &ops[4]; + length -= 4; - // We might be trying to load a gl_Position[N], where we should be - // doing float4[](gl_in[i].gl_Position, ...) instead. - // Similar workarounds are required for input arrays in tessellation. - unroll_array_from_complex_load(id, ptr, expr); + auto &type0 = expression_type(vec0); - auto &type = get(result_type); - // Shouldn't need to check for ID, but current glslang codegen requires it in some cases - // when loading Image/Sampler descriptors. It does not hurt to check ID as well. - if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT)) - { - propagate_nonuniform_qualifier(ptr); - convert_non_uniform_expression(type, expr); - } + // If we have the undefined swizzle index -1, we need to swizzle in undefined data, + // or in our case, T(0). + bool shuffle = false; + for (uint32_t i = 0; i < length; i++) + if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu) + shuffle = true; - if (ptr_expression) - ptr_expression->need_transpose = old_need_transpose; + // Cannot use swizzles with packed expressions, force shuffle path. + if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked)) + shuffle = true; - // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. - // However, if we try to load a complex, composite object from a flattened buffer, - // we should avoid emitting the same code over and over and lower the result to a temporary. - bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 && - (type.basetype == SPIRType::Struct || (type.columns > 1)); + string expr; + bool should_fwd, trivial_forward; - SPIRExpression *e = nullptr; - if (!backend.array_is_value_type && !type.array.empty() && !forward) + if (shuffle) { - // Complicated load case where we need to make a copy of ptr, but we cannot, because - // it is an array, and our backend does not support arrays as value types. - // Emit the temporary, and copy it explicitly. - e = &emit_uninitialized_temporary_expression(result_type, id); - emit_array_copy(to_expression(id), ptr); + should_fwd = should_forward(vec0) && should_forward(vec1); + trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1); + + // Constructor style and shuffling from two different vectors. + SmallVector args; + for (uint32_t i = 0; i < length; i++) + { + if (elems[i] == 0xffffffffu) + { + // Use a constant 0 here. + // We could use the first component or similar, but then we risk propagating + // a value we might not need, and bog down codegen. + SPIRConstant c; + c.constant_type = type0.parent_type; + assert(type0.parent_type != ID(0)); + args.push_back(constant_expression(c)); + } + else if (elems[i] >= type0.vecsize) + args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize)); + else + args.push_back(to_extract_component_expression(vec0, elems[i])); + } + expr += join(type_to_glsl_constructor(get(result_type)), "(", merge(args), ")"); } else - e = &emit_op(result_type, id, expr, forward, !usage_tracking); + { + should_fwd = should_forward(vec0); + trivial_forward = should_suppress_usage_tracking(vec0); - e->need_transpose = need_transpose; - register_read(id, ptr, forward); + // We only source from first vector, so can use swizzle. + // If the vector is packed, unpack it before applying a swizzle (needed for MSL) + expr += to_enclosed_unpacked_expression(vec0); + expr += "."; + for (uint32_t i = 0; i < length; i++) + { + assert(elems[i] != 0xffffffffu); + expr += index_to_swizzle(elems[i]); + } - // Pass through whether the result is of a packed type. - if (has_extended_decoration(ptr, SPIRVCrossDecorationPacked)) - { - set_extended_decoration(id, SPIRVCrossDecorationPacked); - set_extended_decoration(id, SPIRVCrossDecorationPackedType, - get_extended_decoration(ptr, SPIRVCrossDecorationPackedType)); + if (backend.swizzle_is_function && length > 1) + expr += "()"; } - inherit_expression_dependencies(id, ptr); - if (forward) - add_implied_read_expression(*e, ptr); + // A shuffle is trivial in that it doesn't actually *do* anything. + // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. + + emit_op(result_type, id, expr, should_fwd, trivial_forward); + + inherit_expression_dependencies(id, vec0); + if (vec0 != vec1) + inherit_expression_dependencies(id, vec1); break; } - case OpInBoundsAccessChain: - case OpAccessChain: - case OpPtrAccessChain: - { - auto *var = maybe_get(ops[2]); - if (var) - flush_variable_declaration(var->self); + // ALU + case OpIsNan: + GLSL_UFOP(isnan); + break; - // If the base is immutable, the access chain pointer must also be. - // If an expression is mutable and forwardable, we speculate that it is immutable. - AccessChainMeta meta; - bool ptr_chain = opcode == OpPtrAccessChain; - auto e = access_chain(ops[2], &ops[3], length - 3, get(ops[0]), &meta, ptr_chain); + case OpIsInf: + GLSL_UFOP(isinf); + break; - auto &expr = set(ops[1], move(e), ops[0], should_forward(ops[2])); + case OpSNegate: + if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0]) + GLSL_UOP_CAST(-); + else + GLSL_UOP(-); + break; - auto *backing_variable = maybe_get_backing_variable(ops[2]); - expr.loaded_from = backing_variable ? backing_variable->self : ops[2]; - expr.need_transpose = meta.need_transpose; - expr.access_chain = true; + case OpFNegate: + GLSL_UOP(-); + break; - // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. - if (meta.storage_is_packed) - set_extended_decoration(ops[1], SPIRVCrossDecorationPacked); - if (meta.storage_packed_type != 0) - set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type); - if (meta.storage_is_invariant) - set_decoration(ops[1], DecorationInvariant); + case OpIAdd: + { + // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts. + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(+, type); + break; + } - for (uint32_t i = 2; i < length; i++) - { - inherit_expression_dependencies(ops[1], ops[i]); - add_implied_read_expression(expr, ops[i]); - } + case OpFAdd: + GLSL_BOP(+); + break; + + case OpISub: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(-, type); + break; + } + + case OpFSub: + GLSL_BOP(-); + break; + case OpIMul: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(*, type); break; } - case OpStore: + case OpVectorTimesMatrix: + case OpMatrixTimesVector: { - auto *var = maybe_get(ops[0]); + // If the matrix needs transpose, just flip the multiply order. + auto *e = maybe_get(ops[opcode == OpMatrixTimesVector ? 2 : 3]); + if (e && e->need_transpose) + { + e->need_transpose = false; + string expr; - if (has_decoration(ops[0], DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(ops[0]); + if (opcode == OpMatrixTimesVector) + expr = join(to_enclosed_unpacked_expression(ops[3]), " * ", + enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); + else + expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", + to_enclosed_unpacked_expression(ops[2])); - if (var && var->statically_assigned) - var->static_expression = ops[1]; - else if (var && var->loop_variable && !var->loop_variable_enable) - var->static_expression = ops[1]; - else if (var && var->remapped_variable) - { - // Skip the write. - } - else if (var && flattened_structs.count(ops[0])) - { - store_flattened_struct(*var, ops[1]); - register_write(ops[0]); + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + emit_op(ops[0], ops[1], expr, forward); + e->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); } else - { - emit_store_statement(ops[0], ops[1]); - } - - // Storing a pointer results in a variable pointer, so we must conservatively assume - // we can write through it. - if (expression_type(ops[1]).pointer) - register_write(ops[1]); + GLSL_BOP(*); break; } - case OpArrayLength: + case OpMatrixTimesMatrix: { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); - set(id, join(type_to_glsl(get(result_type)), "(", e, ".length())"), result_type, - true); + auto *a = maybe_get(ops[2]); + auto *b = maybe_get(ops[3]); + + // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed. + // a^T * b^T = (b * a)^T. + if (a && b && a->need_transpose && b->need_transpose) + { + a->need_transpose = false; + b->need_transpose = false; + auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", + enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + auto &e = emit_op(ops[0], ops[1], expr, forward); + e.need_transpose = true; + a->need_transpose = true; + b->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + GLSL_BOP(*); + break; } - // Function calls - case OpFunctionCall: + case OpFMul: + case OpMatrixTimesScalar: + case OpVectorTimesScalar: + GLSL_BOP(*); + break; + + case OpOuterProduct: + GLSL_BFOP(outerProduct); + break; + + case OpDot: + GLSL_BFOP(dot); + break; + + case OpTranspose: + if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00 + { + // transpose() is not available, so instead, flip need_transpose, + // which can later be turned into an emulated transpose op by + // convert_row_major_matrix(), if necessary. + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t input = ops[2]; + + // Force need_transpose to false temporarily to prevent + // to_expression() from doing the transpose. + bool need_transpose = false; + auto *input_e = maybe_get(input); + if (input_e) + swap(need_transpose, input_e->need_transpose); + + bool forward = should_forward(input); + auto &e = emit_op(result_type, result_id, to_expression(input), forward); + e.need_transpose = !need_transpose; + + // Restore the old need_transpose flag. + if (input_e) + input_e->need_transpose = need_transpose; + } + else + GLSL_UFOP(transpose); + break; + + case OpSRem: { uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - uint32_t func = ops[2]; - const auto *arg = &ops[3]; - length -= 3; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; - auto &callee = get(func); - auto &return_type = get(callee.return_type); - bool pure = function_is_pure(callee); + // Needs special handling. + bool forward = should_forward(op0) && should_forward(op1); + auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); - bool callee_has_out_variables = false; - bool emit_return_value_as_argument = false; + if (implicit_integer_promotion) + expr = join(type_to_glsl(get(result_type)), '(', expr, ')'); - // Invalidate out variables passed to functions since they can be OpStore'd to. - for (uint32_t i = 0; i < length; i++) - { - if (callee.arguments[i].write_count) - { - register_call_out_argument(arg[i]); - callee_has_out_variables = true; - } + emit_op(result_type, result_id, expr, forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + break; + } - flush_variable_declaration(arg[i]); - } + case OpSDiv: + GLSL_BOP_CAST(/, int_type); + break; - if (!return_type.array.empty() && !backend.can_return_array) - { - callee_has_out_variables = true; - emit_return_value_as_argument = true; - } + case OpUDiv: + GLSL_BOP_CAST(/, uint_type); + break; - if (!pure) - register_impure_function_call(); + case OpIAddCarry: + case OpISubBorrow: + { + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400."); - string funexpr; - SmallVector arglist; - funexpr += to_name(func) + "("; + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, result_id); + const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow"; - if (emit_return_value_as_argument) - { - statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";"); - arglist.push_back(to_name(id)); - } + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ", + to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");"); + break; + } - for (uint32_t i = 0; i < length; i++) - { - // Do not pass in separate images or samplers if we're remapping - // to combined image samplers. - if (skip_argument(arg[i])) - continue; + case OpUMulExtended: + case OpSMulExtended: + { + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000."); - arglist.push_back(to_func_call_arg(arg[i])); - } + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, result_id); + const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended"; - for (auto &combined : callee.combined_parameters) - { - uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id]; - uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id]; - arglist.push_back(to_combined_image_sampler(image_id, sampler_id)); - } + statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".", + to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");"); + break; + } - append_global_func_args(callee, length, arglist); + case OpFDiv: + GLSL_BOP(/); + break; - funexpr += merge(arglist); - funexpr += ")"; + case OpShiftRightLogical: + GLSL_BOP_CAST(>>, uint_type); + break; - // Check for function call constraints. - check_function_call_constraints(arg, length); + case OpShiftRightArithmetic: + GLSL_BOP_CAST(>>, int_type); + break; - if (return_type.basetype != SPIRType::Void) - { - // If the function actually writes to an out variable, - // take the conservative route and do not forward. - // The problem is that we might not read the function - // result (and emit the function) before an out variable - // is read (common case when return value is ignored! - // In order to avoid start tracking invalid variables, - // just avoid the forwarding problem altogether. - bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure && - (forced_temporaries.find(id) == end(forced_temporaries)); + case OpShiftLeftLogical: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(<<, type); + break; + } - if (emit_return_value_as_argument) - { - statement(funexpr, ";"); - set(id, to_name(id), result_type, true); - } - else - emit_op(result_type, id, funexpr, forward); + case OpBitwiseOr: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(|, type); + break; + } - // Function calls are implicit loads from all variables in question. - // Set dependencies for them. - for (uint32_t i = 0; i < length; i++) - register_read(id, arg[i], forward); + case OpBitwiseXor: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(^, type); + break; + } - // If we're going to forward the temporary result, - // put dependencies on every variable that must not change. - if (forward) - register_global_read_dependencies(callee, id); - } + case OpBitwiseAnd: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(&, type); + break; + } + + case OpNot: + if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0]) + GLSL_UOP_CAST(~); else - statement(funexpr, ";"); + GLSL_UOP(~); + break; + case OpUMod: + GLSL_BOP_CAST(%, uint_type); break; - } - // Composite munging - case OpCompositeConstruct: + case OpSMod: + GLSL_BOP_CAST(%, int_type); + break; + + case OpFMod: + GLSL_BFOP(mod); + break; + + case OpFRem: { + if (is_legacy()) + SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is " + "needed for legacy."); + uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - const auto *const elems = &ops[2]; - length -= 2; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; - bool forward = true; - for (uint32_t i = 0; i < length; i++) - forward = forward && should_forward(elems[i]); + // Needs special handling. + bool forward = should_forward(op0) && should_forward(op1); + auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(", + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); - auto &out_type = get(result_type); - auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr; + emit_op(result_type, result_id, expr, forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + break; + } - // Only splat if we have vector constructors. - // Arrays and structs must be initialized properly in full. - bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; + // Relational + case OpAny: + GLSL_UFOP(any); + break; - bool splat = false; - bool swizzle_splat = false; + case OpAll: + GLSL_UFOP(all); + break; - if (in_type) - { - splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting; - swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar; + case OpSelect: + emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]); + break; - if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type)) - { - // Cannot swizzle literal integers as a special case. - swizzle_splat = false; - } - } + case OpLogicalOr: + { + // No vector variant in GLSL for logical OR. + auto result_type = ops[0]; + auto id = ops[1]; + auto &type = get(result_type); - if (splat || swizzle_splat) - { - uint32_t input = elems[0]; - for (uint32_t i = 0; i < length; i++) - { - if (input != elems[i]) - { - splat = false; - swizzle_splat = false; - } - } - } + if (type.vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown); + else + GLSL_BOP(||); + break; + } - if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) - forward = false; - if (!out_type.array.empty() && !backend.can_declare_arrays_inline) - forward = false; - if (type_is_empty(out_type) && !backend.supports_empty_struct) - forward = false; + case OpLogicalAnd: + { + // No vector variant in GLSL for logical AND. + auto result_type = ops[0]; + auto id = ops[1]; + auto &type = get(result_type); - string constructor_op; - if (backend.use_initializer_list && composite) - { - // Only use this path if we are building composites. - // This path cannot be used for arithmetic. - if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty()) - constructor_op += type_to_glsl_constructor(get(result_type)); - constructor_op += "{ "; - if (type_is_empty(out_type) && !backend.supports_empty_struct) - constructor_op += "0"; - else if (splat) - constructor_op += to_expression(elems[0]); - else - constructor_op += build_composite_combiner(result_type, elems, length); - constructor_op += " }"; - } - else if (swizzle_splat && !composite) - { - constructor_op = remap_swizzle(get(result_type), 1, to_expression(elems[0])); - } + if (type.vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown); else - { - constructor_op = type_to_glsl_constructor(get(result_type)) + "("; - if (type_is_empty(out_type) && !backend.supports_empty_struct) - constructor_op += "0"; - else if (splat) - constructor_op += to_expression(elems[0]); - else - constructor_op += build_composite_combiner(result_type, elems, length); - constructor_op += ")"; - } + GLSL_BOP(&&); + break; + } - if (!constructor_op.empty()) - { - emit_op(result_type, id, constructor_op, forward); - for (uint32_t i = 0; i < length; i++) - inherit_expression_dependencies(id, elems[i]); - } + case OpLogicalNot: + { + auto &type = get(ops[0]); + if (type.vecsize > 1) + GLSL_UFOP(not ); + else + GLSL_UOP(!); break; } - case OpVectorInsertDynamic: + case OpIEqual: { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - uint32_t vec = ops[2]; - uint32_t comp = ops[3]; - uint32_t index = ops[4]; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(equal, int_type); + else + GLSL_BOP_CAST(==, int_type); + break; + } - flush_variable_declaration(vec); + case OpLogicalEqual: + case OpFOrdEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(equal); + else + GLSL_BOP(==); + break; + } - // Make a copy, then use access chain to store the variable. - statement(declare_temporary(result_type, id), to_expression(vec), ";"); - set(id, to_name(id), result_type, true); - auto chain = access_chain_internal(id, &index, 1, 0, nullptr); - statement(chain, " = ", to_expression(comp), ";"); + case OpINotEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(notEqual, int_type); + else + GLSL_BOP_CAST(!=, int_type); break; } - case OpVectorExtractDynamic: + case OpLogicalNotEqual: + case OpFOrdNotEqual: + case OpFUnordNotEqual: { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; + // GLSL is fuzzy on what to do with ordered vs unordered not equal. + // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE, + // but this means we have no easy way of implementing ordered not equal. + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(notEqual); + else + GLSL_BOP(!=); + break; + } - auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr); - emit_op(result_type, id, expr, should_forward(ops[2])); - inherit_expression_dependencies(id, ops[2]); - inherit_expression_dependencies(id, ops[3]); + case OpUGreaterThan: + case OpSGreaterThan: + { + auto type = opcode == OpUGreaterThan ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(greaterThan, type); + else + GLSL_BOP_CAST(>, type); break; } - case OpCompositeExtract: + case OpFOrdGreaterThan: { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - length -= 3; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(greaterThan); + else + GLSL_BOP(>); + break; + } - auto &type = get(result_type); + case OpUGreaterThanEqual: + case OpSGreaterThanEqual: + { + auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(greaterThanEqual, type); + else + GLSL_BOP_CAST(>=, type); + break; + } - // We can only split the expression here if our expression is forwarded as a temporary. - bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries); + case OpFOrdGreaterThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(greaterThanEqual); + else + GLSL_BOP(>=); + break; + } - // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case. - auto &composite_type = expression_type(ops[2]); - if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty()) - allow_base_expression = false; + case OpULessThan: + case OpSLessThan: + { + auto type = opcode == OpULessThan ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(lessThan, type); + else + GLSL_BOP_CAST(<, type); + break; + } - // Packed expressions cannot be split up. - if (has_extended_decoration(ops[2], SPIRVCrossDecorationPacked)) - allow_base_expression = false; + case OpFOrdLessThan: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(lessThan); + else + GLSL_BOP(<); + break; + } - AccessChainMeta meta; - SPIRExpression *e = nullptr; + case OpULessThanEqual: + case OpSLessThanEqual: + { + auto type = opcode == OpULessThanEqual ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(lessThanEqual, type); + else + GLSL_BOP_CAST(<=, type); + break; + } - // Only apply this optimization if result is scalar. - if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) - { - // We want to split the access chain from the base. - // This is so we can later combine different CompositeExtract results - // with CompositeConstruct without emitting code like - // - // vec3 temp = texture(...).xyz - // vec4(temp.x, temp.y, temp.z, 1.0). - // - // when we actually wanted to emit this - // vec4(texture(...).xyz, 1.0). - // - // Including the base will prevent this and would trigger multiple reads - // from expression causing it to be forced to an actual temporary in GLSL. - auto expr = access_chain_internal(ops[2], &ops[3], length, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta); - e = &emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2])); - inherit_expression_dependencies(id, ops[2]); - e->base_expression = ops[2]; - } + case OpFOrdLessThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(lessThanEqual); else - { - auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); - e = &emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2])); - inherit_expression_dependencies(id, ops[2]); - } + GLSL_BOP(<=); + break; + } - // Pass through some meta information to the loaded expression. - // We can still end up loading a buffer type to a variable, then CompositeExtract from it - // instead of loading everything through an access chain. - e->need_transpose = meta.need_transpose; - if (meta.storage_is_packed) - set_extended_decoration(id, SPIRVCrossDecorationPacked); - if (meta.storage_packed_type != 0) - set_extended_decoration(id, SPIRVCrossDecorationPackedType, meta.storage_packed_type); - if (meta.storage_is_invariant) - set_decoration(id, DecorationInvariant); + // Conversion + case OpSConvert: + case OpConvertSToF: + case OpUConvert: + case OpConvertUToF: + { + auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type; + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto &type = get(result_type); + auto &arg_type = expression_type(ops[2]); + auto func = type_to_glsl_constructor(type); + if (arg_type.width < type.width || type_is_floating_point(type)) + emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype); + else + emit_unary_func_op(result_type, id, ops[2], func.c_str()); break; } - case OpCompositeInsert: + case OpConvertFToU: + case OpConvertFToS: { + // Cast to expected arithmetic type, then potentially bitcast away to desired signedness. uint32_t result_type = ops[0]; uint32_t id = ops[1]; - uint32_t obj = ops[2]; - uint32_t composite = ops[3]; - const auto *elems = &ops[4]; - length -= 4; + auto &type = get(result_type); + auto expected_type = type; + auto &float_type = expression_type(ops[2]); + expected_type.basetype = + opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width); - flush_variable_declaration(composite); + auto func = type_to_glsl_constructor(expected_type); + emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype); + break; + } - // Make a copy, then use access chain to store the variable. - statement(declare_temporary(result_type, id), to_expression(composite), ";"); - set(id, to_name(id), result_type, true); - auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); - statement(chain, " = ", to_expression(obj), ";"); + case OpFConvert: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto func = type_to_glsl_constructor(get(result_type)); + emit_unary_func_op(result_type, id, ops[2], func.c_str()); break; } - case OpCopyMemory: + case OpBitcast: { - uint32_t lhs = ops[0]; - uint32_t rhs = ops[1]; - if (lhs != rhs) + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + if (!emit_complex_bitcast(result_type, id, arg)) { - flush_variable_declaration(lhs); - flush_variable_declaration(rhs); - statement(to_expression(lhs), " = ", to_expression(rhs), ";"); - register_write(lhs); + auto op = bitcast_glsl_op(get(result_type), expression_type(arg)); + emit_unary_func_op(result_type, id, arg, op.c_str()); } break; } - case OpCopyObject: + case OpQuantizeToF16: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; - uint32_t rhs = ops[2]; - bool pointer = get(result_type).pointer; + uint32_t arg = ops[2]; - auto *chain = maybe_get(rhs); - if (chain) + string op; + auto &type = get(result_type); + + switch (type.vecsize) { - // Cannot lower to a SPIRExpression, just copy the object. - auto &e = set(id, *chain); - e.self = id; - } - else if (expression_is_lvalue(rhs) && !pointer) + case 1: + op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x"); + break; + case 2: + op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))"); + break; + case 3: { - // Need a copy. - // For pointer types, we copy the pointer itself. - statement(declare_temporary(result_type, id), to_expression(rhs), ";"); - set(id, to_name(id), result_type, true); - inherit_expression_dependencies(id, rhs); + auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x"); + op = join("vec3(", op0, ", ", op1, ")"); + break; } - else + case 4: { - // RHS expression is immutable, so just forward it. - // Copying these things really make no sense, but - // seems to be allowed anyways. - auto &e = set(id, to_expression(rhs), result_type, true); - if (pointer) - { - auto *var = maybe_get_backing_variable(rhs); - e.loaded_from = var ? var->self : 0; - } - - // If we're copying an access chain, need to inherit the read expressions. - auto *rhs_expr = maybe_get(rhs); - if (rhs_expr) - e.implied_read_expressions = rhs_expr->implied_read_expressions; + auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))"); + op = join("vec4(", op0, ", ", op1, ")"); + break; } + default: + SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); + } + + emit_op(result_type, id, op, should_forward(arg)); + inherit_expression_dependencies(id, arg); break; } - case OpVectorShuffle: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - uint32_t vec0 = ops[2]; - uint32_t vec1 = ops[3]; - const auto *elems = &ops[4]; - length -= 4; - - auto &type0 = expression_type(vec0); - - // If we have the undefined swizzle index -1, we need to swizzle in undefined data, - // or in our case, T(0). - bool shuffle = false; - for (uint32_t i = 0; i < length; i++) - if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu) - shuffle = true; - - // Cannot use swizzles with packed expressions, force shuffle path. - if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPacked)) - shuffle = true; + // Derivatives + case OpDPdx: + GLSL_UFOP(dFdx); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); + break; - string expr; - bool should_fwd, trivial_forward; + case OpDPdy: + GLSL_UFOP(dFdy); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); + break; - if (shuffle) + case OpDPdxFine: + GLSL_UFOP(dFdxFine); + if (options.es) { - should_fwd = should_forward(vec0) && should_forward(vec1); - trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1); - - // Constructor style and shuffling from two different vectors. - SmallVector args; - for (uint32_t i = 0; i < length; i++) - { - if (elems[i] == 0xffffffffu) - { - // Use a constant 0 here. - // We could use the first component or similar, but then we risk propagating - // a value we might not need, and bog down codegen. - SPIRConstant c; - c.constant_type = type0.parent_type; - assert(type0.parent_type != 0); - args.push_back(constant_expression(c)); - } - else if (elems[i] >= type0.vecsize) - args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize)); - else - args.push_back(to_extract_component_expression(vec0, elems[i])); - } - expr += join(type_to_glsl_constructor(get(result_type)), "(", merge(args), ")"); + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } - else - { - should_fwd = should_forward(vec0); - trivial_forward = !expression_is_forwarded(vec0); - - // We only source from first vector, so can use swizzle. - // If the vector is packed, unpack it before applying a swizzle (needed for MSL) - expr += to_enclosed_unpacked_expression(vec0); - expr += "."; - for (uint32_t i = 0; i < length; i++) - { - assert(elems[i] != 0xffffffffu); - expr += index_to_swizzle(elems[i]); - } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; - if (backend.swizzle_is_function && length > 1) - expr += "()"; + case OpDPdyFine: + GLSL_UFOP(dFdyFine); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } - - // A shuffle is trivial in that it doesn't actually *do* anything. - // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. - - emit_op(result_type, id, expr, should_fwd, trivial_forward); - inherit_expression_dependencies(id, vec0); - inherit_expression_dependencies(id, vec1); + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); break; - } - // ALU - case OpIsNan: - GLSL_UFOP(isnan); + case OpDPdxCoarse: + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + GLSL_UFOP(dFdxCoarse); + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); break; - case OpIsInf: - GLSL_UFOP(isinf); + case OpDPdyCoarse: + GLSL_UFOP(dFdyCoarse); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); break; - case OpSNegate: - case OpFNegate: - GLSL_UOP(-); + case OpFwidth: + GLSL_UFOP(fwidth); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); break; - case OpIAdd: - { - // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts. - auto type = get(ops[0]).basetype; - GLSL_BOP_CAST(+, type); + case OpFwidthCoarse: + GLSL_UFOP(fwidthCoarse); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); break; - } - case OpFAdd: - GLSL_BOP(+); + case OpFwidthFine: + GLSL_UFOP(fwidthFine); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); break; - case OpISub: + // Bitfield + case OpBitFieldInsert: { - auto type = get(ops[0]).basetype; - GLSL_BOP_CAST(-, type); + emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int); break; } - case OpFSub: - GLSL_BOP(-); - break; - - case OpIMul: + case OpBitFieldSExtract: { - auto type = get(ops[0]).basetype; - GLSL_BOP_CAST(*, type); + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type, + SPIRType::Int, SPIRType::Int); break; } - case OpVectorTimesMatrix: - case OpMatrixTimesVector: + case OpBitFieldUExtract: { - // If the matrix needs transpose, just flip the multiply order. - auto *e = maybe_get(ops[opcode == OpMatrixTimesVector ? 2 : 3]); - if (e && e->need_transpose) - { - e->need_transpose = false; - emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*"); - e->need_transpose = true; - } - else - GLSL_BOP(*); + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type, + SPIRType::Int, SPIRType::Int); break; } - case OpFMul: - case OpMatrixTimesScalar: - case OpVectorTimesScalar: - case OpMatrixTimesMatrix: - GLSL_BOP(*); + case OpBitReverse: + // BitReverse does not have issues with sign since result type must match input type. + GLSL_UFOP(bitfieldReverse); break; - case OpOuterProduct: - GLSL_BFOP(outerProduct); + case OpBitCount: + { + auto basetype = expression_type(ops[2]).basetype; + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type); break; + } - case OpDot: - GLSL_BFOP(dot); - break; + // Atomics + case OpAtomicExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + // Ignore semantics for now, probably only relevant to CL. + uint32_t val = ops[5]; + const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; - case OpTranspose: - GLSL_UFOP(transpose); + emit_atomic_func_op(result_type, id, ptr, val, op); break; + } - case OpSRem: + case OpAtomicCompareExchange: { uint32_t result_type = ops[0]; - uint32_t result_id = ops[1]; - uint32_t op0 = ops[2]; - uint32_t op1 = ops[3]; - - // Needs special handling. - bool forward = should_forward(op0) && should_forward(op1); - auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", - to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t val = ops[6]; + uint32_t comp = ops[7]; + const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap"; - emit_op(result_type, result_id, expr, forward); - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); + emit_atomic_func_op(result_type, id, ptr, comp, val, op); break; } - case OpSDiv: - GLSL_BOP_CAST(/, int_type); - break; - - case OpUDiv: - GLSL_BOP_CAST(/, uint_type); + case OpAtomicLoad: + { + // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out. + // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. + auto &type = expression_type(ops[2]); + forced_temporaries.insert(ops[1]); + bool atomic_image = check_atomic_image(ops[2]); + bool unsigned_type = (type.basetype == SPIRType::UInt) || + (atomic_image && get(type.image.type).basetype == SPIRType::UInt); + const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; + const char *increment = unsigned_type ? "0u" : "0"; + emit_op(ops[0], ops[1], + join(op, "(", + to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false); + flush_all_atomic_capable_variables(); break; + } - case OpIAddCarry: - case OpISubBorrow: + case OpAtomicStore: { - if (options.es && options.version < 310) - SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); - else if (!options.es && options.version < 400) - SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400."); - - uint32_t result_type = ops[0]; - uint32_t result_id = ops[1]; - uint32_t op0 = ops[2]; - uint32_t op1 = ops[3]; - auto &type = get(result_type); - emit_uninitialized_temporary_expression(result_type, result_id); - const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow"; - - statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ", - to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");"); + // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result. + // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. + uint32_t ptr = ops[0]; + // Ignore semantics for now, probably only relevant to CL. + uint32_t val = ops[3]; + const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; + statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");"); + flush_all_atomic_capable_variables(); break; } - case OpUMulExtended: - case OpSMulExtended: + case OpAtomicIIncrement: + case OpAtomicIDecrement: { - if (options.es && options.version < 310) - SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); - else if (!options.es && options.version < 400) - SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000."); + forced_temporaries.insert(ops[1]); + auto &type = expression_type(ops[2]); + if (type.storage == StorageClassAtomicCounter) + { + // Legacy GLSL stuff, not sure if this is relevant to support. + if (opcode == OpAtomicIIncrement) + GLSL_UFOP(atomicCounterIncrement); + else + GLSL_UFOP(atomicCounterDecrement); + } + else + { + bool atomic_image = check_atomic_image(ops[2]); + bool unsigned_type = (type.basetype == SPIRType::UInt) || + (atomic_image && get(type.image.type).basetype == SPIRType::UInt); + const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; - uint32_t result_type = ops[0]; - uint32_t result_id = ops[1]; - uint32_t op0 = ops[2]; - uint32_t op1 = ops[3]; - forced_temporaries.insert(result_id); - auto &type = get(result_type); - emit_uninitialized_temporary_expression(result_type, result_id); - const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended"; + const char *increment = nullptr; + if (opcode == OpAtomicIIncrement && unsigned_type) + increment = "1u"; + else if (opcode == OpAtomicIIncrement) + increment = "1"; + else if (unsigned_type) + increment = "uint(-1)"; + else + increment = "-1"; - statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".", - to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");"); + emit_op(ops[0], ops[1], + join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false); + } + + flush_all_atomic_capable_variables(); break; } - case OpFDiv: - GLSL_BOP(/); + case OpAtomicIAdd: + case OpAtomicFAddEXT: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; + } - case OpShiftRightLogical: - GLSL_BOP_CAST(>>, uint_type); + case OpAtomicISub: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + forced_temporaries.insert(ops[1]); + auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); + emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); + flush_all_atomic_capable_variables(); break; + } - case OpShiftRightArithmetic: - GLSL_BOP_CAST(>>, int_type); + case OpAtomicSMin: + case OpAtomicUMin: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; + } - case OpShiftLeftLogical: + case OpAtomicSMax: + case OpAtomicUMax: { - auto type = get(ops[0]).basetype; - GLSL_BOP_CAST(<<, type); + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } - case OpBitwiseOr: + case OpAtomicAnd: { - auto type = get(ops[0]).basetype; - GLSL_BOP_CAST(|, type); + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } - case OpBitwiseXor: + case OpAtomicOr: { - auto type = get(ops[0]).basetype; - GLSL_BOP_CAST(^, type); + const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } - case OpBitwiseAnd: + case OpAtomicXor: { - auto type = get(ops[0]).basetype; - GLSL_BOP_CAST(&, type); + const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } - case OpNot: - GLSL_UOP(~); + // Geometry shaders + case OpEmitVertex: + statement("EmitVertex();"); break; - case OpUMod: - GLSL_BOP_CAST(%, uint_type); + case OpEndPrimitive: + statement("EndPrimitive();"); break; - case OpSMod: - GLSL_BOP_CAST(%, int_type); - break; + case OpEmitStreamVertex: + { + if (options.es) + SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400."); - case OpFMod: - GLSL_BFOP(mod); + auto stream_expr = to_expression(ops[0]); + if (expression_type(ops[0]).basetype != SPIRType::Int) + stream_expr = join("int(", stream_expr, ")"); + statement("EmitStreamVertex(", stream_expr, ");"); break; + } - case OpFRem: + case OpEndStreamPrimitive: { - if (is_legacy()) - SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is " - "needed for legacy."); - - uint32_t result_type = ops[0]; - uint32_t result_id = ops[1]; - uint32_t op0 = ops[2]; - uint32_t op1 = ops[3]; - - // Needs special handling. - bool forward = should_forward(op0) && should_forward(op1); - auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(", - to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + if (options.es) + SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400."); - emit_op(result_type, result_id, expr, forward); - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); + auto stream_expr = to_expression(ops[0]); + if (expression_type(ops[0]).basetype != SPIRType::Int) + stream_expr = join("int(", stream_expr, ")"); + statement("EndStreamPrimitive(", stream_expr, ");"); break; } - // Relational - case OpAny: - GLSL_UFOP(any); + // Textures + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageFetch: + case OpImageGather: + case OpImageDrefGather: + // Gets a bit hairy, so move this to a separate instruction. + emit_texture_op(instruction, false); break; - case OpAll: - GLSL_UFOP(all); + case OpImageSparseSampleExplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageSparseSampleDrefExplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseFetch: + case OpImageSparseGather: + case OpImageSparseDrefGather: + // Gets a bit hairy, so move this to a separate instruction. + emit_texture_op(instruction, true); break; - case OpSelect: - emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]); + case OpImageSparseTexelsResident: + if (options.es) + SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL."); + require_extension_internal("GL_ARB_sparse_texture2"); + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean); break; - case OpLogicalOr: + case OpImage: { - // No vector variant in GLSL for logical OR. - auto result_type = ops[0]; - auto id = ops[1]; - auto &type = get(result_type); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; - if (type.vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||"); - else - GLSL_BOP(||); + // Suppress usage tracking. + auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); + + // When using the image, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : ID(0); break; } - case OpLogicalAnd: + case OpImageQueryLod: { - // No vector variant in GLSL for logical AND. - auto result_type = ops[0]; - auto id = ops[1]; - auto &type = get(result_type); - - if (type.vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&"); + const char *op = nullptr; + if (!options.es && options.version < 400) + { + require_extension_internal("GL_ARB_texture_query_lod"); + // For some reason, the ARB spec is all-caps. + op = "textureQueryLOD"; + } + else if (options.es) + SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile."); else - GLSL_BOP(&&); + op = "textureQueryLod"; + + auto sampler_expr = to_expression(ops[2]); + if (has_decoration(ops[2], DecorationNonUniform)) + { + if (maybe_get_backing_variable(ops[2])) + convert_non_uniform_expression(sampler_expr, ops[2]); + else if (*backend.nonuniform_qualifier != '\0') + sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")"); + } + + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"), + forward); + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + register_control_dependent_expression(ops[1]); break; } - case OpLogicalNot: + case OpImageQueryLevels: { - auto &type = get(ops[0]); - if (type.vecsize > 1) - GLSL_UFOP(not); - else - GLSL_UOP(!); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_texture_query_levels"); + if (options.es) + SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile."); + + auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")"); + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); break; } - case OpIEqual: + case OpImageQuerySamples: { - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP_CAST(equal, int_type); + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + string expr; + if (type.image.sampled == 2) + expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")"); else - GLSL_BOP_CAST(==, int_type); + expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")"); + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); break; } - case OpLogicalEqual: - case OpFOrdEqual: + case OpSampledImage: { - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP(equal); - else - GLSL_BOP(==); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_sampled_image_op(result_type, id, ops[2], ops[3]); + inherit_expression_dependencies(id, ops[2]); + inherit_expression_dependencies(id, ops[3]); break; } - case OpINotEqual: + case OpImageQuerySizeLod: { - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP_CAST(notEqual, int_type); - else - GLSL_BOP_CAST(!=, int_type); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t img = ops[2]; + auto &type = expression_type(img); + auto &imgtype = get(type.self); + + std::string fname = "textureSize"; + if (is_legacy_desktop()) + { + fname = legacy_tex_op(fname, imgtype, img); + } + else if (is_legacy_es()) + SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100."); + + auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ", + bitcast_expression(SPIRType::Int, ops[3]), ")"); + + // ES needs to emulate 1D images as 2D. + if (type.image.dim == Dim1D && options.es) + expr = join(expr, ".x"); + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); break; } - case OpLogicalNotEqual: - case OpFOrdNotEqual: + // Image load/store + case OpImageRead: + case OpImageSparseRead: { - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP(notEqual); + // We added Nonreadable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to read the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[2]); + if (var) + { + auto &flags = get_decoration_bitset(var->self); + if (flags.get(DecorationNonReadable)) + { + unset_decoration(var->self, DecorationNonReadable); + force_recompile(); + } + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + bool pure; + string imgexpr; + auto &type = expression_type(ops[2]); + + if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code + { + if (type.image.ms) + SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible."); + + auto itr = + find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; }); + + if (itr == end(pls_inputs)) + { + // For non-PLS inputs, we rely on subpass type remapping information to get it right + // since ImageRead always returns 4-component vectors and the backing type is opaque. + if (!var->remapped_components) + SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); + imgexpr = remap_swizzle(get(result_type), var->remapped_components, to_expression(ops[2])); + } + else + { + // PLS input could have different number of components than what the SPIR expects, swizzle to + // the appropriate vector size. + uint32_t components = pls_format_to_components(itr->format); + imgexpr = remap_swizzle(get(result_type), components, to_expression(ops[2])); + } + pure = true; + } + else if (type.image.dim == DimSubpassData) + { + if (var && subpass_input_is_framebuffer_fetch(var->self)) + { + imgexpr = to_expression(var->self); + } + else if (options.vulkan_semantics) + { + // With Vulkan semantics, use the proper Vulkan GLSL construct. + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")"); + } + else + imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")"); + } + else + { + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", + to_expression(samples), ")"); + } + else + { + // Implement subpass loads via texture barrier style sampling. + imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); + } + } + imgexpr = remap_swizzle(get(result_type), 4, imgexpr); + pure = true; + } else - GLSL_BOP(!=); - break; - } + { + bool sparse = opcode == OpImageSparseRead; + uint32_t sparse_code_id = 0; + uint32_t sparse_texel_id = 0; + if (sparse) + emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id); - case OpUGreaterThan: - case OpSGreaterThan: - { - auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP_CAST(greaterThan, type); + // imageLoad only accepts int coords, not uint. + auto coord_expr = to_expression(ops[3]); + auto target_coord_type = expression_type(ops[3]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); + + // ES needs to emulate 1D images as 2D. + if (type.image.dim == Dim1D && options.es) + coord_expr = join("ivec2(", coord_expr, ", 0)"); + + // Plain image load/store. + if (sparse) + { + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ", + coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");"); + } + else + { + statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ", + coord_expr, ", ", to_expression(sparse_texel_id), ");"); + } + imgexpr = join(type_to_glsl(get(result_type)), "(", to_expression(sparse_code_id), ", ", + to_expression(sparse_texel_id), ")"); + } + else + { + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = + join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")"); + } + else + imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")"); + } + + if (!sparse) + imgexpr = remap_swizzle(get(result_type), 4, imgexpr); + pure = false; + } + + if (var) + { + bool forward = forced_temporaries.find(id) == end(forced_temporaries); + auto &e = emit_op(result_type, id, imgexpr, forward); + + // We only need to track dependencies if we're reading from image load/store. + if (!pure) + { + e.loaded_from = var->self; + if (forward) + var->dependees.push_back(id); + } + } else - GLSL_BOP_CAST(>, type); - break; - } + emit_op(result_type, id, imgexpr, false); - case OpFOrdGreaterThan: - { - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP(greaterThan); - else - GLSL_BOP(>); + inherit_expression_dependencies(id, ops[2]); + if (type.image.ms) + inherit_expression_dependencies(id, ops[5]); break; } - case OpUGreaterThanEqual: - case OpSGreaterThanEqual: + case OpImageTexelPointer: { - auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP_CAST(greaterThanEqual, type); - else - GLSL_BOP_CAST(>=, type); - break; - } + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; - case OpFOrdGreaterThanEqual: - { - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP(greaterThanEqual); - else - GLSL_BOP(>=); - break; - } + auto coord_expr = to_expression(ops[3]); + auto target_coord_type = expression_type(ops[3]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); - case OpULessThan: - case OpSLessThan: - { - auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP_CAST(lessThan, type); - else - GLSL_BOP_CAST(<, type); - break; - } + auto expr = join(to_expression(ops[2]), ", ", coord_expr); + auto &e = set(id, expr, result_type, true); - case OpFOrdLessThan: - { - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP(lessThan); - else - GLSL_BOP(<); + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); break; } - case OpULessThanEqual: - case OpSLessThanEqual: + case OpImageWrite: { - auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP_CAST(lessThanEqual, type); - else - GLSL_BOP_CAST(<=, type); - break; - } + // We added Nonwritable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to write to the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[0]); + if (var) + { + if (has_decoration(var->self, DecorationNonWritable)) + { + unset_decoration(var->self, DecorationNonWritable); + force_recompile(); + } + } - case OpFOrdLessThanEqual: - { - if (expression_type(ops[2]).vecsize > 1) - GLSL_BFOP(lessThanEqual); + auto &type = expression_type(ops[0]); + auto &value_type = expression_type(ops[2]); + auto store_type = value_type; + store_type.vecsize = 4; + + // imageStore only accepts int coords, not uint. + auto coord_expr = to_expression(ops[1]); + auto target_coord_type = expression_type(ops[1]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr); + + // ES needs to emulate 1D images as 2D. + if (type.image.dim == Dim1D && options.es) + coord_expr = join("ivec2(", coord_expr, ", 0)"); + + if (type.image.ms) + { + uint32_t operands = ops[3]; + if (operands != ImageOperandsSampleMask || length != 5) + SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); + uint32_t samples = ops[4]; + statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ", + remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); + } else - GLSL_BOP(<=); + statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", + remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); + + if (var && variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); break; } - // Conversion - case OpSConvert: - case OpConvertSToF: - case OpUConvert: - case OpConvertUToF: + case OpImageQuerySize: { - auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type; + auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; - auto &type = get(result_type); - auto &arg_type = expression_type(ops[2]); - auto func = type_to_glsl_constructor(type); + if (type.basetype == SPIRType::Image) + { + string expr; + if (type.image.sampled == 2) + { + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_shader_image_size"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize."); - // If we're sign-extending or zero-extending, we need to make sure we cast from the correct type. - // For truncation, it does not matter, so don't emit useless casts. - if (arg_type.width < type.width) - emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype); + // The size of an image is always constant. + expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")"); + } + else + { + // This path is hit for samplerBuffers and multisampled images which do not have LOD. + std::string fname = "textureSize"; + if (is_legacy()) + { + auto &imgtype = get(type.self); + fname = legacy_tex_op(fname, imgtype, ops[2]); + } + expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")"); + } + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + } else - emit_unary_func_op(result_type, id, ops[2], func.c_str()); + SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); break; } - case OpConvertFToU: - case OpConvertFToS: + // Compute + case OpControlBarrier: + case OpMemoryBarrier: { - // Cast to expected arithmetic type, then potentially bitcast away to desired signedness. - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - auto &type = get(result_type); - auto expected_type = type; - auto &float_type = expression_type(ops[2]); - expected_type.basetype = - opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width); + uint32_t execution_scope = 0; + uint32_t memory; + uint32_t semantics; - auto func = type_to_glsl_constructor(expected_type); - emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype); - break; - } + if (opcode == OpMemoryBarrier) + { + memory = evaluate_constant_u32(ops[0]); + semantics = evaluate_constant_u32(ops[1]); + } + else + { + execution_scope = evaluate_constant_u32(ops[0]); + memory = evaluate_constant_u32(ops[1]); + semantics = evaluate_constant_u32(ops[2]); + } - case OpFConvert: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; + if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup) + { + // OpControlBarrier with ScopeSubgroup is subgroupBarrier() + if (opcode != OpControlBarrier) + { + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier); + } + else + { + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier); + } + } - auto func = type_to_glsl_constructor(get(result_type)); - emit_unary_func_op(result_type, id, ops[2], func.c_str()); - break; - } + if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl) + { + // Control shaders only have barriers, and it implies memory barriers. + if (opcode == OpControlBarrier) + statement("barrier();"); + break; + } + + // We only care about these flags, acquire/release and friends are not relevant to GLSL. + semantics = mask_relevant_memory_semantics(semantics); + + if (opcode == OpMemoryBarrier) + { + // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier + // does what we need, so we avoid redundant barriers. + const Instruction *next = get_next_instruction_in_block(instruction); + if (next && next->op == OpControlBarrier) + { + auto *next_ops = stream(*next); + uint32_t next_memory = evaluate_constant_u32(next_ops[1]); + uint32_t next_semantics = evaluate_constant_u32(next_ops[2]); + next_semantics = mask_relevant_memory_semantics(next_semantics); + + bool memory_scope_covered = false; + if (next_memory == memory) + memory_scope_covered = true; + else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) + { + // If we only care about workgroup memory, either Device or Workgroup scope is fine, + // scope does not have to match. + if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && + (memory == ScopeDevice || memory == ScopeWorkgroup)) + { + memory_scope_covered = true; + } + } + else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) + { + // The control barrier has device scope, but the memory barrier just has workgroup scope. + memory_scope_covered = true; + } + + // If we have the same memory scope, and all memory types are covered, we're good. + if (memory_scope_covered && (semantics & next_semantics) == semantics) + break; + } + } + + // We are synchronizing some memory or syncing execution, + // so we cannot forward any loads beyond the memory barrier. + if (semantics || opcode == OpControlBarrier) + { + assert(current_emitting_block); + flush_control_dependent_expressions(current_emitting_block->self); + flush_all_active_variables(); + } + + if (memory == ScopeWorkgroup) // Only need to consider memory within a group + { + if (semantics == MemorySemanticsWorkgroupMemoryMask) + { + // OpControlBarrier implies a memory barrier for shared memory as well. + bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup; + if (!implies_shared_barrier) + statement("memoryBarrierShared();"); + } + else if (semantics != 0) + statement("groupMemoryBarrier();"); + } + else if (memory == ScopeSubgroup) + { + const uint32_t all_barriers = + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; + + if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) + { + // These are not relevant for GLSL, but assume it means memoryBarrier(). + // memoryBarrier() does everything, so no need to test anything else. + statement("subgroupMemoryBarrier();"); + } + else if ((semantics & all_barriers) == all_barriers) + { + // Short-hand instead of emitting 3 barriers. + statement("subgroupMemoryBarrier();"); + } + else + { + // Pick out individual barriers. + if (semantics & MemorySemanticsWorkgroupMemoryMask) + statement("subgroupMemoryBarrierShared();"); + if (semantics & MemorySemanticsUniformMemoryMask) + statement("subgroupMemoryBarrierBuffer();"); + if (semantics & MemorySemanticsImageMemoryMask) + statement("subgroupMemoryBarrierImage();"); + } + } + else + { + const uint32_t all_barriers = + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; - case OpBitcast: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - uint32_t arg = ops[2]; + if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) + { + // These are not relevant for GLSL, but assume it means memoryBarrier(). + // memoryBarrier() does everything, so no need to test anything else. + statement("memoryBarrier();"); + } + else if ((semantics & all_barriers) == all_barriers) + { + // Short-hand instead of emitting 4 barriers. + statement("memoryBarrier();"); + } + else + { + // Pick out individual barriers. + if (semantics & MemorySemanticsWorkgroupMemoryMask) + statement("memoryBarrierShared();"); + if (semantics & MemorySemanticsUniformMemoryMask) + statement("memoryBarrierBuffer();"); + if (semantics & MemorySemanticsImageMemoryMask) + statement("memoryBarrierImage();"); + } + } - auto op = bitcast_glsl_op(get(result_type), expression_type(arg)); - emit_unary_func_op(result_type, id, arg, op.c_str()); + if (opcode == OpControlBarrier) + { + if (execution_scope == ScopeSubgroup) + statement("subgroupBarrier();"); + else + statement("barrier();"); + } break; } - case OpQuantizeToF16: + case OpExtInst: { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - uint32_t arg = ops[2]; - - string op; - auto &type = get(result_type); + uint32_t extension_set = ops[2]; + auto ext = get(extension_set).ext; - switch (type.vecsize) - { - case 1: - op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x"); - break; - case 2: - op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))"); - break; - case 3: + if (ext == SPIRExtension::GLSL) { - auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); - auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x"); - op = join("vec3(", op0, ", ", op1, ")"); - break; + emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } - case 4: + else if (ext == SPIRExtension::SPV_AMD_shader_ballot) { - auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); - auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))"); - op = join("vec4(", op0, ", ", op1, ")"); - break; - } - default: - SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); + emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } - - emit_op(result_type, id, op, should_forward(arg)); - inherit_expression_dependencies(id, arg); - break; - } - - // Derivatives - case OpDPdx: - GLSL_UFOP(dFdx); - if (is_legacy_es()) - require_extension_internal("GL_OES_standard_derivatives"); - register_control_dependent_expression(ops[1]); - break; - - case OpDPdy: - GLSL_UFOP(dFdy); - if (is_legacy_es()) - require_extension_internal("GL_OES_standard_derivatives"); - register_control_dependent_expression(ops[1]); - break; - - case OpDPdxFine: - GLSL_UFOP(dFdxFine); - if (options.es) + else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) { - SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } - if (options.version < 450) - require_extension_internal("GL_ARB_derivative_control"); - register_control_dependent_expression(ops[1]); - break; - - case OpDPdyFine: - GLSL_UFOP(dFdyFine); - if (options.es) + else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) { - SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } - if (options.version < 450) - require_extension_internal("GL_ARB_derivative_control"); - register_control_dependent_expression(ops[1]); - break; - - case OpDPdxCoarse: - if (options.es) + else if (ext == SPIRExtension::SPV_AMD_gcn_shader) { - SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } - GLSL_UFOP(dFdxCoarse); - if (options.version < 450) - require_extension_internal("GL_ARB_derivative_control"); - register_control_dependent_expression(ops[1]); - break; - - case OpDPdyCoarse: - GLSL_UFOP(dFdyCoarse); - if (options.es) + else if (ext == SPIRExtension::SPV_debug_info || + ext == SPIRExtension::NonSemanticShaderDebugInfo || + ext == SPIRExtension::NonSemanticGeneric) { - SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + break; // Ignore SPIR-V debug information extended instructions. } - if (options.version < 450) - require_extension_internal("GL_ARB_derivative_control"); - register_control_dependent_expression(ops[1]); - break; - - case OpFwidth: - GLSL_UFOP(fwidth); - if (is_legacy_es()) - require_extension_internal("GL_OES_standard_derivatives"); - register_control_dependent_expression(ops[1]); - break; - - case OpFwidthCoarse: - GLSL_UFOP(fwidthCoarse); - if (options.es) + else if (ext == SPIRExtension::NonSemanticDebugPrintf) { - SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + // Operation 1 is printf. + if (ops[3] == 1) + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n"); + require_extension_internal("GL_EXT_debug_printf"); + auto &format_string = get(ops[4]).str; + string expr = join("debugPrintfEXT(\"", format_string, "\""); + for (uint32_t i = 5; i < length; i++) + { + expr += ", "; + expr += to_expression(ops[i]); + } + statement(expr, ");"); + } } - if (options.version < 450) - require_extension_internal("GL_ARB_derivative_control"); - register_control_dependent_expression(ops[1]); - break; - - case OpFwidthFine: - GLSL_UFOP(fwidthFine); - if (options.es) + else { - SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + statement("// unimplemented ext op ", instruction.op); + break; } - if (options.version < 450) - require_extension_internal("GL_ARB_derivative_control"); - register_control_dependent_expression(ops[1]); - break; - - // Bitfield - case OpBitFieldInsert: - // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary. - GLSL_QFOP(bitfieldInsert); - break; - case OpBitFieldSExtract: - case OpBitFieldUExtract: - // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary. - GLSL_TFOP(bitfieldExtract); break; + } - case OpBitReverse: - GLSL_UFOP(bitfieldReverse); - break; + // Legacy sub-group stuff ... + case OpSubgroupBallotKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + string expr; + expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)"); + emit_op(result_type, id, expr, should_forward(ops[2])); - case OpBitCount: - GLSL_UFOP(bitCount); + require_extension_internal("GL_ARB_shader_ballot"); + inherit_expression_dependencies(id, ops[2]); + register_control_dependent_expression(ops[1]); break; + } - // Atomics - case OpAtomicExchange: + case OpSubgroupFirstInvocationKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; - uint32_t ptr = ops[2]; - // Ignore semantics for now, probably only relevant to CL. - uint32_t val = ops[5]; - const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; - forced_temporaries.insert(id); - emit_binary_func_op(result_type, id, ptr, val, op); - flush_all_atomic_capable_variables(); + emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB"); + + require_extension_internal("GL_ARB_shader_ballot"); + register_control_dependent_expression(ops[1]); break; } - case OpAtomicCompareExchange: + case OpSubgroupReadInvocationKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; - uint32_t ptr = ops[2]; - uint32_t val = ops[6]; - uint32_t comp = ops[7]; - const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap"; + emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB"); - forced_temporaries.insert(id); - emit_trinary_func_op(result_type, id, ptr, comp, val, op); - flush_all_atomic_capable_variables(); + require_extension_internal("GL_ARB_shader_ballot"); + register_control_dependent_expression(ops[1]); break; } - case OpAtomicLoad: - flush_all_atomic_capable_variables(); - // FIXME: Image? - // OpAtomicLoad seems to only be relevant for atomic counters. - forced_temporaries.insert(ops[1]); - GLSL_UFOP(atomicCounter); - break; - - case OpAtomicStore: - SPIRV_CROSS_THROW("Unsupported opcode OpAtomicStore."); - - case OpAtomicIIncrement: - case OpAtomicIDecrement: + case OpSubgroupAllKHR: { - forced_temporaries.insert(ops[1]); - auto &type = expression_type(ops[2]); - if (type.storage == StorageClassAtomicCounter) - { - // Legacy GLSL stuff, not sure if this is relevant to support. - if (opcode == OpAtomicIIncrement) - GLSL_UFOP(atomicCounterIncrement); - else - GLSL_UFOP(atomicCounterDecrement); - } - else - { - bool atomic_image = check_atomic_image(ops[2]); - bool unsigned_type = (type.basetype == SPIRType::UInt) || - (atomic_image && get(type.image.type).basetype == SPIRType::UInt); - const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; - - const char *increment = nullptr; - if (opcode == OpAtomicIIncrement && unsigned_type) - increment = "1u"; - else if (opcode == OpAtomicIIncrement) - increment = "1"; - else if (unsigned_type) - increment = "uint(-1)"; - else - increment = "-1"; - - emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false); - } + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB"); - flush_all_atomic_capable_variables(); + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); break; } - case OpAtomicIAdd: + case OpSubgroupAnyKHR: { - const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB"); + + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); break; } - case OpAtomicISub: + case OpSubgroupAllEqualKHR: { - const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; - forced_temporaries.insert(ops[1]); - auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); - emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); - flush_all_atomic_capable_variables(); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB"); + + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); break; } - case OpAtomicSMin: - case OpAtomicUMin: + case OpGroupIAddNonUniformAMD: + case OpGroupFAddNonUniformAMD: { - const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); break; } - case OpAtomicSMax: - case OpAtomicUMax: + case OpGroupFMinNonUniformAMD: + case OpGroupUMinNonUniformAMD: + case OpGroupSMinNonUniformAMD: { - const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); break; } - case OpAtomicAnd: + case OpGroupFMaxNonUniformAMD: + case OpGroupUMaxNonUniformAMD: + case OpGroupSMaxNonUniformAMD: { - const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); break; } - case OpAtomicOr: + case OpFragmentMaskFetchAMD: { - const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.image.dim == spv::DimSubpassData) + { + emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD"); + } + else + { + emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD"); + } + + require_extension_internal("GL_AMD_shader_fragment_mask"); break; } - case OpAtomicXor: + case OpFragmentFetchAMD: { - const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor"; - forced_temporaries.insert(ops[1]); - emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); - flush_all_atomic_capable_variables(); + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.image.dim == spv::DimSubpassData) + { + emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD"); + } + else + { + emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD"); + } + + require_extension_internal("GL_AMD_shader_fragment_mask"); break; } - // Geometry shaders - case OpEmitVertex: - statement("EmitVertex();"); + // Vulkan 1.1 sub-group stuff ... + case OpGroupNonUniformElect: + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + case OpGroupNonUniformBallot: + case OpGroupNonUniformInverseBallot: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformBallotBitCount: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + case OpGroupNonUniformFAdd: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformFMul: + case OpGroupNonUniformIMul: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + case OpGroupNonUniformLogicalAnd: + case OpGroupNonUniformLogicalOr: + case OpGroupNonUniformLogicalXor: + case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformQuadBroadcast: + emit_subgroup_op(instruction); break; - case OpEndPrimitive: - statement("EndPrimitive();"); - break; + case OpFUnordEqual: + case OpFUnordLessThan: + case OpFUnordGreaterThan: + case OpFUnordLessThanEqual: + case OpFUnordGreaterThanEqual: + { + // GLSL doesn't specify if floating point comparisons are ordered or unordered, + // but glslang always emits ordered floating point compares for GLSL. + // To get unordered compares, we can test the opposite thing and invert the result. + // This way, we force true when there is any NaN present. + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; - case OpEmitStreamVertex: - statement("EmitStreamVertex();"); - break; + string expr; + if (expression_type(op0).vecsize > 1) + { + const char *comp_op = nullptr; + switch (opcode) + { + case OpFUnordEqual: + comp_op = "notEqual"; + break; - case OpEndStreamPrimitive: - statement("EndStreamPrimitive();"); - break; + case OpFUnordLessThan: + comp_op = "greaterThanEqual"; + break; - // Textures - case OpImageSampleExplicitLod: - case OpImageSampleProjExplicitLod: - case OpImageSampleDrefExplicitLod: - case OpImageSampleProjDrefExplicitLod: - case OpImageSampleImplicitLod: - case OpImageSampleProjImplicitLod: - case OpImageSampleDrefImplicitLod: - case OpImageSampleProjDrefImplicitLod: - case OpImageFetch: - case OpImageGather: - case OpImageDrefGather: - // Gets a bit hairy, so move this to a separate instruction. - emit_texture_op(instruction); - break; + case OpFUnordLessThanEqual: + comp_op = "greaterThan"; + break; - case OpImage: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; + case OpFUnordGreaterThan: + comp_op = "lessThanEqual"; + break; - // Suppress usage tracking. - auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); + case OpFUnordGreaterThanEqual: + comp_op = "lessThan"; + break; - // When using the image, we need to know which variable it is actually loaded from. - auto *var = maybe_get_backing_variable(ops[2]); - e.loaded_from = var ? var->self : 0; + default: + assert(0); + break; + } + + expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))"); + } + else + { + const char *comp_op = nullptr; + switch (opcode) + { + case OpFUnordEqual: + comp_op = " != "; + break; + + case OpFUnordLessThan: + comp_op = " >= "; + break; + + case OpFUnordLessThanEqual: + comp_op = " > "; + break; + + case OpFUnordGreaterThan: + comp_op = " <= "; + break; + + case OpFUnordGreaterThanEqual: + comp_op = " < "; + break; + + default: + assert(0); + break; + } + + expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")"); + } + + emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(ops[1], op0); + inherit_expression_dependencies(ops[1], op1); break; } - case OpImageQueryLod: - { - if (!options.es && options.version < 400) - { - require_extension_internal("GL_ARB_texture_query_lod"); - // For some reason, the ARB spec is all-caps. - GLSL_BFOP(textureQueryLOD); + case OpReportIntersectionKHR: + // NV is same opcode. + forced_temporaries.insert(ops[1]); + if (ray_tracing_is_khr) + GLSL_BFOP(reportIntersectionEXT); + else + GLSL_BFOP(reportIntersectionNV); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpIgnoreIntersectionNV: + // KHR variant is a terminator. + statement("ignoreIntersectionNV();"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpTerminateRayNV: + // KHR variant is a terminator. + statement("terminateRayNV();"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpTraceNV: + statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", + to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", + to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", + to_expression(ops[9]), ", ", to_expression(ops[10]), ");"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpTraceRayKHR: + if (!has_decoration(ops[10], DecorationLocation)) + SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR."); + statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", + to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", + to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", + to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpExecuteCallableNV: + statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpExecuteCallableKHR: + if (!has_decoration(ops[1], DecorationLocation)) + SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR."); + statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + + // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects. + case OpRayQueryInitializeKHR: + flush_variable_declaration(ops[0]); + statement("rayQueryInitializeEXT(", + to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", + to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", + to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", + to_expression(ops[6]), ", ", to_expression(ops[7]), ");"); + break; + case OpRayQueryProceedKHR: + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join("rayQueryProceedEXT(", to_expression(ops[2]), ")"), false); + break; + case OpRayQueryTerminateKHR: + flush_variable_declaration(ops[0]); + statement("rayQueryTerminateEXT(", to_expression(ops[0]), ");"); + break; + case OpRayQueryGenerateIntersectionKHR: + flush_variable_declaration(ops[0]); + statement("rayQueryGenerateIntersectionEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); + break; + case OpRayQueryConfirmIntersectionKHR: + flush_variable_declaration(ops[0]); + statement("rayQueryConfirmIntersectionEXT(", to_expression(ops[0]), ");"); + break; +#define GLSL_RAY_QUERY_GET_OP(op) \ + case OpRayQueryGet##op##KHR: \ + flush_variable_declaration(ops[2]); \ + emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \ + break +#define GLSL_RAY_QUERY_GET_OP2(op) \ + case OpRayQueryGet##op##KHR: \ + flush_variable_declaration(ops[2]); \ + emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \ + break + GLSL_RAY_QUERY_GET_OP(RayTMin); + GLSL_RAY_QUERY_GET_OP(RayFlags); + GLSL_RAY_QUERY_GET_OP(WorldRayOrigin); + GLSL_RAY_QUERY_GET_OP(WorldRayDirection); + GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque); + GLSL_RAY_QUERY_GET_OP2(IntersectionType); + GLSL_RAY_QUERY_GET_OP2(IntersectionT); + GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex); + GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId); + GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset); + GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex); + GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex); + GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics); + GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace); + GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection); + GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin); + GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld); + GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject); +#undef GLSL_RAY_QUERY_GET_OP +#undef GLSL_RAY_QUERY_GET_OP2 + + case OpConvertUToAccelerationStructureKHR: + { + require_extension_internal("GL_EXT_ray_tracing"); + + bool elide_temporary = should_forward(ops[2]) && forced_temporaries.count(ops[1]) == 0 && + !hoisted_temporaries.count(ops[1]); + + if (elide_temporary) + { + GLSL_UFOP(accelerationStructureEXT); } - else if (options.es) - SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile."); else - GLSL_BFOP(textureQueryLod); - register_control_dependent_expression(ops[1]); + { + // Force this path in subsequent iterations. + forced_temporaries.insert(ops[1]); + + // We cannot declare a temporary acceleration structure in GLSL. + // If we get to this point, we'll have to emit a temporary uvec2, + // and cast to RTAS on demand. + statement(declare_temporary(expression_type_id(ops[2]), ops[1]), to_unpacked_expression(ops[2]), ";"); + // Use raw SPIRExpression interface to block all usage tracking. + set(ops[1], join("accelerationStructureEXT(", to_name(ops[1]), ")"), ops[0], true); + } break; } - case OpImageQueryLevels: + case OpConvertUToPtr: { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; + auto &type = get(ops[0]); + if (type.storage != StorageClassPhysicalStorageBufferEXT) + SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr."); - if (!options.es && options.version < 430) - require_extension_internal("GL_ARB_texture_query_levels"); - if (options.es) - SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile."); + auto &in_type = expression_type(ops[2]); + if (in_type.vecsize == 2) + require_extension_internal("GL_EXT_buffer_reference_uvec2"); - auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")"); - auto &restype = get(ops[0]); - expr = bitcast_expression(restype, SPIRType::Int, expr); - emit_op(result_type, id, expr, true); + auto op = type_to_glsl(type); + emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); break; } - case OpImageQuerySamples: + case OpConvertPtrToU: { - auto &type = expression_type(ops[2]); - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; + auto &type = get(ops[0]); + auto &ptr_type = expression_type(ops[2]); + if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT) + SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU."); - string expr; - if (type.image.sampled == 2) - expr = join("imageSamples(", to_expression(ops[2]), ")"); - else - expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")"); + if (type.vecsize == 2) + require_extension_internal("GL_EXT_buffer_reference_uvec2"); - auto &restype = get(ops[0]); - expr = bitcast_expression(restype, SPIRType::Int, expr); - emit_op(result_type, id, expr, true); + auto op = type_to_glsl(type); + emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); break; } - case OpSampledImage: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - emit_sampled_image_op(result_type, id, ops[2], ops[3]); - inherit_expression_dependencies(id, ops[2]); - inherit_expression_dependencies(id, ops[3]); + case OpUndef: + // Undefined value has been declared. break; - } - case OpImageQuerySizeLod: + case OpLine: { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - - auto expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ", ", - bitcast_expression(SPIRType::Int, ops[3]), ")"); - auto &restype = get(ops[0]); - expr = bitcast_expression(restype, SPIRType::Int, expr); - emit_op(result_type, id, expr, true); + emit_line_directive(ops[0], ops[1]); break; } - // Image load/store - case OpImageRead: - { - // We added Nonreadable speculatively to the OpImage variable due to glslangValidator - // not adding the proper qualifiers. - // If it turns out we need to read the image after all, remove the qualifier and recompile. - auto *var = maybe_get_backing_variable(ops[2]); - if (var) - { - auto &flags = ir.meta[var->self].decoration.decoration_flags; - if (flags.get(DecorationNonReadable)) - { - flags.clear(DecorationNonReadable); - force_recompile(); - } - } + case OpNoLine: + break; - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; + case OpDemoteToHelperInvocationEXT: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); + require_extension_internal("GL_EXT_demote_to_helper_invocation"); + statement(backend.demote_literal, ";"); + break; - bool pure; - string imgexpr; - auto &type = expression_type(ops[2]); + case OpIsHelperInvocationEXT: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); + require_extension_internal("GL_EXT_demote_to_helper_invocation"); + // Helper lane state with demote is volatile by nature. + // Do not forward this. + emit_op(ops[0], ops[1], "helperInvocationEXT()", false); + break; - if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code + case OpBeginInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) { - if (type.image.ms) - SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible."); - - auto itr = - find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; }); - - if (itr == end(pls_inputs)) - { - // For non-PLS inputs, we rely on subpass type remapping information to get it right - // since ImageRead always returns 4-component vectors and the backing type is opaque. - if (!var->remapped_components) - SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); - imgexpr = remap_swizzle(get(result_type), var->remapped_components, to_expression(ops[2])); - } - else - { - // PLS input could have different number of components than what the SPIR expects, swizzle to - // the appropriate vector size. - uint32_t components = pls_format_to_components(itr->format); - imgexpr = remap_swizzle(get(result_type), components, to_expression(ops[2])); - } - pure = true; + statement("SPIRV_Cross_beginInvocationInterlock();"); + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. } - else if (type.image.dim == DimSubpassData) + break; + + case OpEndInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) { - if (options.vulkan_semantics) - { - // With Vulkan semantics, use the proper Vulkan GLSL construct. - if (type.image.ms) - { - uint32_t operands = ops[4]; - if (operands != ImageOperandsSampleMask || length != 6) - SPIRV_CROSS_THROW( - "Multisampled image used in OpImageRead, but unexpected operand mask was used."); + statement("SPIRV_Cross_endInvocationInterlock();"); + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. + } + break; - uint32_t samples = ops[5]; - imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")"); - } - else - imgexpr = join("subpassLoad(", to_expression(ops[2]), ")"); - } - else - { - if (type.image.ms) - { - uint32_t operands = ops[4]; - if (operands != ImageOperandsSampleMask || length != 6) - SPIRV_CROSS_THROW( - "Multisampled image used in OpImageRead, but unexpected operand mask was used."); + case OpSetMeshOutputsEXT: + statement("SetMeshOutputsEXT(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");"); + break; - uint32_t samples = ops[5]; - imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", - to_expression(samples), ")"); - } - else - { - // Implement subpass loads via texture barrier style sampling. - imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); - } - } - imgexpr = remap_swizzle(get(result_type), 4, imgexpr); - pure = true; - } - else - { - // imageLoad only accepts int coords, not uint. - auto coord_expr = to_expression(ops[3]); - auto target_coord_type = expression_type(ops[3]); - target_coord_type.basetype = SPIRType::Int; - coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); + default: + statement("// unimplemented op ", instruction.op); + break; + } +} - // Plain image load/store. - if (type.image.ms) - { - uint32_t operands = ops[4]; - if (operands != ImageOperandsSampleMask || length != 6) - SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used."); +// Appends function arguments, mapped from global variables, beyond the specified arg index. +// This is used when a function call uses fewer arguments than the function defines. +// This situation may occur if the function signature has been dynamically modified to +// extract global variables referenced from within the function, and convert them to +// function arguments. This is necessary for shader languages that do not support global +// access to shader input content from within a function (eg. Metal). Each additional +// function args uses the name of the global variable. Function nesting will modify the +// functions and function calls all the way up the nesting chain. +void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist) +{ + auto &args = func.arguments; + uint32_t arg_cnt = uint32_t(args.size()); + for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++) + { + auto &arg = args[arg_idx]; + assert(arg.alias_global_variable); - uint32_t samples = ops[5]; - imgexpr = - join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")"); - } - else - imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")"); + // If the underlying variable needs to be declared + // (ie. a local variable with deferred declaration), do so now. + uint32_t var_id = get(arg.id).basevariable; + if (var_id) + flush_variable_declaration(var_id); - imgexpr = remap_swizzle(get(result_type), 4, imgexpr); - pure = false; - } + arglist.push_back(to_func_call_arg(arg, arg.id)); + } +} - if (var && var->forwardable) - { - bool forward = forced_temporaries.find(id) == end(forced_temporaries); - auto &e = emit_op(result_type, id, imgexpr, forward); +string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) +{ + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + { + return to_member_name(get(type.type_alias), index); + } - // We only need to track dependencies if we're reading from image load/store. - if (!pure) - { - e.loaded_from = var->self; - if (forward) - var->dependees.push_back(id); - } - } - else - emit_op(result_type, id, imgexpr, false); + auto &memb = ir.meta[type.self].members; + if (index < memb.size() && !memb[index].alias.empty()) + return memb[index].alias; + else + return join("_m", index); +} - inherit_expression_dependencies(id, ops[2]); - if (type.image.ms) - inherit_expression_dependencies(id, ops[5]); - break; +string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool) +{ + return join(".", to_member_name(type, index)); +} + +string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector &indices) +{ + string ret; + auto *member_type = &type; + for (auto &index : indices) + { + ret += join(".", to_member_name(*member_type, index)); + member_type = &get(member_type->member_types[index]); } + return ret; +} - case OpImageTexelPointer: +void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index) +{ + auto &memb = ir.meta[type.self].members; + if (index < memb.size() && !memb[index].alias.empty()) { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - auto &e = set(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true); + auto &name = memb[index].alias; + if (name.empty()) + return; + + ParsedIR::sanitize_identifier(name, true, true); + update_name_cache(type.member_name_cache, name); + } +} + +// Checks whether the ID is a row_major matrix that requires conversion before use +bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id) +{ + // Natively supported row-major matrices do not need to be converted. + // Legacy targets do not support row major. + if (backend.native_row_major_matrix && !is_legacy()) + return false; + + auto *e = maybe_get(id); + if (e) + return e->need_transpose; + else + return has_decoration(id, DecorationRowMajor); +} + +// Checks whether the member is a row_major matrix that requires conversion before use +bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) +{ + // Natively supported row-major matrices do not need to be converted. + if (backend.native_row_major_matrix && !is_legacy()) + return false; + + // Non-matrix or column-major matrix types do not need to be converted. + if (!has_member_decoration(type.self, index, DecorationRowMajor)) + return false; + + // Only square row-major matrices can be converted at this time. + // Converting non-square matrices will require defining custom GLSL function that + // swaps matrix elements while retaining the original dimensional form of the matrix. + const auto mbr_type = get(type.member_types[index]); + if (mbr_type.columns != mbr_type.vecsize) + SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); + + return true; +} + +// Checks if we need to remap physical type IDs when declaring the type in a buffer. +bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const +{ + return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID); +} - // When using the pointer, we need to know which variable it is actually loaded from. - auto *var = maybe_get_backing_variable(ops[2]); - e.loaded_from = var ? var->self : 0; - break; - } +// Checks whether the member is in packed data type, that might need to be unpacked. +bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const +{ + return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked); +} - case OpImageWrite: +// Wraps the expression string in a function call that converts the +// row_major matrix result of the expression to a column_major matrix. +// Base implementation uses the standard library transpose() function. +// Subclasses may override to use a different function. +string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */, + bool /*is_packed*/) +{ + strip_enclosed_expression(exp_str); + if (!is_matrix(exp_type)) { - // We added Nonwritable speculatively to the OpImage variable due to glslangValidator - // not adding the proper qualifiers. - // If it turns out we need to write to the image after all, remove the qualifier and recompile. - auto *var = maybe_get_backing_variable(ops[0]); - if (var) - { - auto &flags = ir.meta[var->self].decoration.decoration_flags; - if (flags.get(DecorationNonWritable)) - { - flags.clear(DecorationNonWritable); - force_recompile(); - } - } + auto column_index = exp_str.find_last_of('['); + if (column_index == string::npos) + return exp_str; - auto &type = expression_type(ops[0]); - auto &value_type = expression_type(ops[2]); - auto store_type = value_type; - store_type.vecsize = 4; + auto column_expr = exp_str.substr(column_index); + exp_str.resize(column_index); - // imageStore only accepts int coords, not uint. - auto coord_expr = to_expression(ops[1]); - auto target_coord_type = expression_type(ops[1]); - target_coord_type.basetype = SPIRType::Int; - coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr); + auto transposed_expr = type_to_glsl_constructor(exp_type) + "("; - if (type.image.ms) + // Loading a column from a row-major matrix. Unroll the load. + for (uint32_t c = 0; c < exp_type.vecsize; c++) { - uint32_t operands = ops[3]; - if (operands != ImageOperandsSampleMask || length != 5) - SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); - uint32_t samples = ops[4]; - statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ", - remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); + transposed_expr += join(exp_str, '[', c, ']', column_expr); + if (c + 1 < exp_type.vecsize) + transposed_expr += ", "; } - else - statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", - remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); - if (var && variable_storage_is_aliased(*var)) - flush_all_aliased_variables(); - break; + transposed_expr += ")"; + return transposed_expr; } - - case OpImageQuerySize: + else if (options.version < 120) { - auto &type = expression_type(ops[2]); - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - - if (type.basetype == SPIRType::Image) + // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that + // these GLSL versions do not support non-square matrices. + if (exp_type.vecsize == 2 && exp_type.columns == 2) { - string expr; - if (type.image.sampled == 2) + if (!requires_transpose_2x2) { - // The size of an image is always constant. - expr = join("imageSize(", to_expression(ops[2]), ")"); + requires_transpose_2x2 = true; + force_recompile(); } - else + } + else if (exp_type.vecsize == 3 && exp_type.columns == 3) + { + if (!requires_transpose_3x3) { - // This path is hit for samplerBuffers and multisampled images which do not have LOD. - expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ")"); + requires_transpose_3x3 = true; + force_recompile(); + } + } + else if (exp_type.vecsize == 4 && exp_type.columns == 4) + { + if (!requires_transpose_4x4) + { + requires_transpose_4x4 = true; + force_recompile(); } - - auto &restype = get(ops[0]); - expr = bitcast_expression(restype, SPIRType::Int, expr); - emit_op(result_type, id, expr, true); } else - SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); - break; + SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose."); + return join("spvTranspose(", exp_str, ")"); } + else + return join("transpose(", exp_str, ")"); +} - // Compute - case OpControlBarrier: - case OpMemoryBarrier: - { - uint32_t execution_scope = 0; - uint32_t memory; - uint32_t semantics; +string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id) +{ + string type_name = type_to_glsl(type, id); + remap_variable_type_name(type, name, type_name); + return join(type_name, " ", name, type_to_array_glsl(type)); +} - if (opcode == OpMemoryBarrier) - { - memory = get(ops[0]).scalar(); - semantics = get(ops[1]).scalar(); - } - else - { - execution_scope = get(ops[0]).scalar(); - memory = get(ops[1]).scalar(); - semantics = get(ops[2]).scalar(); - } +bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const +{ + return var.storage == storage; +} - if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup) - { - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics."); - require_extension_internal("GL_KHR_shader_subgroup_basic"); - } +// Emit a structure member. Subclasses may override to modify output, +// or to dynamically add a padding member if needed. +void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier, uint32_t) +{ + auto &membertype = get(member_type_id); - if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl) - { - // Control shaders only have barriers, and it implies memory barriers. - if (opcode == OpControlBarrier) - statement("barrier();"); - break; - } + Bitset memberflags; + auto &memb = ir.meta[type.self].members; + if (index < memb.size()) + memberflags = memb[index].decoration_flags; - // We only care about these flags, acquire/release and friends are not relevant to GLSL. - semantics = mask_relevant_memory_semantics(semantics); + string qualifiers; + bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); - if (opcode == OpMemoryBarrier) - { - // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier - // does what we need, so we avoid redundant barriers. - const Instruction *next = get_next_instruction_in_block(instruction); - if (next && next->op == OpControlBarrier) - { - auto *next_ops = stream(*next); - uint32_t next_memory = get(next_ops[1]).scalar(); - uint32_t next_semantics = get(next_ops[2]).scalar(); - next_semantics = mask_relevant_memory_semantics(next_semantics); + if (is_block) + qualifiers = to_interpolation_qualifiers(memberflags); - bool memory_scope_covered = false; - if (next_memory == memory) - memory_scope_covered = true; - else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) - { - // If we only care about workgroup memory, either Device or Workgroup scope is fine, - // scope does not have to match. - if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && - (memory == ScopeDevice || memory == ScopeWorkgroup)) - { - memory_scope_covered = true; - } - } - else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) - { - // The control barrier has device scope, but the memory barrier just has workgroup scope. - memory_scope_covered = true; - } + statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags), + variable_decl(membertype, to_member_name(type, index)), ";"); +} - // If we have the same memory scope, and all memory types are covered, we're good. - if (memory_scope_covered && (semantics & next_semantics) == semantics) - break; - } - } +void CompilerGLSL::emit_struct_padding_target(const SPIRType &) +{ +} - // We are synchronizing some memory or syncing execution, - // so we cannot forward any loads beyond the memory barrier. - if (semantics || opcode == OpControlBarrier) - { - assert(current_emitting_block); - flush_control_dependent_expressions(current_emitting_block->self); - flush_all_active_variables(); - } +string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) +{ + // GL_EXT_buffer_reference variables can be marked as restrict. + if (flags.get(DecorationRestrictPointerEXT)) + return "restrict "; - if (memory == ScopeWorkgroup) // Only need to consider memory within a group + string qual; + + if (type_is_floating_point(type) && flags.get(DecorationNoContraction) && backend.support_precise_qualifier) + qual = "precise "; + + // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). + bool type_supports_precision = + type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt || + type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || + type.basetype == SPIRType::Sampler; + + if (!type_supports_precision) + return qual; + + if (options.es) + { + auto &execution = get_entry_point(); + + if (flags.get(DecorationRelaxedPrecision)) { - if (semantics == MemorySemanticsWorkgroupMemoryMask) - statement("memoryBarrierShared();"); - else if (semantics != 0) - statement("groupMemoryBarrier();"); + bool implied_fmediump = type.basetype == SPIRType::Float && + options.fragment.default_float_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + options.fragment.default_int_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + qual += (implied_fmediump || implied_imediump) ? "" : "mediump "; } - else if (memory == ScopeSubgroup) + else { - const uint32_t all_barriers = - MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; + bool implied_fhighp = + type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp && + execution.model == ExecutionModelFragment) || + (execution.model != ExecutionModelFragment)); - if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) - { - // These are not relevant for GLSL, but assume it means memoryBarrier(). - // memoryBarrier() does everything, so no need to test anything else. - statement("subgroupMemoryBarrier();"); - } - else if ((semantics & all_barriers) == all_barriers) - { - // Short-hand instead of emitting 3 barriers. - statement("subgroupMemoryBarrier();"); - } - else - { - // Pick out individual barriers. - if (semantics & MemorySemanticsWorkgroupMemoryMask) - statement("subgroupMemoryBarrierShared();"); - if (semantics & MemorySemanticsUniformMemoryMask) - statement("subgroupMemoryBarrierBuffer();"); - if (semantics & MemorySemanticsImageMemoryMask) - statement("subgroupMemoryBarrierImage();"); - } + bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + ((options.fragment.default_int_precision == Options::Highp && + execution.model == ExecutionModelFragment) || + (execution.model != ExecutionModelFragment)); + + qual += (implied_fhighp || implied_ihighp) ? "" : "highp "; } - else + } + else if (backend.allow_precision_qualifiers) + { + // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient. + // The default is highp however, so only emit mediump in the rare case that a shader has these. + if (flags.get(DecorationRelaxedPrecision)) + qual += "mediump "; + } + + return qual; +} + +string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) +{ + auto &type = expression_type(id); + bool use_precision_qualifiers = backend.allow_precision_qualifiers; + if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage)) + { + // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types. + auto &result_type = get(type.image.type); + if (result_type.width < 32) + return "mediump "; + } + return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags); +} + +void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var) +{ + // Works around weird behavior in glslangValidator where + // a patch out block is translated to just block members getting the decoration. + // To make glslang not complain when we compile again, we have to transform this back to a case where + // the variable itself has Patch decoration, and not members. + // Same for perprimitiveEXT. + auto &type = get(var.basetype); + if (has_decoration(type.self, DecorationBlock)) + { + uint32_t member_count = uint32_t(type.member_types.size()); + Decoration promoted_decoration = {}; + bool do_promote_decoration = false; + for (uint32_t i = 0; i < member_count; i++) { - const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | - MemorySemanticsImageMemoryMask | MemorySemanticsAtomicCounterMemoryMask; - - if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) - { - // These are not relevant for GLSL, but assume it means memoryBarrier(). - // memoryBarrier() does everything, so no need to test anything else. - statement("memoryBarrier();"); - } - else if ((semantics & all_barriers) == all_barriers) + if (has_member_decoration(type.self, i, DecorationPatch)) { - // Short-hand instead of emitting 4 barriers. - statement("memoryBarrier();"); + promoted_decoration = DecorationPatch; + do_promote_decoration = true; + break; } - else + else if (has_member_decoration(type.self, i, DecorationPerPrimitiveEXT)) { - // Pick out individual barriers. - if (semantics & MemorySemanticsWorkgroupMemoryMask) - statement("memoryBarrierShared();"); - if (semantics & MemorySemanticsUniformMemoryMask) - statement("memoryBarrierBuffer();"); - if (semantics & MemorySemanticsImageMemoryMask) - statement("memoryBarrierImage();"); - if (semantics & MemorySemanticsAtomicCounterMemoryMask) - statement("memoryBarrierAtomicCounter();"); + promoted_decoration = DecorationPerPrimitiveEXT; + do_promote_decoration = true; + break; } } - if (opcode == OpControlBarrier) + if (do_promote_decoration) { - if (execution_scope == ScopeSubgroup) - statement("subgroupBarrier();"); - else - statement("barrier();"); + set_decoration(var.self, promoted_decoration); + for (uint32_t i = 0; i < member_count; i++) + unset_member_decoration(type.self, i, promoted_decoration); } - break; } +} - case OpExtInst: +string CompilerGLSL::to_qualifiers_glsl(uint32_t id) +{ + auto &flags = get_decoration_bitset(id); + string res; + + auto *var = maybe_get(id); + + if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) + res += "shared "; + else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied) + res += "taskPayloadSharedEXT "; + + res += to_interpolation_qualifiers(flags); + if (var) + res += to_storage_qualifiers_glsl(*var); + + auto &type = expression_type(id); + if (type.image.dim != DimSubpassData && type.image.sampled == 2) { - uint32_t extension_set = ops[2]; + if (flags.get(DecorationCoherent)) + res += "coherent "; + if (flags.get(DecorationRestrict)) + res += "restrict "; - if (get(extension_set).ext == SPIRExtension::GLSL) - { - emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4); - } - else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot) - { - emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4); - } - else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) + if (flags.get(DecorationNonWritable)) + res += "readonly "; + + bool formatted_load = type.image.format == ImageFormatUnknown; + if (flags.get(DecorationNonReadable)) { - emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + res += "writeonly "; + formatted_load = false; } - else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) + + if (formatted_load) { - emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + if (!options.es) + require_extension_internal("GL_EXT_shader_image_load_formatted"); + else + SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL."); } - else if (get(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader) + } + + res += to_precision_qualifiers_glsl(id); + + return res; +} + +string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) +{ + // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... + auto &type = expression_type(arg.id); + const char *direction = ""; + + if (type.pointer) + { + if (arg.write_count && arg.read_count) + direction = "inout "; + else if (arg.write_count) + direction = "out "; + } + + return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id)); +} + +string CompilerGLSL::to_initializer_expression(const SPIRVariable &var) +{ + return to_unpacked_expression(var.initializer); +} + +string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id) +{ +#ifndef NDEBUG + auto &type = get(type_id); + assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction || + type.storage == StorageClassGeneric); +#endif + uint32_t id = ir.increase_bound_by(1); + ir.make_constant_null(id, type_id, false); + return constant_expression(get(id)); +} + +bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const +{ + if (type.pointer) + return false; + + if (!type.array.empty() && options.flatten_multidimensional_arrays) + return false; + + for (auto &literal : type.array_size_literal) + if (!literal) + return false; + + for (auto &memb : type.member_types) + if (!type_can_zero_initialize(get(memb))) + return false; + + return true; +} + +string CompilerGLSL::variable_decl(const SPIRVariable &variable) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + auto &type = get_variable_data_type(variable); + + if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer) + SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types."); + + auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self)); + + if (variable.loop_variable && variable.static_expression) + { + uint32_t expr = variable.static_expression; + if (ir.ids[expr].get_type() != TypeUndef) + res += join(" = ", to_unpacked_expression(variable.static_expression)); + else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); + } + else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup)) + { + uint32_t expr = variable.initializer; + if (ir.ids[expr].get_type() != TypeUndef) + res += join(" = ", to_initializer_expression(variable)); + else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); + } + + return res; +} + +const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) +{ + auto &flags = get_decoration_bitset(variable.self); + if (flags.get(DecorationRelaxedPrecision)) + return "mediump "; + else + return "highp "; +} + +string CompilerGLSL::pls_decl(const PlsRemap &var) +{ + auto &variable = get(var.id); + + SPIRType type; + type.vecsize = pls_format_to_components(var.format); + type.basetype = pls_format_to_basetype(var.format); + + return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ", + to_name(variable.self)); +} + +uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const +{ + return to_array_size_literal(type, uint32_t(type.array.size() - 1)); +} + +uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const +{ + assert(type.array.size() == type.array_size_literal.size()); + + if (type.array_size_literal[index]) + { + return type.array[index]; + } + else + { + // Use the default spec constant value. + // This is the best we can do. + return evaluate_constant_u32(type.array[index]); + } +} + +string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) +{ + assert(type.array.size() == type.array_size_literal.size()); + + auto &size = type.array[index]; + if (!type.array_size_literal[index]) + return to_expression(size); + else if (size) + return convert_to_string(size); + else if (!backend.unsized_array_supported) + { + // For runtime-sized arrays, we can work around + // lack of standard support for this by simply having + // a single element array. + // + // Runtime length arrays must always be the last element + // in an interface block. + return "1"; + } + else + return ""; +} + +string CompilerGLSL::type_to_array_glsl(const SPIRType &type) +{ + if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) + { + // We are using a wrapped pointer type, and we should not emit any array declarations here. + return ""; + } + + if (type.array.empty()) + return ""; + + if (options.flatten_multidimensional_arrays) + { + string res; + res += "["; + for (auto i = uint32_t(type.array.size()); i; i--) { - emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + res += enclose_expression(to_array_size(type, i - 1)); + if (i > 1) + res += " * "; } - else if (get(extension_set).ext == SPIRExtension::SPV_debug_info) + res += "]"; + return res; + } + else + { + if (type.array.size() > 1) { - break; // Ignore SPIR-V debug information extended instructions. + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_arrays_of_arrays"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. " + "Try using --flatten-multidimensional-arrays or set " + "options.flatten_multidimensional_arrays to true."); } - else + + string res; + for (auto i = uint32_t(type.array.size()); i; i--) { - statement("// unimplemented ext op ", instruction.op); - break; + res += "["; + res += to_array_size(type, i - 1); + res += "]"; } + return res; + } +} +string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) +{ + auto &imagetype = get(type.image.type); + string res; + + switch (imagetype.basetype) + { + case SPIRType::Int: + case SPIRType::Short: + case SPIRType::SByte: + res = "i"; + break; + case SPIRType::UInt: + case SPIRType::UShort: + case SPIRType::UByte: + res = "u"; + break; + default: break; } - // Legacy sub-group stuff ... - case OpSubgroupBallotKHR: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - string expr; - expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)"); - emit_op(result_type, id, expr, should_forward(ops[2])); + // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation. + // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter. - require_extension_internal("GL_ARB_shader_ballot"); - inherit_expression_dependencies(id, ops[2]); - register_control_dependent_expression(ops[1]); - break; + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) + return res + "subpassInput" + (type.image.ms ? "MS" : ""); + else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && + subpass_input_is_framebuffer_fetch(id)) + { + SPIRType sampled_type = get(type.image.type); + sampled_type.vecsize = 4; + return type_to_glsl(sampled_type); } - case OpSubgroupFirstInvocationKHR: + // If we're emulating subpassInput with samplers, force sampler2D + // so we don't have to specify format. + if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB"); - - require_extension_internal("GL_ARB_shader_ballot"); - register_control_dependent_expression(ops[1]); - break; + // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. + if (type.image.dim == DimBuffer && type.image.sampled == 1) + res += "sampler"; + else + res += type.image.sampled == 2 ? "image" : "texture"; } + else + res += "sampler"; - case OpSubgroupReadInvocationKHR: + switch (type.image.dim) { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB"); - - require_extension_internal("GL_ARB_shader_ballot"); - register_control_dependent_expression(ops[1]); + case Dim1D: + // ES doesn't support 1D. Fake it with 2D. + res += options.es ? "2D" : "1D"; break; - } + case Dim2D: + res += "2D"; + break; + case Dim3D: + res += "3D"; + break; + case DimCube: + res += "Cube"; + break; + case DimRect: + if (options.es) + SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES."); - case OpSubgroupAllKHR: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB"); + if (is_legacy_desktop()) + require_extension_internal("GL_ARB_texture_rectangle"); - require_extension_internal("GL_ARB_shader_group_vote"); - register_control_dependent_expression(ops[1]); + res += "2DRect"; break; - } - case OpSubgroupAnyKHR: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB"); + case DimBuffer: + if (options.es && options.version < 320) + require_extension_internal("GL_EXT_texture_buffer"); + else if (!options.es && options.version < 300) + require_extension_internal("GL_EXT_texture_buffer_object"); + res += "Buffer"; + break; - require_extension_internal("GL_ARB_shader_group_vote"); - register_control_dependent_expression(ops[1]); + case DimSubpassData: + res += "2D"; break; + default: + SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported."); } - case OpSubgroupAllEqualKHR: + if (type.image.ms) + res += "MS"; + if (type.image.arrayed) { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB"); - - require_extension_internal("GL_ARB_shader_group_vote"); - register_control_dependent_expression(ops[1]); - break; + if (is_legacy_desktop()) + require_extension_internal("GL_EXT_texture_array"); + res += "Array"; } - case OpGroupIAddNonUniformAMD: - case OpGroupFAddNonUniformAMD: + // "Shadow" state in GLSL only exists for samplers and combined image samplers. + if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && + is_depth_image(type, id)) { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD"); + res += "Shadow"; + } - require_extension_internal("GL_AMD_shader_ballot"); - register_control_dependent_expression(ops[1]); - break; + return res; +} + +string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) +{ + if (backend.use_array_constructor && type.array.size() > 1) + { + if (options.flatten_multidimensional_arrays) + SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, " + "e.g. float[][]()."); + else if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_arrays_of_arrays"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310."); } - case OpGroupFMinNonUniformAMD: - case OpGroupUMinNonUniformAMD: - case OpGroupSMinNonUniformAMD: + auto e = type_to_glsl(type); + if (backend.use_array_constructor) { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD"); + for (uint32_t i = 0; i < type.array.size(); i++) + e += "[]"; + } + return e; +} - require_extension_internal("GL_AMD_shader_ballot"); - register_control_dependent_expression(ops[1]); - break; +// The optional id parameter indicates the object whose type we are trying +// to find the description for. It is optional. Most type descriptions do not +// depend on a specific object's use of that type. +string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) +{ + if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) + { + // Need to create a magic type name which compacts the entire type information. + string name = type_to_glsl(get_pointee_type(type)); + for (size_t i = 0; i < type.array.size(); i++) + { + if (type.array_size_literal[i]) + name += join(type.array[i], "_"); + else + name += join("id", type.array[i], "_"); + } + name += "Pointer"; + return name; } - case OpGroupFMaxNonUniformAMD: - case OpGroupUMaxNonUniformAMD: - case OpGroupSMaxNonUniformAMD: + switch (type.basetype) { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD"); + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + if (backend.explicit_struct_type) + return join("struct ", to_name(type.self)); + else + return to_name(type.self); - require_extension_internal("GL_AMD_shader_ballot"); - register_control_dependent_expression(ops[1]); + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_glsl(type, id); + + case SPIRType::Sampler: + // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing + // this distinction into the type system. + return comparison_ids.count(id) ? "samplerShadow" : "sampler"; + + case SPIRType::AccelerationStructure: + return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV"; + + case SPIRType::RayQuery: + return "rayQueryEXT"; + + case SPIRType::Void: + return "void"; + + default: break; } - case OpFragmentMaskFetchAMD: - { - auto &type = expression_type(ops[2]); - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; + if (type.basetype == SPIRType::UInt && is_legacy()) + SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); - if (type.image.dim == spv::DimSubpassData) + if (type.vecsize == 1 && type.columns == 1) // Scalar builtin + { + switch (type.basetype) { - emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD"); + case SPIRType::Boolean: + return "bool"; + case SPIRType::SByte: + return backend.basic_int8_type; + case SPIRType::UByte: + return backend.basic_uint8_type; + case SPIRType::Short: + return backend.basic_int16_type; + case SPIRType::UShort: + return backend.basic_uint16_type; + case SPIRType::Int: + return backend.basic_int_type; + case SPIRType::UInt: + return backend.basic_uint_type; + case SPIRType::AtomicCounter: + return "atomic_uint"; + case SPIRType::Half: + return "float16_t"; + case SPIRType::Float: + return "float"; + case SPIRType::Double: + return "double"; + case SPIRType::Int64: + return "int64_t"; + case SPIRType::UInt64: + return "uint64_t"; + default: + return "???"; } - else + } + else if (type.vecsize > 1 && type.columns == 1) // Vector builtin + { + switch (type.basetype) { - emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD"); + case SPIRType::Boolean: + return join("bvec", type.vecsize); + case SPIRType::SByte: + return join("i8vec", type.vecsize); + case SPIRType::UByte: + return join("u8vec", type.vecsize); + case SPIRType::Short: + return join("i16vec", type.vecsize); + case SPIRType::UShort: + return join("u16vec", type.vecsize); + case SPIRType::Int: + return join("ivec", type.vecsize); + case SPIRType::UInt: + return join("uvec", type.vecsize); + case SPIRType::Half: + return join("f16vec", type.vecsize); + case SPIRType::Float: + return join("vec", type.vecsize); + case SPIRType::Double: + return join("dvec", type.vecsize); + case SPIRType::Int64: + return join("i64vec", type.vecsize); + case SPIRType::UInt64: + return join("u64vec", type.vecsize); + default: + return "???"; } - - require_extension_internal("GL_AMD_shader_fragment_mask"); - break; } - - case OpFragmentFetchAMD: + else if (type.vecsize == type.columns) // Simple Matrix builtin { - auto &type = expression_type(ops[2]); - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - - if (type.image.dim == spv::DimSubpassData) + switch (type.basetype) { - emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD"); + case SPIRType::Boolean: + return join("bmat", type.vecsize); + case SPIRType::Int: + return join("imat", type.vecsize); + case SPIRType::UInt: + return join("umat", type.vecsize); + case SPIRType::Half: + return join("f16mat", type.vecsize); + case SPIRType::Float: + return join("mat", type.vecsize); + case SPIRType::Double: + return join("dmat", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; } - else + } + else + { + switch (type.basetype) { - emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD"); + case SPIRType::Boolean: + return join("bmat", type.columns, "x", type.vecsize); + case SPIRType::Int: + return join("imat", type.columns, "x", type.vecsize); + case SPIRType::UInt: + return join("umat", type.columns, "x", type.vecsize); + case SPIRType::Half: + return join("f16mat", type.columns, "x", type.vecsize); + case SPIRType::Float: + return join("mat", type.columns, "x", type.vecsize); + case SPIRType::Double: + return join("dmat", type.columns, "x", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; } - - require_extension_internal("GL_AMD_shader_fragment_mask"); - break; } +} - // Vulkan 1.1 sub-group stuff ... - case OpGroupNonUniformElect: - case OpGroupNonUniformBroadcast: - case OpGroupNonUniformBroadcastFirst: - case OpGroupNonUniformBallot: - case OpGroupNonUniformInverseBallot: - case OpGroupNonUniformBallotBitExtract: - case OpGroupNonUniformBallotBitCount: - case OpGroupNonUniformBallotFindLSB: - case OpGroupNonUniformBallotFindMSB: - case OpGroupNonUniformShuffle: - case OpGroupNonUniformShuffleXor: - case OpGroupNonUniformShuffleUp: - case OpGroupNonUniformShuffleDown: - case OpGroupNonUniformAll: - case OpGroupNonUniformAny: - case OpGroupNonUniformAllEqual: - case OpGroupNonUniformFAdd: - case OpGroupNonUniformIAdd: - case OpGroupNonUniformFMul: - case OpGroupNonUniformIMul: - case OpGroupNonUniformFMin: - case OpGroupNonUniformFMax: - case OpGroupNonUniformSMin: - case OpGroupNonUniformSMax: - case OpGroupNonUniformUMin: - case OpGroupNonUniformUMax: - case OpGroupNonUniformBitwiseAnd: - case OpGroupNonUniformBitwiseOr: - case OpGroupNonUniformBitwiseXor: - case OpGroupNonUniformQuadSwap: - case OpGroupNonUniformQuadBroadcast: - emit_subgroup_op(instruction); - break; - - case OpFUnordEqual: - GLSL_BFOP(unsupported_FUnordEqual); - break; - - case OpFUnordNotEqual: - GLSL_BFOP(unsupported_FUnordNotEqual); - break; - - case OpFUnordLessThan: - GLSL_BFOP(unsupported_FUnordLessThan); - break; - - case OpFUnordGreaterThan: - GLSL_BFOP(unsupported_FUnordGreaterThan); - break; - - case OpFUnordLessThanEqual: - GLSL_BFOP(unsupported_FUnordLessThanEqual); - break; - - case OpFUnordGreaterThanEqual: - GLSL_BFOP(unsupported_FUnordGreaterThanEqual); - break; - - case OpReportIntersectionNV: - statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); - break; - case OpIgnoreIntersectionNV: - statement("ignoreIntersectionNV();"); - break; - case OpTerminateRayNV: - statement("terminateRayNV();"); - break; - case OpTraceNV: - statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", - to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", - to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", - to_expression(ops[9]), ", ", to_expression(ops[10]), ");"); - break; - case OpExecuteCallableNV: - statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); - break; +void CompilerGLSL::add_variable(unordered_set &variables_primary, + const unordered_set &variables_secondary, string &name) +{ + if (name.empty()) + return; - case OpConvertUToPtr: + ParsedIR::sanitize_underscores(name); + if (ParsedIR::is_globally_reserved_identifier(name, true)) { - auto &type = get(ops[0]); - if (type.storage != StorageClassPhysicalStorageBufferEXT) - SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr."); - - auto op = type_to_glsl(type); - emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); - break; + name.clear(); + return; } - case OpConvertPtrToU: - { - auto &type = get(ops[0]); - auto &ptr_type = expression_type(ops[2]); - if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT) - SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU."); + update_name_cache(variables_primary, variables_secondary, name); +} - auto op = type_to_glsl(type); - emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); - break; - } +void CompilerGLSL::add_local_variable_name(uint32_t id) +{ + add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias); +} - case OpUndef: - // Undefined value has been declared. - break; +void CompilerGLSL::add_resource_name(uint32_t id) +{ + add_variable(resource_names, block_names, ir.meta[id].decoration.alias); +} - case OpLine: - { - emit_line_directive(ops[0], ops[1]); - break; - } +void CompilerGLSL::add_header_line(const std::string &line) +{ + header_lines.push_back(line); +} - case OpNoLine: - break; +bool CompilerGLSL::has_extension(const std::string &ext) const +{ + auto itr = find(begin(forced_extensions), end(forced_extensions), ext); + return itr != end(forced_extensions); +} - default: - statement("// unimplemented op ", instruction.op); - break; - } +void CompilerGLSL::require_extension(const std::string &ext) +{ + if (!has_extension(ext)) + forced_extensions.push_back(ext); } -// Appends function arguments, mapped from global variables, beyond the specified arg index. -// This is used when a function call uses fewer arguments than the function defines. -// This situation may occur if the function signature has been dynamically modified to -// extract global variables referenced from within the function, and convert them to -// function arguments. This is necessary for shader languages that do not support global -// access to shader input content from within a function (eg. Metal). Each additional -// function args uses the name of the global variable. Function nesting will modify the -// functions and function calls all the way up the nesting chain. -void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist) +void CompilerGLSL::require_extension_internal(const string &ext) { - auto &args = func.arguments; - uint32_t arg_cnt = uint32_t(args.size()); - for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++) + if (backend.supports_extensions && !has_extension(ext)) { - auto &arg = args[arg_idx]; - assert(arg.alias_global_variable); + forced_extensions.push_back(ext); + force_recompile(); + } +} - // If the underlying variable needs to be declared - // (ie. a local variable with deferred declaration), do so now. - uint32_t var_id = get(arg.id).basevariable; - if (var_id) - flush_variable_declaration(var_id); +void CompilerGLSL::flatten_buffer_block(VariableID id) +{ + auto &var = get(id); + auto &type = get(var.basetype); + auto name = to_name(type.self, false); + auto &flags = get_decoration_bitset(type.self); - arglist.push_back(to_func_call_arg(arg.id)); - } + if (!type.array.empty()) + SPIRV_CROSS_THROW(name + " is an array of UBOs."); + if (type.basetype != SPIRType::Struct) + SPIRV_CROSS_THROW(name + " is not a struct."); + if (!flags.get(DecorationBlock)) + SPIRV_CROSS_THROW(name + " is not a block."); + if (type.member_types.empty()) + SPIRV_CROSS_THROW(name + " is an empty struct."); + + flattened_buffer_blocks.insert(id); } -string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) +bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const { - auto &memb = ir.meta[type.self].members; - if (index < memb.size() && !memb[index].alias.empty()) - return memb[index].alias; - else - return join("_m", index); + return false; // GLSL itself does not need to translate array builtin types to non-array builtin types } -string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool) +bool CompilerGLSL::check_atomic_image(uint32_t id) { - return join(".", to_member_name(type, index)); + auto &type = expression_type(id); + if (type.storage == StorageClassImage) + { + if (options.es && options.version < 320) + require_extension_internal("GL_OES_shader_image_atomic"); + + auto *var = maybe_get_backing_variable(id); + if (var) + { + if (has_decoration(var->self, DecorationNonWritable) || has_decoration(var->self, DecorationNonReadable)) + { + unset_decoration(var->self, DecorationNonWritable); + unset_decoration(var->self, DecorationNonReadable); + force_recompile(); + } + } + return true; + } + else + return false; } -void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index) +void CompilerGLSL::add_function_overload(const SPIRFunction &func) { - auto &memb = ir.meta[type.self].members; - if (index < memb.size() && !memb[index].alias.empty()) + Hasher hasher; + for (auto &arg : func.arguments) { - auto &name = memb[index].alias; - if (name.empty()) - return; + // Parameters can vary with pointer type or not, + // but that will not change the signature in GLSL/HLSL, + // so strip the pointer type before hashing. + uint32_t type_id = get_pointee_type_id(arg.type); + auto &type = get(type_id); - // Reserved for temporaries. - if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) + if (!combined_image_samplers.empty()) { - name.clear(); - return; + // If we have combined image samplers, we cannot really trust the image and sampler arguments + // we pass down to callees, because they may be shuffled around. + // Ignore these arguments, to make sure that functions need to differ in some other way + // to be considered different overloads. + if (type.basetype == SPIRType::SampledImage || + (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler) + { + continue; + } } - update_name_cache(type.member_name_cache, name); + hasher.u32(type_id); + } + uint64_t types_hash = hasher.get(); + + auto function_name = to_name(func.self); + auto itr = function_overloads.find(function_name); + if (itr != end(function_overloads)) + { + // There exists a function with this name already. + auto &overloads = itr->second; + if (overloads.count(types_hash) != 0) + { + // Overload conflict, assign a new name. + add_resource_name(func.self); + function_overloads[to_name(func.self)].insert(types_hash); + } + else + { + // Can reuse the name. + overloads.insert(types_hash); + } + } + else + { + // First time we see this function name. + add_resource_name(func.self); + function_overloads[to_name(func.self)].insert(types_hash); } } -// Checks whether the ID is a row_major matrix that requires conversion before use -bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id) +void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) { - // Natively supported row-major matrices do not need to be converted. - // Legacy targets do not support row major. - if (backend.native_row_major_matrix && !is_legacy()) - return false; + if (func.self != ir.default_entry_point) + add_function_overload(func); - // Non-matrix or column-major matrix types do not need to be converted. - if (!has_decoration(id, DecorationRowMajor)) - return false; + // Avoid shadow declarations. + local_variable_names = resource_names; - // Only square row-major matrices can be converted at this time. - // Converting non-square matrices will require defining custom GLSL function that - // swaps matrix elements while retaining the original dimensional form of the matrix. - const auto type = expression_type(id); - if (type.columns != type.vecsize) - SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); + string decl; - return true; -} + auto &type = get(func.return_type); + decl += flags_to_qualifiers_glsl(type, return_flags); + decl += type_to_glsl(type); + decl += type_to_array_glsl(type); + decl += " "; -// Checks whether the member is a row_major matrix that requires conversion before use -bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) -{ - // Natively supported row-major matrices do not need to be converted. - if (backend.native_row_major_matrix && !is_legacy()) - return false; + if (func.self == ir.default_entry_point) + { + // If we need complex fallback in GLSL, we just wrap main() in a function + // and interlock the entire shader ... + if (interlocked_is_complex) + decl += "spvMainInterlockedBody"; + else + decl += "main"; - // Non-matrix or column-major matrix types do not need to be converted. - if (!has_member_decoration(type.self, index, DecorationRowMajor)) - return false; + processing_entry_point = true; + } + else + decl += to_name(func.self); - // Only square row-major matrices can be converted at this time. - // Converting non-square matrices will require defining custom GLSL function that - // swaps matrix elements while retaining the original dimensional form of the matrix. - const auto mbr_type = get(type.member_types[index]); - if (mbr_type.columns != mbr_type.vecsize) - SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); + decl += "("; + SmallVector arglist; + for (auto &arg : func.arguments) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg.id)) + continue; - return true; -} + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); -// Checks whether the member is in packed data type, that might need to be unpacked. -// GLSL does not define packed data types, but certain subclasses do. -bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) const -{ - return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked); -} + arglist.push_back(argument_decl(arg)); -// Wraps the expression string in a function call that converts the -// row_major matrix result of the expression to a column_major matrix. -// Base implementation uses the standard library transpose() function. -// Subclasses may override to use a different function. -string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/, bool /*is_packed*/) -{ - strip_enclosed_expression(exp_str); - return join("transpose(", exp_str, ")"); -} + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } -string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id) -{ - string type_name = type_to_glsl(type, id); - remap_variable_type_name(type, name, type_name); - return join(type_name, " ", name, type_to_array_glsl(type)); + for (auto &arg : func.shadow_arguments) + { + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + decl += merge(arglist); + decl += ")"; + statement(decl); } -// Emit a structure member. Subclasses may override to modify output, -// or to dynamically add a padding member if needed. -void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, - const string &qualifier, uint32_t) +void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) { - auto &membertype = get(member_type_id); + // Avoid potential cycles. + if (func.active) + return; + func.active = true; - Bitset memberflags; - auto &memb = ir.meta[type.self].members; - if (index < memb.size()) - memberflags = memb[index].decoration_flags; + // If we depend on a function, emit that function before we emit our own function. + for (auto block : func.blocks) + { + auto &b = get(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); - string qualifiers; - bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || - ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + if (op == OpFunctionCall) + { + // Recursively emit functions which are called. + uint32_t id = ops[2]; + emit_function(get(id), ir.meta[ops[1]].decoration.decoration_flags); + } + } + } - if (is_block) - qualifiers = to_interpolation_qualifiers(memberflags); + if (func.entry_line.file_id != 0) + emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal); + emit_function_prototype(func, return_flags); + begin_scope(); - statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags), - variable_decl(membertype, to_member_name(type, index)), ";"); -} + if (func.self == ir.default_entry_point) + emit_entry_point_declarations(); -const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) -{ - // GL_EXT_buffer_reference variables can be marked as restrict. - if (flags.get(DecorationRestrictPointerEXT)) - return "restrict "; + current_function = &func; + auto &entry_block = get(func.entry_block); - // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). - if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt && - type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage && - type.basetype != SPIRType::Sampler) - return ""; + sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack)); + for (auto &array : func.constant_arrays_needed_on_stack) + { + auto &c = get(array); + auto &type = get(c.constant_type); + statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";"); + } - if (options.es) + for (auto &v : func.local_variables) { - auto &execution = get_entry_point(); + auto &var = get(v); + var.deferred_declaration = false; - if (flags.get(DecorationRelaxedPrecision)) + if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup)) { - bool implied_fmediump = type.basetype == SPIRType::Float && - options.fragment.default_float_precision == Options::Mediump && - execution.model == ExecutionModelFragment; + // Special variable type which cannot have initializer, + // need to be declared as standalone variables. + // Comes from MSL which can push global variables as local variables in main function. + add_local_variable_name(var.self); + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } + else if (var.storage == StorageClassPrivate) + { + // These variables will not have had their CFG usage analyzed, so move it to the entry block. + // Comes from MSL which can push global variables as local variables in main function. + // We could just declare them right now, but we would miss out on an important initialization case which is + // LUT declaration in MSL. + // If we don't declare the variable when it is assigned we're forced to go through a helper function + // which copies elements one by one. + add_local_variable_name(var.self); - bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && - options.fragment.default_int_precision == Options::Mediump && - execution.model == ExecutionModelFragment; + if (var.initializer) + { + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } + else + { + auto &dominated = entry_block.dominated_variables; + if (find(begin(dominated), end(dominated), var.self) == end(dominated)) + entry_block.dominated_variables.push_back(var.self); + var.deferred_declaration = true; + } + } + else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression) + { + // No need to declare this variable, it has a static expression. + var.deferred_declaration = false; + } + else if (expression_is_lvalue(v)) + { + add_local_variable_name(var.self); - return implied_fmediump || implied_imediump ? "" : "mediump "; + // Loop variables should never be declared early, they are explicitly emitted in a loop. + if (var.initializer && !var.loop_variable) + statement(variable_decl_function_local(var), ";"); + else + { + // Don't declare variable until first use to declutter the GLSL output quite a lot. + // If we don't touch the variable before first branch, + // declare it then since we need variable declaration to be in top scope. + var.deferred_declaration = true; + } } else { - bool implied_fhighp = - type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp && - execution.model == ExecutionModelFragment) || - (execution.model != ExecutionModelFragment)); + // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this. + // For these types (non-lvalue), we enforce forwarding through a shadowed variable. + // This means that when we OpStore to these variables, we just write in the expression ID directly. + // This breaks any kind of branching, since the variable must be statically assigned. + // Branching on samplers and images would be pretty much impossible to fake in GLSL. + var.statically_assigned = true; + } - bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && - ((options.fragment.default_int_precision == Options::Highp && - execution.model == ExecutionModelFragment) || - (execution.model != ExecutionModelFragment)); + var.loop_variable_enable = false; - return implied_fhighp || implied_ihighp ? "" : "highp "; + // Loop variables are never declared outside their for-loop, so block any implicit declaration. + if (var.loop_variable) + { + var.deferred_declaration = false; + // Need to reset the static expression so we can fallback to initializer if need be. + var.static_expression = 0; } } - else if (backend.allow_precision_qualifiers) + + // Enforce declaration order for regression testing purposes. + for (auto &block_id : func.blocks) { - // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient. - // The default is highp however, so only emit mediump in the rare case that a shader has these. - if (flags.get(DecorationRelaxedPrecision)) - return "mediump "; - else - return ""; + auto &block = get(block_id); + sort(begin(block.dominated_variables), end(block.dominated_variables)); + } + + for (auto &line : current_function->fixup_hooks_in) + line(); + + emit_block_chain(entry_block); + + end_scope(); + processing_entry_point = false; + statement(""); + + // Make sure deferred declaration state for local variables is cleared when we are done with function. + // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise. + for (auto &v : func.local_variables) + { + auto &var = get(v); + var.deferred_declaration = false; } - else - return ""; } -const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) +void CompilerGLSL::emit_fixup() { - auto &type = expression_type(id); - bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es; - if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage)) + if (is_vertex_like_shader()) { - // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types. - auto &result_type = get(type.image.type); - if (result_type.width < 32) - return "mediump "; + if (options.vertex.fixup_clipspace) + { + const char *suffix = backend.float_literal_suffix ? "f" : ""; + statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;"); + } + + if (options.vertex.flip_vert_y) + statement("gl_Position.y = -gl_Position.y;"); } - return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags); } -string CompilerGLSL::to_qualifiers_glsl(uint32_t id) +void CompilerGLSL::flush_phi(BlockID from, BlockID to) { - auto &flags = ir.meta[id].decoration.decoration_flags; - string res; + auto &child = get(to); + if (child.ignore_phi_from_block == from) + return; - auto *var = maybe_get(id); + unordered_set temporary_phi_variables; - if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) - res += "shared "; + for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr) + { + auto &phi = *itr; - res += to_interpolation_qualifiers(flags); - if (var) - res += to_storage_qualifiers_glsl(*var); + if (phi.parent == from) + { + auto &var = get(phi.function_variable); - auto &type = expression_type(id); - if (type.image.dim != DimSubpassData && type.image.sampled == 2) - { - if (flags.get(DecorationCoherent)) - res += "coherent "; - if (flags.get(DecorationRestrict)) - res += "restrict "; - if (flags.get(DecorationNonWritable)) - res += "readonly "; - if (flags.get(DecorationNonReadable)) - res += "writeonly "; - } + // A Phi variable might be a loop variable, so flush to static expression. + if (var.loop_variable && !var.loop_variable_enable) + var.static_expression = phi.local_variable; + else + { + flush_variable_declaration(phi.function_variable); - res += to_precision_qualifiers_glsl(id); + // Check if we are going to write to a Phi variable that another statement will read from + // as part of another Phi node in our target block. + // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads. + // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. + bool need_saved_temporary = + find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool { + return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from; + }) != end(child.phi_variables); - return res; -} + if (need_saved_temporary) + { + // Need to make sure we declare the phi variable with a copy at the right scope. + // We cannot safely declare a temporary here since we might be inside a continue block. + if (!var.allocate_temporary_copy) + { + var.allocate_temporary_copy = true; + force_recompile(); + } + statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";"); + temporary_phi_variables.insert(phi.function_variable); + } -string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) -{ - // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... - auto &type = expression_type(arg.id); - const char *direction = ""; + // This might be called in continue block, so make sure we + // use this to emit ESSL 1.0 compliant increments/decrements. + auto lhs = to_expression(phi.function_variable); - if (type.pointer) - { - if (arg.write_count && arg.read_count) - direction = "inout "; - else if (arg.write_count) - direction = "out "; - } + string rhs; + if (temporary_phi_variables.count(phi.local_variable)) + rhs = join("_", phi.local_variable, "_copy"); + else + rhs = to_pointer_expression(phi.local_variable); - return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id)); -} + if (!optimize_read_modify_write(get(var.basetype), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + } -string CompilerGLSL::to_initializer_expression(const SPIRVariable &var) -{ - return to_expression(var.initializer); + register_write(phi.function_variable); + } + } } -string CompilerGLSL::variable_decl(const SPIRVariable &variable) +void CompilerGLSL::branch_to_continue(BlockID from, BlockID to) { - // Ignore the pointer type since GLSL doesn't have pointers. - auto &type = get_variable_data_type(variable); + auto &to_block = get(to); + if (from == to) + return; - if (type.pointer_depth > 1) - SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types."); + assert(is_continue(to)); + if (to_block.complex_continue) + { + // Just emit the whole block chain as is. + auto usage_counts = expression_usage_counts; - auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self)); + emit_block_chain(to_block); - if (variable.loop_variable && variable.static_expression) - { - uint32_t expr = variable.static_expression; - if (ir.ids[expr].get_type() != TypeUndef) - res += join(" = ", to_expression(variable.static_expression)); + // Expression usage counts are moot after returning from the continue block. + expression_usage_counts = usage_counts; } - else if (variable.initializer) + else { - uint32_t expr = variable.initializer; - if (ir.ids[expr].get_type() != TypeUndef) - res += join(" = ", to_initializer_expression(variable)); - } - return res; -} + auto &from_block = get(from); + bool outside_control_flow = false; + uint32_t loop_dominator = 0; -const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) -{ - auto &flags = ir.meta[variable.self].decoration.decoration_flags; - if (flags.get(DecorationRelaxedPrecision)) - return "mediump "; - else - return "highp "; -} + // FIXME: Refactor this to not use the old loop_dominator tracking. + if (from_block.merge_block) + { + // If we are a loop header, we don't set the loop dominator, + // so just use "self" here. + loop_dominator = from; + } + else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator)) + { + loop_dominator = from_block.loop_dominator; + } -string CompilerGLSL::pls_decl(const PlsRemap &var) -{ - auto &variable = get(var.id); + if (loop_dominator != 0) + { + auto &cfg = get_cfg_for_current_function(); - SPIRType type; - type.vecsize = pls_format_to_components(var.format); - type.basetype = pls_format_to_basetype(var.format); + // For non-complex continue blocks, we implicitly branch to the continue block + // by having the continue block be part of the loop header in for (; ; continue-block). + outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from); + } - return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ", - to_name(variable.self)); + // Some simplification for for-loops. We always end up with a useless continue; + // statement since we branch to a loop block. + // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block, + // we can avoid writing out an explicit continue statement. + // Similar optimization to return statements if we know we're outside flow control. + if (!outside_control_flow) + statement("continue;"); + } } -uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const +void CompilerGLSL::branch(BlockID from, BlockID to) { - return to_array_size_literal(type, uint32_t(type.array.size() - 1)); -} + flush_phi(from, to); + flush_control_dependent_expressions(from); -uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const -{ - assert(type.array.size() == type.array_size_literal.size()); + bool to_is_continue = is_continue(to); - if (type.array_size_literal[index]) + // This is only a continue if we branch to our loop dominator. + if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get(from).loop_dominator == to) { - return type.array[index]; + // This can happen if we had a complex continue block which was emitted. + // Once the continue block tries to branch to the loop header, just emit continue; + // and end the chain here. + statement("continue;"); + } + else if (from != to && is_break(to)) + { + // We cannot break to ourselves, so check explicitly for from != to. + // This case can trigger if a loop header is all three of these things: + // - Continue block + // - Loop header + // - Break merge target all at once ... + + // Very dirty workaround. + // Switch constructs are able to break, but they cannot break out of a loop at the same time, + // yet SPIR-V allows it. + // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, + // write to the ladder here, and defer the break. + // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case. + if (is_loop_break(to)) + { + for (size_t n = current_emitting_switch_stack.size(); n; n--) + { + auto *current_emitting_switch = current_emitting_switch_stack[n - 1]; + + if (current_emitting_switch && + current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) && + get(current_emitting_switch->loop_dominator).merge_block == to) + { + if (!current_emitting_switch->need_ladder_break) + { + force_recompile(); + current_emitting_switch->need_ladder_break = true; + } + + statement("_", current_emitting_switch->self, "_ladder_break = true;"); + } + else + break; + } + } + statement("break;"); } - else + else if (to_is_continue || from == to) { - // Use the default spec constant value. - // This is the best we can do. - uint32_t array_size_id = type.array[index]; + // For from == to case can happen for a do-while loop which branches into itself. + // We don't mark these cases as continue blocks, but the only possible way to branch into + // ourselves is through means of continue blocks. - // Explicitly check for this case. The error message you would get (bad cast) makes no sense otherwise. - if (ir.ids[array_size_id].get_type() == TypeConstantOp) - SPIRV_CROSS_THROW("An array size was found to be an OpSpecConstantOp. This is not supported since " - "SPIRV-Cross cannot deduce the actual size here."); + // If we are merging to a continue block, there is no need to emit the block chain for continue here. + // We can branch to the continue block after we merge execution. - uint32_t array_size = get(array_size_id).scalar(); - return array_size; + // Here we make use of structured control flow rules from spec: + // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block + // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG + // If we are branching to a merge block, we must be inside a construct which dominates the merge block. + auto &block_meta = ir.block_meta[to]; + bool branching_to_merge = + (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT | + ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0; + if (!to_is_continue || !branching_to_merge) + branch_to_continue(from, to); } + else if (!is_conditional(to)) + emit_block_chain(get(to)); + + // It is important that we check for break before continue. + // A block might serve two purposes, a break block for the inner scope, and + // a continue block in the outer scope. + // Inner scope always takes precedence. } -string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) +void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block) { - assert(type.array.size() == type.array_size_literal.size()); + auto &from_block = get(from); + BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0); - // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays. - // Opt for unsized as it's the more "correct" variant to use. - if (type.storage == StorageClassInput && (get_entry_point().model == ExecutionModelTessellationControl || - get_entry_point().model == ExecutionModelTessellationEvaluation)) - return ""; + // If we branch directly to our selection merge target, we don't need a code path. + bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block); + bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block); - auto &size = type.array[index]; - if (!type.array_size_literal[index]) - return to_expression(size); - else if (size) - return convert_to_string(size); - else if (!backend.unsized_array_supported) + if (!true_block_needs_code && !false_block_needs_code) + return; + + // We might have a loop merge here. Only consider selection flattening constructs. + // Loop hints are handled explicitly elsewhere. + if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten) + emit_block_hints(from_block); + + if (true_block_needs_code) { - // For runtime-sized arrays, we can work around - // lack of standard support for this by simply having - // a single element array. - // - // Runtime length arrays must always be the last element - // in an interface block. - return "1"; + statement("if (", to_expression(cond), ")"); + begin_scope(); + branch(from, true_block); + end_scope(); + + if (false_block_needs_code) + { + statement("else"); + begin_scope(); + branch(from, false_block); + end_scope(); + } + } + else if (false_block_needs_code) + { + // Only need false path, use negative conditional. + statement("if (!", to_enclosed_expression(cond), ")"); + begin_scope(); + branch(from, false_block); + end_scope(); } - else - return ""; } -string CompilerGLSL::type_to_array_glsl(const SPIRType &type) +// FIXME: This currently cannot handle complex continue blocks +// as in do-while. +// This should be seen as a "trivial" continue block. +string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block) { - if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) - { - // We are using a wrapped pointer type, and we should not emit any array declarations here. - return ""; - } + auto *block = &get(continue_block); - if (type.array.empty()) - return ""; + // While emitting the continue block, declare_temporary will check this + // if we have to emit temporaries. + current_continue_block = block; - if (options.flatten_multidimensional_arrays) + SmallVector statements; + + // Capture all statements into our list. + auto *old = redirect_statement; + redirect_statement = &statements; + + // Stamp out all blocks one after each other. + while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0) { - string res; - res += "["; - for (auto i = uint32_t(type.array.size()); i; i--) + // Write out all instructions we have in this block. + emit_block_instructions(*block); + + // For plain branchless for/while continue blocks. + if (block->next_block) { - res += enclose_expression(to_array_size(type, i - 1)); - if (i > 1) - res += " * "; + flush_phi(continue_block, block->next_block); + block = &get(block->next_block); } - res += "]"; - return res; - } - else - { - if (type.array.size() > 1) + // For do while blocks. The last block will be a select block. + else if (block->true_block && follow_true_block) { - if (!options.es && options.version < 430) - require_extension_internal("GL_ARB_arrays_of_arrays"); - else if (options.es && options.version < 310) - SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. " - "Try using --flatten-multidimensional-arrays or set " - "options.flatten_multidimensional_arrays to true."); + flush_phi(continue_block, block->true_block); + block = &get(block->true_block); } - - string res; - for (auto i = uint32_t(type.array.size()); i; i--) + else if (block->false_block && follow_false_block) { - res += "["; - res += to_array_size(type, i - 1); - res += "]"; + flush_phi(continue_block, block->false_block); + block = &get(block->false_block); + } + else + { + SPIRV_CROSS_THROW("Invalid continue block detected!"); } - return res; } -} -string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) -{ - auto &imagetype = get(type.image.type); - string res; + // Restore old pointer. + redirect_statement = old; - switch (imagetype.basetype) + // Somewhat ugly, strip off the last ';' since we use ',' instead. + // Ideally, we should select this behavior in statement(). + for (auto &s : statements) { - case SPIRType::Int: - case SPIRType::Short: - case SPIRType::SByte: - res = "i"; - break; - case SPIRType::UInt: - case SPIRType::UShort: - case SPIRType::UByte: - res = "u"; - break; - default: - break; + if (!s.empty() && s.back() == ';') + s.erase(s.size() - 1, 1); } - // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation. - // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter. - - if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) - return res + "subpassInput" + (type.image.ms ? "MS" : ""); + current_continue_block = nullptr; + return merge(statements); +} - // If we're emulating subpassInput with samplers, force sampler2D - // so we don't have to specify format. - if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) +void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block) +{ + // While loops do not take initializers, so declare all of them outside. + for (auto &loop_var : block.loop_variables) { - // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. - if (type.image.dim == DimBuffer && type.image.sampled == 1) - res += "sampler"; - else - res += type.image.sampled == 2 ? "image" : "texture"; + auto &var = get(loop_var); + statement(variable_decl(var), ";"); } - else - res += "sampler"; - - switch (type.image.dim) - { - case Dim1D: - res += "1D"; - break; - case Dim2D: - res += "2D"; - break; - case Dim3D: - res += "3D"; - break; - case DimCube: - res += "Cube"; - break; - case DimRect: - if (options.es) - SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES."); +} - if (is_legacy_desktop()) - require_extension_internal("GL_ARB_texture_rectangle"); +string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block) +{ + if (block.loop_variables.empty()) + return ""; - res += "2DRect"; - break; + bool same_types = for_loop_initializers_are_same_type(block); + // We can only declare for loop initializers if all variables are of same type. + // If we cannot do this, declare individual variables before the loop header. - case DimBuffer: - if (options.es && options.version < 320) - require_extension_internal("GL_OES_texture_buffer"); - else if (!options.es && options.version < 300) - require_extension_internal("GL_EXT_texture_buffer_object"); - res += "Buffer"; - break; + // We might have a loop variable candidate which was not assigned to for some reason. + uint32_t missing_initializers = 0; + for (auto &variable : block.loop_variables) + { + uint32_t expr = get(variable).static_expression; - case DimSubpassData: - res += "2D"; - break; - default: - SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported."); + // Sometimes loop variables are initialized with OpUndef, but we can just declare + // a plain variable without initializer in this case. + if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) + missing_initializers++; } - if (type.image.ms) - res += "MS"; - if (type.image.arrayed) + if (block.loop_variables.size() == 1 && missing_initializers == 0) { - if (is_legacy_desktop()) - require_extension_internal("GL_EXT_texture_array"); - res += "Array"; + return variable_decl(get(block.loop_variables.front())); + } + else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size())) + { + for (auto &loop_var : block.loop_variables) + statement(variable_decl(get(loop_var)), ";"); + return ""; } + else + { + // We have a mix of loop variables, either ones with a clear initializer, or ones without. + // Separate the two streams. + string expr; + + for (auto &loop_var : block.loop_variables) + { + uint32_t static_expr = get(loop_var).static_expression; + if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef) + { + statement(variable_decl(get(loop_var)), ";"); + } + else + { + auto &var = get(loop_var); + auto &type = get_variable_data_type(var); + if (expr.empty()) + { + // For loop initializers are of the form (var).static_expression; + if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) + continue; + + if (expected == 0) + { + expected = get(var).basetype; + expected_flags = get_decoration_bitset(var); + } + else if (expected != get(var).basetype) + return false; + + // Precision flags and things like that must also match. + if (expected_flags != get_decoration_bitset(var)) + return false; } - auto e = type_to_glsl(type); - for (uint32_t i = 0; i < type.array.size(); i++) - e += "[]"; - return e; + return true; } -// The optional id parameter indicates the object whose type we are trying -// to find the description for. It is optional. Most type descriptions do not -// depend on a specific object's use of that type. -string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) +bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method) { - if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) + SPIRBlock::ContinueBlockType continue_type = continue_block_type(get(block.continue_block)); + + if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop) { - // Need to create a magic type name which compacts the entire type information. - string name = type_to_glsl(get_pointee_type(type)); - for (size_t i = 0; i < type.array.size(); i++) + uint32_t current_count = statement_count; + // If we're trying to create a true for loop, + // we need to make sure that all opcodes before branch statement do not actually emit any code. + // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. + emit_block_instructions(block); + + bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries); + + // This can work! We only did trivial things which could be forwarded in block body! + if (current_count == statement_count && condition_is_temporary) { - if (type.array_size_literal[i]) - name += join(type.array[i], "_"); - else - name += join("id", type.array[i], "_"); - } - name += "Pointer"; - return name; - } + switch (continue_type) + { + case SPIRBlock::ForLoop: + { + // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. + flush_undeclared_variables(block); - switch (type.basetype) - { - case SPIRType::Struct: - // Need OpName lookup here to get a "sensible" name for a struct. - if (backend.explicit_struct_type) - return join("struct ", to_name(type.self)); - else - return to_name(type.self); + // Important that we do this in this order because + // emitting the continue block can invalidate the condition expression. + auto initializer = emit_for_loop_initializers(block); + auto condition = to_expression(block.condition); - case SPIRType::Image: - case SPIRType::SampledImage: - return image_type_glsl(type, id); + // Condition might have to be inverted. + if (execution_is_noop(get(block.true_block), get(block.merge_block))) + condition = join("!", enclose_expression(condition)); - case SPIRType::Sampler: - // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing - // this distinction into the type system. - return comparison_ids.count(id) ? "samplerShadow" : "sampler"; + emit_block_hints(block); + if (method != SPIRBlock::MergeToSelectContinueForLoop) + { + auto continue_block = emit_continue_block(block.continue_block, false, false); + statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); + } + else + statement("for (", initializer, "; ", condition, "; )"); + break; + } - case SPIRType::AccelerationStructureNV: - return "accelerationStructureNV"; + case SPIRBlock::WhileLoop: + { + // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header. + flush_undeclared_variables(block); + emit_while_loop_initializers(block); + emit_block_hints(block); - case SPIRType::Void: - return "void"; + auto condition = to_expression(block.condition); + // Condition might have to be inverted. + if (execution_is_noop(get(block.true_block), get(block.merge_block))) + condition = join("!", enclose_expression(condition)); - default: - break; - } + statement("while (", condition, ")"); + break; + } - if (type.basetype == SPIRType::UInt && is_legacy()) - SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); + default: + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; + } - if (type.vecsize == 1 && type.columns == 1) // Scalar builtin - { - switch (type.basetype) - { - case SPIRType::Boolean: - return "bool"; - case SPIRType::SByte: - return backend.basic_int8_type; - case SPIRType::UByte: - return backend.basic_uint8_type; - case SPIRType::Short: - return backend.basic_int16_type; - case SPIRType::UShort: - return backend.basic_uint16_type; - case SPIRType::Int: - return backend.basic_int_type; - case SPIRType::UInt: - return backend.basic_uint_type; - case SPIRType::AtomicCounter: - return "atomic_uint"; - case SPIRType::Half: - return "float16_t"; - case SPIRType::Float: - return "float"; - case SPIRType::Double: - return "double"; - case SPIRType::Int64: - return "int64_t"; - case SPIRType::UInt64: - return "uint64_t"; - default: - return "???"; - } - } - else if (type.vecsize > 1 && type.columns == 1) // Vector builtin - { - switch (type.basetype) - { - case SPIRType::Boolean: - return join("bvec", type.vecsize); - case SPIRType::SByte: - return join("i8vec", type.vecsize); - case SPIRType::UByte: - return join("u8vec", type.vecsize); - case SPIRType::Short: - return join("i16vec", type.vecsize); - case SPIRType::UShort: - return join("u16vec", type.vecsize); - case SPIRType::Int: - return join("ivec", type.vecsize); - case SPIRType::UInt: - return join("uvec", type.vecsize); - case SPIRType::Half: - return join("f16vec", type.vecsize); - case SPIRType::Float: - return join("vec", type.vecsize); - case SPIRType::Double: - return join("dvec", type.vecsize); - case SPIRType::Int64: - return join("i64vec", type.vecsize); - case SPIRType::UInt64: - return join("u64vec", type.vecsize); - default: - return "???"; - } - } - else if (type.vecsize == type.columns) // Simple Matrix builtin - { - switch (type.basetype) - { - case SPIRType::Boolean: - return join("bmat", type.vecsize); - case SPIRType::Int: - return join("imat", type.vecsize); - case SPIRType::UInt: - return join("umat", type.vecsize); - case SPIRType::Half: - return join("f16mat", type.vecsize); - case SPIRType::Float: - return join("mat", type.vecsize); - case SPIRType::Double: - return join("dmat", type.vecsize); - // Matrix types not supported for int64/uint64. - default: - return "???"; + begin_scope(); + return true; } - } - else - { - switch (type.basetype) + else { - case SPIRType::Boolean: - return join("bmat", type.columns, "x", type.vecsize); - case SPIRType::Int: - return join("imat", type.columns, "x", type.vecsize); - case SPIRType::UInt: - return join("umat", type.columns, "x", type.vecsize); - case SPIRType::Half: - return join("f16mat", type.columns, "x", type.vecsize); - case SPIRType::Float: - return join("mat", type.columns, "x", type.vecsize); - case SPIRType::Double: - return join("dmat", type.columns, "x", type.vecsize); - // Matrix types not supported for int64/uint64. - default: - return "???"; + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; } } -} + else if (method == SPIRBlock::MergeToDirectForLoop) + { + auto &child = get(block.next_block); -void CompilerGLSL::add_variable(unordered_set &variables_primary, - const unordered_set &variables_secondary, string &name) -{ - if (name.empty()) - return; + // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. + flush_undeclared_variables(child); - // Reserved for temporaries. - if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) - { - name.clear(); - return; - } + uint32_t current_count = statement_count; - // Avoid double underscores. - name = sanitize_underscores(name); + // If we're trying to create a true for loop, + // we need to make sure that all opcodes before branch statement do not actually emit any code. + // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. + emit_block_instructions(child); - update_name_cache(variables_primary, variables_secondary, name); -} + bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); -void CompilerGLSL::add_local_variable_name(uint32_t id) -{ - add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias); -} + if (current_count == statement_count && condition_is_temporary) + { + uint32_t target_block = child.true_block; -void CompilerGLSL::add_resource_name(uint32_t id) -{ - add_variable(resource_names, block_names, ir.meta[id].decoration.alias); -} + switch (continue_type) + { + case SPIRBlock::ForLoop: + { + // Important that we do this in this order because + // emitting the continue block can invalidate the condition expression. + auto initializer = emit_for_loop_initializers(block); + auto condition = to_expression(child.condition); -void CompilerGLSL::add_header_line(const std::string &line) -{ - header_lines.push_back(line); -} + // Condition might have to be inverted. + if (execution_is_noop(get(child.true_block), get(block.merge_block))) + { + condition = join("!", enclose_expression(condition)); + target_block = child.false_block; + } -bool CompilerGLSL::has_extension(const std::string &ext) const -{ - auto itr = find(begin(forced_extensions), end(forced_extensions), ext); - return itr != end(forced_extensions); + auto continue_block = emit_continue_block(block.continue_block, false, false); + emit_block_hints(block); + statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); + break; + } + + case SPIRBlock::WhileLoop: + { + emit_while_loop_initializers(block); + emit_block_hints(block); + + auto condition = to_expression(child.condition); + // Condition might have to be inverted. + if (execution_is_noop(get(child.true_block), get(block.merge_block))) + { + condition = join("!", enclose_expression(condition)); + target_block = child.false_block; + } + + statement("while (", condition, ")"); + break; + } + + default: + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; + } + + begin_scope(); + branch(child.self, target_block); + return true; + } + else + { + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; + } + } + else + return false; } -void CompilerGLSL::require_extension(const std::string &ext) +void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) { - if (!has_extension(ext)) - forced_extensions.push_back(ext); + for (auto &v : block.dominated_variables) + flush_variable_declaration(v); } -void CompilerGLSL::require_extension_internal(const string &ext) +void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &temporaries) { - if (backend.supports_extensions && !has_extension(ext)) + // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. + // Need to sort these to ensure that reference output is stable. + sort(begin(temporaries), end(temporaries), + [](const pair &a, const pair &b) { return a.second < b.second; }); + + for (auto &tmp : temporaries) { - forced_extensions.push_back(ext); - force_recompile(); - } -} + auto &type = get(tmp.first); -void CompilerGLSL::flatten_buffer_block(uint32_t id) -{ - auto &var = get(id); - auto &type = get(var.basetype); - auto name = to_name(type.self, false); - auto &flags = ir.meta[type.self].decoration.decoration_flags; + // There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries. + // This should be ignored unless we're doing actual variable pointers and backend supports it. + // Access chains cannot normally be lowered to temporaries in GLSL and HLSL. + if (type.pointer && !backend.native_pointers) + continue; - if (!type.array.empty()) - SPIRV_CROSS_THROW(name + " is an array of UBOs."); - if (type.basetype != SPIRType::Struct) - SPIRV_CROSS_THROW(name + " is not a struct."); - if (!flags.get(DecorationBlock)) - SPIRV_CROSS_THROW(name + " is not a block."); - if (type.member_types.empty()) - SPIRV_CROSS_THROW(name + " is an empty struct."); + add_local_variable_name(tmp.second); + auto &flags = get_decoration_bitset(tmp.second); - flattened_buffer_blocks.insert(id); -} + // Not all targets support pointer literals, so don't bother with that case. + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(tmp.first)); -bool CompilerGLSL::check_atomic_image(uint32_t id) -{ - auto &type = expression_type(id); - if (type.storage == StorageClassImage) - { - if (options.es && options.version < 320) - require_extension_internal("GL_OES_shader_image_atomic"); + statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";"); - auto *var = maybe_get_backing_variable(id); - if (var) + hoisted_temporaries.insert(tmp.second); + forced_temporaries.insert(tmp.second); + + // The temporary might be read from before it's assigned, set up the expression now. + set(tmp.second, to_name(tmp.second), tmp.first, true); + + // If we have hoisted temporaries in multi-precision contexts, emit that here too ... + // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here. + auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(tmp.second); + if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end()) { - auto &flags = ir.meta[var->self].decoration.decoration_flags; - if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable)) - { - flags.clear(DecorationNonWritable); - flags.clear(DecorationNonReadable); - force_recompile(); - } + uint32_t mirror_id = mirrored_precision_itr->second; + auto &mirror_flags = get_decoration_bitset(mirror_id); + statement(flags_to_qualifiers_glsl(type, mirror_flags), + variable_decl(type, to_name(mirror_id)), + initializer, ";"); + // The temporary might be read from before it's assigned, set up the expression now. + set(mirror_id, to_name(mirror_id), tmp.first, true); + hoisted_temporaries.insert(mirror_id); } - return true; } - else - return false; } -void CompilerGLSL::add_function_overload(const SPIRFunction &func) +void CompilerGLSL::emit_block_chain(SPIRBlock &block) { - Hasher hasher; - for (auto &arg : func.arguments) + bool select_branch_to_true_block = false; + bool select_branch_to_false_block = false; + bool skip_direct_branch = false; + bool emitted_loop_header_variables = false; + bool force_complex_continue_block = false; + ValueSaver loop_level_saver(current_loop_level); + + if (block.merge == SPIRBlock::MergeLoop) + add_loop_level(); + + emit_hoisted_temporaries(block.declare_temporary); + + SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; + if (block.continue_block) { - // Parameters can vary with pointer type or not, - // but that will not change the signature in GLSL/HLSL, - // so strip the pointer type before hashing. - uint32_t type_id = get_pointee_type_id(arg.type); - auto &type = get(type_id); + continue_type = continue_block_type(get(block.continue_block)); + // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles. + if (continue_type == SPIRBlock::ComplexLoop) + block.complex_continue = true; + } - if (!combined_image_samplers.empty()) - { - // If we have combined image samplers, we cannot really trust the image and sampler arguments - // we pass down to callees, because they may be shuffled around. - // Ignore these arguments, to make sure that functions need to differ in some other way - // to be considered different overloads. - if (type.basetype == SPIRType::SampledImage || - (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler) - { - continue; - } - } + // If we have loop variables, stop masking out access to the variable now. + for (auto var_id : block.loop_variables) + { + auto &var = get(var_id); + var.loop_variable_enable = true; + // We're not going to declare the variable directly, so emit a copy here. + emit_variable_temporary_copies(var); + } - hasher.u32(type_id); + // Remember deferred declaration state. We will restore it before returning. + SmallVector rearm_dominated_variables(block.dominated_variables.size()); + for (size_t i = 0; i < block.dominated_variables.size(); i++) + { + uint32_t var_id = block.dominated_variables[i]; + auto &var = get(var_id); + rearm_dominated_variables[i] = var.deferred_declaration; } - uint64_t types_hash = hasher.get(); - auto function_name = to_name(func.self); - auto itr = function_overloads.find(function_name); - if (itr != end(function_overloads)) + // This is the method often used by spirv-opt to implement loops. + // The loop header goes straight into the continue block. + // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block, + // it *MUST* be used in the continue block. This loop method will not work. + if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop)) { - // There exists a function with this name already. - auto &overloads = itr->second; - if (overloads.count(types_hash) != 0) + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop)) { - // Overload conflict, assign a new name. - add_resource_name(func.self); - function_overloads[to_name(func.self)].insert(types_hash); + if (execution_is_noop(get(block.true_block), get(block.merge_block))) + select_branch_to_false_block = true; + else + select_branch_to_true_block = true; + + emitted_loop_header_variables = true; + force_complex_continue_block = true; } - else + } + // This is the older loop behavior in glslang which branches to loop body directly from the loop header. + else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop)) + { + // The body of while, is actually just the true (or false) block, so always branch there unconditionally. + if (execution_is_noop(get(block.true_block), get(block.merge_block))) + select_branch_to_false_block = true; + else + select_branch_to_true_block = true; + + emitted_loop_header_variables = true; + } + } + // This is the newer loop behavior in glslang which branches from Loop header directly to + // a new block, which in turn has a OpBranchSelection without a selection merge. + else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop)) { - // Can reuse the name. - overloads.insert(types_hash); + skip_direct_branch = true; + emitted_loop_header_variables = true; } } - else + else if (continue_type == SPIRBlock::DoWhileLoop) { - // First time we see this function name. - add_resource_name(func.self); - function_overloads[to_name(func.self)].insert(types_hash); - } -} - -void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) -{ - if (func.self != ir.default_entry_point) - add_function_overload(func); + flush_undeclared_variables(block); + emit_while_loop_initializers(block); + emitted_loop_header_variables = true; + // We have some temporaries where the loop header is the dominator. + // We risk a case where we have code like: + // for (;;) { create-temporary; break; } consume-temporary; + // so force-declare temporaries here. + emit_hoisted_temporaries(block.potential_declare_temporary); + statement("do"); + begin_scope(); - // Avoid shadow declarations. - local_variable_names = resource_names; + emit_block_instructions(block); + } + else if (block.merge == SPIRBlock::MergeLoop) + { + flush_undeclared_variables(block); + emit_while_loop_initializers(block); + emitted_loop_header_variables = true; - string decl; + // We have a generic loop without any distinguishable pattern like for, while or do while. + get(block.continue_block).complex_continue = true; + continue_type = SPIRBlock::ComplexLoop; - auto &type = get(func.return_type); - decl += flags_to_qualifiers_glsl(type, return_flags); - decl += type_to_glsl(type); - decl += type_to_array_glsl(type); - decl += " "; + // We have some temporaries where the loop header is the dominator. + // We risk a case where we have code like: + // for (;;) { create-temporary; break; } consume-temporary; + // so force-declare temporaries here. + emit_hoisted_temporaries(block.potential_declare_temporary); + emit_block_hints(block); + statement("for (;;)"); + begin_scope(); - if (func.self == ir.default_entry_point) - { - decl += "main"; - processing_entry_point = true; + emit_block_instructions(block); } else - decl += to_name(func.self); - - decl += "("; - SmallVector arglist; - for (auto &arg : func.arguments) { - // Do not pass in separate images or samplers if we're remapping - // to combined image samplers. - if (skip_argument(arg.id)) - continue; - - // Might change the variable name if it already exists in this function. - // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation - // to use same name for variables. - // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. - add_local_variable_name(arg.id); - - arglist.push_back(argument_decl(arg)); - - // Hold a pointer to the parameter so we can invalidate the readonly field if needed. - auto *var = maybe_get(arg.id); - if (var) - var->parameter = &arg; + emit_block_instructions(block); } - for (auto &arg : func.shadow_arguments) + // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem + // as writes to said loop variables might have been masked out, we need a recompile. + if (!emitted_loop_header_variables && !block.loop_variables.empty()) { - // Might change the variable name if it already exists in this function. - // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation - // to use same name for variables. - // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. - add_local_variable_name(arg.id); - - arglist.push_back(argument_decl(arg)); - - // Hold a pointer to the parameter so we can invalidate the readonly field if needed. - auto *var = maybe_get(arg.id); - if (var) - var->parameter = &arg; + force_recompile_guarantee_forward_progress(); + for (auto var : block.loop_variables) + get(var).loop_variable = false; + block.loop_variables.clear(); } - decl += merge(arglist); - decl += ")"; - statement(decl); -} - -void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) -{ - // Avoid potential cycles. - if (func.active) - return; - func.active = true; + flush_undeclared_variables(block); + bool emit_next_block = true; - // If we depend on a function, emit that function before we emit our own function. - for (auto block : func.blocks) + // Handle end of block. + switch (block.terminator) { - auto &b = get(block); - for (auto &i : b.ops) + case SPIRBlock::Direct: + // True when emitting complex continue block. + if (block.loop_dominator == block.next_block) { - auto ops = stream(i); - auto op = static_cast(i.op); + branch(block.self, block.next_block); + emit_next_block = false; + } + // True if MergeToDirectForLoop succeeded. + else if (skip_direct_branch) + emit_next_block = false; + else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block)) + { + branch(block.self, block.next_block); + emit_next_block = false; + } + break; - if (op == OpFunctionCall) + case SPIRBlock::Select: + // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded. + if (select_branch_to_true_block) + { + if (force_complex_continue_block) { - // Recursively emit functions which are called. - uint32_t id = ops[2]; - emit_function(get(id), ir.meta[ops[1]].decoration.decoration_flags); + assert(block.true_block == block.continue_block); + + // We're going to emit a continue block directly here, so make sure it's marked as complex. + auto &complex_continue = get(block.continue_block).complex_continue; + bool old_complex = complex_continue; + complex_continue = true; + branch(block.self, block.true_block); + complex_continue = old_complex; } + else + branch(block.self, block.true_block); } - } - - if (func.entry_line.file_id != 0) - emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal); - emit_function_prototype(func, return_flags); - begin_scope(); - - if (func.self == ir.default_entry_point) - emit_entry_point_declarations(); + else if (select_branch_to_false_block) + { + if (force_complex_continue_block) + { + assert(block.false_block == block.continue_block); - current_function = &func; - auto &entry_block = get(func.entry_block); + // We're going to emit a continue block directly here, so make sure it's marked as complex. + auto &complex_continue = get(block.continue_block).complex_continue; + bool old_complex = complex_continue; + complex_continue = true; + branch(block.self, block.false_block); + complex_continue = old_complex; + } + else + branch(block.self, block.false_block); + } + else + branch(block.self, block.condition, block.true_block, block.false_block); + break; - sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack)); - for (auto &array : func.constant_arrays_needed_on_stack) + case SPIRBlock::MultiSelect: { - auto &c = get(array); - auto &type = get(c.constant_type); - statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";"); - } + auto &type = expression_type(block.condition); + bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || + type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64; - for (auto &v : func.local_variables) - { - auto &var = get(v); - var.deferred_declaration = false; + if (block.merge == SPIRBlock::MergeNone) + SPIRV_CROSS_THROW("Switch statement is not structured"); - if (var.storage == StorageClassWorkgroup) - { - // Special variable type which cannot have initializer, - // need to be declared as standalone variables. - // Comes from MSL which can push global variables as local variables in main function. - add_local_variable_name(var.self); - statement(variable_decl(var), ";"); - var.deferred_declaration = false; - } - else if (var.storage == StorageClassPrivate) - { - // These variables will not have had their CFG usage analyzed, so move it to the entry block. - // Comes from MSL which can push global variables as local variables in main function. - // We could just declare them right now, but we would miss out on an important initialization case which is - // LUT declaration in MSL. - // If we don't declare the variable when it is assigned we're forced to go through a helper function - // which copies elements one by one. - add_local_variable_name(var.self); - auto &dominated = entry_block.dominated_variables; - if (find(begin(dominated), end(dominated), var.self) == end(dominated)) - entry_block.dominated_variables.push_back(var.self); - var.deferred_declaration = true; - } - else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression) + if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)) { - // No need to declare this variable, it has a static expression. - var.deferred_declaration = false; + // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages. + SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors."); } - else if (expression_is_lvalue(v)) - { - add_local_variable_name(var.self); - if (var.initializer) - statement(variable_decl_function_local(var), ";"); - else + const char *label_suffix = ""; + if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix) + label_suffix = "u"; + else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch) + label_suffix = "l"; + else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch) + label_suffix = "ul"; + else if (type.basetype == SPIRType::UShort) + label_suffix = backend.uint16_t_literal_suffix; + else if (type.basetype == SPIRType::Short) + label_suffix = backend.int16_t_literal_suffix; + + current_emitting_switch_stack.push_back(&block); + + if (block.need_ladder_break) + statement("bool _", block.self, "_ladder_break = false;"); + + // Find all unique case constructs. + unordered_map> case_constructs; + SmallVector block_declaration_order; + SmallVector literals_to_merge; + + // If a switch case branches to the default block for some reason, we can just remove that literal from consideration + // and let the default: block handle it. + // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here. + // We only need to consider possible fallthrough if order[i] branches to order[i + 1]. + auto &cases = get_case_list(block); + for (auto &c : cases) + { + if (c.block != block.next_block && c.block != block.default_block) { - // Don't declare variable until first use to declutter the GLSL output quite a lot. - // If we don't touch the variable before first branch, - // declare it then since we need variable declaration to be in top scope. - var.deferred_declaration = true; + if (!case_constructs.count(c.block)) + block_declaration_order.push_back(c.block); + case_constructs[c.block].push_back(c.value); + } + else if (c.block == block.next_block && block.default_block != block.next_block) + { + // We might have to flush phi inside specific case labels. + // If we can piggyback on default:, do so instead. + literals_to_merge.push_back(c.value); } } - else - { - // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this. - // For these types (non-lvalue), we enforce forwarding through a shadowed variable. - // This means that when we OpStore to these variables, we just write in the expression ID directly. - // This breaks any kind of branching, since the variable must be statically assigned. - // Branching on samplers and images would be pretty much impossible to fake in GLSL. - var.statically_assigned = true; - } - - var.loop_variable_enable = false; - - // Loop variables are never declared outside their for-loop, so block any implicit declaration. - if (var.loop_variable) - var.deferred_declaration = false; - } - - // Enforce declaration order for regression testing purposes. - for (auto &block_id : func.blocks) - { - auto &block = get(block_id); - sort(begin(block.dominated_variables), end(block.dominated_variables)); - } - for (auto &line : current_function->fixup_hooks_in) - line(); + // Empty literal array -> default. + if (block.default_block != block.next_block) + { + auto &default_block = get(block.default_block); - emit_block_chain(entry_block); + // We need to slide in the default block somewhere in this chain + // if there are fall-through scenarios since the default is declared separately in OpSwitch. + // Only consider trivial fall-through cases here. + size_t num_blocks = block_declaration_order.size(); + bool injected_block = false; - end_scope(); - processing_entry_point = false; - statement(""); + for (size_t i = 0; i < num_blocks; i++) + { + auto &case_block = get(block_declaration_order[i]); + if (execution_is_direct_branch(case_block, default_block)) + { + // Fallthrough to default block, we must inject the default block here. + block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block); + injected_block = true; + break; + } + else if (execution_is_direct_branch(default_block, case_block)) + { + // Default case is falling through to another case label, we must inject the default block here. + block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block); + injected_block = true; + break; + } + } - // Make sure deferred declaration state for local variables is cleared when we are done with function. - // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise. - for (auto &v : func.local_variables) - { - auto &var = get(v); - var.deferred_declaration = false; - } -} + // Order does not matter. + if (!injected_block) + block_declaration_order.push_back(block.default_block); + else if (is_legacy_es()) + SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0."); -void CompilerGLSL::emit_fixup() -{ - auto &execution = get_entry_point(); - if (execution.model == ExecutionModelVertex) - { - if (options.vertex.fixup_clipspace) - { - const char *suffix = backend.float_literal_suffix ? "f" : ""; - statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;"); + case_constructs[block.default_block] = {}; } - if (options.vertex.flip_vert_y) - statement("gl_Position.y = -gl_Position.y;"); - } -} + size_t num_blocks = block_declaration_order.size(); -bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to) -{ - auto &child = get(to); - for (auto &phi : child.phi_variables) - if (phi.parent == from) - return true; - return false; -} + const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string + { + if (is_unsigned_case) + return convert_to_string(literal); -void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) -{ - auto &child = get(to); - if (child.ignore_phi_from_block == from) - return; + // For smaller cases, the literals are compiled as 32 bit wide + // literals so we don't need to care for all sizes specifically. + if (width <= 32) + { + return convert_to_string(int64_t(int32_t(literal))); + } - unordered_set temporary_phi_variables; + return convert_to_string(int64_t(literal)); + }; - for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr) - { - auto &phi = *itr; + const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector &labels, + const char *suffix) -> string { + string ret; + size_t count = labels.size(); + for (size_t i = 0; i < count; i++) + { + if (i) + ret += " || "; + ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix, + count > 1 ? ")" : ""); + } + return ret; + }; - if (phi.parent == from) + // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture, + // we need to flush phi nodes outside the switch block in a branch, + // and skip any Phi handling inside the case label to make fall-through work as expected. + // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this + // inside the case label if at all possible. + for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++) { - auto &var = get(phi.function_variable); - - // A Phi variable might be a loop variable, so flush to static expression. - if (var.loop_variable && !var.loop_variable_enable) - var.static_expression = phi.local_variable; - else + if (flush_phi_required(block.self, block_declaration_order[i]) && + flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i])) { - flush_variable_declaration(phi.function_variable); + uint32_t target_block = block_declaration_order[i]; - // Check if we are going to write to a Phi variable that another statement will read from - // as part of another Phi node in our target block. - // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads. - // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. - bool need_saved_temporary = - find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool { - return future_phi.local_variable == phi.function_variable && future_phi.parent == from; - }) != end(child.phi_variables); + // Make sure we flush Phi, it might have been marked to be ignored earlier. + get(target_block).ignore_phi_from_block = 0; - if (need_saved_temporary) + auto &literals = case_constructs[target_block]; + + if (literals.empty()) { - // Need to make sure we declare the phi variable with a copy at the right scope. - // We cannot safely declare a temporary here since we might be inside a continue block. - if (!var.allocate_temporary_copy) + // Oh boy, gotta make a complete negative test instead! o.o + // Find all possible literals that would *not* make us enter the default block. + // If none of those literals match, we flush Phi ... + SmallVector conditions; + for (size_t j = 0; j < num_blocks; j++) { - var.allocate_temporary_copy = true; - force_recompile(); + auto &negative_literals = case_constructs[block_declaration_order[j]]; + for (auto &case_label : negative_literals) + conditions.push_back(join(to_enclosed_expression(block.condition), + " != ", to_case_label(case_label, type.width, unsigned_case))); } - statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";"); - temporary_phi_variables.insert(phi.function_variable); - } - - // This might be called in continue block, so make sure we - // use this to emit ESSL 1.0 compliant increments/decrements. - auto lhs = to_expression(phi.function_variable); - string rhs; - if (temporary_phi_variables.count(phi.local_variable)) - rhs = join("_", phi.local_variable, "_copy"); + statement("if (", merge(conditions, " && "), ")"); + begin_scope(); + flush_phi(block.self, target_block); + end_scope(); + } else - rhs = to_pointer_expression(phi.local_variable); + { + SmallVector conditions; + conditions.reserve(literals.size()); + for (auto &case_label : literals) + conditions.push_back(join(to_enclosed_expression(block.condition), + " == ", to_case_label(case_label, type.width, unsigned_case))); + statement("if (", merge(conditions, " || "), ")"); + begin_scope(); + flush_phi(block.self, target_block); + end_scope(); + } - if (!optimize_read_modify_write(get(var.basetype), lhs, rhs)) - statement(lhs, " = ", rhs, ";"); + // Mark the block so that we don't flush Phi from header to case label. + get(target_block).ignore_phi_from_block = block.self; } - - register_write(phi.function_variable); } - } -} - -void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) -{ - auto &to_block = get(to); - if (from == to) - return; - - assert(is_continue(to)); - if (to_block.complex_continue) - { - // Just emit the whole block chain as is. - auto usage_counts = expression_usage_counts; - auto invalid = invalid_expressions; - emit_block_chain(to_block); + // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate + // non-structured exits with the help of a switch block. + // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic. + bool block_like_switch = cases.empty(); - // Expression usage counts and invalid expressions - // are moot after returning from the continue block. - // Since we emit the same block multiple times, - // we don't want to invalidate ourselves. - expression_usage_counts = usage_counts; - invalid_expressions = invalid; - } - else - { - auto &from_block = get(from); - bool outside_control_flow = false; - uint32_t loop_dominator = 0; + // If this is true, the switch is completely meaningless, and we should just avoid it. + bool collapsed_switch = block_like_switch && block.default_block == block.next_block; - // FIXME: Refactor this to not use the old loop_dominator tracking. - if (from_block.merge_block) - { - // If we are a loop header, we don't set the loop dominator, - // so just use "self" here. - loop_dominator = from; - } - else if (from_block.loop_dominator != SPIRBlock::NoDominator) + if (!collapsed_switch) { - loop_dominator = from_block.loop_dominator; + if (block_like_switch || is_legacy_es()) + { + // ESSL 1.0 is not guaranteed to support do/while. + if (is_legacy_es()) + { + uint32_t counter = statement_count; + statement("for (int spvDummy", counter, " = 0; spvDummy", counter, " < 1; spvDummy", counter, + "++)"); + } + else + statement("do"); + } + else + { + emit_block_hints(block); + statement("switch (", to_unpacked_expression(block.condition), ")"); + } + begin_scope(); } - if (loop_dominator != 0) + for (size_t i = 0; i < num_blocks; i++) { - auto &dominator = get(loop_dominator); - - // For non-complex continue blocks, we implicitly branch to the continue block - // by having the continue block be part of the loop header in for (; ; continue-block). - outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block); - } - - // Some simplification for for-loops. We always end up with a useless continue; - // statement since we branch to a loop block. - // Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block, - // we can avoid writing out an explicit continue statement. - // Similar optimization to return statements if we know we're outside flow control. - if (!outside_control_flow) - statement("continue;"); - } -} - -void CompilerGLSL::branch(uint32_t from, uint32_t to) -{ - flush_phi(from, to); - flush_control_dependent_expressions(from); - flush_all_active_variables(); + uint32_t target_block = block_declaration_order[i]; + auto &literals = case_constructs[target_block]; - // This is only a continue if we branch to our loop dominator. - if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get(from).loop_dominator == to) - { - // This can happen if we had a complex continue block which was emitted. - // Once the continue block tries to branch to the loop header, just emit continue; - // and end the chain here. - statement("continue;"); - } - else if (is_break(to)) - { - // Very dirty workaround. - // Switch constructs are able to break, but they cannot break out of a loop at the same time. - // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, - // write to the ladder here, and defer the break. - // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case. - if (current_emitting_switch && is_loop_break(to) && current_emitting_switch->loop_dominator != ~0u && - get(current_emitting_switch->loop_dominator).merge_block == to) - { - if (!current_emitting_switch->need_ladder_break) + if (literals.empty()) + { + // Default case. + if (!block_like_switch) + { + if (is_legacy_es()) + statement("else"); + else + statement("default:"); + } + } + else { - force_recompile(); - current_emitting_switch->need_ladder_break = true; + if (is_legacy_es()) + { + statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix), + ")"); + } + else + { + for (auto &case_literal : literals) + { + // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here. + statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":"); + } + } } - statement("_", current_emitting_switch->self, "_ladder_break = true;"); - } - statement("break;"); - } - else if (is_continue(to) || (from == to)) - { - // For from == to case can happen for a do-while loop which branches into itself. - // We don't mark these cases as continue blocks, but the only possible way to branch into - // ourselves is through means of continue blocks. - branch_to_continue(from, to); - } - else if (!is_conditional(to)) - emit_block_chain(get(to)); + auto &case_block = get(target_block); + if (backend.support_case_fallthrough && i + 1 < num_blocks && + execution_is_direct_branch(case_block, get(block_declaration_order[i + 1]))) + { + // We will fall through here, so just terminate the block chain early. + // We still need to deal with Phi potentially. + // No need for a stack-like thing here since we only do fall-through when there is a + // single trivial branch to fall-through target.. + current_emitting_switch_fallthrough = true; + } + else + current_emitting_switch_fallthrough = false; - // It is important that we check for break before continue. - // A block might serve two purposes, a break block for the inner scope, and - // a continue block in the outer scope. - // Inner scope always takes precedence. -} + if (!block_like_switch) + begin_scope(); + branch(block.self, target_block); + if (!block_like_switch) + end_scope(); -void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block) -{ - // If we branch directly to a selection merge target, we don't really need a code path. - bool true_sub = !is_conditional(true_block); - bool false_sub = !is_conditional(false_block); + current_emitting_switch_fallthrough = false; + } - if (true_sub) - { - emit_block_hints(get(from)); - statement("if (", to_expression(cond), ")"); - begin_scope(); - branch(from, true_block); - end_scope(); + // Might still have to flush phi variables if we branch from loop header directly to merge target. + // This is supposed to emit all cases where we branch from header to merge block directly. + // There are two main scenarios where cannot rely on default fallthrough. + // - There is an explicit default: label already. + // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block. + // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there. + bool header_merge_requires_phi = flush_phi_required(block.self, block.next_block); + bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty(); + if (!collapsed_switch && ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty())) + { + for (auto &case_literal : literals_to_merge) + statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":"); + + if (block.default_block == block.next_block) + { + if (is_legacy_es()) + statement("else"); + else + statement("default:"); + } - if (false_sub || is_continue(false_block) || is_break(false_block)) - { - statement("else"); - begin_scope(); - branch(from, false_block); - end_scope(); - } - else if (flush_phi_required(from, false_block)) - { - statement("else"); begin_scope(); - flush_phi(from, false_block); + flush_phi(block.self, block.next_block); + statement("break;"); end_scope(); } - } - else if (false_sub && !true_sub) - { - // Only need false path, use negative conditional. - emit_block_hints(get(from)); - statement("if (!", to_enclosed_expression(cond), ")"); - begin_scope(); - branch(from, false_block); - end_scope(); - if (is_continue(true_block) || is_break(true_block)) + if (!collapsed_switch) { - statement("else"); - begin_scope(); - branch(from, true_block); - end_scope(); + if (block_like_switch && !is_legacy_es()) + end_scope_decl("while(false)"); + else + end_scope(); } - else if (flush_phi_required(from, true_block)) + else + flush_phi(block.self, block.next_block); + + if (block.need_ladder_break) { - statement("else"); + statement("if (_", block.self, "_ladder_break)"); begin_scope(); - flush_phi(from, true_block); + statement("break;"); end_scope(); } - } -} -// FIXME: This currently cannot handle complex continue blocks -// as in do-while. -// This should be seen as a "trivial" continue block. -string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block) -{ - auto *block = &get(continue_block); - - // While emitting the continue block, declare_temporary will check this - // if we have to emit temporaries. - current_continue_block = block; + current_emitting_switch_stack.pop_back(); + break; + } - SmallVector statements; + case SPIRBlock::Return: + { + for (auto &line : current_function->fixup_hooks_out) + line(); - // Capture all statements into our list. - auto *old = redirect_statement; - redirect_statement = &statements; + if (processing_entry_point) + emit_fixup(); - // Stamp out all blocks one after each other. - while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0) - { - // Write out all instructions we have in this block. - emit_block_instructions(*block); + auto &cfg = get_cfg_for_current_function(); - // For plain branchless for/while continue blocks. - if (block->next_block) - { - flush_phi(continue_block, block->next_block); - block = &get(block->next_block); - } - // For do while blocks. The last block will be a select block. - else if (block->true_block && follow_true_block) - { - flush_phi(continue_block, block->true_block); - block = &get(block->true_block); - } - else if (block->false_block && follow_false_block) + if (block.return_value) { - flush_phi(continue_block, block->false_block); - block = &get(block->false_block); + auto &type = expression_type(block.return_value); + if (!type.array.empty() && !backend.can_return_array) + { + // If we cannot return arrays, we will have a special out argument we can write to instead. + // The backend is responsible for setting this up, and redirection the return values as appropriate. + if (ir.ids[block.return_value].get_type() != TypeUndef) + { + emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction, + get_expression_effective_storage_class(block.return_value)); + } + + if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || + block.loop_dominator != BlockID(SPIRBlock::NoDominator)) + { + statement("return;"); + } + } + else + { + // OpReturnValue can return Undef, so don't emit anything for this case. + if (ir.ids[block.return_value].get_type() != TypeUndef) + statement("return ", to_unpacked_expression(block.return_value), ";"); + } } - else + else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || + block.loop_dominator != BlockID(SPIRBlock::NoDominator)) { - SPIRV_CROSS_THROW("Invalid continue block detected!"); + // If this block is the very final block and not called from control flow, + // we do not need an explicit return which looks out of place. Just end the function here. + // In the very weird case of for(;;) { return; } executing return is unconditional, + // but we actually need a return here ... + statement("return;"); } + break; } - // Restore old pointer. - redirect_statement = old; + // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement. + case SPIRBlock::Kill: + statement(backend.discard_literal, ";"); + if (block.return_value) + statement("return ", to_unpacked_expression(block.return_value), ";"); + break; - // Somewhat ugly, strip off the last ';' since we use ',' instead. - // Ideally, we should select this behavior in statement(). - for (auto &s : statements) + case SPIRBlock::Unreachable: { - if (!s.empty() && s.back() == ';') - s.erase(s.size() - 1, 1); - } + // Avoid emitting false fallthrough, which can happen for + // if (cond) break; else discard; inside a case label. + // Discard is not always implementable as a terminator. - current_continue_block = nullptr; - return merge(statements); -} + auto &cfg = get_cfg_for_current_function(); + bool inner_dominator_is_switch = false; + ID id = block.self; -void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block) -{ - // While loops do not take initializers, so declare all of them outside. - for (auto &loop_var : block.loop_variables) - { - auto &var = get(loop_var); - statement(variable_decl(var), ";"); - } -} + while (id) + { + auto &iter_block = get(id); + if (iter_block.terminator == SPIRBlock::MultiSelect || + iter_block.merge == SPIRBlock::MergeLoop) + { + ID next_block = iter_block.merge == SPIRBlock::MergeLoop ? + iter_block.merge_block : iter_block.next_block; + bool outside_construct = next_block && cfg.find_common_dominator(next_block, block.self) == next_block; + if (!outside_construct) + { + inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect; + break; + } + } -string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block) -{ - if (block.loop_variables.empty()) - return ""; + if (cfg.get_preceding_edges(id).empty()) + break; - bool same_types = for_loop_initializers_are_same_type(block); - // We can only declare for loop initializers if all variables are of same type. - // If we cannot do this, declare individual variables before the loop header. + id = cfg.get_immediate_dominator(id); + } - // We might have a loop variable candidate which was not assigned to for some reason. - uint32_t missing_initializers = 0; - for (auto &variable : block.loop_variables) - { - uint32_t expr = get(variable).static_expression; + if (inner_dominator_is_switch) + statement("break; // unreachable workaround"); - // Sometimes loop variables are initialized with OpUndef, but we can just declare - // a plain variable without initializer in this case. - if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) - missing_initializers++; + emit_next_block = false; + break; } - if (block.loop_variables.size() == 1 && missing_initializers == 0) - { - return variable_decl(get(block.loop_variables.front())); - } - else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size())) - { - for (auto &loop_var : block.loop_variables) - statement(variable_decl(get(loop_var)), ";"); - return ""; + case SPIRBlock::IgnoreIntersection: + statement("ignoreIntersectionEXT;"); + break; + + case SPIRBlock::TerminateRay: + statement("terminateRayEXT;"); + break; + + case SPIRBlock::EmitMeshTasks: + statement("EmitMeshTasksEXT(", + to_unpacked_expression(block.mesh.groups[0]), ", ", + to_unpacked_expression(block.mesh.groups[1]), ", ", + to_unpacked_expression(block.mesh.groups[2]), ");"); + break; + + default: + SPIRV_CROSS_THROW("Unimplemented block terminator."); } - else - { - // We have a mix of loop variables, either ones with a clear initializer, or ones without. - // Separate the two streams. - string expr; - for (auto &loop_var : block.loop_variables) + if (block.next_block && emit_next_block) + { + // If we hit this case, we're dealing with an unconditional branch, which means we will output + // that block after this. If we had selection merge, we already flushed phi variables. + if (block.merge != SPIRBlock::MergeSelection) { - uint32_t static_expr = get(loop_var).static_expression; - if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef) - { - statement(variable_decl(get(loop_var)), ";"); - } - else - { - auto &var = get(loop_var); - auto &type = get_variable_data_type(var); - if (expr.empty()) - { - // For loop initializers are of the form (block.next_block).invalidate_expressions = block.invalidate_expressions; + } - expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression)); + // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi. + if (!current_emitting_switch_fallthrough) + { + // For merge selects we might have ignored the fact that a merge target + // could have been a break; or continue; + // We will need to deal with it here. + if (is_loop_break(block.next_block)) + { + // Cannot check for just break, because switch statements will also use break. + assert(block.merge == SPIRBlock::MergeSelection); + statement("break;"); + } + else if (is_continue(block.next_block)) + { + assert(block.merge == SPIRBlock::MergeSelection); + branch_to_continue(block.self, block.next_block); } + else if (BlockID(block.self) != block.next_block) + emit_block_chain(get(block.next_block)); } - return expr; } -} - -bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block) -{ - if (block.loop_variables.size() <= 1) - return true; - uint32_t expected = 0; - Bitset expected_flags; - for (auto &var : block.loop_variables) + if (block.merge == SPIRBlock::MergeLoop) { - // Don't care about uninitialized variables as they will not be part of the initializers. - uint32_t expr = get(var).static_expression; - if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) - continue; - - if (expected == 0) + if (continue_type == SPIRBlock::DoWhileLoop) { - expected = get(var).basetype; - expected_flags = get_decoration_bitset(var); - } - else if (expected != get(var).basetype) - return false; + // Make sure that we run the continue block to get the expressions set, but this + // should become an empty string. + // We have no fallbacks if we cannot forward everything to temporaries ... + const auto &continue_block = get(block.continue_block); + bool positive_test = execution_is_noop(get(continue_block.true_block), + get(continue_block.loop_dominator)); - // Precision flags and things like that must also match. - if (expected_flags != get_decoration_bitset(var)) - return false; - } + uint32_t current_count = statement_count; + auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test); + if (statement_count != current_count) + { + // The DoWhile block has side effects, force ComplexLoop pattern next pass. + get(block.continue_block).complex_continue = true; + force_recompile(); + } - return true; -} + // Might have to invert the do-while test here. + auto condition = to_expression(continue_block.condition); + if (!positive_test) + condition = join("!", enclose_expression(condition)); -bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method) -{ - SPIRBlock::ContinueBlockType continue_type = continue_block_type(get(block.continue_block)); + end_scope_decl(join("while (", condition, ")")); + } + else + end_scope(); - if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop) - { - uint32_t current_count = statement_count; - // If we're trying to create a true for loop, - // we need to make sure that all opcodes before branch statement do not actually emit any code. - // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. - emit_block_instructions(block); + loop_level_saver.release(); - bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries); + // We cannot break out of two loops at once, so don't check for break; here. + // Using block.self as the "from" block isn't quite right, but it has the same scope + // and dominance structure, so it's fine. + if (is_continue(block.merge_block)) + branch_to_continue(block.self, block.merge_block); + else + emit_block_chain(get(block.merge_block)); + } - // This can work! We only did trivial things which could be forwarded in block body! - if (current_count == statement_count && condition_is_temporary) - { - switch (continue_type) - { - case SPIRBlock::ForLoop: - { - // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. - flush_undeclared_variables(block); + // Forget about control dependent expressions now. + block.invalidate_expressions.clear(); - // Important that we do this in this order because - // emitting the continue block can invalidate the condition expression. - auto initializer = emit_for_loop_initializers(block); - auto condition = to_expression(block.condition); + // After we return, we must be out of scope, so if we somehow have to re-emit this function, + // re-declare variables if necessary. + assert(rearm_dominated_variables.size() == block.dominated_variables.size()); + for (size_t i = 0; i < block.dominated_variables.size(); i++) + { + uint32_t var = block.dominated_variables[i]; + get(var).deferred_declaration = rearm_dominated_variables[i]; + } - // Condition might have to be inverted. - if (execution_is_noop(get(block.true_block), get(block.merge_block))) - condition = join("!", enclose_expression(condition)); + // Just like for deferred declaration, we need to forget about loop variable enable + // if our block chain is reinstantiated later. + for (auto &var_id : block.loop_variables) + get(var_id).loop_variable_enable = false; +} - emit_block_hints(block); - if (method != SPIRBlock::MergeToSelectContinueForLoop) - { - auto continue_block = emit_continue_block(block.continue_block, false, false); - statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); - } - else - statement("for (", initializer, "; ", condition, "; )"); - break; - } +void CompilerGLSL::begin_scope() +{ + statement("{"); + indent++; +} - case SPIRBlock::WhileLoop: - { - // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header. - flush_undeclared_variables(block); - emit_while_loop_initializers(block); - emit_block_hints(block); +void CompilerGLSL::end_scope() +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("}"); +} - auto condition = to_expression(block.condition); - // Condition might have to be inverted. - if (execution_is_noop(get(block.true_block), get(block.merge_block))) - condition = join("!", enclose_expression(condition)); +void CompilerGLSL::end_scope(const string &trailer) +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("}", trailer); +} - statement("while (", condition, ")"); - break; - } +void CompilerGLSL::end_scope_decl() +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("};"); +} - default: - block.disable_block_optimization = true; - force_recompile(); - begin_scope(); // We'll see an end_scope() later. - return false; - } +void CompilerGLSL::end_scope_decl(const string &decl) +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("} ", decl, ";"); +} - begin_scope(); - return true; - } - else +void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length) +{ + // If our variable is remapped, and we rely on type-remapping information as + // well, then we cannot pass the variable as a function parameter. + // Fixing this is non-trivial without stamping out variants of the same function, + // so for now warn about this and suggest workarounds instead. + for (uint32_t i = 0; i < length; i++) + { + auto *var = maybe_get(args[i]); + if (!var || !var->remapped_variable) + continue; + + auto &type = get(var->basetype); + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) { - block.disable_block_optimization = true; - force_recompile(); - begin_scope(); // We'll see an end_scope() later. - return false; + SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. " + "This will not work correctly because type-remapping information is lost. " + "To workaround, please consider not passing the subpass input as a function parameter, " + "or use in/out variables instead which do not need type remapping information."); } } - else if (method == SPIRBlock::MergeToDirectForLoop) - { - auto &child = get(block.next_block); +} - // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. - flush_undeclared_variables(child); +const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr) +{ + // FIXME: This is kind of hacky. There should be a cleaner way. + auto offset = uint32_t(&instr - current_emitting_block->ops.data()); + if ((offset + 1) < current_emitting_block->ops.size()) + return ¤t_emitting_block->ops[offset + 1]; + else + return nullptr; +} - uint32_t current_count = statement_count; +uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) +{ + return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask | + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | + MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); +} - // If we're trying to create a true for loop, - // we need to make sure that all opcodes before branch statement do not actually emit any code. - // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. - emit_block_instructions(child); +void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass) +{ + statement(lhs, " = ", to_expression(rhs_id), ";"); +} - bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); +bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id) +{ + if (!backend.force_gl_in_out_block) + return false; + // This path is only relevant for GL backends. - if (current_count == statement_count && condition_is_temporary) - { - uint32_t target_block = child.true_block; + auto *var = maybe_get(target_id); + if (!var || var->storage != StorageClassOutput) + return false; - switch (continue_type) - { - case SPIRBlock::ForLoop: - { - // Important that we do this in this order because - // emitting the continue block can invalidate the condition expression. - auto initializer = emit_for_loop_initializers(block); - auto condition = to_expression(child.condition); + if (!is_builtin_variable(*var) || BuiltIn(get_decoration(var->self, DecorationBuiltIn)) != BuiltInSampleMask) + return false; + + auto &type = expression_type(source_id); + string array_expr; + if (type.array_size_literal.back()) + { + array_expr = convert_to_string(type.array.back()); + if (type.array.back() == 0) + SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); + } + else + array_expr = to_expression(type.array.back()); - // Condition might have to be inverted. - if (execution_is_noop(get(child.true_block), get(block.merge_block))) - { - condition = join("!", enclose_expression(condition)); - target_block = child.false_block; - } + SPIRType target_type; + target_type.basetype = SPIRType::Int; - auto continue_block = emit_continue_block(block.continue_block, false, false); - emit_block_hints(block); - statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); - break; - } + statement("for (int i = 0; i < int(", array_expr, "); i++)"); + begin_scope(); + statement(to_expression(target_id), "[i] = ", + bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")), + ";"); + end_scope(); - case SPIRBlock::WhileLoop: - { - emit_while_loop_initializers(block); - emit_block_hints(block); + return true; +} - auto condition = to_expression(child.condition); - // Condition might have to be inverted. - if (execution_is_noop(get(child.true_block), get(block.merge_block))) - { - condition = join("!", enclose_expression(condition)); - target_block = child.false_block; - } +void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr) +{ + if (!backend.force_gl_in_out_block) + return; + // This path is only relevant for GL backends. - statement("while (", condition, ")"); - break; - } + auto *var = maybe_get(source_id); + if (!var) + return; - default: - block.disable_block_optimization = true; - force_recompile(); - begin_scope(); // We'll see an end_scope() later. - return false; - } + if (var->storage != StorageClassInput && var->storage != StorageClassOutput) + return; - begin_scope(); - branch(child.self, target_block); - return true; + auto &type = get_variable_data_type(*var); + if (type.array.empty()) + return; + + auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); + bool is_builtin = is_builtin_variable(*var) && + (builtin == BuiltInPointSize || + builtin == BuiltInPosition || + builtin == BuiltInSampleMask); + bool is_tess = is_tessellation_shader(); + bool is_patch = has_decoration(var->self, DecorationPatch); + bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask; + + // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it. + // We must unroll the array load. + // For builtins, we couldn't catch this case normally, + // because this is resolved in the OpAccessChain in most cases. + // If we load the entire array, we have no choice but to unroll here. + if (!is_patch && (is_builtin || is_tess)) + { + auto new_expr = join("_", target_id, "_unrolled"); + statement(variable_decl(type, new_expr, target_id), ";"); + string array_expr; + if (type.array_size_literal.back()) + { + array_expr = convert_to_string(type.array.back()); + if (type.array.back() == 0) + SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); } else + array_expr = to_expression(type.array.back()); + + // The array size might be a specialization constant, so use a for-loop instead. + statement("for (int i = 0; i < int(", array_expr, "); i++)"); + begin_scope(); + if (is_builtin && !is_sample_mask) + statement(new_expr, "[i] = gl_in[i].", expr, ";"); + else if (is_sample_mask) { - block.disable_block_optimization = true; - force_recompile(); - begin_scope(); // We'll see an end_scope() later. - return false; + SPIRType target_type; + target_type.basetype = SPIRType::Int; + statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";"); } + else + statement(new_expr, "[i] = ", expr, "[i];"); + end_scope(); + + expr = std::move(new_expr); } - else - return false; } -void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) +void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) { - for (auto &v : block.dominated_variables) - flush_variable_declaration(v); -} + // We will handle array cases elsewhere. + if (!expr_type.array.empty()) + return; -void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &temporaries) -{ - // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. - // Need to sort these to ensure that reference output is stable. - sort(begin(temporaries), end(temporaries), - [](const pair &a, const pair &b) { return a.second < b.second; }); + auto *var = maybe_get_backing_variable(source_id); + if (var) + source_id = var->self; - for (auto &tmp : temporaries) + // Only interested in standalone builtin variables. + if (!has_decoration(source_id, DecorationBuiltIn)) + return; + + auto builtin = static_cast(get_decoration(source_id, DecorationBuiltIn)); + auto expected_type = expr_type.basetype; + + // TODO: Fill in for more builtins. + switch (builtin) { - add_local_variable_name(tmp.second); - auto &flags = ir.meta[tmp.second].decoration.decoration_flags; - auto &type = get(tmp.first); - statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";"); + case BuiltInLayer: + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + case BuiltInInstanceId: + case BuiltInInstanceIndex: + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInSampleId: + case BuiltInBaseVertex: + case BuiltInBaseInstance: + case BuiltInDrawIndex: + case BuiltInFragStencilRefEXT: + case BuiltInInstanceCustomIndexNV: + case BuiltInSampleMask: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInShadingRateKHR: + expected_type = SPIRType::Int; + break; - hoisted_temporaries.insert(tmp.second); - forced_temporaries.insert(tmp.second); + case BuiltInGlobalInvocationId: + case BuiltInLocalInvocationId: + case BuiltInWorkgroupId: + case BuiltInLocalInvocationIndex: + case BuiltInWorkgroupSize: + case BuiltInNumWorkgroups: + case BuiltInIncomingRayFlagsNV: + case BuiltInLaunchIdNV: + case BuiltInLaunchSizeNV: + case BuiltInPrimitiveTriangleIndicesEXT: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInPrimitivePointIndicesEXT: + expected_type = SPIRType::UInt; + break; - // The temporary might be read from before it's assigned, set up the expression now. - set(tmp.second, to_name(tmp.second), tmp.first, true); + default: + break; } + + if (expected_type != expr_type.basetype) + expr = bitcast_expression(expr_type, expected_type, expr); } -void CompilerGLSL::emit_block_chain(SPIRBlock &block) +void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) { - bool select_branch_to_true_block = false; - bool select_branch_to_false_block = false; - bool skip_direct_branch = false; - bool emitted_loop_header_variables = false; - bool force_complex_continue_block = false; + auto *var = maybe_get_backing_variable(target_id); + if (var) + target_id = var->self; - emit_hoisted_temporaries(block.declare_temporary); + // Only interested in standalone builtin variables. + if (!has_decoration(target_id, DecorationBuiltIn)) + return; - SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; - if (block.continue_block) - continue_type = continue_block_type(get(block.continue_block)); + auto builtin = static_cast(get_decoration(target_id, DecorationBuiltIn)); + auto expected_type = expr_type.basetype; - // If we have loop variables, stop masking out access to the variable now. - for (auto var_id : block.loop_variables) + // TODO: Fill in for more builtins. + switch (builtin) { - auto &var = get(var_id); - var.loop_variable_enable = true; - // We're not going to declare the variable directly, so emit a copy here. - emit_variable_temporary_copies(var); - } + case BuiltInLayer: + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + case BuiltInFragStencilRefEXT: + case BuiltInSampleMask: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInShadingRateKHR: + expected_type = SPIRType::Int; + break; - // Remember deferred declaration state. We will restore it before returning. - SmallVector rearm_dominated_variables(block.dominated_variables.size()); - for (size_t i = 0; i < block.dominated_variables.size(); i++) - { - uint32_t var_id = block.dominated_variables[i]; - auto &var = get(var_id); - rearm_dominated_variables[i] = var.deferred_declaration; + default: + break; } - // This is the method often used by spirv-opt to implement loops. - // The loop header goes straight into the continue block. - // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block, - // it *MUST* be used in the continue block. This loop method will not work. - if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop)) + if (expected_type != expr_type.basetype) { - flush_undeclared_variables(block); - if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop)) - { - if (execution_is_noop(get(block.true_block), get(block.merge_block))) - select_branch_to_false_block = true; - else - select_branch_to_true_block = true; - - emitted_loop_header_variables = true; - force_complex_continue_block = true; - } + auto type = expr_type; + type.basetype = expected_type; + expr = bitcast_expression(type, expr_type.basetype, expr); } - // This is the older loop behavior in glslang which branches to loop body directly from the loop header. - else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop)) - { - flush_undeclared_variables(block); - if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop)) - { - // The body of while, is actually just the true (or false) block, so always branch there unconditionally. - if (execution_is_noop(get(block.true_block), get(block.merge_block))) - select_branch_to_false_block = true; - else - select_branch_to_true_block = true; +} - emitted_loop_header_variables = true; - } - } - // This is the newer loop behavior in glslang which branches from Loop header directly to - // a new block, which in turn has a OpBranchSelection without a selection merge. - else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop)) - { - flush_undeclared_variables(block); - if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop)) - { - skip_direct_branch = true; - emitted_loop_header_variables = true; - } - } - else if (continue_type == SPIRBlock::DoWhileLoop) - { - flush_undeclared_variables(block); - emit_while_loop_initializers(block); - emitted_loop_header_variables = true; - // We have some temporaries where the loop header is the dominator. - // We risk a case where we have code like: - // for (;;) { create-temporary; break; } consume-temporary; - // so force-declare temporaries here. - emit_hoisted_temporaries(block.potential_declare_temporary); - statement("do"); - begin_scope(); +void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id) +{ + if (*backend.nonuniform_qualifier == '\0') + return; - emit_block_instructions(block); - } - else if (block.merge == SPIRBlock::MergeLoop) - { - flush_undeclared_variables(block); - emit_while_loop_initializers(block); - emitted_loop_header_variables = true; + auto *var = maybe_get_backing_variable(ptr_id); + if (!var) + return; - // We have a generic loop without any distinguishable pattern like for, while or do while. - get(block.continue_block).complex_continue = true; - continue_type = SPIRBlock::ComplexLoop; + if (var->storage != StorageClassUniformConstant && + var->storage != StorageClassStorageBuffer && + var->storage != StorageClassUniform) + return; - // We have some temporaries where the loop header is the dominator. - // We risk a case where we have code like: - // for (;;) { create-temporary; break; } consume-temporary; - // so force-declare temporaries here. - emit_hoisted_temporaries(block.potential_declare_temporary); - statement("for (;;)"); - begin_scope(); + auto &backing_type = get(var->basetype); + if (backing_type.array.empty()) + return; - emit_block_instructions(block); - } - else - { - emit_block_instructions(block); - } + // If we get here, we know we're accessing an arrayed resource which + // might require nonuniform qualifier. - // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem - // as writes to said loop variables might have been masked out, we need a recompile. - if (!emitted_loop_header_variables && !block.loop_variables.empty()) - { - force_recompile(); - for (auto var : block.loop_variables) - get(var).loop_variable = false; - block.loop_variables.clear(); - } + auto start_array_index = expr.find_first_of('['); - flush_undeclared_variables(block); - bool emit_next_block = true; + if (start_array_index == string::npos) + return; - // Handle end of block. - switch (block.terminator) + // We've opened a bracket, track expressions until we can close the bracket. + // This must be our resource index. + size_t end_array_index = string::npos; + unsigned bracket_count = 1; + for (size_t index = start_array_index + 1; index < expr.size(); index++) { - case SPIRBlock::Direct: - // True when emitting complex continue block. - if (block.loop_dominator == block.next_block) - { - branch(block.self, block.next_block); - emit_next_block = false; - } - // True if MergeToDirectForLoop succeeded. - else if (skip_direct_branch) - emit_next_block = false; - else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block)) - { - branch(block.self, block.next_block); - emit_next_block = false; - } - break; - - case SPIRBlock::Select: - // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded. - if (select_branch_to_true_block) + if (expr[index] == ']') { - if (force_complex_continue_block) + if (--bracket_count == 0) { - assert(block.true_block == block.continue_block); - - // We're going to emit a continue block directly here, so make sure it's marked as complex. - auto &complex_continue = get(block.continue_block).complex_continue; - bool old_complex = complex_continue; - complex_continue = true; - branch(block.self, block.true_block); - complex_continue = old_complex; + end_array_index = index; + break; } - else - branch(block.self, block.true_block); } - else if (select_branch_to_false_block) - { - if (force_complex_continue_block) - { - assert(block.false_block == block.continue_block); + else if (expr[index] == '[') + bracket_count++; + } - // We're going to emit a continue block directly here, so make sure it's marked as complex. - auto &complex_continue = get(block.continue_block).complex_continue; - bool old_complex = complex_continue; - complex_continue = true; - branch(block.self, block.false_block); - complex_continue = old_complex; - } - else - branch(block.self, block.false_block); - } - else - branch(block.self, block.condition, block.true_block, block.false_block); - break; + assert(bracket_count == 0); - case SPIRBlock::MultiSelect: - { - auto &type = expression_type(block.condition); - bool unsigned_case = - type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte; + // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's + // nothing we can do here to express that. + if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index) + return; - if (block.merge == SPIRBlock::MergeNone) - SPIRV_CROSS_THROW("Switch statement is not structured"); + start_array_index++; - if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64) - { - // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages. - SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors."); - } + expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(", + expr.substr(start_array_index, end_array_index - start_array_index), ")", + expr.substr(end_array_index, string::npos)); +} - const char *label_suffix = ""; - if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix) - label_suffix = "u"; - else if (type.basetype == SPIRType::UShort) - label_suffix = backend.uint16_t_literal_suffix; - else if (type.basetype == SPIRType::Short) - label_suffix = backend.int16_t_literal_suffix; +void CompilerGLSL::emit_block_hints(const SPIRBlock &block) +{ + if ((options.es && options.version < 310) || (!options.es && options.version < 140)) + return; - SPIRBlock *old_emitting_switch = current_emitting_switch; - current_emitting_switch = █ + switch (block.hint) + { + case SPIRBlock::HintFlatten: + require_extension_internal("GL_EXT_control_flow_attributes"); + statement("SPIRV_CROSS_FLATTEN"); + break; + case SPIRBlock::HintDontFlatten: + require_extension_internal("GL_EXT_control_flow_attributes"); + statement("SPIRV_CROSS_BRANCH"); + break; + case SPIRBlock::HintUnroll: + require_extension_internal("GL_EXT_control_flow_attributes"); + statement("SPIRV_CROSS_UNROLL"); + break; + case SPIRBlock::HintDontUnroll: + require_extension_internal("GL_EXT_control_flow_attributes"); + statement("SPIRV_CROSS_LOOP"); + break; + default: + break; + } +} - if (block.need_ladder_break) - statement("bool _", block.self, "_ladder_break = false;"); +void CompilerGLSL::preserve_alias_on_reset(uint32_t id) +{ + preserved_aliases[id] = get_name(id); +} - // Find all unique case constructs. - unordered_map> case_constructs; - SmallVector block_declaration_order; - SmallVector literals_to_merge; +void CompilerGLSL::reset_name_caches() +{ + for (auto &preserved : preserved_aliases) + set_name(preserved.first, preserved.second); - // If a switch case branches to the default block for some reason, we can just remove that literal from consideration - // and let the default: block handle it. - // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here. - // We only need to consider possible fallthrough if order[i] branches to order[i + 1]. - for (auto &c : block.cases) - { - if (c.block != block.next_block && c.block != block.default_block) - { - if (!case_constructs.count(c.block)) - block_declaration_order.push_back(c.block); - case_constructs[c.block].push_back(c.value); - } - else if (c.block == block.next_block && block.default_block != block.next_block) - { - // We might have to flush phi inside specific case labels. - // If we can piggyback on default:, do so instead. - literals_to_merge.push_back(c.value); - } - } + preserved_aliases.clear(); + resource_names.clear(); + block_input_names.clear(); + block_output_names.clear(); + block_ubo_names.clear(); + block_ssbo_names.clear(); + block_names.clear(); + function_overloads.clear(); +} - // Empty literal array -> default. - if (block.default_block != block.next_block) - { - auto &default_block = get(block.default_block); +void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set &visited, const SPIRType &type) +{ + if (visited.count(type.self)) + return; + visited.insert(type.self); - // We need to slide in the default block somewhere in this chain - // if there are fall-through scenarios since the default is declared separately in OpSwitch. - // Only consider trivial fall-through cases here. - size_t num_blocks = block_declaration_order.size(); - bool injected_block = false; + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + auto &mbr_type = get(type.member_types[i]); - for (size_t i = 0; i < num_blocks; i++) + if (mbr_type.basetype == SPIRType::Struct) + { + // If there are multiple aliases, the output might be somewhat unpredictable, + // but the only real alternative in that case is to do nothing, which isn't any better. + // This check should be fine in practice. + if (get_name(mbr_type.self).empty() && !get_member_name(type.self, i).empty()) { - auto &case_block = get(block_declaration_order[i]); - if (execution_is_direct_branch(case_block, default_block)) - { - // Fallthrough to default block, we must inject the default block here. - block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block); - injected_block = true; - break; - } - else if (execution_is_direct_branch(default_block, case_block)) - { - // Default case is falling through to another case label, we must inject the default block here. - block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block); - injected_block = true; - break; - } + auto anon_name = join("anon_", get_member_name(type.self, i)); + ParsedIR::sanitize_underscores(anon_name); + set_name(mbr_type.self, anon_name); } - // Order does not matter. - if (!injected_block) - block_declaration_order.push_back(block.default_block); - - case_constructs[block.default_block] = {}; + fixup_anonymous_struct_names(visited, mbr_type); } + } +} - size_t num_blocks = block_declaration_order.size(); +void CompilerGLSL::fixup_anonymous_struct_names() +{ + // HLSL codegen can often end up emitting anonymous structs inside blocks, which + // breaks GL linking since all names must match ... + // Try to emit sensible code, so attempt to find such structs and emit anon_$member. - const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string { - return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal)); - }; + // Breaks exponential explosion with weird type trees. + std::unordered_set visited; - // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture, - // we need to flush phi nodes outside the switch block in a branch, - // and skip any Phi handling inside the case label to make fall-through work as expected. - // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this - // inside the case label if at all possible. - for (size_t i = 1; i < num_blocks; i++) + ir.for_each_typed_id([&](uint32_t, SPIRType &type) { + if (type.basetype == SPIRType::Struct && + (has_decoration(type.self, DecorationBlock) || + has_decoration(type.self, DecorationBufferBlock))) { - if (flush_phi_required(block.self, block_declaration_order[i]) && - flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i])) - { - uint32_t target_block = block_declaration_order[i]; - - // Make sure we flush Phi, it might have been marked to be ignored earlier. - get(target_block).ignore_phi_from_block = 0; + fixup_anonymous_struct_names(visited, type); + } + }); +} - auto &literals = case_constructs[target_block]; +void CompilerGLSL::fixup_type_alias() +{ + // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. + ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { + if (!type.type_alias) + return; - if (literals.empty()) - { - // Oh boy, gotta make a complete negative test instead! o.o - // Find all possible literals that would *not* make us enter the default block. - // If none of those literals match, we flush Phi ... - SmallVector conditions; - for (size_t j = 0; j < num_blocks; j++) - { - auto &negative_literals = case_constructs[block_declaration_order[j]]; - for (auto &case_label : negative_literals) - conditions.push_back(join(to_enclosed_expression(block.condition), - " != ", to_case_label(case_label, unsigned_case))); - } + if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) + { + // Top-level block types should never alias anything else. + type.type_alias = 0; + } + else if (type_is_block_like(type) && type.self == ID(self)) + { + // A block-like type is any type which contains Offset decoration, but not top-level blocks, + // i.e. blocks which are placed inside buffers. + // Become the master. + ir.for_each_typed_id([&](uint32_t other_id, SPIRType &other_type) { + if (other_id == self) + return; - statement("if (", merge(conditions, " && "), ")"); - begin_scope(); - flush_phi(block.self, target_block); - end_scope(); - } - else - { - SmallVector conditions; - conditions.reserve(literals.size()); - for (auto &case_label : literals) - conditions.push_back(join(to_enclosed_expression(block.condition), - " == ", to_case_label(case_label, unsigned_case))); - statement("if (", merge(conditions, " || "), ")"); - begin_scope(); - flush_phi(block.self, target_block); - end_scope(); - } + if (other_type.type_alias == type.type_alias) + other_type.type_alias = self; + }); - // Mark the block so that we don't flush Phi from header to case label. - get(target_block).ignore_phi_from_block = block.self; - } + this->get(type.type_alias).type_alias = self; + type.type_alias = 0; } + }); +} - emit_block_hints(block); - statement("switch (", to_expression(block.condition), ")"); - begin_scope(); +void CompilerGLSL::reorder_type_alias() +{ + // Reorder declaration of types so that the master of the type alias is always emitted first. + // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which + // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here. + auto loop_lock = ir.create_loop_hard_lock(); - for (size_t i = 0; i < num_blocks; i++) + auto &type_ids = ir.ids_for_type[TypeType]; + for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr) + { + auto &type = get(*alias_itr); + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) { - uint32_t target_block = block_declaration_order[i]; - auto &literals = case_constructs[target_block]; + // We will skip declaring this type, so make sure the type_alias type comes before. + auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias)); + assert(master_itr != end(type_ids)); - if (literals.empty()) - { - // Default case. - statement("default:"); - } - else + if (alias_itr < master_itr) { - for (auto &case_literal : literals) - { - // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here. - statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":"); - } - } + // Must also swap the type order for the constant-type joined array. + auto &joined_types = ir.ids_for_constant_undef_or_type; + auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr); + auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr); + assert(alt_alias_itr != end(joined_types)); + assert(alt_master_itr != end(joined_types)); - auto &case_block = get(target_block); - if (backend.support_case_fallthrough && i + 1 < num_blocks && - execution_is_direct_branch(case_block, get(block_declaration_order[i + 1]))) - { - // We will fall through here, so just terminate the block chain early. - // We still need to deal with Phi potentially. - // No need for a stack-like thing here since we only do fall-through when there is a - // single trivial branch to fall-through target.. - current_emitting_switch_fallthrough = true; + swap(*alias_itr, *master_itr); + swap(*alt_alias_itr, *alt_master_itr); } - else - current_emitting_switch_fallthrough = false; - - begin_scope(); - branch(block.self, target_block); - end_scope(); - - current_emitting_switch_fallthrough = false; } + } +} - // Might still have to flush phi variables if we branch from loop header directly to merge target. - if (flush_phi_required(block.self, block.next_block)) - { - if (block.default_block == block.next_block || !literals_to_merge.empty()) - { - for (auto &case_literal : literals_to_merge) - statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":"); +void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) +{ + // If we are redirecting statements, ignore the line directive. + // Common case here is continue blocks. + if (redirect_statement) + return; - if (block.default_block == block.next_block) - statement("default:"); + if (options.emit_line_directives) + { + require_extension_internal("GL_GOOGLE_cpp_style_line_directive"); + statement_no_indent("#line ", line_literal, " \"", get(file_id).str, "\""); + } +} - begin_scope(); - flush_phi(block.self, block.next_block); - statement("break;"); - end_scope(); - } - } +void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id, + SmallVector chain) +{ + // Fully unroll all member/array indices one by one. - end_scope(); + auto &lhs_type = get(lhs_type_id); + auto &rhs_type = get(rhs_type_id); - if (block.need_ladder_break) + if (!lhs_type.array.empty()) + { + // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types, + // and this is a rather obscure opcode anyways, keep it simple unless we are forced to. + uint32_t array_size = to_array_size_literal(lhs_type); + chain.push_back(0); + + for (uint32_t i = 0; i < array_size; i++) { - statement("if (_", block.self, "_ladder_break)"); - begin_scope(); - statement("break;"); - end_scope(); + chain.back() = i; + emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain); + } + } + else if (lhs_type.basetype == SPIRType::Struct) + { + chain.push_back(0); + uint32_t member_count = uint32_t(lhs_type.member_types.size()); + for (uint32_t i = 0; i < member_count; i++) + { + chain.back() = i; + emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain); } - - current_emitting_switch = old_emitting_switch; - break; } + else + { + // Need to handle unpack/packing fixups since this can differ wildly between the logical types, + // particularly in MSL. + // To deal with this, we emit access chains and go through emit_store_statement + // to deal with all the special cases we can encounter. - case SPIRBlock::Return: - for (auto &line : current_function->fixup_hooks_out) - line(); + AccessChainMeta lhs_meta, rhs_meta; + auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta); + auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta); - if (processing_entry_point) - emit_fixup(); + uint32_t id = ir.increase_bound_by(2); + lhs_id = id; + rhs_id = id + 1; - if (block.return_value) { - auto &type = expression_type(block.return_value); - if (!type.array.empty() && !backend.can_return_array) - { - // If we cannot return arrays, we will have a special out argument we can write to instead. - // The backend is responsible for setting this up, and redirection the return values as appropriate. - if (ir.ids[block.return_value].get_type() != TypeUndef) - emit_array_copy("SPIRV_Cross_return_value", block.return_value); + auto &lhs_expr = set(lhs_id, std::move(lhs), lhs_type_id, true); + lhs_expr.need_transpose = lhs_meta.need_transpose; - if (!block_is_outside_flow_control_from_block(get(current_function->entry_block), block) || - block.loop_dominator != SPIRBlock::NoDominator) - { - statement("return;"); - } - } - else - { - // OpReturnValue can return Undef, so don't emit anything for this case. - if (ir.ids[block.return_value].get_type() != TypeUndef) - statement("return ", to_expression(block.return_value), ";"); - } + if (lhs_meta.storage_is_packed) + set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked); + if (lhs_meta.storage_physical_type != 0) + set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type); + + forwarded_temporaries.insert(lhs_id); + suppressed_usage_tracking.insert(lhs_id); } - // If this block is the very final block and not called from control flow, - // we do not need an explicit return which looks out of place. Just end the function here. - // In the very weird case of for(;;) { return; } executing return is unconditional, - // but we actually need a return here ... - else if (!block_is_outside_flow_control_from_block(get(current_function->entry_block), block) || - block.loop_dominator != SPIRBlock::NoDominator) + { - statement("return;"); - } - break; + auto &rhs_expr = set(rhs_id, std::move(rhs), rhs_type_id, true); + rhs_expr.need_transpose = rhs_meta.need_transpose; - case SPIRBlock::Kill: - statement(backend.discard_literal, ";"); - break; + if (rhs_meta.storage_is_packed) + set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked); + if (rhs_meta.storage_physical_type != 0) + set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type); - case SPIRBlock::Unreachable: - emit_next_block = false; - break; + forwarded_temporaries.insert(rhs_id); + suppressed_usage_tracking.insert(rhs_id); + } - default: - SPIRV_CROSS_THROW("Unimplemented block terminator."); + emit_store_statement(lhs_id, rhs_id); } +} - if (block.next_block && emit_next_block) - { - // If we hit this case, we're dealing with an unconditional branch, which means we will output - // that block after this. If we had selection merge, we already flushed phi variables. - if (block.merge != SPIRBlock::MergeSelection) - flush_phi(block.self, block.next_block); +bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const +{ + if (!has_decoration(id, DecorationInputAttachmentIndex)) + return false; - // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi. - if (!current_emitting_switch_fallthrough) + uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex); + for (auto &remap : subpass_to_framebuffer_fetch_attachment) + if (remap.first == input_attachment_index) + return true; + + return false; +} + +const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const +{ + const SPIRVariable *ret = nullptr; + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + if (has_decoration(var.self, DecorationInputAttachmentIndex) && + get_decoration(var.self, DecorationInputAttachmentIndex) == index) { - // For merge selects we might have ignored the fact that a merge target - // could have been a break; or continue; - // We will need to deal with it here. - if (is_loop_break(block.next_block)) - { - // Cannot check for just break, because switch statements will also use break. - assert(block.merge == SPIRBlock::MergeSelection); - statement("break;"); - } - else if (is_continue(block.next_block)) - { - assert(block.merge == SPIRBlock::MergeSelection); - branch_to_continue(block.self, block.next_block); - } - else if (block.self != block.next_block) - emit_block_chain(get(block.next_block)); + ret = &var; } - } + }); + return ret; +} - if (block.merge == SPIRBlock::MergeLoop) - { - if (continue_type == SPIRBlock::DoWhileLoop) - { - // Make sure that we run the continue block to get the expressions set, but this - // should become an empty string. - // We have no fallbacks if we cannot forward everything to temporaries ... - const auto &continue_block = get(block.continue_block); - bool positive_test = execution_is_noop(get(continue_block.true_block), - get(continue_block.loop_dominator)); +const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const +{ + const SPIRVariable *ret = nullptr; + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location) + ret = &var; + }); + return ret; +} - uint32_t current_count = statement_count; - auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test); - if (statement_count != current_count) +void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs() +{ + for (auto &remap : subpass_to_framebuffer_fetch_attachment) + { + auto *subpass_var = find_subpass_input_by_attachment_index(remap.first); + auto *output_var = find_color_output_by_location(remap.second); + if (!subpass_var) + continue; + if (!output_var) + SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able " + "to read from it."); + if (is_array(get(output_var->basetype))) + SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs."); + + auto &func = get(get_entry_point().self); + func.fixup_hooks_in.push_back([=]() { + if (is_legacy()) { - // The DoWhile block has side effects, force ComplexLoop pattern next pass. - get(block.continue_block).complex_continue = true; - force_recompile(); + statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[", + get_decoration(output_var->self, DecorationLocation), "];"); } + else + { + uint32_t num_rt_components = this->get(output_var->basetype).vecsize; + statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ", + to_expression(output_var->self), ";"); + } + }); + } +} - // Might have to invert the do-while test here. - auto condition = to_expression(continue_block.condition); - if (!positive_test) - condition = join("!", enclose_expression(condition)); +bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const +{ + return is_depth_image(get(get(id).basetype), id); +} - end_scope_decl(join("while (", condition, ")")); - } - else - end_scope(); +const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c) +{ + static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot", + "GL_KHR_shader_subgroup_basic", + "GL_KHR_shader_subgroup_vote", + "GL_NV_gpu_shader_5", + "GL_NV_shader_thread_group", + "GL_NV_shader_thread_shuffle", + "GL_ARB_shader_ballot", + "GL_ARB_shader_group_vote", + "GL_AMD_gcn_shader" }; + return retval[c]; +} - // We cannot break out of two loops at once, so don't check for break; here. - // Using block.self as the "from" block isn't quite right, but it has the same scope - // and dominance structure, so it's fine. - if (is_continue(block.merge_block)) - branch_to_continue(block.self, block.merge_block); - else - emit_block_chain(get(block.merge_block)); +SmallVector CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c) +{ + switch (c) + { + case ARB_shader_ballot: + return { "GL_ARB_shader_int64" }; + case AMD_gcn_shader: + return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" }; + default: + return {}; } +} - // Forget about control dependent expressions now. - block.invalidate_expressions.clear(); - - // After we return, we must be out of scope, so if we somehow have to re-emit this function, - // re-declare variables if necessary. - assert(rearm_dominated_variables.size() == block.dominated_variables.size()); - for (size_t i = 0; i < block.dominated_variables.size(); i++) +const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c) +{ + switch (c) { - uint32_t var = block.dominated_variables[i]; - get(var).deferred_declaration = rearm_dominated_variables[i]; + case ARB_shader_ballot: + return "defined(GL_ARB_shader_int64)"; + case AMD_gcn_shader: + return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))"; + default: + return ""; } +} - // Just like for deferred declaration, we need to forget about loop variable enable - // if our block chain is reinstantiated later. - for (auto &var_id : block.loop_variables) - get(var_id).loop_variable_enable = false; +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_feature_dependencies(Feature feature) +{ + switch (feature) + { + case SubgroupAllEqualT: + return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool }; + case SubgroupElect: + return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID }; + case SubgroupInverseBallot_InclBitCount_ExclBitCout: + return { SubgroupMask }; + case SubgroupBallotBitCount: + return { SubgroupBallot }; + default: + return {}; + } } -void CompilerGLSL::begin_scope() +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper:: + get_feature_dependency_mask(Feature feature) { - statement("{"); - indent++; + return build_mask(get_feature_dependencies(feature)); } -void CompilerGLSL::end_scope() +bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature) { - if (!indent) - SPIRV_CROSS_THROW("Popping empty indent stack."); - indent--; - statement("}"); + static const bool retval[FeatureCount] = { false, false, false, false, false, false, + true, // SubgroupBalloFindLSB_MSB + false, false, false, false, + true, // SubgroupMemBarrier - replaced with workgroup memory barriers + false, false, true, false }; + + return retval[feature]; } -void CompilerGLSL::end_scope_decl() +CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper:: + get_KHR_extension_for_feature(Feature feature) { - if (!indent) - SPIRV_CROSS_THROW("Popping empty indent stack."); - indent--; - statement("};"); + static const Candidate extensions[FeatureCount] = { + KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, + KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, + KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, + KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot + }; + + return extensions[feature]; } -void CompilerGLSL::end_scope_decl(const string &decl) +void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature) { - if (!indent) - SPIRV_CROSS_THROW("Popping empty indent stack."); - indent--; - statement("} ", decl, ";"); + feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature); +} + +bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const +{ + return (feature_mask & (1u << feature)) != 0; } -void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length) +CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const { - // If our variable is remapped, and we rely on type-remapping information as - // well, then we cannot pass the variable as a function parameter. - // Fixing this is non-trivial without stamping out variants of the same function, - // so for now warn about this and suggest workarounds instead. - for (uint32_t i = 0; i < length; i++) - { - auto *var = maybe_get(args[i]); - if (!var || !var->remapped_variable) - continue; + Result res; - auto &type = get(var->basetype); - if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) + for (uint32_t i = 0u; i < FeatureCount; ++i) + { + if (feature_mask & (1u << i)) { - SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. " - "This will not work correctly because type-remapping information is lost. " - "To workaround, please consider not passing the subpass input as a function parameter, " - "or use in/out variables instead which do not need type remapping information."); + auto feature = static_cast(i); + std::unordered_set unique_candidates; + + auto candidates = get_candidates_for_feature(feature); + unique_candidates.insert(candidates.begin(), candidates.end()); + + auto deps = get_feature_dependencies(feature); + for (Feature d : deps) + { + candidates = get_candidates_for_feature(d); + if (!candidates.empty()) + unique_candidates.insert(candidates.begin(), candidates.end()); + } + + for (uint32_t c : unique_candidates) + ++res.weights[static_cast(c)]; } } + + return res; } -const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr) +CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_candidates_for_feature(Feature ft, const Result &r) { - // FIXME: This is kind of hacky. There should be a cleaner way. - auto offset = uint32_t(&instr - current_emitting_block->ops.data()); - if ((offset + 1) < current_emitting_block->ops.size()) - return ¤t_emitting_block->ops[offset + 1]; - else - return nullptr; + auto c = get_candidates_for_feature(ft); + auto cmp = [&r](Candidate a, Candidate b) { + if (r.weights[a] == r.weights[b]) + return a < b; // Prefer candidates with lower enum value + return r.weights[a] > r.weights[b]; + }; + std::sort(c.begin(), c.end(), cmp); + return c; } -uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) +CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_candidates_for_feature(Feature feature) { - return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask | - MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | - MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); + switch (feature) + { + case SubgroupMask: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupSize: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot }; + case SubgroupInvocationID: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupID: + return { KHR_shader_subgroup_basic, NV_shader_thread_group }; + case NumSubgroups: + return { KHR_shader_subgroup_basic, NV_shader_thread_group }; + case SubgroupBroadcast_First: + return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot }; + case SubgroupBallotFindLSB_MSB: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group }; + case SubgroupAll_Any_AllEqualBool: + return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader }; + case SubgroupAllEqualT: + return {}; // depends on other features only + case SubgroupElect: + return {}; // depends on other features only + case SubgroupBallot: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupBarrier: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader }; + case SubgroupMemBarrier: + return { KHR_shader_subgroup_basic }; + case SubgroupInverseBallot_InclBitCount_ExclBitCout: + return {}; + case SubgroupBallotBitExtract: + return { NV_shader_thread_group }; + case SubgroupBallotBitCount: + return {}; + default: + return {}; + } } -void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id) +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask( + const SmallVector &features) { - statement(lhs, " = ", to_expression(rhs_id), ";"); + FeatureMask mask = 0; + for (Feature f : features) + mask |= FeatureMask(1) << f; + return mask; } -void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr) +CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result() { - if (!backend.force_gl_in_out_block) - return; - // This path is only relevant for GL backends. + for (auto &weight : weights) + weight = 0; + + // Make sure KHR_shader_subgroup extensions are always prefered. + const uint32_t big_num = FeatureCount; + weights[KHR_shader_subgroup_ballot] = big_num; + weights[KHR_shader_subgroup_basic] = big_num; + weights[KHR_shader_subgroup_vote] = big_num; +} - auto *var = maybe_get(source_id); +void CompilerGLSL::request_workaround_wrapper_overload(TypeID id) +{ + // Must be ordered to maintain deterministic output, so vector is appropriate. + if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) == + end(workaround_ubo_load_overload_types)) + { + force_recompile(); + workaround_ubo_load_overload_types.push_back(id); + } +} + +void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr) +{ + // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic. + // To load these types correctly, we must first wrap them in a dummy function which only purpose is to + // ensure row_major decoration is actually respected. + auto *var = maybe_get_backing_variable(ptr); if (!var) return; - if (var->storage != StorageClassInput) + auto &backing_type = get(var->basetype); + bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform && + has_decoration(backing_type.self, DecorationBlock); + if (!is_ubo) return; - auto &type = get_variable_data_type(*var); - if (type.array.empty()) - return; + auto *type = &get(loaded_type); + bool rewrite = false; + bool relaxed = options.es; - auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); - bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition); - bool is_tess = is_tessellation_shader(); + if (is_matrix(*type)) + { + // To avoid adding a lot of unnecessary meta tracking to forward the row_major state, + // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state. + // If there is any row-major action going on, we apply the workaround. + // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution. + // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround. + type = &backing_type; + } + else + { + // If we're loading a composite, we don't have overloads like these. + relaxed = false; + } - // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it. - // We must unroll the array load. - // For builtins, we couldn't catch this case normally, - // because this is resolved in the OpAccessChain in most cases. - // If we load the entire array, we have no choice but to unroll here. - if (is_builtin || is_tess) + if (type->basetype == SPIRType::Struct) { - auto new_expr = join("_", target_id, "_unrolled"); - statement(variable_decl(type, new_expr, target_id), ";"); - string array_expr; - if (type.array_size_literal.front()) + // If we're loading a struct where any member is a row-major matrix, apply the workaround. + for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++) { - array_expr = convert_to_string(type.array.front()); - if (type.array.front() == 0) - SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); - } - else - array_expr = to_expression(type.array.front()); + auto decorations = combined_decoration_for_member(*type, i); + if (decorations.get(DecorationRowMajor)) + rewrite = true; - // The array size might be a specialization constant, so use a for-loop instead. - statement("for (int i = 0; i < int(", array_expr, "); i++)"); - begin_scope(); - if (is_builtin) - statement(new_expr, "[i] = gl_in[i].", expr, ";"); - else - statement(new_expr, "[i] = ", expr, "[i];"); - end_scope(); + // Since we decide on a per-struct basis, only use mediump wrapper if all candidates are mediump. + if (!decorations.get(DecorationRelaxedPrecision)) + relaxed = false; + } + } - expr = move(new_expr); + if (rewrite) + { + request_workaround_wrapper_overload(loaded_type); + expr = join("spvWorkaroundRowMajor", (relaxed ? "MP" : ""), "(", expr, ")"); } } -void CompilerGLSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) +void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component) { - auto *var = maybe_get_backing_variable(source_id); - if (var) - source_id = var->self; - - // Only interested in standalone builtin variables. - if (!has_decoration(source_id, DecorationBuiltIn)) - return; + masked_output_locations.insert({ location, component }); +} - auto builtin = static_cast(get_decoration(source_id, DecorationBuiltIn)); - auto expected_type = expr_type.basetype; +void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin) +{ + masked_output_builtins.insert(builtin); +} - // TODO: Fill in for more builtins. - switch (builtin) - { - case BuiltInLayer: - case BuiltInPrimitiveId: - case BuiltInViewportIndex: - case BuiltInInstanceId: - case BuiltInInstanceIndex: - case BuiltInVertexId: - case BuiltInVertexIndex: - case BuiltInSampleId: - case BuiltInBaseVertex: - case BuiltInBaseInstance: - case BuiltInDrawIndex: - case BuiltInFragStencilRefEXT: - expected_type = SPIRType::Int; - break; +bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const +{ + auto &type = get(var.basetype); + bool is_block = has_decoration(type.self, DecorationBlock); + // Blocks by themselves are never masked. Must be masked per-member. + if (is_block) + return false; - case BuiltInGlobalInvocationId: - case BuiltInLocalInvocationId: - case BuiltInWorkgroupId: - case BuiltInLocalInvocationIndex: - case BuiltInWorkgroupSize: - case BuiltInNumWorkgroups: - expected_type = SPIRType::UInt; - break; + bool is_builtin = has_decoration(var.self, DecorationBuiltIn); - default: - break; + if (is_builtin) + { + return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn))); } + else + { + if (!has_decoration(var.self, DecorationLocation)) + return false; - if (expected_type != expr_type.basetype) - expr = bitcast_expression(expr_type, expected_type, expr); + return is_stage_output_location_masked( + get_decoration(var.self, DecorationLocation), + get_decoration(var.self, DecorationComponent)); + } } -void CompilerGLSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) +bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const { - // Only interested in standalone builtin variables. - if (!has_decoration(target_id, DecorationBuiltIn)) - return; - - auto builtin = static_cast(get_decoration(target_id, DecorationBuiltIn)); - auto expected_type = expr_type.basetype; + auto &type = get(var.basetype); + bool is_block = has_decoration(type.self, DecorationBlock); + if (!is_block) + return false; - // TODO: Fill in for more builtins. - switch (builtin) + BuiltIn builtin = BuiltInMax; + if (is_member_builtin(type, index, &builtin)) { - case BuiltInLayer: - case BuiltInPrimitiveId: - case BuiltInViewportIndex: - case BuiltInFragStencilRefEXT: - expected_type = SPIRType::Int; - break; - - default: - break; + return is_stage_output_builtin_masked(builtin); } - - if (expected_type != expr_type.basetype) + else { - auto type = expr_type; - type.basetype = expected_type; - expr = bitcast_expression(type, expr_type.basetype, expr); + uint32_t location = get_declared_member_location(var, index, strip_array); + uint32_t component = get_member_decoration(type.self, index, DecorationComponent); + return is_stage_output_location_masked(location, component); } } -void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr) +bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const { - if (*backend.nonuniform_qualifier == '\0') - return; + if (has_decoration(var.self, DecorationPerPrimitiveEXT)) + return true; - // Handle SPV_EXT_descriptor_indexing. - if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage || - type.basetype == SPIRType::Image) - { - // The image/sampler ID must be declared as non-uniform. - // However, it is not legal GLSL to have - // nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier - // to the array indexing, like - // samplers[nonuniformEXT(index)]. - // While the access chain will generally be nonuniformEXT, it's not necessarily so, - // so we might have to fixup the OpLoad-ed expression late. - - auto start_array_index = expr.find_first_of('['); - auto end_array_index = expr.find_last_of(']'); - // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's - // nothing we can do here to express that. - if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index) - return; + auto &type = get(var.basetype); + if (!has_decoration(type.self, DecorationBlock)) + return false; - start_array_index++; + for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++) + if (!has_member_decoration(type.self, i, DecorationPerPrimitiveEXT)) + return false; - expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(", - expr.substr(start_array_index, end_array_index - start_array_index), ")", - expr.substr(end_array_index, string::npos)); - } + return true; } -void CompilerGLSL::emit_block_hints(const SPIRBlock &) +bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const { + return masked_output_locations.count({ location, component }) != 0; } -void CompilerGLSL::preserve_alias_on_reset(uint32_t id) +bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const { - preserved_aliases[id] = get_name(id); + return masked_output_builtins.count(builtin) != 0; } -void CompilerGLSL::reset_name_caches() +uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const { - for (auto &preserved : preserved_aliases) - set_name(preserved.first, preserved.second); - - preserved_aliases.clear(); - resource_names.clear(); - block_input_names.clear(); - block_output_names.clear(); - block_ubo_names.clear(); - block_ssbo_names.clear(); - block_names.clear(); - function_overloads.clear(); + auto &block_type = get(var.basetype); + if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation)) + return get_member_decoration(block_type.self, mbr_idx, DecorationLocation); + else + return get_accumulated_member_location(var, mbr_idx, strip_array); } -void CompilerGLSL::fixup_type_alias() +uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const { - // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. - // FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type - // alias if the slave type is a block type. - ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { - if (type.type_alias && type_is_block_like(type)) - { - // Become the master. - ir.for_each_typed_id([&](uint32_t other_id, SPIRType &other_type) { - if (other_id == type.self) - return; - - if (other_type.type_alias == type.type_alias) - other_type.type_alias = type.self; - }); - - this->get(type.type_alias).type_alias = self; - type.type_alias = 0; - } - }); - - ir.for_each_typed_id([&](uint32_t, SPIRType &type) { - if (type.type_alias && type_is_block_like(type)) - { - // This is not allowed, drop the type_alias. - type.type_alias = 0; - } - }); -} + auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + uint32_t location = get_decoration(var.self, DecorationLocation); -void CompilerGLSL::reorder_type_alias() -{ - // Reorder declaration of types so that the master of the type alias is always emitted first. - // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which - // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here. - auto &type_ids = ir.ids_for_type[TypeType]; - for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr) + for (uint32_t i = 0; i < mbr_idx; i++) { - auto &type = get(*alias_itr); - if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked)) - { - // We will skip declaring this type, so make sure the type_alias type comes before. - auto master_itr = find(begin(type_ids), end(type_ids), type.type_alias); - assert(master_itr != end(type_ids)); + auto &mbr_type = get(type.member_types[i]); - if (alias_itr < master_itr) - { - // Must also swap the type order for the constant-type joined array. - auto &joined_types = ir.ids_for_constant_or_type; - auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr); - auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr); - assert(alt_alias_itr != end(joined_types)); - assert(alt_master_itr != end(joined_types)); + // Start counting from any place we have a new location decoration. + if (has_member_decoration(type.self, mbr_idx, DecorationLocation)) + location = get_member_decoration(type.self, mbr_idx, DecorationLocation); - swap(*alias_itr, *master_itr); - swap(*alt_alias_itr, *alt_master_itr); - } - } + uint32_t location_count = type_to_location_count(mbr_type); + location += location_count; } + + return location; } -void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) +StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr) { - // If we are redirecting statements, ignore the line directive. - // Common case here is continue blocks. - if (redirect_statement) - return; - - if (options.emit_line_directives) - { - require_extension_internal("GL_GOOGLE_cpp_style_line_directive"); - statement_no_indent("#line ", line_literal, " \"", get(file_id).str, "\""); + auto *var = maybe_get_backing_variable(ptr); + + // If the expression has been lowered to a temporary, we need to use the Generic storage class. + // We're looking for the effective storage class of a given expression. + // An access chain or forwarded OpLoads from such access chains + // will generally have the storage class of the underlying variable, but if the load was not forwarded + // we have lost any address space qualifiers. + bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get(ptr).access_chain && + (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0); + + if (var && !forced_temporary) + { + if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup)) + return StorageClassWorkgroup; + if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer)) + return StorageClassStorageBuffer; + + // Normalize SSBOs to StorageBuffer here. + if (var->storage == StorageClassUniform && + has_decoration(get(var->basetype).self, DecorationBufferBlock)) + return StorageClassStorageBuffer; + else + return var->storage; } + else + return expression_type(ptr).storage; } -void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id) +uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const { - // SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen, - // we need to know NonUniformEXT a little earlier, when the resource is actually loaded. - // Back-propagate the qualifier based on the expression dependency chain. - - if (!has_decoration(id, DecorationNonUniformEXT)) - { - set_decoration(id, DecorationNonUniformEXT); - force_recompile(); - } - - auto *e = maybe_get(id); - auto *combined = maybe_get(id); - auto *chain = maybe_get(id); - if (e) - { - for (auto &expr : e->expression_dependencies) - propagate_nonuniform_qualifier(expr); - for (auto &expr : e->implied_read_expressions) - propagate_nonuniform_qualifier(expr); - } - else if (combined) + uint32_t count; + if (type.basetype == SPIRType::Struct) { - propagate_nonuniform_qualifier(combined->image); - propagate_nonuniform_qualifier(combined->sampler); + uint32_t mbr_count = uint32_t(type.member_types.size()); + count = 0; + for (uint32_t i = 0; i < mbr_count; i++) + count += type_to_location_count(get(type.member_types[i])); } - else if (chain) + else { - for (auto &expr : chain->implied_read_expressions) - propagate_nonuniform_qualifier(expr); + count = type.columns > 1 ? type.columns : 1; } + + uint32_t dim_count = uint32_t(type.array.size()); + for (uint32_t i = 0; i < dim_count; i++) + count *= to_array_size_literal(type, i); + + return count; } diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index e1eb39bf7a2..4dcde5540f4 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2015-2019 Arm Limited + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_GLSL_HPP #define SPIRV_CROSS_GLSL_HPP @@ -56,7 +63,10 @@ enum AccessChainFlagBits ACCESS_CHAIN_INDEX_IS_LITERAL_BIT = 1 << 0, ACCESS_CHAIN_CHAIN_ONLY_BIT = 1 << 1, ACCESS_CHAIN_PTR_CHAIN_BIT = 1 << 2, - ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3 + ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3, + ACCESS_CHAIN_LITERAL_MSB_FORCE_ID = 1 << 4, + ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT = 1 << 5, + ACCESS_CHAIN_FORCE_COMPOSITE_BIT = 1 << 6 }; typedef uint32_t AccessChainFlags; @@ -73,6 +83,11 @@ class CompilerGLSL : public Compiler // Debug option to always emit temporary variables for all expressions. bool force_temporary = false; + // Debug option, can be increased in an attempt to workaround SPIRV-Cross bugs temporarily. + // If this limit has to be increased, it points to an implementation bug. + // In certain scenarios, the maximum number of debug iterations may increase beyond this limit + // as long as we can prove we're making certain kinds of forward progress. + uint32_t force_recompile_max_debug_iterations = 3; // If true, Vulkan GLSL features are used instead of GL-compatible features. // Mostly useful for debugging SPIR-V files. @@ -107,6 +122,38 @@ class CompilerGLSL : public Compiler // May not correspond exactly to original source, but should be a good approximation. bool emit_line_directives = false; + // In cases where readonly/writeonly decoration are not used at all, + // we try to deduce which qualifier(s) we should actually used, since actually emitting + // read-write decoration is very rare, and older glslang/HLSL compilers tend to just emit readwrite as a matter of fact. + // The default (true) is to enable automatic deduction for these cases, but if you trust the decorations set + // by the SPIR-V, it's recommended to set this to false. + bool enable_storage_image_qualifier_deduction = true; + + // On some targets (WebGPU), uninitialized variables are banned. + // If this is enabled, all variables (temporaries, Private, Function) + // which would otherwise be uninitialized will now be initialized to 0 instead. + bool force_zero_initialized_variables = false; + + // In GLSL, force use of I/O block flattening, similar to + // what happens on legacy GLSL targets for blocks and structs. + bool force_flattened_io_blocks = false; + + // For opcodes where we have to perform explicit additional nan checks, very ugly code is generated. + // If we opt-in, ignore these requirements. + // In opcodes like NClamp/NMin/NMax and FP compare, ignore NaN behavior. + // Use FClamp/FMin/FMax semantics for clamps and lets implementation choose ordered or unordered + // compares. + bool relax_nan_checks = false; + + // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic. + // To load these types correctly, we must generate a wrapper. them in a dummy function which only purpose is to + // ensure row_major decoration is actually respected. + // This workaround may cause significant performance degeneration on some Android devices. + bool enable_row_major_load_workaround = true; + + // If non-zero, controls layout(num_views = N) in; in GL_OVR_multiview2. + uint32_t ovr_multiview_view_count = 0; + enum Precision { DontCare, @@ -115,14 +162,16 @@ class CompilerGLSL : public Compiler Highp }; - struct + struct VertexOptions { - // GLSL: In vertex shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style). - // MSL: In vertex shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. - // HLSL: In vertex shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. + // "Vertex-like shader" here is any shader stage that can write BuiltInPosition. + + // GLSL: In vertex-like shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style). + // MSL: In vertex-like shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. + // HLSL: In vertex-like shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. bool fixup_clipspace = false; - // Inverts gl_Position.y or equivalent. + // In vertex-like shaders, inverts gl_Position.y or equivalent. bool flip_vert_y = false; // GLSL only, for HLSL version of this option, see CompilerHLSL. @@ -132,7 +181,7 @@ class CompilerGLSL : public Compiler bool support_nonzero_base_instance = true; } vertex; - struct + struct FragmentOptions { // Add precision mediump float in ES targets when emitting GLES source. // Add precision highp int in ES targets when emitting GLES source. @@ -148,6 +197,11 @@ class CompilerGLSL : public Compiler remap_pls_variables(); } + // Redirect a subpassInput reading from input_attachment_index to instead load its value from + // the color attachment at location = color_location. Requires ESSL. + // If coherent, uses GL_EXT_shader_framebuffer_fetch, if not, uses noncoherent variant. + void remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent); + explicit CompilerGLSL(std::vector spirv_) : Compiler(std::move(spirv_)) { @@ -209,10 +263,104 @@ class CompilerGLSL : public Compiler // For this to work, all types in the block must be the same basic type, e.g. mixing vec2 and vec4 is fine, but // mixing int and float is not. // The name of the uniform array will be the same as the interface block name. - void flatten_buffer_block(uint32_t id); + void flatten_buffer_block(VariableID id); + + // After compilation, query if a variable ID was used as a depth resource. + // This is meaningful for MSL since descriptor types depend on this knowledge. + // Cases which return true: + // - Images which are declared with depth = 1 image type. + // - Samplers which are statically used at least once with Dref opcodes. + // - Images which are statically used at least once with Dref opcodes. + bool variable_is_depth_or_compare(VariableID id) const; + + // If a shader output is active in this stage, but inactive in a subsequent stage, + // this can be signalled here. This can be used to work around certain cross-stage matching problems + // which plagues MSL and HLSL in certain scenarios. + // An output which matches one of these will not be emitted in stage output interfaces, but rather treated as a private + // variable. + // This option is only meaningful for MSL and HLSL, since GLSL matches by location directly. + // Masking builtins only takes effect if the builtin in question is part of the stage output interface. + void mask_stage_output_by_location(uint32_t location, uint32_t component); + void mask_stage_output_by_builtin(spv::BuiltIn builtin); protected: - void reset(); + struct ShaderSubgroupSupportHelper + { + // lower enum value = greater priority + enum Candidate + { + KHR_shader_subgroup_ballot, + KHR_shader_subgroup_basic, + KHR_shader_subgroup_vote, + NV_gpu_shader_5, + NV_shader_thread_group, + NV_shader_thread_shuffle, + ARB_shader_ballot, + ARB_shader_group_vote, + AMD_gcn_shader, + + CandidateCount + }; + + static const char *get_extension_name(Candidate c); + static SmallVector get_extra_required_extension_names(Candidate c); + static const char *get_extra_required_extension_predicate(Candidate c); + + enum Feature + { + SubgroupMask = 0, + SubgroupSize = 1, + SubgroupInvocationID = 2, + SubgroupID = 3, + NumSubgroups = 4, + SubgroupBroadcast_First = 5, + SubgroupBallotFindLSB_MSB = 6, + SubgroupAll_Any_AllEqualBool = 7, + SubgroupAllEqualT = 8, + SubgroupElect = 9, + SubgroupBarrier = 10, + SubgroupMemBarrier = 11, + SubgroupBallot = 12, + SubgroupInverseBallot_InclBitCount_ExclBitCout = 13, + SubgroupBallotBitExtract = 14, + SubgroupBallotBitCount = 15, + + FeatureCount + }; + + using FeatureMask = uint32_t; + static_assert(sizeof(FeatureMask) * 8u >= FeatureCount, "Mask type needs more bits."); + + using CandidateVector = SmallVector; + using FeatureVector = SmallVector; + + static FeatureVector get_feature_dependencies(Feature feature); + static FeatureMask get_feature_dependency_mask(Feature feature); + static bool can_feature_be_implemented_without_extensions(Feature feature); + static Candidate get_KHR_extension_for_feature(Feature feature); + + struct Result + { + Result(); + uint32_t weights[CandidateCount]; + }; + + void request_feature(Feature feature); + bool is_feature_requested(Feature feature) const; + Result resolve() const; + + static CandidateVector get_candidates_for_feature(Feature ft, const Result &r); + + private: + static CandidateVector get_candidates_for_feature(Feature ft); + static FeatureMask build_mask(const SmallVector &features); + FeatureMask feature_mask = 0; + }; + + // TODO remove this function when all subgroup ops are supported (or make it always return true) + static bool is_supported_subgroup_op_in_opengl(spv::Op op); + + void reset(uint32_t iteration_count); void emit_function(SPIRFunction &func, const Bitset &return_flags); bool has_extension(const std::string &ext) const; @@ -222,11 +370,22 @@ class CompilerGLSL : public Compiler virtual void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags); SPIRBlock *current_emitting_block = nullptr; - SPIRBlock *current_emitting_switch = nullptr; + SmallVector current_emitting_switch_stack; bool current_emitting_switch_fallthrough = false; virtual void emit_instruction(const Instruction &instr); + struct TemporaryCopy + { + uint32_t dst_id; + uint32_t src_id; + }; + TemporaryCopy handle_instruction_precision(const Instruction &instr); void emit_block_instructions(SPIRBlock &block); + + // For relax_nan_checks. + GLSLstd450 get_remapped_glsl_op(GLSLstd450 std450_op) const; + spv::Op get_remapped_spirv_op(spv::Op op) const; + virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, uint32_t count); virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op, @@ -242,32 +401,72 @@ class CompilerGLSL : public Compiler void build_workgroup_size(SmallVector &arguments, const SpecializationConstant &x, const SpecializationConstant &y, const SpecializationConstant &z); + void request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature); + virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id); - virtual void emit_texture_op(const Instruction &i); + virtual void emit_texture_op(const Instruction &i, bool sparse); + virtual std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, + SmallVector &inherited_expressions); virtual void emit_subgroup_op(const Instruction &i); virtual std::string type_to_glsl(const SPIRType &type, uint32_t id = 0); virtual std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage); virtual void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier = "", uint32_t base_offset = 0); + virtual void emit_struct_padding_target(const SPIRType &type); virtual std::string image_type_glsl(const SPIRType &type, uint32_t id = 0); - std::string constant_expression(const SPIRConstant &c); - std::string constant_op_expression(const SPIRConstantOp &cop); + std::string constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope = false); + virtual std::string constant_op_expression(const SPIRConstantOp &cop); virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector); virtual void emit_fixup(); virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0); - virtual std::string to_func_call_arg(uint32_t id); - virtual std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, - bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, - bool has_dref, uint32_t lod, uint32_t minlod); - virtual std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, - bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, - uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, - uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, - uint32_t minlod, bool *p_forward); + virtual bool variable_decl_is_remapped_storage(const SPIRVariable &var, spv::StorageClass storage) const; + virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id); + + struct TextureFunctionBaseArguments + { + // GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor. + TextureFunctionBaseArguments() = default; + VariableID img = 0; + const SPIRType *imgtype = nullptr; + bool is_fetch = false, is_gather = false, is_proj = false; + }; + + struct TextureFunctionNameArguments + { + // GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor. + TextureFunctionNameArguments() = default; + TextureFunctionBaseArguments base; + bool has_array_offsets = false, has_offset = false, has_grad = false; + bool has_dref = false, is_sparse_feedback = false, has_min_lod = false; + uint32_t lod = 0; + }; + virtual std::string to_function_name(const TextureFunctionNameArguments &args); + + struct TextureFunctionArguments + { + // GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor. + TextureFunctionArguments() = default; + TextureFunctionBaseArguments base; + uint32_t coord = 0, coord_components = 0, dref = 0; + uint32_t grad_x = 0, grad_y = 0, lod = 0, offset = 0; + uint32_t bias = 0, component = 0, sample = 0, sparse_texel = 0, min_lod = 0; + bool nonuniform_expression = false; + }; + virtual std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward); + + void emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id, + uint32_t &texel_id); + uint32_t get_sparse_feedback_texel_id(uint32_t id) const; virtual void emit_buffer_block(const SPIRVariable &type); virtual void emit_push_constant_block(const SPIRVariable &var); virtual void emit_uniform(const SPIRVariable &var); - virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id); + virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, + bool packed_type, bool row_major); + + virtual bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const; + + void emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id, + SmallVector chain); StringStream<> buffer; @@ -325,19 +524,22 @@ class CompilerGLSL : public Compiler // on a single line separated by comma. SmallVector *redirect_statement = nullptr; const SPIRBlock *current_continue_block = nullptr; + bool block_temporary_hoisting = false; void begin_scope(); void end_scope(); + void end_scope(const std::string &trailer); void end_scope_decl(); void end_scope_decl(const std::string &decl); Options options; - std::string type_to_array_glsl(const SPIRType &type); + virtual std::string type_to_array_glsl( + const SPIRType &type); // Allow Metal to use the array template to make arrays a value type std::string to_array_size(const SPIRType &type, uint32_t index); uint32_t to_array_size_literal(const SPIRType &type, uint32_t index) const; uint32_t to_array_size_literal(const SPIRType &type) const; - std::string variable_decl(const SPIRVariable &variable); + virtual std::string variable_decl(const SPIRVariable &variable); // Threadgroup arrays can't have a wrapper type std::string variable_decl_function_local(SPIRVariable &variable); void add_local_variable_name(uint32_t id); @@ -347,8 +549,10 @@ class CompilerGLSL : public Compiler virtual bool is_non_native_row_major_matrix(uint32_t id); virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index); - bool member_is_packed_type(const SPIRType &type, uint32_t index) const; - virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed); + bool member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const; + bool member_is_packed_physical_type(const SPIRType &type, uint32_t index) const; + virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, + uint32_t physical_type_id, bool is_packed); std::unordered_set local_variable_names; std::unordered_set resource_names; @@ -369,6 +573,7 @@ class CompilerGLSL : public Compiler struct BackendVariations { std::string discard_literal = "discard"; + std::string demote_literal = "demote"; std::string null_pointer_literal = ""; bool float_literal_suffix = false; bool double_literal_suffix = true; @@ -383,6 +588,7 @@ class CompilerGLSL : public Compiler const char *int16_t_literal_suffix = "s"; const char *uint16_t_literal_suffix = "us"; const char *nonuniform_qualifier = "nonuniformEXT"; + const char *boolean_mix_function = "mix"; bool swizzle_is_function = false; bool shared_is_implied = false; bool unsized_array_supported = true; @@ -393,51 +599,68 @@ class CompilerGLSL : public Compiler bool can_declare_arrays_inline = true; bool native_row_major_matrix = true; bool use_constructor_splatting = true; - bool boolean_mix_support = true; bool allow_precision_qualifiers = false; bool can_swizzle_scalar = false; bool force_gl_in_out_block = false; + bool force_merged_mesh_block = false; bool can_return_array = true; bool allow_truncated_access_chain = false; bool supports_extensions = false; bool supports_empty_struct = false; bool array_is_value_type = true; + bool array_is_value_type_in_buffer_blocks = true; bool comparison_image_samples_scalar = false; bool native_pointers = false; bool support_small_type_sampling_result = false; bool support_case_fallthrough = true; + bool use_array_constructor = false; + bool needs_row_major_load_workaround = false; + bool support_pointer_to_pointer = false; + bool support_precise_qualifier = false; + bool support_64bit_switch = false; + bool workgroup_size_is_hidden = false; + bool requires_relaxed_precision_analysis = false; + bool implicit_c_integer_promotion_rules = false; } backend; void emit_struct(SPIRType &type); void emit_resources(); + void emit_extension_workarounds(spv::ExecutionModel model); void emit_buffer_block_native(const SPIRVariable &var); - void emit_buffer_reference_block(SPIRType &type, bool forward_declaration); + void emit_buffer_reference_block(uint32_t type_id, bool forward_declaration); void emit_buffer_block_legacy(const SPIRVariable &var); void emit_buffer_block_flattened(const SPIRVariable &type); + void fixup_implicit_builtin_block_names(spv::ExecutionModel model); void emit_declared_builtin_block(spv::StorageClass storage, spv::ExecutionModel model); + bool should_force_emit_builtin_block(spv::StorageClass storage); void emit_push_constant_block_vulkan(const SPIRVariable &var); void emit_push_constant_block_glsl(const SPIRVariable &var); void emit_interface_block(const SPIRVariable &type); void emit_flattened_io_block(const SPIRVariable &var, const char *qual); + void emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector &indices); + void emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector &indices); void emit_block_chain(SPIRBlock &block); - void emit_hoisted_temporaries(SmallVector> &temporaries); + void emit_hoisted_temporaries(SmallVector> &temporaries); std::string constant_value_macro_name(uint32_t id); + int get_constant_mapping_to_workgroup_component(const SPIRConstant &constant) const; void emit_constant(const SPIRConstant &constant); void emit_specialization_constant_op(const SPIRConstantOp &constant); std::string emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block); bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method); - void branch(uint32_t from, uint32_t to); - void branch_to_continue(uint32_t from, uint32_t to); - void branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block); - void flush_phi(uint32_t from, uint32_t to); - bool flush_phi_required(uint32_t from, uint32_t to); + void branch(BlockID from, BlockID to); + void branch_to_continue(BlockID from, BlockID to); + void branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block); + void flush_phi(BlockID from, BlockID to); void flush_variable_declaration(uint32_t id); void flush_undeclared_variables(SPIRBlock &block); void emit_variable_temporary_copies(const SPIRVariable &var); bool should_dereference(uint32_t id); - bool should_forward(uint32_t id); + bool should_forward(uint32_t id) const; + bool should_suppress_usage_tracking(uint32_t id) const; void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp); void emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op); bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp); @@ -446,41 +669,67 @@ class CompilerGLSL : public Compiler void emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op); void emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op); void emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type); void emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type); + void emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type); void emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op, SPIRType::BaseType input_type); + void emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op, SPIRType::BaseType expected_result_type, + SPIRType::BaseType input_type0, SPIRType::BaseType input_type1, + SPIRType::BaseType input_type2); + void emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, + uint32_t op3, const char *op, SPIRType::BaseType offset_count_type); void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); void emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op); void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); - void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, + bool negate, SPIRType::BaseType expected_type); void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, - SPIRType::BaseType input_type, bool skip_cast_if_equal_type); + SPIRType::BaseType input_type, bool skip_cast_if_equal_type, bool implicit_integer_promotion); SPIRType binary_op_bitcast_helper(std::string &cast_op0, std::string &cast_op1, SPIRType::BaseType &input_type, uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type); + virtual bool emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0); + std::string to_ternary_expression(const SPIRType &result_type, uint32_t select, uint32_t true_value, uint32_t false_value); void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); - bool expression_is_forwarded(uint32_t id); + void emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); + bool expression_is_forwarded(uint32_t id) const; + bool expression_suppresses_usage_tracking(uint32_t id) const; + bool expression_read_implies_multiple_reads(uint32_t id) const; SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs, bool suppress_usage_tracking = false); + void access_chain_internal_append_index(std::string &expr, uint32_t base, const SPIRType *type, + AccessChainFlags flags, bool &access_chain_is_arrayed, uint32_t index); + std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, AccessChainMeta *meta); + spv::StorageClass get_expression_effective_storage_class(uint32_t ptr); + virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base); + + virtual void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type); + virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, + spv::StorageClass storage, bool &is_packed); + std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, AccessChainMeta *meta = nullptr, bool ptr_chain = false); std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, - bool need_transpose); + uint32_t array_stride, bool need_transpose); std::string flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset); std::string flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, @@ -493,6 +742,7 @@ class CompilerGLSL : public Compiler uint32_t count, uint32_t offset, uint32_t word_stride, bool *need_transpose = nullptr, uint32_t *matrix_stride = nullptr, + uint32_t *array_stride = nullptr, bool ptr_chain = false); const char *index_to_swizzle(uint32_t index); @@ -501,39 +751,50 @@ class CompilerGLSL : public Compiler void emit_uninitialized_temporary(uint32_t type, uint32_t id); SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id); void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist); + std::string to_non_uniform_aware_expression(uint32_t id); std::string to_expression(uint32_t id, bool register_expression_read = true); - std::string to_composite_constructor_expression(uint32_t id); + std::string to_composite_constructor_expression(uint32_t id, bool block_like_type); std::string to_rerolled_array_expression(const std::string &expr, const SPIRType &type); std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true); std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true); + std::string to_unpacked_row_major_matrix_expression(uint32_t id); std::string to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read = true); std::string to_dereferenced_expression(uint32_t id, bool register_expression_read = true); std::string to_pointer_expression(uint32_t id, bool register_expression_read = true); std::string to_enclosed_pointer_expression(uint32_t id, bool register_expression_read = true); std::string to_extract_component_expression(uint32_t id, uint32_t index); + std::string to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c, + const uint32_t *chain, uint32_t length); std::string enclose_expression(const std::string &expr); std::string dereference_expression(const SPIRType &expression_type, const std::string &expr); std::string address_of_expression(const std::string &expr); void strip_enclosed_expression(std::string &expr); std::string to_member_name(const SPIRType &type, uint32_t index); - virtual std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain); + virtual std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved); + std::string to_multi_member_reference(const SPIRType &type, const SmallVector &indices); std::string type_to_glsl_constructor(const SPIRType &type); std::string argument_decl(const SPIRFunction::Parameter &arg); virtual std::string to_qualifiers_glsl(uint32_t id); - const char *to_precision_qualifiers_glsl(uint32_t id); + void fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var); + void emit_output_variable_initializer(const SPIRVariable &var); + std::string to_precision_qualifiers_glsl(uint32_t id); virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var); - const char *flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags); + std::string flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags); const char *format_to_glsl(spv::ImageFormat format); virtual std::string layout_for_member(const SPIRType &type, uint32_t index); virtual std::string to_interpolation_qualifiers(const Bitset &flags); std::string layout_for_variable(const SPIRVariable &variable); - std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id); + std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id); virtual bool skip_argument(uint32_t id) const; - virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id); + virtual void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id, + spv::StorageClass lhs_storage, spv::StorageClass rhs_storage); virtual void emit_block_hints(const SPIRBlock &block); virtual std::string to_initializer_expression(const SPIRVariable &var); + virtual std::string to_zero_initialized_expression(uint32_t type_id); + bool type_can_zero_initialize(const SPIRType &type) const; - bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset = 0, + bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, + uint32_t *failed_index = nullptr, uint32_t start_offset = 0, uint32_t end_offset = ~(0u)); std::string buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout); @@ -541,6 +802,7 @@ class CompilerGLSL : public Compiler uint32_t type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); uint32_t type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); uint32_t type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); + uint32_t type_to_location_count(const SPIRType &type) const; std::string bitcast_glsl(const SPIRType &result_type, uint32_t arg); virtual std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type); @@ -557,23 +819,35 @@ class CompilerGLSL : public Compiler bool check_atomic_image(uint32_t id); virtual void replace_illegal_names(); + void replace_illegal_names(const std::unordered_set &keywords); virtual void emit_entry_point_declarations(); void replace_fragment_output(SPIRVariable &var); void replace_fragment_outputs(); - bool check_explicit_lod_allowed(uint32_t lod); - std::string legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod, uint32_t id); + std::string legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t id); + + void forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length); + void analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length); + Options::Precision analyze_expression_precision(const uint32_t *args, uint32_t length) const; uint32_t indent = 0; std::unordered_set emitted_functions; + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + std::unordered_set flushed_phi_variables; + std::unordered_set flattened_buffer_blocks; - std::unordered_set flattened_structs; + std::unordered_map flattened_structs; + + ShaderSubgroupSupportHelper shader_subgroup_supporter; - std::string load_flattened_struct(SPIRVariable &var); - std::string to_flattened_struct_member(const SPIRVariable &var, uint32_t index); - void store_flattened_struct(SPIRVariable &var, uint32_t value); + std::string load_flattened_struct(const std::string &basename, const SPIRType &type); + std::string to_flattened_struct_member(const std::string &basename, const SPIRType &type, uint32_t index); + void store_flattened_struct(uint32_t lhs_id, uint32_t value); + void store_flattened_struct(const std::string &basename, uint32_t rhs, const SPIRType &type, + const SmallVector &indices); + std::string to_flattened_access_chain_expression(uint32_t id); // Usage tracking. If a temporary is used more than once, use the temporary instead to // avoid AST explosion when SPIRV is generated with pure SSA and doesn't write stuff to variables. @@ -588,6 +862,10 @@ class CompilerGLSL : public Compiler // Currently used by NMin/Max/Clamp implementations. std::unordered_map extra_sub_expressions; + SmallVector workaround_ubo_load_overload_types; + void request_workaround_wrapper_overload(TypeID id); + void rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr); + uint32_t statement_count = 0; inline bool is_legacy() const @@ -605,6 +883,13 @@ class CompilerGLSL : public Compiler return !options.es && options.version < 130; } + bool requires_transpose_2x2 = false; + bool requires_transpose_3x3 = false; + bool requires_transpose_4x4 = false; + bool ray_tracing_is_khr = false; + bool barycentric_is_nv = false; + void ray_tracing_khr_fixup_locations(); + bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure); void register_call_out_argument(uint32_t id); void register_impure_function_call(); @@ -618,6 +903,16 @@ class CompilerGLSL : public Compiler void emit_pls(); void remap_pls_variables(); + // GL_EXT_shader_framebuffer_fetch support. + std::vector> subpass_to_framebuffer_fetch_attachment; + std::vector> inout_color_attachments; + bool location_is_framebuffer_fetch(uint32_t location) const; + bool location_is_non_coherent_framebuffer_fetch(uint32_t location) const; + bool subpass_input_is_framebuffer_fetch(uint32_t id) const; + void emit_inout_fragment_outputs_copy_to_subpass_inputs(); + const SPIRVariable *find_subpass_input_by_attachment_index(uint32_t index) const; + const SPIRVariable *find_color_output_by_location(uint32_t location) const; + // A variant which takes two sets of name. The secondary is only used to verify there are no collisions, // but the set is not updated when we have found a new name. // Used primarily when adding block interface names. @@ -626,8 +921,14 @@ class CompilerGLSL : public Compiler void check_function_call_constraints(const uint32_t *args, uint32_t length); void handle_invalid_expression(uint32_t id); + void force_temporary_and_recompile(uint32_t id); void find_static_extensions(); + uint32_t consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision); + std::unordered_map temporary_to_mirror_precision_alias; + std::unordered_set composite_insert_overwritten; + std::unordered_set block_composite_insert_overwrite; + std::string emit_for_loop_initializers(const SPIRBlock &block); void emit_while_loop_initializers(const SPIRBlock &block); bool for_loop_initializers_are_same_type(const SPIRBlock &block); @@ -636,10 +937,6 @@ class CompilerGLSL : public Compiler bool type_is_empty(const SPIRType &type); - virtual void declare_undefined_values(); - - static std::string sanitize_underscores(const std::string &str); - bool can_use_io_location(spv::StorageClass storage, bool block); const Instruction *get_next_instruction_in_block(const Instruction &instr); static uint32_t mask_relevant_memory_semantics(uint32_t semantics); @@ -652,16 +949,18 @@ class CompilerGLSL : public Compiler // Builtins in GLSL are always specific signedness, but the SPIR-V can declare them // as either unsigned or signed. - // Sometimes we will need to automatically perform bitcasts on load and store to make this work. - virtual void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type); - virtual void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type); + // Sometimes we will need to automatically perform casts on load and store to make this work. + virtual void cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type); + virtual void cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type); void unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr); - void convert_non_uniform_expression(const SPIRType &type, std::string &expr); + bool unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id); + void convert_non_uniform_expression(std::string &expr, uint32_t ptr_id); void handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id); void disallow_forwarding_in_expression_chain(const SPIRExpression &expr); bool expression_is_constant_null(uint32_t id) const; + bool expression_is_non_value_type_array(uint32_t ptr); virtual void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression); uint32_t get_integer_width_for_instruction(const Instruction &instr) const; @@ -673,11 +972,29 @@ class CompilerGLSL : public Compiler void fixup_type_alias(); void reorder_type_alias(); + void fixup_anonymous_struct_names(); + void fixup_anonymous_struct_names(std::unordered_set &visited, const SPIRType &type); + + static const char *vector_swizzle(int vecsize, int index); - void propagate_nonuniform_qualifier(uint32_t id); + bool is_stage_output_location_masked(uint32_t location, uint32_t component) const; + bool is_stage_output_builtin_masked(spv::BuiltIn builtin) const; + bool is_stage_output_variable_masked(const SPIRVariable &var) const; + bool is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const; + bool is_per_primitive_variable(const SPIRVariable &var) const; + uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const; + uint32_t get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const; + std::unordered_set masked_output_locations; + std::unordered_set masked_output_builtins; private: void init(); + + SmallVector get_composite_constant_ids(ConstantID const_id); + void fill_composite_constant(SPIRConstant &constant, TypeID type_id, const SmallVector &initializers); + void set_composite_constant(ConstantID const_id, TypeID type_id, const SmallVector &initializers); + TypeID get_composite_member_type(TypeID type_id, uint32_t member_idx); + std::unordered_map> const_composite_insert_ids; }; } // namespace SPIRV_CROSS_NAMESPACE diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 57bbef8b818..b3ba58041ae 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2016-2019 Robert Konrad + * Copyright 2016-2021 Robert Konrad + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,6 +13,13 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. + * + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . */ #include "spirv_hlsl.hpp" @@ -23,6 +31,41 @@ using namespace spv; using namespace SPIRV_CROSS_NAMESPACE; using namespace std; +enum class ImageFormatNormalizedState +{ + None = 0, + Unorm = 1, + Snorm = 2 +}; + +static ImageFormatNormalizedState image_format_to_normalized_state(ImageFormat fmt) +{ + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + case ImageFormatRg8: + case ImageFormatRg16: + case ImageFormatRgba8: + case ImageFormatRgba16: + case ImageFormatRgb10A2: + return ImageFormatNormalizedState::Unorm; + + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + return ImageFormatNormalizedState::Snorm; + + default: + break; + } + + return ImageFormatNormalizedState::None; +} + static unsigned image_format_to_components(ImageFormat fmt) { switch (fmt) @@ -203,13 +246,15 @@ static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype) } } -string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) +string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id) { auto &imagetype = get(type.image.type); const char *dim = nullptr; bool typed_load = false; uint32_t components = 4; + bool force_image_srv = hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id, DecorationNonWritable); + switch (type.image.dim) { case Dim1D: @@ -235,7 +280,19 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) if (type.image.sampled == 1) return join("Buffer<", type_to_glsl(imagetype), components, ">"); else if (type.image.sampled == 2) - return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">"); + { + if (interlocked_resources.count(id)) + return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype), + ">"); + + typed_load = !force_image_srv && type.image.sampled == 2; + + const char *rw = force_image_srv ? "" : "RW"; + return join(rw, "Buffer<", + typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : + join(type_to_glsl(imagetype), components), + ">"); + } else SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); case DimSubpassData: @@ -247,14 +304,21 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) } const char *arrayed = type.image.arrayed ? "Array" : ""; const char *ms = type.image.ms ? "MS" : ""; - const char *rw = typed_load ? "RW" : ""; + const char *rw = typed_load && !force_image_srv ? "RW" : ""; + + if (force_image_srv) + typed_load = false; + + if (typed_load && interlocked_resources.count(id)) + rw = "RasterizerOrdered"; + return join(rw, "Texture", dim, ms, arrayed, "<", typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : join(type_to_glsl(imagetype), components), ">"); } -string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t id) +string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t /*id*/) { auto &imagetype = get(type.image.type); string res; @@ -317,8 +381,6 @@ string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t id) res += "MS"; if (type.image.arrayed) res += "Array"; - if (image_is_comparison(type, id)) - res += "Shadow"; return res; } @@ -374,15 +436,36 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id) case SPIRType::AtomicCounter: return "atomic_uint"; case SPIRType::Half: - return "min16float"; + if (hlsl_options.enable_16bit_types) + return "half"; + else + return "min16float"; + case SPIRType::Short: + if (hlsl_options.enable_16bit_types) + return "int16_t"; + else + return "min16int"; + case SPIRType::UShort: + if (hlsl_options.enable_16bit_types) + return "uint16_t"; + else + return "min16uint"; case SPIRType::Float: return "float"; case SPIRType::Double: return "double"; case SPIRType::Int64: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0."); return "int64_t"; case SPIRType::UInt64: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0."); return "uint64_t"; + case SPIRType::AccelerationStructure: + return "RaytracingAccelerationStructure"; + case SPIRType::RayQuery: + return "RayQuery"; default: return "???"; } @@ -398,7 +481,11 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id) case SPIRType::UInt: return join("uint", type.vecsize); case SPIRType::Half: - return join("min16float", type.vecsize); + return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.vecsize); + case SPIRType::Short: + return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.vecsize); + case SPIRType::UShort: + return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.vecsize); case SPIRType::Float: return join("float", type.vecsize); case SPIRType::Double: @@ -422,7 +509,11 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id) case SPIRType::UInt: return join("uint", type.columns, "x", type.vecsize); case SPIRType::Half: - return join("min16float", type.columns, "x", type.vecsize); + return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.columns, "x", type.vecsize); + case SPIRType::Short: + return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.columns, "x", type.vecsize); + case SPIRType::UShort: + return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.columns, "x", type.vecsize); case SPIRType::Float: return join("float", type.columns, "x", type.vecsize); case SPIRType::Double: @@ -483,10 +574,17 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() switch (builtin) { case BuiltInPosition: - type = "float4"; + type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4"; semantic = legacy ? "POSITION" : "SV_Position"; break; + case BuiltInSampleMask: + if (hlsl_options.shader_model < 41 || execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Sample Mask output is only supported in PS 4.1 or higher."); + type = "uint"; + semantic = "SV_Coverage"; + break; + case BuiltInFragDepth: type = "float"; if (legacy) @@ -505,36 +603,80 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() break; case BuiltInClipDistance: + { + static const char *types[] = { "float", "float2", "float3", "float4" }; + // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. - for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + if (execution.model == ExecutionModelMeshEXT) { - uint32_t to_declare = clip_distance_count - clip; - if (to_declare > 4) - to_declare = 4; + if (clip_distance_count > 4) + SPIRV_CROSS_THROW("Clip distance count > 4 not supported for mesh shaders."); - uint32_t semantic_index = clip / 4; + if (clip_distance_count == 1) + { + // Avoids having to hack up access_chain code. Makes it trivially indexable. + statement("float gl_ClipDistance[1] : SV_ClipDistance;"); + } + else + { + // Replace array with vector directly, avoids any weird fixup path. + statement(types[clip_distance_count - 1], " gl_ClipDistance : SV_ClipDistance;"); + } + } + else + { + for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + { + uint32_t to_declare = clip_distance_count - clip; + if (to_declare > 4) + to_declare = 4; - static const char *types[] = { "float", "float2", "float3", "float4" }; - statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, - " : SV_ClipDistance", semantic_index, ";"); + uint32_t semantic_index = clip / 4; + + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_ClipDistance", semantic_index, ";"); + } } break; + } case BuiltInCullDistance: + { + static const char *types[] = { "float", "float2", "float3", "float4" }; + // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. - for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + if (execution.model == ExecutionModelMeshEXT) { - uint32_t to_declare = cull_distance_count - cull; - if (to_declare > 4) - to_declare = 4; + if (cull_distance_count > 4) + SPIRV_CROSS_THROW("Cull distance count > 4 not supported for mesh shaders."); - uint32_t semantic_index = cull / 4; + if (cull_distance_count == 1) + { + // Avoids having to hack up access_chain code. Makes it trivially indexable. + statement("float gl_CullDistance[1] : SV_CullDistance;"); + } + else + { + // Replace array with vector directly, avoids any weird fixup path. + statement(types[cull_distance_count - 1], " gl_CullDistance : SV_CullDistance;"); + } + } + else + { + for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + { + uint32_t to_declare = cull_distance_count - cull; + if (to_declare > 4) + to_declare = 4; - static const char *types[] = { "float", "float2", "float3", "float4" }; - statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, - " : SV_CullDistance", semantic_index, ";"); + uint32_t semantic_index = cull / 4; + + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_CullDistance", semantic_index, ";"); + } } break; + } case BuiltInPointSize: // If point_size_compat is enabled, just ignore PointSize. @@ -545,8 +687,69 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() else SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + case BuiltInLayer: + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInCullPrimitiveEXT: + // per-primitive attributes handled separatly + break; + + case BuiltInPrimitivePointIndicesEXT: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInPrimitiveTriangleIndicesEXT: + // meshlet local-index buffer handled separatly + break; + default: SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + } + + if (type && semantic) + statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";"); + }); +} + +void CompilerHLSL::emit_builtin_primitive_outputs_in_struct() +{ + active_output_builtins.for_each_bit([&](uint32_t i) { + const char *type = nullptr; + const char *semantic = nullptr; + auto builtin = static_cast(i); + switch (builtin) + { + case BuiltInLayer: + { + const ExecutionModel model = get_entry_point().model; + if (hlsl_options.shader_model < 50 || + (model != ExecutionModelGeometry && model != ExecutionModelMeshEXT)) + SPIRV_CROSS_THROW("Render target array index output is only supported in GS/MS 5.0 or higher."); + type = "uint"; + semantic = "SV_RenderTargetArrayIndex"; + break; + } + + case BuiltInPrimitiveId: + type = "uint"; + semantic = "SV_PrimitiveID"; + break; + + case BuiltInViewportIndex: + type = "uint"; + semantic = "SV_ViewportArrayIndex"; + break; + + case BuiltInPrimitiveShadingRateKHR: + type = "uint"; + semantic = "SV_ShadingRate"; + break; + + case BuiltInCullPrimitiveEXT: + type = "bool"; + semantic = "SV_CullPrimitive"; + break; + + default: break; } @@ -577,6 +780,11 @@ void CompilerHLSL::emit_builtin_inputs_in_struct() semantic = "SV_VertexID"; break; + case BuiltInPrimitiveId: + type = "uint"; + semantic = "SV_PrimitiveID"; + break; + case BuiltInInstanceId: case BuiltInInstanceIndex: if (legacy) @@ -592,6 +800,13 @@ void CompilerHLSL::emit_builtin_inputs_in_struct() semantic = "SV_SampleIndex"; break; + case BuiltInSampleMask: + if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Sample Mask input is only supported in PS 5.0 or higher."); + type = "uint"; + semantic = "SV_Coverage"; + break; + case BuiltInGlobalInvocationId: type = "uint3"; semantic = "SV_DispatchThreadID"; @@ -617,6 +832,13 @@ void CompilerHLSL::emit_builtin_inputs_in_struct() semantic = "SV_IsFrontFace"; break; + case BuiltInViewIndex: + if (hlsl_options.shader_model < 61 || (get_entry_point().model != ExecutionModelVertex && get_entry_point().model != ExecutionModelFragment)) + SPIRV_CROSS_THROW("View Index input is only supported in VS and PS 6.1 or higher."); + type = "uint"; + semantic = "SV_ViewID"; + break; + case BuiltInNumWorkgroups: case BuiltInSubgroupSize: case BuiltInSubgroupLocalInvocationId: @@ -625,9 +847,16 @@ void CompilerHLSL::emit_builtin_inputs_in_struct() case BuiltInSubgroupLeMask: case BuiltInSubgroupGtMask: case BuiltInSubgroupGeMask: + case BuiltInBaseVertex: + case BuiltInBaseInstance: // Handled specially. break; + case BuiltInHelperInvocation: + if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher."); + break; + case BuiltInClipDistance: // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) @@ -667,9 +896,15 @@ void CompilerHLSL::emit_builtin_inputs_in_struct() else SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + case BuiltInLayer: + if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Render target array index input is only supported in PS 5.0 or higher."); + type = "uint"; + semantic = "SV_RenderTargetArrayIndex"; + break; + default: SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); - break; } if (type && semantic) @@ -695,7 +930,7 @@ uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const if (type.array_size_literal[i]) array_multiplier *= type.array[i]; else - array_multiplier *= get(type.array[i]).scalar(); + array_multiplier *= evaluate_constant_u32(type.array[i]); } elements += array_multiplier * type.columns; } @@ -717,8 +952,8 @@ string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags) res += "patch "; // Seems to be different in actual HLSL. if (flags.get(DecorationSample)) res += "sample "; - if (flags.get(DecorationInvariant)) - res += "invariant "; // Not supported? + if (flags.get(DecorationInvariant) && backend.support_precise_qualifier) + res += "precise "; // Not supported? return res; } @@ -738,48 +973,40 @@ std::string CompilerHLSL::to_semantic(uint32_t location, ExecutionModel em, Stor return join("TEXCOORD", location); } -void CompilerHLSL::emit_io_block(const SPIRVariable &var) +std::string CompilerHLSL::to_initializer_expression(const SPIRVariable &var) { - auto &execution = get_entry_point(); - + // We cannot emit static const initializer for block constants for practical reasons, + // so just inline the initializer. + // FIXME: There is a theoretical problem here if someone tries to composite extract + // into this initializer since we don't declare it properly, but that is somewhat non-sensical. auto &type = get(var.basetype); - add_resource_name(type.self); - - statement("struct ", to_name(type.self)); - begin_scope(); - type.member_name_cache.clear(); - - uint32_t base_location = get_decoration(var.self, DecorationLocation); - - for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) - { - string semantic; - if (has_member_decoration(type.self, i, DecorationLocation)) - { - uint32_t location = get_member_decoration(type.self, i, DecorationLocation); - semantic = join(" : ", to_semantic(location, execution.model, var.storage)); - } - else - { - // If the block itself has a location, but not its members, use the implicit location. - // There could be a conflict if the block members partially specialize the locations. - // It is unclear how SPIR-V deals with this. Assume this does not happen for now. - uint32_t location = base_location + i; - semantic = join(" : ", to_semantic(location, execution.model, var.storage)); - } - - add_member_name(type, i); - - auto &membertype = get(type.member_types[i]); - statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, i)), - variable_decl(membertype, to_member_name(type, i)), semantic, ";"); - } - - end_scope_decl(); - statement(""); + bool is_block = has_decoration(type.self, DecorationBlock); + auto *c = maybe_get(var.initializer); + if (is_block && c) + return constant_expression(*c); + else + return CompilerGLSL::to_initializer_expression(var); +} - statement("static ", variable_decl(var), ";"); - statement(""); +void CompilerHLSL::emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index, + uint32_t location, + std::unordered_set &active_locations) +{ + auto &execution = get_entry_point(); + auto type = get(var.basetype); + auto semantic = to_semantic(location, execution.model, var.storage); + auto mbr_name = join(to_name(type.self), "_", to_member_name(type, member_index)); + auto &mbr_type = get(type.member_types[member_index]); + + statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, member_index)), + type_to_glsl(mbr_type), + " ", mbr_name, type_to_array_glsl(mbr_type), + " : ", semantic, ";"); + + // Structs and arrays should consume more locations. + uint32_t consumed_locations = type_to_consumed_locations(mbr_type); + for (uint32_t i = 0; i < consumed_locations; i++) + active_locations.insert(location + i); } void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unordered_set &active_locations) @@ -814,7 +1041,6 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex; - auto &m = ir.meta[var.self].decoration; auto name = to_name(var.self); if (use_location_number) { @@ -822,8 +1048,8 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord // If an explicit location exists, use it with TEXCOORD[N] semantic. // Otherwise, pick a vacant location. - if (m.decoration_flags.get(DecorationLocation)) - location_number = m.location; + if (has_decoration(var.self, DecorationLocation)) + location_number = get_decoration(var.self, DecorationLocation); else location_number = get_vacant_location(); @@ -840,24 +1066,39 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord { SPIRType newtype = type; newtype.columns = 1; + + string effective_semantic; + if (hlsl_options.flatten_matrix_vertex_input_semantics) + effective_semantic = to_semantic(location_number, execution.model, var.storage); + else + effective_semantic = join(semantic, "_", i); + statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), - variable_decl(newtype, join(name, "_", i)), " : ", semantic, "_", i, ";"); + variable_decl(newtype, join(name, "_", i)), " : ", effective_semantic, ";"); active_locations.insert(location_number++); } } else { - statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(type, name), " : ", + auto decl_type = type; + if (execution.model == ExecutionModelMeshEXT) + { + decl_type.array.erase(decl_type.array.begin()); + decl_type.array_size_literal.erase(decl_type.array_size_literal.begin()); + } + statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(decl_type, name), " : ", semantic, ";"); // Structs and arrays should consume more locations. - uint32_t consumed_locations = type_to_consumed_locations(type); + uint32_t consumed_locations = type_to_consumed_locations(decl_type); for (uint32_t i = 0; i < consumed_locations; i++) active_locations.insert(location_number + i); } } else + { statement(variable_decl(type, name), " : ", binding, ";"); + } } std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) @@ -876,7 +1117,9 @@ std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClas auto &var = get(num_workgroups_builtin); auto &type = get(var.basetype); - return sanitize_underscores(join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0))); + auto ret = join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0)); + ParsedIR::sanitize_underscores(ret); + return ret; } case BuiltInPointCoord: // Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set. @@ -885,6 +1128,8 @@ std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClas return "WaveGetLaneIndex()"; case BuiltInSubgroupSize: return "WaveGetLaneCount()"; + case BuiltInHelperInvocation: + return "IsHelperLane()"; default: return CompilerGLSL::builtin_to_glsl(builtin, storage); @@ -896,7 +1141,31 @@ void CompilerHLSL::emit_builtin_variables() Bitset builtins = active_input_builtins; builtins.merge_or(active_output_builtins); - bool need_base_vertex_info = false; + std::unordered_map builtin_to_initializer; + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + if (!is_builtin_variable(var) || var.storage != StorageClassOutput || !var.initializer) + return; + + auto *c = this->maybe_get(var.initializer); + if (!c) + return; + + auto &type = this->get(var.basetype); + if (type.basetype == SPIRType::Struct) + { + uint32_t member_count = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < member_count; i++) + { + if (has_member_decoration(type.self, i, DecorationBuiltIn)) + { + builtin_to_initializer[get_member_decoration(type.self, i, DecorationBuiltIn)] = + c->subconstants[i]; + } + } + } + else if (has_decoration(var.self, DecorationBuiltIn)) + builtin_to_initializer[get_decoration(var.self, DecorationBuiltIn)] = var.initializer; + }); // Emit global variables for the interface variables which are statically used by the shader. builtins.for_each_bit([&](uint32_t i) { @@ -904,6 +1173,23 @@ void CompilerHLSL::emit_builtin_variables() auto builtin = static_cast(i); uint32_t array_size = 0; + string init_expr; + auto init_itr = builtin_to_initializer.find(builtin); + if (init_itr != builtin_to_initializer.end()) + init_expr = join(" = ", to_expression(init_itr->second)); + + if (get_execution_model() == ExecutionModelMeshEXT) + { + if (builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || + builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId || + builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT || + builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT || + builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT) + { + return; + } + } + switch (builtin) { case BuiltInFragCoord: @@ -920,7 +1206,13 @@ void CompilerHLSL::emit_builtin_variables() case BuiltInInstanceIndex: type = "int"; if (hlsl_options.support_nonzero_base_vertex_base_instance) - need_base_vertex_info = true; + base_vertex_info.used = true; + break; + + case BuiltInBaseVertex: + case BuiltInBaseInstance: + type = "int"; + base_vertex_info.used = true; break; case BuiltInInstanceId: @@ -973,6 +1265,11 @@ void CompilerHLSL::emit_builtin_variables() type = "uint4"; break; + case BuiltInHelperInvocation: + if (hlsl_options.shader_model < 50) + SPIRV_CROSS_THROW("Need SM 5.0 for Helper Invocation."); + break; + case BuiltInClipDistance: array_size = clip_distance_count; type = "float"; @@ -983,26 +1280,56 @@ void CompilerHLSL::emit_builtin_variables() type = "float"; break; + case BuiltInSampleMask: + type = "int"; + break; + + case BuiltInPrimitiveId: + case BuiltInViewIndex: + case BuiltInLayer: + type = "uint"; + break; + + case BuiltInViewportIndex: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInCullPrimitiveEXT: + type = "uint"; + break; + default: SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); } StorageClass storage = active_input_builtins.get(i) ? StorageClassInput : StorageClassOutput; - // FIXME: SampleMask can be both in and out with sample builtin, - // need to distinguish that when we add support for that. if (type) { if (array_size) - statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "];"); + statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "]", init_expr, ";"); else - statement("static ", type, " ", builtin_to_glsl(builtin, storage), ";"); + statement("static ", type, " ", builtin_to_glsl(builtin, storage), init_expr, ";"); + } + + // SampleMask can be both in and out with sample builtin, in this case we have already + // declared the input variable and we need to add the output one now. + if (builtin == BuiltInSampleMask && storage == StorageClassInput && this->active_output_builtins.get(i)) + { + statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), init_expr, ";"); } }); - if (need_base_vertex_info) + if (base_vertex_info.used) { - statement("cbuffer SPIRV_Cross_VertexInfo"); + string binding_info; + if (base_vertex_info.explicit_binding) + { + binding_info = join(" : register(b", base_vertex_info.register_index); + if (base_vertex_info.register_space) + binding_info += join(", space", base_vertex_info.register_space); + binding_info += ")"; + } + statement("cbuffer SPIRV_Cross_VertexInfo", binding_info); begin_scope(); statement("int SPIRV_Cross_BaseVertex;"); statement("int SPIRV_Cross_BaseInstance;"); @@ -1011,6 +1338,30 @@ void CompilerHLSL::emit_builtin_variables() } } +void CompilerHLSL::set_hlsl_aux_buffer_binding(HLSLAuxBinding binding, uint32_t register_index, uint32_t register_space) +{ + if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE) + { + base_vertex_info.explicit_binding = true; + base_vertex_info.register_space = register_space; + base_vertex_info.register_index = register_index; + } +} + +void CompilerHLSL::unset_hlsl_aux_buffer_binding(HLSLAuxBinding binding) +{ + if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE) + base_vertex_info.explicit_binding = false; +} + +bool CompilerHLSL::is_hlsl_aux_buffer_binding_used(HLSLAuxBinding binding) const +{ + if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE) + return base_vertex_info.used; + else + return false; +} + void CompilerHLSL::emit_composite_constants() { // HLSL cannot declare structs or arrays inline, so we must move them out to @@ -1022,8 +1373,13 @@ void CompilerHLSL::emit_composite_constants() return; auto &type = this->get(c.constant_type); + + if (type.basetype == SPIRType::Struct && is_builtin_type(type)) + return; + if (type.basetype == SPIRType::Struct || !type.array.empty()) { + add_resource_name(c.self); auto name = to_name(c.self); statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";"); emitted = true; @@ -1038,9 +1394,22 @@ void CompilerHLSL::emit_specialization_constants_and_structs() { bool emitted = false; SpecializationConstant wg_x, wg_y, wg_z; - uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + std::unordered_set io_block_types; + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + auto &type = this->get(var.basetype); + if ((var.storage == StorageClassInput || var.storage == StorageClassOutput) && + !var.remapped_variable && type.pointer && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self) && + has_decoration(type.self, DecorationBlock)) + { + io_block_types.insert(type.self); + } + }); - for (auto &id_ : ir.ids_for_constant_or_type) + auto loop_lock = ir.create_loop_hard_lock(); + for (auto &id_ : ir.ids_for_constant_undef_or_type) { auto &id = ir.ids[id_]; @@ -1057,16 +1426,23 @@ void CompilerHLSL::emit_specialization_constants_and_structs() else if (c.specialization) { auto &type = get(c.constant_type); + add_resource_name(c.self); auto name = to_name(c.self); - // HLSL does not support specialization constants, so fallback to macros. - c.specialization_constant_macro_name = - constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + if (has_decoration(c.self, DecorationSpecId)) + { + // HLSL does not support specialization constants, so fallback to macros. + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + + statement("#ifndef ", c.specialization_constant_macro_name); + statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); + statement("#endif"); + statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";"); + } + else + statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";"); - statement("#ifndef ", c.specialization_constant_macro_name); - statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); - statement("#endif"); - statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";"); emitted = true; } } @@ -1074,6 +1450,7 @@ void CompilerHLSL::emit_specialization_constants_and_structs() { auto &c = id.get(); auto &type = get(c.basetype); + add_resource_name(c.self); auto name = to_name(c.self); statement("static const ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); emitted = true; @@ -1081,9 +1458,11 @@ void CompilerHLSL::emit_specialization_constants_and_structs() else if (id.get_type() == TypeType) { auto &type = id.get(); - if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && - (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) && - !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) + bool is_non_io_block = has_decoration(type.self, DecorationBlock) && + io_block_types.count(type.self) == 0; + bool is_buffer_block = has_decoration(type.self, DecorationBufferBlock); + if (type.basetype == SPIRType::Struct && type.array.empty() && + !type.pointer && !is_non_io_block && !is_buffer_block) { if (emitted) statement(""); @@ -1092,6 +1471,21 @@ void CompilerHLSL::emit_specialization_constants_and_structs() emit_struct(type); } } + else if (id.get_type() == TypeUndef) + { + auto &undef = id.get(); + auto &type = this->get(undef.basetype); + // OpUndef can be void for some reason ... + if (type.basetype == SPIRType::Void) + return; + + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(undef.basetype)); + + statement("static ", variable_decl(type, to_name(undef.self), undef.self), initializer, ";"); + emitted = true; + } } if (emitted) @@ -1102,18 +1496,36 @@ void CompilerHLSL::replace_illegal_names() { static const unordered_set keywords = { // Additional HLSL specific keywords. - "line", "linear", "matrix", "point", "row_major", "sampler", + // From https://docs.microsoft.com/en-US/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords + "AppendStructuredBuffer", "asm", "asm_fragment", + "BlendState", "bool", "break", "Buffer", "ByteAddressBuffer", + "case", "cbuffer", "centroid", "class", "column_major", "compile", + "compile_fragment", "CompileShader", "const", "continue", "ComputeShader", + "ConsumeStructuredBuffer", + "default", "DepthStencilState", "DepthStencilView", "discard", "do", + "double", "DomainShader", "dword", + "else", "export", "false", "float", "for", "fxgroup", + "GeometryShader", "groupshared", "half", "HullShader", + "indices", "if", "in", "inline", "inout", "InputPatch", "int", "interface", + "line", "lineadj", "linear", "LineStream", + "matrix", "min16float", "min10float", "min16int", "min16uint", + "namespace", "nointerpolation", "noperspective", "NULL", + "out", "OutputPatch", + "payload", "packoffset", "pass", "pixelfragment", "PixelShader", "point", + "PointStream", "precise", "RasterizerState", "RenderTargetView", + "return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer", + "RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D", + "RWTexture2DArray", "RWTexture3D", "sample", "sampler", "SamplerState", + "SamplerComparisonState", "shared", "snorm", "stateblock", "stateblock_state", + "static", "string", "struct", "switch", "StructuredBuffer", "tbuffer", + "technique", "technique10", "technique11", "texture", "Texture1D", + "Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray", + "Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle", + "triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned", + "vector", "vertexfragment", "VertexShader", "vertices", "void", "volatile", "while", }; - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - if (!is_hidden_variable(var)) - { - auto &m = ir.meta[var.self].decoration; - if (keywords.find(m.alias) != end(keywords)) - m.alias = join("_", m.alias); - } - }); - + CompilerGLSL::replace_illegal_names(keywords); CompilerGLSL::replace_illegal_names(); } @@ -1123,6 +1535,19 @@ void CompilerHLSL::emit_resources() replace_illegal_names(); + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + case ExecutionModelMeshEXT: + fixup_implicit_builtin_block_names(execution.model); + break; + + default: + break; + } + emit_specialization_constants_and_structs(); emit_composite_constants(); @@ -1155,7 +1580,8 @@ void CompilerHLSL::emit_resources() } }); - if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30) + if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30 && + active_output_builtins.get(BuiltInPosition)) { statement("uniform float4 gl_HalfPixel;"); emitted = true; @@ -1179,7 +1605,8 @@ void CompilerHLSL::emit_resources() } if (var.storage != StorageClassFunction && !is_builtin_variable(var) && !var.remapped_variable && - type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter)) + type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) && + !is_hidden_variable(var)) { emit_uniform(var); emitted = true; @@ -1193,22 +1620,21 @@ void CompilerHLSL::emit_resources() // Emit builtin input and output variables here. emit_builtin_variables(); - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + if (execution.model != ExecutionModelMeshEXT) + { + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); - // Do not emit I/O blocks here. - // I/O blocks can be arrayed, so we must deal with them separately to support geometry shaders - // and tessellation down the line. - if (!block && var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && - (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) && - interface_variable_exists_in_entry_point(var.self)) - { - // Only emit non-builtins which are not blocks here. Builtin variables are handled separately. - emit_interface_block_globally(var); - emitted = true; - } - }); + if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + // Builtin variables are handled separately. + emit_interface_block_globally(var); + emitted = true; + } + }); + } if (emitted) statement(""); @@ -1218,69 +1644,72 @@ void CompilerHLSL::emit_resources() require_output = false; unordered_set active_inputs; unordered_set active_outputs; - SmallVector input_variables; - SmallVector output_variables; + + struct IOVariable + { + const SPIRVariable *var; + uint32_t location; + uint32_t block_member_index; + bool block; + }; + + SmallVector input_variables; + SmallVector output_variables; + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); - bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + bool block = has_decoration(type.self, DecorationBlock); if (var.storage != StorageClassInput && var.storage != StorageClassOutput) return; - // Do not emit I/O blocks here. - // I/O blocks can be arrayed, so we must deal with them separately to support geometry shaders - // and tessellation down the line. - if (!block && !var.remapped_variable && type.pointer && !is_builtin_variable(var) && + if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) { - if (var.storage == StorageClassInput) - input_variables.push_back(&var); - else - output_variables.push_back(&var); - } - - // Reserve input and output locations for block variables as necessary. - if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) - { - auto &active = var.storage == StorageClassInput ? active_inputs : active_outputs; - for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + if (block) { - if (has_member_decoration(type.self, i, DecorationLocation)) + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) { - uint32_t location = get_member_decoration(type.self, i, DecorationLocation); - active.insert(location); + uint32_t location = get_declared_member_location(var, i, false); + if (var.storage == StorageClassInput) + input_variables.push_back({ &var, location, i, true }); + else + output_variables.push_back({ &var, location, i, true }); } } - - // Emit the block struct and a global variable here. - emit_io_block(var); + else + { + uint32_t location = get_decoration(var.self, DecorationLocation); + if (var.storage == StorageClassInput) + input_variables.push_back({ &var, location, 0, false }); + else + output_variables.push_back({ &var, location, 0, false }); + } } }); - const auto variable_compare = [&](const SPIRVariable *a, const SPIRVariable *b) -> bool { + const auto variable_compare = [&](const IOVariable &a, const IOVariable &b) -> bool { // Sort input and output variables based on, from more robust to less robust: // - Location // - Variable has a location // - Name comparison // - Variable has a name // - Fallback: ID - bool has_location_a = has_decoration(a->self, DecorationLocation); - bool has_location_b = has_decoration(b->self, DecorationLocation); + bool has_location_a = a.block || has_decoration(a.var->self, DecorationLocation); + bool has_location_b = b.block || has_decoration(b.var->self, DecorationLocation); if (has_location_a && has_location_b) - { - return get_decoration(a->self, DecorationLocation) < get_decoration(b->self, DecorationLocation); - } + return a.location < b.location; else if (has_location_a && !has_location_b) return true; else if (!has_location_a && has_location_b) return false; - const auto &name1 = to_name(a->self); - const auto &name2 = to_name(b->self); + const auto &name1 = to_name(a.var->self); + const auto &name2 = to_name(b.var->self); if (name1.empty() && name2.empty()) - return a->self < b->self; + return a.var->self < b.var->self; else if (name1.empty()) return true; else if (name2.empty()) @@ -1307,33 +1736,71 @@ void CompilerHLSL::emit_resources() begin_scope(); sort(input_variables.begin(), input_variables.end(), variable_compare); - for (auto var : input_variables) - emit_interface_block_in_struct(*var, active_inputs); + for (auto &var : input_variables) + { + if (var.block) + emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_inputs); + else + emit_interface_block_in_struct(*var.var, active_inputs); + } emit_builtin_inputs_in_struct(); end_scope_decl(); statement(""); } + const bool is_mesh_shader = execution.model == ExecutionModelMeshEXT; if (!output_variables.empty() || !active_output_builtins.empty()) { - require_output = true; - statement("struct SPIRV_Cross_Output"); + sort(output_variables.begin(), output_variables.end(), variable_compare); + require_output = !is_mesh_shader; + statement(is_mesh_shader ? "struct gl_MeshPerVertexEXT" : "struct SPIRV_Cross_Output"); begin_scope(); - // FIXME: Use locations properly if they exist. - sort(output_variables.begin(), output_variables.end(), variable_compare); - for (auto var : output_variables) - emit_interface_block_in_struct(*var, active_outputs); + for (auto &var : output_variables) + { + if (is_per_primitive_variable(*var.var)) + continue; + if (var.block && is_mesh_shader && var.block_member_index != 0) + continue; + if (var.block && !is_mesh_shader) + emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs); + else + emit_interface_block_in_struct(*var.var, active_outputs); + } emit_builtin_outputs_in_struct(); + if (!is_mesh_shader) + emit_builtin_primitive_outputs_in_struct(); end_scope_decl(); statement(""); + + if (is_mesh_shader) + { + statement("struct gl_MeshPerPrimitiveEXT"); + begin_scope(); + for (auto &var : output_variables) + { + if (!is_per_primitive_variable(*var.var)) + continue; + if (var.block && var.block_member_index != 0) + continue; + + emit_interface_block_in_struct(*var.var, active_outputs); + } + emit_builtin_primitive_outputs_in_struct(); + end_scope_decl(); + statement(""); + } } // Global variables. for (auto global : global_variables) { auto &var = get(global); - if (var.storage != StorageClassOutput) + if (is_hidden_variable(var, true)) + continue; + + if (var.storage != StorageClassOutput && + var.storage != StorageClassTaskPayloadWorkgroupEXT) { if (!variable_is_lut(var)) { @@ -1350,7 +1817,15 @@ void CompilerHLSL::emit_resources() storage = "static"; break; } - statement(storage, " ", variable_decl(var), ";"); + + string initializer; + if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate && + !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var))) + { + initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var))); + } + statement(storage, " ", variable_decl(var), initializer, ";"); + emitted = true; } } @@ -1359,8 +1834,6 @@ void CompilerHLSL::emit_resources() if (emitted) statement(""); - declare_undefined_values(); - if (requires_op_fmod) { static const char *types[] = { @@ -1380,97 +1853,64 @@ void CompilerHLSL::emit_resources() } } - if (required_textureSizeVariants != 0) + emit_texture_size_variants(required_texture_size_variants.srv, "4", false, ""); + for (uint32_t norm = 0; norm < 3; norm++) { - static const char *types[QueryTypeCount] = { "float4", "int4", "uint4" }; - static const char *dims[QueryDimCount] = { "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray", - "Texture3D", "Buffer", "TextureCube", "TextureCubeArray", - "Texture2DMS", "Texture2DMSArray" }; - - static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false }; - - static const char *ret_types[QueryDimCount] = { - "uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3", - }; - - static const uint32_t return_arguments[QueryDimCount] = { - 1, 2, 2, 3, 3, 1, 2, 3, 2, 3, - }; - - for (uint32_t index = 0; index < QueryDimCount; index++) + for (uint32_t comp = 0; comp < 4; comp++) { - for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++) - { - uint32_t bit = 16 * type_index + index; - uint64_t mask = 1ull << bit; - - if ((required_textureSizeVariants & mask) == 0) - continue; - - statement(ret_types[index], " SPIRV_Cross_textureSize(", dims[index], "<", types[type_index], - "> Tex, uint Level, out uint Param)"); - begin_scope(); - statement(ret_types[index], " ret;"); - switch (return_arguments[index]) - { - case 1: - if (has_lod[index]) - statement("Tex.GetDimensions(Level, ret.x, Param);"); - else - { - statement("Tex.GetDimensions(ret.x);"); - statement("Param = 0u;"); - } - break; - case 2: - if (has_lod[index]) - statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);"); - else - statement("Tex.GetDimensions(ret.x, ret.y, Param);"); - break; - case 3: - if (has_lod[index]) - statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);"); - else - statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);"); - break; - } - - statement("return ret;"); - end_scope(); - statement(""); - } + static const char *qualifiers[] = { "", "unorm ", "snorm " }; + static const char *vecsizes[] = { "", "2", "3", "4" }; + emit_texture_size_variants(required_texture_size_variants.uav[norm][comp], vecsizes[comp], true, + qualifiers[norm]); } } if (requires_fp16_packing) { // HLSL does not pack into a single word sadly :( - statement("uint SPIRV_Cross_packHalf2x16(float2 value)"); + statement("uint spvPackHalf2x16(float2 value)"); begin_scope(); statement("uint2 Packed = f32tof16(value);"); statement("return Packed.x | (Packed.y << 16);"); end_scope(); statement(""); - statement("float2 SPIRV_Cross_unpackHalf2x16(uint value)"); + statement("float2 spvUnpackHalf2x16(uint value)"); begin_scope(); statement("return f16tof32(uint2(value & 0xffff, value >> 16));"); end_scope(); statement(""); } + if (requires_uint2_packing) + { + statement("uint64_t spvPackUint2x32(uint2 value)"); + begin_scope(); + statement("return (uint64_t(value.y) << 32) | uint64_t(value.x);"); + end_scope(); + statement(""); + + statement("uint2 spvUnpackUint2x32(uint64_t value)"); + begin_scope(); + statement("uint2 Unpacked;"); + statement("Unpacked.x = uint(value & 0xffffffff);"); + statement("Unpacked.y = uint(value >> 32);"); + statement("return Unpacked;"); + end_scope(); + statement(""); + } + if (requires_explicit_fp16_packing) { // HLSL does not pack into a single word sadly :( - statement("uint SPIRV_Cross_packFloat2x16(min16float2 value)"); + statement("uint spvPackFloat2x16(min16float2 value)"); begin_scope(); statement("uint2 Packed = f32tof16(value);"); statement("return Packed.x | (Packed.y << 16);"); end_scope(); statement(""); - statement("min16float2 SPIRV_Cross_unpackFloat2x16(uint value)"); + statement("min16float2 spvUnpackFloat2x16(uint value)"); begin_scope(); statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));"); end_scope(); @@ -1480,14 +1920,14 @@ void CompilerHLSL::emit_resources() // HLSL does not seem to have builtins for these operation, so roll them by hand ... if (requires_unorm8_packing) { - statement("uint SPIRV_Cross_packUnorm4x8(float4 value)"); + statement("uint spvPackUnorm4x8(float4 value)"); begin_scope(); statement("uint4 Packed = uint4(round(saturate(value) * 255.0));"); statement("return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);"); end_scope(); statement(""); - statement("float4 SPIRV_Cross_unpackUnorm4x8(uint value)"); + statement("float4 spvUnpackUnorm4x8(uint value)"); begin_scope(); statement("uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);"); statement("return float4(Packed) / 255.0;"); @@ -1497,14 +1937,14 @@ void CompilerHLSL::emit_resources() if (requires_snorm8_packing) { - statement("uint SPIRV_Cross_packSnorm4x8(float4 value)"); + statement("uint spvPackSnorm4x8(float4 value)"); begin_scope(); statement("int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;"); statement("return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));"); end_scope(); statement(""); - statement("float4 SPIRV_Cross_unpackSnorm4x8(uint value)"); + statement("float4 spvUnpackSnorm4x8(uint value)"); begin_scope(); statement("int SignedValue = int(value);"); statement("int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;"); @@ -1515,14 +1955,14 @@ void CompilerHLSL::emit_resources() if (requires_unorm16_packing) { - statement("uint SPIRV_Cross_packUnorm2x16(float2 value)"); + statement("uint spvPackUnorm2x16(float2 value)"); begin_scope(); statement("uint2 Packed = uint2(round(saturate(value) * 65535.0));"); statement("return Packed.x | (Packed.y << 16);"); end_scope(); statement(""); - statement("float2 SPIRV_Cross_unpackUnorm2x16(uint value)"); + statement("float2 spvUnpackUnorm2x16(uint value)"); begin_scope(); statement("uint2 Packed = uint2(value & 0xffff, value >> 16);"); statement("return float2(Packed) / 65535.0;"); @@ -1532,14 +1972,14 @@ void CompilerHLSL::emit_resources() if (requires_snorm16_packing) { - statement("uint SPIRV_Cross_packSnorm2x16(float2 value)"); + statement("uint spvPackSnorm2x16(float2 value)"); begin_scope(); statement("int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;"); statement("return uint(Packed.x | (Packed.y << 16));"); end_scope(); statement(""); - statement("float2 SPIRV_Cross_unpackSnorm2x16(uint value)"); + statement("float2 spvUnpackSnorm2x16(uint value)"); begin_scope(); statement("int SignedValue = int(value);"); statement("int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;"); @@ -1553,7 +1993,7 @@ void CompilerHLSL::emit_resources() static const char *types[] = { "uint", "uint2", "uint3", "uint4" }; for (auto &type : types) { - statement(type, " SPIRV_Cross_bitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)"); + statement(type, " spvBitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)"); begin_scope(); statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));"); statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);"); @@ -1567,7 +2007,7 @@ void CompilerHLSL::emit_resources() static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" }; for (auto &type : unsigned_types) { - statement(type, " SPIRV_Cross_bitfieldUExtract(", type, " Base, uint Offset, uint Count)"); + statement(type, " spvBitfieldUExtract(", type, " Base, uint Offset, uint Count)"); begin_scope(); statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);"); statement("return (Base >> Offset) & Mask;"); @@ -1579,7 +2019,7 @@ void CompilerHLSL::emit_resources() static const char *signed_types[] = { "int", "int2", "int3", "int4" }; for (auto &type : signed_types) { - statement(type, " SPIRV_Cross_bitfieldSExtract(", type, " Base, int Offset, int Count)"); + statement(type, " spvBitfieldSExtract(", type, " Base, int Offset, int Count)"); begin_scope(); statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);"); statement(type, " Masked = (Base >> Offset) & Mask;"); @@ -1594,7 +2034,7 @@ void CompilerHLSL::emit_resources() { statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float2x2 SPIRV_Cross_Inverse(float2x2 m)"); + statement("float2x2 spvInverse(float2x2 m)"); begin_scope(); statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); @@ -1618,29 +2058,29 @@ void CompilerHLSL::emit_resources() if (requires_inverse_3x3) { statement("// Returns the determinant of a 2x2 matrix."); - statement("float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)"); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); begin_scope(); statement("return a1 * b2 - b1 * a2;"); end_scope(); statement_no_indent(""); statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float3x3 SPIRV_Cross_Inverse(float3x3 m)"); + statement("float3x3 spvInverse(float3x3 m)"); begin_scope(); statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); - statement("adj[0][0] = SPIRV_Cross_Det2x2(m[1][1], m[1][2], m[2][1], m[2][2]);"); - statement("adj[0][1] = -SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[2][1], m[2][2]);"); - statement("adj[0][2] = SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[1][1], m[1][2]);"); + statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);"); + statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);"); + statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);"); statement_no_indent(""); - statement("adj[1][0] = -SPIRV_Cross_Det2x2(m[1][0], m[1][2], m[2][0], m[2][2]);"); - statement("adj[1][1] = SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[2][0], m[2][2]);"); - statement("adj[1][2] = -SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[1][0], m[1][2]);"); + statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);"); + statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);"); + statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);"); statement_no_indent(""); - statement("adj[2][0] = SPIRV_Cross_Det2x2(m[1][0], m[1][1], m[2][0], m[2][1]);"); - statement("adj[2][1] = -SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[2][0], m[2][1]);"); - statement("adj[2][2] = SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[1][0], m[1][1]);"); + statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);"); + statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);"); + statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);"); statement_no_indent(""); statement("// Calculate the determinant as a combination of the cofactors of the first row."); statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);"); @@ -1657,7 +2097,7 @@ void CompilerHLSL::emit_resources() if (!requires_inverse_3x3) { statement("// Returns the determinant of a 2x2 matrix."); - statement("float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)"); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); begin_scope(); statement("return a1 * b2 - b1 * a2;"); end_scope(); @@ -1665,71 +2105,71 @@ void CompilerHLSL::emit_resources() } statement("// Returns the determinant of a 3x3 matrix."); - statement("float SPIRV_Cross_Det3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " + statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " "float c2, float c3)"); begin_scope(); - statement("return a1 * SPIRV_Cross_Det2x2(b2, b3, c2, c3) - b1 * SPIRV_Cross_Det2x2(a2, a3, c2, c3) + c1 * " - "SPIRV_Cross_Det2x2(a2, a3, " + statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * " + "spvDet2x2(a2, a3, " "b2, b3);"); end_scope(); statement_no_indent(""); statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float4x4 SPIRV_Cross_Inverse(float4x4 m)"); + statement("float4x4 spvInverse(float4x4 m)"); begin_scope(); statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); statement( - "adj[0][0] = SPIRV_Cross_Det3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " "m[3][3]);"); statement( - "adj[0][1] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " "m[3][3]);"); statement( - "adj[0][2] = SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], " + "adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], " "m[3][3]);"); statement( - "adj[0][3] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], " + "adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], " "m[2][3]);"); statement_no_indent(""); statement( - "adj[1][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " "m[3][3]);"); statement( - "adj[1][1] = SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " "m[3][3]);"); statement( - "adj[1][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], " + "adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], " "m[3][3]);"); statement( - "adj[1][3] = SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], " + "adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], " "m[2][3]);"); statement_no_indent(""); statement( - "adj[2][0] = SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " "m[3][3]);"); statement( - "adj[2][1] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " "m[3][3]);"); statement( - "adj[2][2] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], " + "adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], " "m[3][3]);"); statement( - "adj[2][3] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], " + "adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], " "m[2][3]);"); statement_no_indent(""); statement( - "adj[3][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " "m[3][2]);"); statement( - "adj[3][1] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " "m[3][2]);"); statement( - "adj[3][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], " + "adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], " "m[3][2]);"); statement( - "adj[3][3] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], " + "adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], " "m[2][2]);"); statement_no_indent(""); statement("// Calculate the determinant as a combination of the cofactors of the first row."); @@ -1746,7 +2186,7 @@ void CompilerHLSL::emit_resources() if (requires_scalar_reflect) { // FP16/FP64? No templates in HLSL. - statement("float SPIRV_Cross_Reflect(float i, float n)"); + statement("float spvReflect(float i, float n)"); begin_scope(); statement("return i - 2.0 * dot(n, i) * n;"); end_scope(); @@ -1756,7 +2196,7 @@ void CompilerHLSL::emit_resources() if (requires_scalar_refract) { // FP16/FP64? No templates in HLSL. - statement("float SPIRV_Cross_Refract(float i, float n, float eta)"); + statement("float spvRefract(float i, float n, float eta)"); begin_scope(); statement("float NoI = n * i;"); statement("float NoI2 = NoI * NoI;"); @@ -1772,6 +2212,304 @@ void CompilerHLSL::emit_resources() end_scope(); statement(""); } + + if (requires_scalar_faceforward) + { + // FP16/FP64? No templates in HLSL. + statement("float spvFaceForward(float n, float i, float nref)"); + begin_scope(); + statement("return i * nref < 0.0 ? n : -n;"); + end_scope(); + statement(""); + } + + for (TypeID type_id : composite_selection_workaround_types) + { + // Need out variable since HLSL does not support returning arrays. + auto &type = get(type_id); + auto type_str = type_to_glsl(type); + auto type_arr_str = type_to_array_glsl(type); + statement("void spvSelectComposite(out ", type_str, " out_value", type_arr_str, ", bool cond, ", + type_str, " true_val", type_arr_str, ", ", + type_str, " false_val", type_arr_str, ")"); + begin_scope(); + statement("if (cond)"); + begin_scope(); + statement("out_value = true_val;"); + end_scope(); + statement("else"); + begin_scope(); + statement("out_value = false_val;"); + end_scope(); + end_scope(); + statement(""); + } +} + +void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char *vecsize_qualifier, bool uav, + const char *type_qualifier) +{ + if (variant_mask == 0) + return; + + static const char *types[QueryTypeCount] = { "float", "int", "uint" }; + static const char *dims[QueryDimCount] = { "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray", + "Texture3D", "Buffer", "TextureCube", "TextureCubeArray", + "Texture2DMS", "Texture2DMSArray" }; + + static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false }; + + static const char *ret_types[QueryDimCount] = { + "uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3", + }; + + static const uint32_t return_arguments[QueryDimCount] = { + 1, 2, 2, 3, 3, 1, 2, 3, 2, 3, + }; + + for (uint32_t index = 0; index < QueryDimCount; index++) + { + for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++) + { + uint32_t bit = 16 * type_index + index; + uint64_t mask = 1ull << bit; + + if ((variant_mask & mask) == 0) + continue; + + statement(ret_types[index], " spv", (uav ? "Image" : "Texture"), "Size(", (uav ? "RW" : ""), + dims[index], "<", type_qualifier, types[type_index], vecsize_qualifier, "> Tex, ", + (uav ? "" : "uint Level, "), "out uint Param)"); + begin_scope(); + statement(ret_types[index], " ret;"); + switch (return_arguments[index]) + { + case 1: + if (has_lod[index] && !uav) + statement("Tex.GetDimensions(Level, ret.x, Param);"); + else + { + statement("Tex.GetDimensions(ret.x);"); + statement("Param = 0u;"); + } + break; + case 2: + if (has_lod[index] && !uav) + statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);"); + else if (!uav) + statement("Tex.GetDimensions(ret.x, ret.y, Param);"); + else + { + statement("Tex.GetDimensions(ret.x, ret.y);"); + statement("Param = 0u;"); + } + break; + case 3: + if (has_lod[index] && !uav) + statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);"); + else if (!uav) + statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);"); + else + { + statement("Tex.GetDimensions(ret.x, ret.y, ret.z);"); + statement("Param = 0u;"); + } + break; + } + + statement("return ret;"); + end_scope(); + statement(""); + } + } +} + +void CompilerHLSL::analyze_meshlet_writes() +{ + uint32_t id_per_vertex = 0; + uint32_t id_per_primitive = 0; + bool need_per_primitive = false; + bool need_per_vertex = false; + + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if (var.storage == StorageClassOutput && block && is_builtin_variable(var)) + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT)) + id_per_primitive = var.self; + else + id_per_vertex = var.self; + } + else if (var.storage == StorageClassOutput) + { + Bitset flags; + if (block) + flags = get_buffer_block_flags(var.self); + else + flags = get_decoration_bitset(var.self); + + if (flags.get(DecorationPerPrimitiveEXT)) + need_per_primitive = true; + else + need_per_vertex = true; + } + }); + + // If we have per-primitive outputs, and no per-primitive builtins, + // empty version of gl_MeshPerPrimitiveEXT will be emitted. + // If we don't use block IO for vertex output, we'll also need to synthesize the PerVertex block. + + const auto generate_block = [&](const char *block_name, const char *instance_name, bool per_primitive) -> uint32_t { + auto &execution = get_entry_point(); + + uint32_t op_type = ir.increase_bound_by(4); + uint32_t op_arr = op_type + 1; + uint32_t op_ptr = op_type + 2; + uint32_t op_var = op_type + 3; + + auto &type = set(op_type); + type.basetype = SPIRType::Struct; + set_name(op_type, block_name); + set_decoration(op_type, DecorationBlock); + if (per_primitive) + set_decoration(op_type, DecorationPerPrimitiveEXT); + + auto &arr = set(op_arr, type); + arr.parent_type = type.self; + arr.array.push_back(per_primitive ? execution.output_primitives : execution.output_vertices); + arr.array_size_literal.push_back(true); + + auto &ptr = set(op_ptr, arr); + ptr.parent_type = arr.self; + ptr.pointer = true; + ptr.pointer_depth++; + ptr.storage = StorageClassOutput; + set_decoration(op_ptr, DecorationBlock); + set_name(op_ptr, block_name); + + auto &var = set(op_var, op_ptr, StorageClassOutput); + if (per_primitive) + set_decoration(op_var, DecorationPerPrimitiveEXT); + set_name(op_var, instance_name); + execution.interface_variables.push_back(var.self); + + return op_var; + }; + + if (id_per_vertex == 0 && need_per_vertex) + id_per_vertex = generate_block("gl_MeshPerVertexEXT", "gl_MeshVerticesEXT", false); + if (id_per_primitive == 0 && need_per_primitive) + id_per_primitive = generate_block("gl_MeshPerPrimitiveEXT", "gl_MeshPrimitivesEXT", true); + + unordered_set processed_func_ids; + analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids); +} + +void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive, + std::unordered_set &processed_func_ids) +{ + // Avoid processing a function more than once + if (processed_func_ids.find(func_id) != processed_func_ids.end()) + return; + processed_func_ids.insert(func_id); + + auto &func = get(func_id); + // Recursively establish global args added to functions on which we depend. + for (auto& block : func.blocks) + { + auto &b = get(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + switch (op) + { + case OpFunctionCall: + { + // Then recurse into the function itself to extract globals used internally in the function + uint32_t inner_func_id = ops[2]; + analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids); + auto &inner_func = get(inner_func_id); + for (auto &iarg : inner_func.arguments) + { + if (!iarg.alias_global_variable) + continue; + + bool already_declared = false; + for (auto &arg : func.arguments) + { + if (arg.id == iarg.id) + { + already_declared = true; + break; + } + } + + if (!already_declared) + { + // basetype is effectively ignored here since we declare the argument + // with explicit types. Just pass down a valid type. + func.arguments.push_back({ expression_type_id(iarg.id), iarg.id, + iarg.read_count, iarg.write_count, true }); + } + } + break; + } + + case OpStore: + case OpLoad: + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + case OpInBoundsPtrAccessChain: + case OpArrayLength: + { + auto *var = maybe_get(ops[op == OpStore ? 0 : 2]); + if (var && (var->storage == StorageClassOutput || var->storage == StorageClassTaskPayloadWorkgroupEXT)) + { + bool already_declared = false; + auto builtin_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); + + uint32_t var_id = var->self; + if (var->storage != StorageClassTaskPayloadWorkgroupEXT && + builtin_type != BuiltInPrimitivePointIndicesEXT && + builtin_type != BuiltInPrimitiveLineIndicesEXT && + builtin_type != BuiltInPrimitiveTriangleIndicesEXT) + { + var_id = is_per_primitive_variable(*var) ? id_per_primitive : id_per_vertex; + } + + for (auto &arg : func.arguments) + { + if (arg.id == var_id) + { + already_declared = true; + break; + } + } + + if (!already_declared) + { + // basetype is effectively ignored here since we declare the argument + // with explicit types. Just pass down a valid type. + uint32_t type_id = expression_type_id(var_id); + if (var->storage == StorageClassTaskPayloadWorkgroupEXT) + func.arguments.push_back({ type_id, var_id, 1u, 0u, true }); + else + func.arguments.push_back({ type_id, var_id, 1u, 1u, true }); + } + } + break; + } + + default: + break; + } + } + } } string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index) @@ -1801,17 +2539,10 @@ void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type if (index < memb.size()) memberflags = memb[index].decoration_flags; - string qualifiers; - bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || - ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); - - if (is_block) - qualifiers = to_interpolation_qualifiers(memberflags); - string packing_offset; bool is_push_constant = type.storage == StorageClassPushConstant; - if ((has_extended_decoration(type.self, SPIRVCrossDecorationPacked) || is_push_constant) && + if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) && has_member_decoration(type.self, index, DecorationOffset)) { uint32_t offset = memb[index].offset - base_offset; @@ -1822,37 +2553,47 @@ void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type packing_offset = join(" : packoffset(c", offset / 16, packing_swizzle[(offset & 15) >> 2], ")"); } - statement(layout_for_member(type, index), qualifiers, qualifier, + statement(layout_for_member(type, index), qualifier, variable_decl(membertype, to_member_name(type, index)), packing_offset, ";"); } +void CompilerHLSL::emit_rayquery_function(const char *commited, const char *candidate, const uint32_t *ops) +{ + flush_variable_declaration(ops[0]); + uint32_t is_commited = evaluate_constant_u32(ops[3]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), is_commited ? commited : candidate), false); +} + void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) { auto &type = get(var.basetype); bool is_uav = var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock); - if (is_uav) + if (flattened_buffer_blocks.count(var.self)) + { + emit_buffer_block_flattened(var); + } + else if (is_uav) { Bitset flags = ir.get_buffer_block_flags(var); - bool is_readonly = flags.get(DecorationNonWritable); - bool is_coherent = flags.get(DecorationCoherent); + bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); + bool is_coherent = flags.get(DecorationCoherent) && !is_readonly; + bool is_interlocked = interlocked_resources.count(var.self) > 0; + const char *type_name = "ByteAddressBuffer "; + if (!is_readonly) + type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer "; add_resource_name(var.self); - statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ", - to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); + statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type), + to_resource_binding(var), ";"); } else { if (type.array.empty()) { - if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset)) - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); - else - SPIRV_CROSS_THROW("cbuffer cannot be expressed with either HLSL packing layout or packoffset."); - // Flatten the top-level struct so we can use packoffset, // this restriction is similar to GLSL where layout(offset) is not possible on sub-structs. - flattened_structs.insert(var.self); + flattened_structs[var.self] = false; // Prefer the block name if possible. auto buffer_name = to_name(type.self, false); @@ -1870,6 +2611,16 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) if (buffer_name.empty()) buffer_name = join("_", get(var.basetype).self, "_", var.self); + uint32_t failed_index = 0; + if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index)) + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + else + { + SPIRV_CROSS_THROW(join("cbuffer ID ", var.self, " (name: ", buffer_name, "), member index ", + failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with either HLSL packing layout or packoffset.")); + } + block_names.insert(buffer_name); // Save for post-reflection later. @@ -1890,7 +2641,9 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) add_member_name(type, i); auto backup_name = get_member_name(type.self, i); auto member_name = to_member_name(type, i); - set_member_name(type.self, i, sanitize_underscores(join(to_name(var.self), "_", member_name))); + member_name = join(to_name(var.self), "_", member_name); + ParsedIR::sanitize_underscores(member_name); + set_member_name(type.self, i, member_name); emit_struct_member(type, member, i, ""); set_member_name(type.self, i, backup_name); i++; @@ -1905,13 +2658,18 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) SPIRV_CROSS_THROW( "Need ConstantBuffer to use arrays of UBOs, but this is only supported in SM 5.1."); - // ConstantBuffer does not support packoffset, so it is unuseable unless everything aligns as we expect. - if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer)) - SPIRV_CROSS_THROW("HLSL ConstantBuffer cannot be expressed with normal HLSL packing rules."); - add_resource_name(type.self); add_resource_name(var.self); + // ConstantBuffer does not support packoffset, so it is unuseable unless everything aligns as we expect. + uint32_t failed_index = 0; + if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer, &failed_index)) + { + SPIRV_CROSS_THROW(join("HLSL ConstantBuffer ID ", var.self, " (name: ", to_name(type.self), + "), member index ", failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with normal HLSL packing rules.")); + } + emit_struct(get(type.self)); statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); @@ -1921,7 +2679,11 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) { - if (root_constants_layout.empty()) + if (flattened_buffer_blocks.count(var.self)) + { + emit_buffer_block_flattened(var); + } + else if (root_constants_layout.empty()) { emit_buffer_block(var); } @@ -1931,19 +2693,24 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) { auto &type = get(var.basetype); - if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, layout.start, layout.end)) - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + uint32_t failed_index = 0; + if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index, layout.start, + layout.end)) + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); else - SPIRV_CROSS_THROW( - "root constant cbuffer cannot be expressed with either HLSL packing layout or packoffset."); + { + SPIRV_CROSS_THROW(join("Root constant cbuffer ID ", var.self, " (name: ", to_name(type.self), ")", + ", member index ", failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with either HLSL packing layout or packoffset.")); + } - flattened_structs.insert(var.self); + flattened_structs[var.self] = false; type.member_name_cache.clear(); add_resource_name(var.self); auto &memb = ir.meta[type.self].members; statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self), - to_resource_register('b', layout.binding, layout.space)); + to_resource_register(HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT, 'b', layout.binding, layout.space)); begin_scope(); // Index of the next field in the generated root constant constant buffer @@ -1961,8 +2728,9 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) add_member_name(type, constant_index); auto backup_name = get_member_name(type.self, i); auto member_name = to_member_name(type, i); - set_member_name(type.self, constant_index, - sanitize_underscores(join(to_name(var.self), "_", member_name))); + member_name = join(to_name(var.self), "_", member_name); + ParsedIR::sanitize_underscores(member_name); + set_member_name(type.self, constant_index, member_name); emit_struct_member(type, member, i, "", layout.start); set_member_name(type.self, constant_index, backup_name); @@ -1977,7 +2745,7 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) string CompilerHLSL::to_sampler_expression(uint32_t id) { - auto expr = join("_", to_expression(id)); + auto expr = join("_", to_non_uniform_aware_expression(id)); auto index = expr.find_first_of('['); if (index == string::npos) { @@ -2003,9 +2771,9 @@ void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_i } } -string CompilerHLSL::to_func_call_arg(uint32_t id) +string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) { - string arg_str = CompilerGLSL::to_func_call_arg(id); + string arg_str = CompilerGLSL::to_func_call_arg(arg, id); if (hlsl_options.shader_model <= 30) return arg_str; @@ -2022,12 +2790,34 @@ string CompilerHLSL::to_func_call_arg(uint32_t id) return arg_str; } +string CompilerHLSL::get_inner_entry_point_name() const +{ + auto &execution = get_entry_point(); + + if (hlsl_options.use_entry_point_name) + { + auto name = join(execution.name, "_inner"); + ParsedIR::sanitize_underscores(name); + return name; + } + + if (execution.model == ExecutionModelVertex) + return "vert_main"; + else if (execution.model == ExecutionModelFragment) + return "frag_main"; + else if (execution.model == ExecutionModelGLCompute) + return "comp_main"; + else if (execution.model == ExecutionModelMeshEXT) + return "mesh_main"; + else + SPIRV_CROSS_THROW("Unsupported execution model."); +} + void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) { if (func.self != ir.default_entry_point) add_function_overload(func); - auto &execution = get_entry_point(); // Avoid shadow declarations. local_variable_names = resource_names; @@ -2048,14 +2838,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret if (func.self == ir.default_entry_point) { - if (execution.model == ExecutionModelVertex) - decl += "vert_main"; - else if (execution.model == ExecutionModelFragment) - decl += "frag_main"; - else if (execution.model == ExecutionModelGLCompute) - decl += "comp_main"; - else - SPIRV_CROSS_THROW("Unsupported execution model."); + decl += get_inner_entry_point_name(); processing_entry_point = true; } else @@ -2071,9 +2854,9 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret out_argument += "out "; out_argument += type_to_glsl(type); out_argument += " "; - out_argument += "SPIRV_Cross_return_value"; + out_argument += "spvReturnValue"; out_argument += type_to_array_glsl(type); - arglist.push_back(move(out_argument)); + arglist.push_back(std::move(out_argument)); } for (auto &arg : func.arguments) @@ -2097,7 +2880,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret arg_type.image.dim != DimBuffer) { // Manufacture automatic sampler arg for SampledImage texture - arglist.push_back(join(image_is_comparison(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ", + arglist.push_back(join(is_depth_image(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ", to_sampler_expression(arg.id), type_to_array_glsl(arg_type))); } @@ -2135,33 +2918,62 @@ void CompilerHLSL::emit_hlsl_entry_point() if (require_input) arguments.push_back("SPIRV_Cross_Input stage_input"); - // Add I/O blocks as separate arguments with appropriate storage qualifier. - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); - - if (var.storage != StorageClassInput && var.storage != StorageClassOutput) - return; - - if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) - { - if (var.storage == StorageClassInput) - { - arguments.push_back(join("in ", variable_decl(type, join("stage_input", to_name(var.self))))); - } - else if (var.storage == StorageClassOutput) - { - arguments.push_back(join("out ", variable_decl(type, join("stage_output", to_name(var.self))))); - } - } - }); - auto &execution = get_entry_point(); switch (execution.model) { + case ExecutionModelMeshEXT: + case ExecutionModelMeshNV: case ExecutionModelGLCompute: { + if (execution.model == ExecutionModelMeshEXT) + { + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + statement("[outputtopology(\"triangle\")]"); + else if (execution.flags.get(ExecutionModeOutputLinesEXT)) + statement("[outputtopology(\"line\")]"); + else if (execution.flags.get(ExecutionModeOutputPoints)) + SPIRV_CROSS_THROW("Topology mode \"points\" is not supported in DirectX"); + + auto &func = get(ir.default_entry_point); + for (auto &arg : func.arguments) + { + auto &var = get(arg.id); + auto &base_type = get(var.basetype); + bool block = has_decoration(base_type.self, DecorationBlock); + if (var.storage == StorageClassTaskPayloadWorkgroupEXT) + { + arguments.push_back("in payload " + variable_decl(var)); + } + else if (block) + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(arg.id, DecorationPerPrimitiveEXT)) + { + arguments.push_back("out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + else + { + arguments.push_back("out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[" + + std::to_string(execution.output_vertices) + "]"); + } + } + else + { + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + { + arguments.push_back("out indices uint3 gl_PrimitiveTriangleIndicesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + else + { + arguments.push_back("out indices uint2 gl_PrimitiveLineIndicesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + } + } + } SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); @@ -2169,6 +2981,16 @@ void CompilerHLSL::emit_hlsl_entry_point() uint32_t y = execution.workgroup_size.y; uint32_t z = execution.workgroup_size.z; + if (!execution.workgroup_size.constant && execution.flags.get(ExecutionModeLocalSizeId)) + { + if (execution.workgroup_size.id_x) + x = get(execution.workgroup_size.id_x).scalar(); + if (execution.workgroup_size.id_y) + y = get(execution.workgroup_size.id_y).scalar(); + if (execution.workgroup_size.id_z) + z = get(execution.workgroup_size.id_z).scalar(); + } + auto x_expr = wg_x.id ? get(wg_x.id).specialization_constant_macro_name : to_string(x); auto y_expr = wg_y.id ? get(wg_y.id).specialization_constant_macro_name : to_string(y); auto z_expr = wg_z.id ? get(wg_z.id).specialization_constant_macro_name : to_string(z); @@ -2184,7 +3006,13 @@ void CompilerHLSL::emit_hlsl_entry_point() break; } - statement(require_output ? "SPIRV_Cross_Output " : "void ", "main(", merge(arguments), ")"); + const char *entry_point_name; + if (hlsl_options.use_entry_point_name) + entry_point_name = get_entry_point().name.c_str(); + else + entry_point_name = "main"; + + statement(require_output ? "SPIRV_Cross_Output " : "void ", entry_point_name, "(", merge(arguments), ")"); begin_scope(); bool legacy = hlsl_options.shader_model <= 30; @@ -2200,7 +3028,11 @@ void CompilerHLSL::emit_hlsl_entry_point() if (legacy) statement(builtin, " = stage_input.", builtin, " + float4(0.5f, 0.5f, 0.0f, 0.0f);"); else + { statement(builtin, " = stage_input.", builtin, ";"); + // ZW are undefined in D3D9, only do this fixup here. + statement(builtin, ".w = 1.0 / ", builtin, ".w;"); + } break; case BuiltInVertexId: @@ -2218,6 +3050,14 @@ void CompilerHLSL::emit_hlsl_entry_point() statement(builtin, " = int(stage_input.", builtin, ");"); break; + case BuiltInBaseVertex: + statement(builtin, " = SPIRV_Cross_BaseVertex;"); + break; + + case BuiltInBaseInstance: + statement(builtin, " = SPIRV_Cross_BaseInstance;"); + break; + case BuiltInInstanceId: // D3D semantics are uint, but shader wants int. statement(builtin, " = int(stage_input.", builtin, ");"); @@ -2227,6 +3067,7 @@ void CompilerHLSL::emit_hlsl_entry_point() case BuiltInPointCoord: case BuiltInSubgroupSize: case BuiltInSubgroupLocalInvocationId: + case BuiltInHelperInvocation: break; case BuiltInSubgroupEqMask: @@ -2312,64 +3153,64 @@ void CompilerHLSL::emit_hlsl_entry_point() // Copy from stage input struct to globals. ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); - bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + bool block = has_decoration(type.self, DecorationBlock); if (var.storage != StorageClassInput) return; bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex; - if (!block && !var.remapped_variable && type.pointer && !is_builtin_variable(var) && + if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) { - auto name = to_name(var.self); - auto &mtype = this->get(var.basetype); - if (need_matrix_unroll && mtype.columns > 1) + if (block) { - // Unroll matrices. - for (uint32_t col = 0; col < mtype.columns; col++) - statement(name, "[", col, "] = stage_input.", name, "_", col, ";"); + auto type_name = to_name(type.self); + auto var_name = to_name(var.self); + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++) + { + auto mbr_name = to_member_name(type, mbr_idx); + auto flat_name = join(type_name, "_", mbr_name); + statement(var_name, ".", mbr_name, " = stage_input.", flat_name, ";"); + } } else { - statement(name, " = stage_input.", name, ";"); + auto name = to_name(var.self); + auto &mtype = this->get(var.basetype); + if (need_matrix_unroll && mtype.columns > 1) + { + // Unroll matrices. + for (uint32_t col = 0; col < mtype.columns; col++) + statement(name, "[", col, "] = stage_input.", name, "_", col, ";"); + } + else + { + statement(name, " = stage_input.", name, ";"); + } } } - - // I/O blocks don't use the common stage input/output struct, but separate outputs. - if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) - { - auto name = to_name(var.self); - statement(name, " = stage_input", name, ";"); - } }); // Run the shader. - if (execution.model == ExecutionModelVertex) - statement("vert_main();"); - else if (execution.model == ExecutionModelFragment) - statement("frag_main();"); - else if (execution.model == ExecutionModelGLCompute) - statement("comp_main();"); + if (execution.model == ExecutionModelVertex || + execution.model == ExecutionModelFragment || + execution.model == ExecutionModelGLCompute || + execution.model == ExecutionModelMeshEXT) + { + // For mesh shaders, we receive special arguments that we must pass down as function arguments. + // HLSL does not support proper reference types for passing these IO blocks, + // but DXC post-inlining seems to magically fix it up anyways *shrug*. + SmallVector arglist; + auto &func = get(ir.default_entry_point); + // The arguments are marked out, avoid detecting reads and emitting inout. + for (auto &arg : func.arguments) + arglist.push_back(to_expression(arg.id, false)); + statement(get_inner_entry_point_name(), "(", merge(arglist), ");"); + } else SPIRV_CROSS_THROW("Unsupported shader stage."); - // Copy block outputs. - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); - - if (var.storage != StorageClassOutput) - return; - - // I/O blocks don't use the common stage input/output struct, but separate outputs. - if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) - { - auto name = to_name(var.self); - statement("stage_output", name, " = ", name, ";"); - } - }); - // Copy stage outputs. if (require_output) { @@ -2406,27 +3247,43 @@ void CompilerHLSL::emit_hlsl_entry_point() ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); - bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + bool block = has_decoration(type.self, DecorationBlock); if (var.storage != StorageClassOutput) return; - if (!block && var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && - !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) + if (!var.remapped_variable && type.pointer && + !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) { - auto name = to_name(var.self); - - if (legacy && execution.model == ExecutionModelFragment) + if (block) { - string output_filler; - for (uint32_t size = type.vecsize; size < 4; ++size) - output_filler += ", 0.0"; - - statement("stage_output.", name, " = float4(", name, output_filler, ");"); + // I/O blocks need to flatten output. + auto type_name = to_name(type.self); + auto var_name = to_name(var.self); + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++) + { + auto mbr_name = to_member_name(type, mbr_idx); + auto flat_name = join(type_name, "_", mbr_name); + statement("stage_output.", flat_name, " = ", var_name, ".", mbr_name, ";"); + } } else { - statement("stage_output.", name, " = ", name, ";"); + auto name = to_name(var.self); + + if (legacy && execution.model == ExecutionModelFragment) + { + string output_filler; + for (uint32_t size = type.vecsize; size < 4; ++size) + output_filler += ", 0.0"; + + statement("stage_output.", name, " = float4(", name, output_filler, ");"); + } + else + { + statement("stage_output.", name, " = ", name, ";"); + } } } }); @@ -2439,7 +3296,7 @@ void CompilerHLSL::emit_hlsl_entry_point() void CompilerHLSL::emit_fixup() { - if (get_entry_point().model == ExecutionModelVertex) + if (is_vertex_like_shader() && active_output_builtins.get(BuiltInPosition)) { // Do various mangling on the gl_Position. if (hlsl_options.shader_model <= 30) @@ -2457,8 +3314,11 @@ void CompilerHLSL::emit_fixup() } } -void CompilerHLSL::emit_texture_op(const Instruction &i) +void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse) { + if (sparse) + SPIRV_CROSS_THROW("Sparse feedback not yet supported in HLSL."); + auto *ops = stream(i); auto op = static_cast(i.op); uint32_t length = i.length; @@ -2467,7 +3327,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) uint32_t result_type = ops[0]; uint32_t id = ops[1]; - uint32_t img = ops[2]; + VariableID img = ops[2]; uint32_t coord = ops[3]; uint32_t dref = 0; uint32_t comp = 0; @@ -2475,13 +3335,16 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) bool proj = false; const uint32_t *opt = nullptr; auto *combined_image = maybe_get(img); - auto img_expr = to_expression(combined_image ? combined_image->image : img); - inherited_expressions.push_back(coord); + if (combined_image && has_decoration(img, DecorationNonUniform)) + { + set_decoration(combined_image->image, DecorationNonUniform); + set_decoration(combined_image->sampler, DecorationNonUniform); + } - // Make sure non-uniform decoration is back-propagated to where it needs to be. - if (has_decoration(img, DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(img); + auto img_expr = to_non_uniform_aware_expression(combined_image ? combined_image->image : img); + + inherited_expressions.push_back(coord); switch (op) { @@ -2631,7 +3494,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) { texop += img_expr; - if (image_is_comparison(imgtype, img)) + if (is_depth_image(imgtype, img)) { if (gather) { @@ -2647,7 +3510,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) } else if (gather) { - uint32_t comp_num = get(comp).scalar(); + uint32_t comp_num = evaluate_constant_u32(comp); if (hlsl_options.shader_model >= 50) { switch (comp_num) @@ -2713,14 +3576,15 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) SPIRV_CROSS_THROW("textureGather is not supported in HLSL shader model 2/3."); if (offset || coffset) SPIRV_CROSS_THROW("textureOffset is not supported in HLSL shader model 2/3."); - if (proj) - texop += "proj"; + if (grad_x || grad_y) texop += "grad"; - if (lod) + else if (lod) texop += "lod"; - if (bias) + else if (bias) texop += "bias"; + else if (proj || dref) + texop += "proj"; } } @@ -2736,7 +3600,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) { string sampler_expr; if (combined_image) - sampler_expr = to_expression(combined_image->sampler); + sampler_expr = to_non_uniform_aware_expression(combined_image->sampler); else sampler_expr = to_sampler_expression(img); expr += sampler_expr; @@ -2772,24 +3636,52 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) if (proj && hlsl_options.shader_model >= 40) // Legacy HLSL has "proj" operations which do this for us. coord_expr = coord_expr + " / " + to_extract_component_expression(coord, coord_components); - if (hlsl_options.shader_model < 40 && lod) + if (hlsl_options.shader_model < 40) { - string coord_filler; - for (uint32_t size = coord_components; size < 3; ++size) + if (dref) + { + if (imgtype.image.dim != spv::Dim1D && imgtype.image.dim != spv::Dim2D) + { + SPIRV_CROSS_THROW( + "Depth comparison is only supported for 1D and 2D textures in HLSL shader model 2/3."); + } + + if (grad_x || grad_y) + SPIRV_CROSS_THROW("Depth comparison is not supported for grad sampling in HLSL shader model 2/3."); + + for (uint32_t size = coord_components; size < 2; ++size) + coord_expr += ", 0.0"; + + forward = forward && should_forward(dref); + coord_expr += ", " + to_expression(dref); + } + else if (lod || bias || proj) { - coord_filler += ", 0.0"; + for (uint32_t size = coord_components; size < 3; ++size) + coord_expr += ", 0.0"; } - coord_expr = "float4(" + coord_expr + coord_filler + ", " + to_expression(lod) + ")"; - } - if (hlsl_options.shader_model < 40 && bias) - { - string coord_filler; - for (uint32_t size = coord_components; size < 3; ++size) + if (lod) { - coord_filler += ", 0.0"; + coord_expr = "float4(" + coord_expr + ", " + to_expression(lod) + ")"; } - coord_expr = "float4(" + coord_expr + coord_filler + ", " + to_expression(bias) + ")"; + else if (bias) + { + coord_expr = "float4(" + coord_expr + ", " + to_expression(bias) + ")"; + } + else if (proj) + { + coord_expr = "float4(" + coord_expr + ", " + to_extract_component_expression(coord, coord_components) + ")"; + } + else if (dref) + { + // A "normal" sample gets fed into tex2Dproj as well, because the + // regular tex2D accepts only two coordinates. + coord_expr = "float4(" + coord_expr + ", 1.0)"; + } + + if (!!lod + !!bias + !!proj > 1) + SPIRV_CROSS_THROW("Legacy HLSL can only use one of lod/bias/proj modifiers."); } if (op == OpImageFetch) @@ -2802,11 +3694,8 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) expr += ", "; expr += coord_expr; - if (dref) + if (dref && hlsl_options.shader_model >= 40) { - if (hlsl_options.shader_model < 40) - SPIRV_CROSS_THROW("Legacy HLSL does not support comparison sampling."); - forward = forward && should_forward(dref); expr += ", "; @@ -2861,6 +3750,9 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) expr += ")"; + if (dref && hlsl_options.shader_model < 40) + expr += ".x"; + if (op == OpImageQueryLod) { // This is rather awkward. @@ -2870,7 +3762,8 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) // according to GLSL spec, and it depends on the sampler itself. // Just assume X == Y, so we will need to splat the result to a float2. statement("float _", id, "_tmp = ", expr, ";"); - emit_op(result_type, id, join("float2(_", id, "_tmp, _", id, "_tmp)"), true, true); + statement("float2 _", id, " = _", id, "_tmp.xx;"); + set(id, join("_", id), result_type, true); } else { @@ -2886,7 +3779,6 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) case OpImageSampleImplicitLod: case OpImageSampleProjImplicitLod: case OpImageSampleProjDrefImplicitLod: - case OpImageQueryLod: register_control_dependent_expression(id); break; @@ -2897,30 +3789,52 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) string CompilerHLSL::to_resource_binding(const SPIRVariable &var) { - // TODO: Basic implementation, might need special consideration for RW/RO structured buffers, - // RW/RO images, and so on. + const auto &type = get(var.basetype); - if (!has_decoration(var.self, DecorationBinding)) + // We can remap push constant blocks, even if they don't have any binding decoration. + if (type.storage != StorageClassPushConstant && !has_decoration(var.self, DecorationBinding)) return ""; - const auto &type = get(var.basetype); char space = '\0'; + HLSLBindingFlagBits resource_flags = HLSL_BINDING_AUTO_NONE_BIT; + switch (type.basetype) { case SPIRType::SampledImage: space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; break; case SPIRType::Image: if (type.image.sampled == 2 && type.image.dim != DimSubpassData) - space = 'u'; // UAV + { + if (has_decoration(var.self, DecorationNonWritable) && hlsl_options.nonwritable_uav_texture_as_srv) + { + space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; + } + else + { + space = 'u'; // UAV + resource_flags = HLSL_BINDING_AUTO_UAV_BIT; + } + } else + { space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; + } break; case SPIRType::Sampler: space = 's'; + resource_flags = HLSL_BINDING_AUTO_SAMPLER_BIT; + break; + + case SPIRType::AccelerationStructure: + space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; break; case SPIRType::Struct: @@ -2931,20 +3845,28 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var) if (has_decoration(type.self, DecorationBufferBlock)) { Bitset flags = ir.get_buffer_block_flags(var); - bool is_readonly = flags.get(DecorationNonWritable); + bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); space = is_readonly ? 't' : 'u'; // UAV + resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT; } else if (has_decoration(type.self, DecorationBlock)) + { space = 'b'; // Constant buffers + resource_flags = HLSL_BINDING_AUTO_CBV_BIT; + } } else if (storage == StorageClassPushConstant) + { space = 'b'; // Constant buffers + resource_flags = HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT; + } else if (storage == StorageClassStorageBuffer) { // UAV or SRV depending on readonly flag. Bitset flags = ir.get_buffer_block_flags(var); - bool is_readonly = flags.get(DecorationNonWritable); + bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); space = is_readonly ? 't' : 'u'; + resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT; } break; @@ -2956,8 +3878,16 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var) if (!space) return ""; - return to_resource_register(space, get_decoration(var.self, DecorationBinding), - get_decoration(var.self, DecorationDescriptorSet)); + uint32_t desc_set = + resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantDescriptorSet : 0u; + uint32_t binding = resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantBinding : 0u; + + if (has_decoration(var.self, DecorationBinding)) + binding = get_decoration(var.self, DecorationBinding); + if (has_decoration(var.self, DecorationDescriptorSet)) + desc_set = get_decoration(var.self, DecorationDescriptorSet); + + return to_resource_register(resource_flags, space, binding, desc_set); } string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var) @@ -2966,16 +3896,65 @@ string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var) if (!has_decoration(var.self, DecorationBinding)) return ""; - return to_resource_register('s', get_decoration(var.self, DecorationBinding), + return to_resource_register(HLSL_BINDING_AUTO_SAMPLER_BIT, 's', get_decoration(var.self, DecorationBinding), get_decoration(var.self, DecorationDescriptorSet)); } -string CompilerHLSL::to_resource_register(char space, uint32_t binding, uint32_t space_set) +void CompilerHLSL::remap_hlsl_resource_binding(HLSLBindingFlagBits type, uint32_t &desc_set, uint32_t &binding) +{ + auto itr = resource_bindings.find({ get_execution_model(), desc_set, binding }); + if (itr != end(resource_bindings)) + { + auto &remap = itr->second; + remap.second = true; + + switch (type) + { + case HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT: + case HLSL_BINDING_AUTO_CBV_BIT: + desc_set = remap.first.cbv.register_space; + binding = remap.first.cbv.register_binding; + break; + + case HLSL_BINDING_AUTO_SRV_BIT: + desc_set = remap.first.srv.register_space; + binding = remap.first.srv.register_binding; + break; + + case HLSL_BINDING_AUTO_SAMPLER_BIT: + desc_set = remap.first.sampler.register_space; + binding = remap.first.sampler.register_binding; + break; + + case HLSL_BINDING_AUTO_UAV_BIT: + desc_set = remap.first.uav.register_space; + binding = remap.first.uav.register_binding; + break; + + default: + break; + } + } +} + +string CompilerHLSL::to_resource_register(HLSLBindingFlagBits flag, char space, uint32_t binding, uint32_t space_set) { - if (hlsl_options.shader_model >= 51) - return join(" : register(", space, binding, ", space", space_set, ")"); + if ((flag & resource_binding_flags) == 0) + { + remap_hlsl_resource_binding(flag, space_set, binding); + + // The push constant block did not have a binding, and there were no remap for it, + // so, declare without register binding. + if (flag == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT && space_set == ResourceBindingPushConstantDescriptorSet) + return ""; + + if (hlsl_options.shader_model >= 51) + return join(" : register(", space, binding, ", space", space_set, ")"); + else + return join(" : register(", space, binding, ")"); + } else - return join(" : register(", space, binding, ")"); + return ""; } void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var) @@ -2996,7 +3975,7 @@ void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var) if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) { // For combined image samplers, also emit a combined image sampler. - if (image_is_comparison(type, var.self)) + if (is_depth_image(type, var.self)) statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type), to_resource_binding_sampler(var), ";"); else @@ -3044,6 +4023,11 @@ void CompilerHLSL::emit_uniform(const SPIRVariable &var) emit_legacy_uniform(var); } +bool CompilerHLSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t) +{ + return false; +} + string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) { if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int) @@ -3077,7 +4061,7 @@ string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i requires_explicit_fp16_packing = true; force_recompile(); } - return "SPIRV_Cross_unpackFloat2x16"; + return "spvUnpackFloat2x16"; } else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) { @@ -3086,7 +4070,19 @@ string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i requires_explicit_fp16_packing = true; force_recompile(); } - return "SPIRV_Cross_packFloat2x16"; + return "spvPackFloat2x16"; + } + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) + { + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("Half to UShort requires Shader Model 4."); + return "(" + type_to_glsl(out_type) + ")f32tof16"; + } + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) + { + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("UShort to Half requires Shader Model 4."); + return "(" + type_to_glsl(out_type) + ")f16tof32"; } else return ""; @@ -3101,6 +4097,8 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); + op = get_remapped_glsl_op(op); + switch (op) { case GLSLstd450InverseSqrt: @@ -3112,7 +4110,10 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, break; case GLSLstd450RoundEven: - SPIRV_CROSS_THROW("roundEven is not supported on HLSL."); + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("roundEven is not supported in HLSL shader model 2/3."); + emit_unary_func_op(result_type, id, args[0], "round"); + break; case GLSLstd450Acosh: case GLSLstd450Asinh: @@ -3148,7 +4149,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_fp16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packHalf2x16"); + emit_unary_func_op(result_type, id, args[0], "spvPackHalf2x16"); break; case GLSLstd450UnpackHalf2x16: @@ -3157,7 +4158,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_fp16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackHalf2x16"); + emit_unary_func_op(result_type, id, args[0], "spvUnpackHalf2x16"); break; case GLSLstd450PackSnorm4x8: @@ -3166,7 +4167,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_snorm8_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packSnorm4x8"); + emit_unary_func_op(result_type, id, args[0], "spvPackSnorm4x8"); break; case GLSLstd450UnpackSnorm4x8: @@ -3175,7 +4176,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_snorm8_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackSnorm4x8"); + emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm4x8"); break; case GLSLstd450PackUnorm4x8: @@ -3184,7 +4185,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_unorm8_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packUnorm4x8"); + emit_unary_func_op(result_type, id, args[0], "spvPackUnorm4x8"); break; case GLSLstd450UnpackUnorm4x8: @@ -3193,7 +4194,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_unorm8_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackUnorm4x8"); + emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm4x8"); break; case GLSLstd450PackSnorm2x16: @@ -3202,7 +4203,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_snorm16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packSnorm2x16"); + emit_unary_func_op(result_type, id, args[0], "spvPackSnorm2x16"); break; case GLSLstd450UnpackSnorm2x16: @@ -3211,7 +4212,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_snorm16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackSnorm2x16"); + emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm2x16"); break; case GLSLstd450PackUnorm2x16: @@ -3220,7 +4221,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_unorm16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packUnorm2x16"); + emit_unary_func_op(result_type, id, args[0], "spvPackUnorm2x16"); break; case GLSLstd450UnpackUnorm2x16: @@ -3229,7 +4230,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_unorm16_packing = true; force_recompile(); } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackUnorm2x16"); + emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm2x16"); break; case GLSLstd450PackDouble2x32: @@ -3237,8 +4238,11 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL."); case GLSLstd450FindILsb: - emit_unary_func_op(result_type, id, args[0], "firstbitlow"); + { + auto basetype = expression_type(args[0]).basetype; + emit_unary_func_op_cast(result_type, id, args[0], "firstbitlow", basetype, basetype); break; + } case GLSLstd450FindSMsb: emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", int_type, int_type); @@ -3275,7 +4279,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, force_recompile(); } } - emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_Inverse"); + emit_unary_func_op(result_type, id, args[0], "spvInverse"); break; } @@ -3298,7 +4302,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_scalar_reflect = true; force_recompile(); } - emit_binary_func_op(result_type, id, args[0], args[1], "SPIRV_Cross_Reflect"); + emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect"); } else CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); @@ -3312,7 +4316,21 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, requires_scalar_refract = true; force_recompile(); } - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_Refract"); + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450FaceForward: + if (get(result_type).vecsize == 1) + { + if (!requires_scalar_faceforward) + { + requires_scalar_faceforward = true; + force_recompile(); + } + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward"); } else CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); @@ -3324,7 +4342,57 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, } } -string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain) +void CompilerHLSL::read_access_chain_array(const string &lhs, const SPIRAccessChain &chain) +{ + auto &type = get(chain.basetype); + + // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops. + auto ident = get_unique_identifier(); + + statement("[unroll]"); + statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ", + ident, "++)"); + begin_scope(); + auto subchain = chain; + subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index); + subchain.basetype = type.parent_type; + if (!get(subchain.basetype).array.empty()) + subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride); + read_access_chain(nullptr, join(lhs, "[", ident, "]"), subchain); + end_scope(); +} + +void CompilerHLSL::read_access_chain_struct(const string &lhs, const SPIRAccessChain &chain) +{ + auto &type = get(chain.basetype); + auto subchain = chain; + uint32_t member_count = uint32_t(type.member_types.size()); + + for (uint32_t i = 0; i < member_count; i++) + { + uint32_t offset = type_struct_member_offset(type, i); + subchain.static_index = chain.static_index + offset; + subchain.basetype = type.member_types[i]; + + subchain.matrix_stride = 0; + subchain.array_stride = 0; + subchain.row_major_matrix = false; + + auto &member_type = get(subchain.basetype); + if (member_type.columns > 1) + { + subchain.matrix_stride = type_struct_member_matrix_stride(type, i); + subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor); + } + + if (!member_type.array.empty()) + subchain.array_stride = type_struct_member_array_stride(type, i); + + read_access_chain(nullptr, join(lhs, ".", to_member_name(type, i)), subchain); + } +} + +void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIRAccessChain &chain) { auto &type = get(chain.basetype); @@ -3333,17 +4401,31 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain) target_type.vecsize = type.vecsize; target_type.columns = type.columns; - if (type.basetype == SPIRType::Struct) - SPIRV_CROSS_THROW("Reading structs from ByteAddressBuffer not yet supported."); - - if (type.width != 32) - SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported."); - if (!type.array.empty()) - SPIRV_CROSS_THROW("Reading arrays from ByteAddressBuffer not yet supported."); + { + read_access_chain_array(lhs, chain); + return; + } + else if (type.basetype == SPIRType::Struct) + { + read_access_chain_struct(lhs, chain); + return; + } + else if (type.width != 32 && !hlsl_options.enable_16bit_types) + SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported, unless SM 6.2 and " + "native 16-bit types are enabled."); + string base = chain.base; + if (has_decoration(chain.self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain.self); + + bool templated_load = hlsl_options.shader_model >= 62; string load_expr; + string template_expr; + if (templated_load) + template_expr = join("<", type_to_glsl(type), ">"); + // Load a vector or scalar. if (type.columns == 1 && !chain.row_major_matrix) { @@ -3366,12 +4448,24 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain) SPIRV_CROSS_THROW("Unknown vector size."); } - load_expr = join(chain.base, ".", load_op, "(", chain.dynamic_index, chain.static_index, ")"); + if (templated_load) + load_op = "Load"; + + load_expr = join(base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index, ")"); } else if (type.columns == 1) { // Strided load since we are loading a column from a row-major matrix. - if (type.vecsize > 1) + if (templated_load) + { + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + if (type.vecsize > 1) + load_expr += type_to_glsl(type) + "("; + } + else if (type.vecsize > 1) { load_expr = type_to_glsl(target_type); load_expr += "("; @@ -3379,8 +4473,8 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain) for (uint32_t r = 0; r < type.vecsize; r++) { - load_expr += - join(chain.base, ".Load(", chain.dynamic_index, chain.static_index + r * chain.matrix_stride, ")"); + load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index, + chain.static_index + r * chain.matrix_stride, ")"); if (r + 1 < type.vecsize) load_expr += ", "; } @@ -3410,13 +4504,25 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain) SPIRV_CROSS_THROW("Unknown vector size."); } - // Note, this loading style in HLSL is *actually* row-major, but we always treat matrices as transposed in this backend, - // so row-major is technically column-major ... - load_expr = type_to_glsl(target_type); + if (templated_load) + { + auto vector_type = type; + vector_type.columns = 1; + template_expr = join("<", type_to_glsl(vector_type), ">"); + load_expr = type_to_glsl(type); + load_op = "Load"; + } + else + { + // Note, this loading style in HLSL is *actually* row-major, but we always treat matrices as transposed in this backend, + // so row-major is technically column-major ... + load_expr = type_to_glsl(target_type); + } load_expr += "("; + for (uint32_t c = 0; c < type.columns; c++) { - load_expr += join(chain.base, ".", load_op, "(", chain.dynamic_index, + load_expr += join(base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index + c * chain.matrix_stride, ")"); if (c + 1 < type.columns) load_expr += ", "; @@ -3428,13 +4534,24 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain) // Pick out elements one by one ... Hopefully compilers are smart enough to recognize this pattern // considering HLSL is "row-major decl", but "column-major" memory layout (basically implicit transpose model, ugh) ... - load_expr = type_to_glsl(target_type); + if (templated_load) + { + load_expr = type_to_glsl(type); + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + } + else + load_expr = type_to_glsl(target_type); + load_expr += "("; + for (uint32_t c = 0; c < type.columns; c++) { for (uint32_t r = 0; r < type.vecsize; r++) { - load_expr += join(chain.base, ".Load(", chain.dynamic_index, + load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index, chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ")"); if ((r + 1 < type.vecsize) || (c + 1 < type.columns)) @@ -3444,11 +4561,20 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain) load_expr += ")"; } - auto bitcast_op = bitcast_glsl_op(type, target_type); - if (!bitcast_op.empty()) - load_expr = join(bitcast_op, "(", load_expr, ")"); + if (!templated_load) + { + auto bitcast_op = bitcast_glsl_op(type, target_type); + if (!bitcast_op.empty()) + load_expr = join(bitcast_op, "(", load_expr, ")"); + } - return load_expr; + if (lhs.empty()) + { + assert(expr); + *expr = std::move(load_expr); + } + else + statement(lhs, " = ", load_expr, ";"); } void CompilerHLSL::emit_load(const Instruction &instruction) @@ -3462,56 +4588,174 @@ void CompilerHLSL::emit_load(const Instruction &instruction) uint32_t id = ops[1]; uint32_t ptr = ops[2]; - if (has_decoration(ptr, DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(ptr); + auto &type = get(result_type); + bool composite_load = !type.array.empty() || type.basetype == SPIRType::Struct; + + if (composite_load) + { + // We cannot make this work in one single expression as we might have nested structures and arrays, + // so unroll the load to an uninitialized temporary. + emit_uninitialized_temporary_expression(result_type, id); + read_access_chain(nullptr, to_expression(id), *chain); + track_expression_read(chain->self); + } + else + { + string load_expr; + read_access_chain(&load_expr, "", *chain); + + bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + + // If we are forwarding this load, + // don't register the read to access chain here, defer that to when we actually use the expression, + // using the add_implied_read_expression mechanism. + if (!forward) + track_expression_read(chain->self); + + // Do not forward complex load sequences like matrices, structs and arrays. + if (type.columns > 1) + forward = false; + + auto &e = emit_op(result_type, id, load_expr, forward, true); + e.need_transpose = false; + register_read(id, ptr, forward); + inherit_expression_dependencies(id, ptr); + if (forward) + add_implied_read_expression(e, chain->self); + } + } + else + CompilerGLSL::emit_instruction(instruction); +} + +void CompilerHLSL::write_access_chain_array(const SPIRAccessChain &chain, uint32_t value, + const SmallVector &composite_chain) +{ + auto &type = get(chain.basetype); - auto load_expr = read_access_chain(*chain); + // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops. + auto ident = get_unique_identifier(); + + uint32_t id = ir.increase_bound_by(2); + uint32_t int_type_id = id + 1; + SPIRType int_type; + int_type.basetype = SPIRType::Int; + int_type.width = 32; + set(int_type_id, int_type); + set(id, ident, int_type_id, true); + set_name(id, ident); + suppressed_usage_tracking.insert(id); + + statement("[unroll]"); + statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ", + ident, "++)"); + begin_scope(); + auto subchain = chain; + subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index); + subchain.basetype = type.parent_type; - bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + // Forcefully allow us to use an ID here by setting MSB. + auto subcomposite_chain = composite_chain; + subcomposite_chain.push_back(0x80000000u | id); - // If we are forwarding this load, - // don't register the read to access chain here, defer that to when we actually use the expression, - // using the add_implied_read_expression mechanism. - if (!forward) - track_expression_read(chain->self); + if (!get(subchain.basetype).array.empty()) + subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride); - // Do not forward complex load sequences like matrices, structs and arrays. - auto &type = get(result_type); - if (type.columns > 1 || !type.array.empty() || type.basetype == SPIRType::Struct) - forward = false; + write_access_chain(subchain, value, subcomposite_chain); + end_scope(); +} - auto &e = emit_op(result_type, id, load_expr, forward, true); - e.need_transpose = false; - register_read(id, ptr, forward); - inherit_expression_dependencies(id, ptr); - if (forward) - add_implied_read_expression(e, chain->self); +void CompilerHLSL::write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value, + const SmallVector &composite_chain) +{ + auto &type = get(chain.basetype); + uint32_t member_count = uint32_t(type.member_types.size()); + auto subchain = chain; + + auto subcomposite_chain = composite_chain; + subcomposite_chain.push_back(0); + + for (uint32_t i = 0; i < member_count; i++) + { + uint32_t offset = type_struct_member_offset(type, i); + subchain.static_index = chain.static_index + offset; + subchain.basetype = type.member_types[i]; + + subchain.matrix_stride = 0; + subchain.array_stride = 0; + subchain.row_major_matrix = false; + + auto &member_type = get(subchain.basetype); + if (member_type.columns > 1) + { + subchain.matrix_stride = type_struct_member_matrix_stride(type, i); + subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor); + } + + if (!member_type.array.empty()) + subchain.array_stride = type_struct_member_array_stride(type, i); + + subcomposite_chain.back() = i; + write_access_chain(subchain, value, subcomposite_chain); } +} + +string CompilerHLSL::write_access_chain_value(uint32_t value, const SmallVector &composite_chain, + bool enclose) +{ + string ret; + if (composite_chain.empty()) + ret = to_expression(value); else - CompilerGLSL::emit_instruction(instruction); + { + AccessChainMeta meta; + ret = access_chain_internal(value, composite_chain.data(), uint32_t(composite_chain.size()), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_LITERAL_MSB_FORCE_ID, &meta); + } + + if (enclose) + ret = enclose_expression(ret); + return ret; } -void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value) +void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value, + const SmallVector &composite_chain) { auto &type = get(chain.basetype); // Make sure we trigger a read of the constituents in the access chain. track_expression_read(chain.self); - if (has_decoration(chain.self, DecorationNonUniformEXT)) - propagate_nonuniform_qualifier(chain.self); - SPIRType target_type; target_type.basetype = SPIRType::UInt; target_type.vecsize = type.vecsize; target_type.columns = type.columns; - if (type.basetype == SPIRType::Struct) - SPIRV_CROSS_THROW("Writing structs to RWByteAddressBuffer not yet supported."); - if (type.width != 32) - SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported."); if (!type.array.empty()) - SPIRV_CROSS_THROW("Reading arrays from ByteAddressBuffer not yet supported."); + { + write_access_chain_array(chain, value, composite_chain); + register_write(chain.self); + return; + } + else if (type.basetype == SPIRType::Struct) + { + write_access_chain_struct(chain, value, composite_chain); + register_write(chain.self); + return; + } + else if (type.width != 32 && !hlsl_options.enable_16bit_types) + SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported, unless SM 6.2 and " + "native 16-bit types are enabled."); + + bool templated_store = hlsl_options.shader_model >= 62; + + auto base = chain.base; + if (has_decoration(chain.self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain.self); + + string template_expr; + if (templated_store) + template_expr = join("<", type_to_glsl(type), ">"); if (type.columns == 1 && !chain.row_major_matrix) { @@ -3534,18 +4778,33 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val SPIRV_CROSS_THROW("Unknown vector size."); } - auto store_expr = to_expression(value); - auto bitcast_op = bitcast_glsl_op(target_type, type); - if (!bitcast_op.empty()) - store_expr = join(bitcast_op, "(", store_expr, ")"); - statement(chain.base, ".", store_op, "(", chain.dynamic_index, chain.static_index, ", ", store_expr, ");"); + auto store_expr = write_access_chain_value(value, composite_chain, false); + + if (!templated_store) + { + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + } + else + store_op = "Store"; + statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index, ", ", + store_expr, ");"); } else if (type.columns == 1) { + if (templated_store) + { + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + } + // Strided store. for (uint32_t r = 0; r < type.vecsize; r++) { - auto store_expr = to_enclosed_expression(value); + auto store_expr = write_access_chain_value(value, composite_chain, true); if (type.vecsize > 1) { store_expr += "."; @@ -3553,11 +4812,15 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val } remove_duplicate_swizzle(store_expr); - auto bitcast_op = bitcast_glsl_op(target_type, type); - if (!bitcast_op.empty()) - store_expr = join(bitcast_op, "(", store_expr, ")"); - statement(chain.base, ".Store(", chain.dynamic_index, chain.static_index + chain.matrix_stride * r, ", ", - store_expr, ");"); + if (!templated_store) + { + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + } + + statement(base, ".Store", template_expr, "(", chain.dynamic_index, + chain.static_index + chain.matrix_stride * r, ", ", store_expr, ");"); } } else if (!chain.row_major_matrix) @@ -3581,28 +4844,50 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val SPIRV_CROSS_THROW("Unknown vector size."); } + if (templated_store) + { + store_op = "Store"; + auto vector_type = type; + vector_type.columns = 1; + template_expr = join("<", type_to_glsl(vector_type), ">"); + } + for (uint32_t c = 0; c < type.columns; c++) { - auto store_expr = join(to_enclosed_expression(value), "[", c, "]"); - auto bitcast_op = bitcast_glsl_op(target_type, type); - if (!bitcast_op.empty()) - store_expr = join(bitcast_op, "(", store_expr, ")"); - statement(chain.base, ".", store_op, "(", chain.dynamic_index, chain.static_index + c * chain.matrix_stride, - ", ", store_expr, ");"); + auto store_expr = join(write_access_chain_value(value, composite_chain, true), "[", c, "]"); + + if (!templated_store) + { + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + } + + statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, + chain.static_index + c * chain.matrix_stride, ", ", store_expr, ");"); } } else { + if (templated_store) + { + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + } + for (uint32_t r = 0; r < type.vecsize; r++) { for (uint32_t c = 0; c < type.columns; c++) { - auto store_expr = join(to_enclosed_expression(value), "[", c, "].", index_to_swizzle(r)); + auto store_expr = + join(write_access_chain_value(value, composite_chain, true), "[", c, "].", index_to_swizzle(r)); remove_duplicate_swizzle(store_expr); auto bitcast_op = bitcast_glsl_op(target_type, type); if (!bitcast_op.empty()) store_expr = join(bitcast_op, "(", store_expr, ")"); - statement(chain.base, ".Store(", chain.dynamic_index, + statement(base, ".Store", template_expr, "(", chain.dynamic_index, chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ", ", store_expr, ");"); } } @@ -3616,7 +4901,7 @@ void CompilerHLSL::emit_store(const Instruction &instruction) auto ops = stream(instruction); auto *chain = maybe_get(ops[0]); if (chain) - write_access_chain(*chain, ops[1]); + write_access_chain(*chain, ops[1], {}); else CompilerGLSL::emit_instruction(instruction); } @@ -3646,7 +4931,10 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction) if (need_byte_access_chain) { - uint32_t to_plain_buffer_length = static_cast(type.array.size()); + // If we have a chain variable, we are already inside the SSBO, and any array type will refer to arrays within a block, + // and not array of SSBO. + uint32_t to_plain_buffer_length = chain ? 0u : static_cast(type.array.size()); + auto *backing_variable = maybe_get_backing_variable(ops[2]); string base; @@ -3668,6 +4956,7 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction) } uint32_t matrix_stride = 0; + uint32_t array_stride = 0; bool row_major_matrix = false; // Inherit matrix information. @@ -3675,17 +4964,19 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction) { matrix_stride = chain->matrix_stride; row_major_matrix = chain->row_major_matrix; + array_stride = chain->array_stride; } - auto offsets = - flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length], - length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix, &matrix_stride); + auto offsets = flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length], + length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix, + &matrix_stride, &array_stride); auto &e = set(ops[1], ops[0], type.storage, base, offsets.first, offsets.second); e.row_major_matrix = row_major_matrix; e.matrix_stride = matrix_stride; + e.array_stride = array_stride; e.immutable = should_forward(ops[2]); - e.loaded_from = backing_variable ? backing_variable->self : 0; + e.loaded_from = backing_variable ? backing_variable->self : ID(0); if (chain) { @@ -3710,9 +5001,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) const char *atomic_op = nullptr; string value_expr; - if (op != OpAtomicIDecrement && op != OpAtomicIIncrement) + if (op != OpAtomicIDecrement && op != OpAtomicIIncrement && op != OpAtomicLoad && op != OpAtomicStore) value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]); + bool is_atomic_store = false; + switch (op) { case OpAtomicIIncrement: @@ -3725,6 +5018,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) value_expr = "-1"; break; + case OpAtomicLoad: + atomic_op = "InterlockedAdd"; + value_expr = "0"; + break; + case OpAtomicISub: atomic_op = "InterlockedAdd"; value_expr = join("-", enclose_expression(value_expr)); @@ -3760,6 +5058,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) atomic_op = "InterlockedExchange"; break; + case OpAtomicStore: + atomic_op = "InterlockedExchange"; + is_atomic_store = true; + break; + case OpAtomicCompareExchange: if (length < 8) SPIRV_CROSS_THROW("Not enough data for opcode."); @@ -3771,31 +5074,64 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) SPIRV_CROSS_THROW("Unknown atomic opcode."); } - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - forced_temporaries.insert(ops[1]); + if (is_atomic_store) + { + auto &data_type = expression_type(ops[0]); + auto *chain = maybe_get(ops[0]); - auto &type = get(result_type); - statement(variable_decl(type, to_name(id)), ";"); + auto &tmp_id = extra_sub_expressions[ops[0]]; + if (!tmp_id) + { + tmp_id = ir.increase_bound_by(1); + emit_uninitialized_temporary_expression(get_pointee_type(data_type).self, tmp_id); + } - auto &data_type = expression_type(ops[2]); - auto *chain = maybe_get(ops[2]); - SPIRType::BaseType expr_type; - if (data_type.storage == StorageClassImage || !chain) - { - statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); - expr_type = data_type.basetype; + if (data_type.storage == StorageClassImage || !chain) + { + statement(atomic_op, "(", to_non_uniform_aware_expression(ops[0]), ", ", + to_expression(ops[3]), ", ", to_expression(tmp_id), ");"); + } + else + { + string base = chain->base; + if (has_decoration(chain->self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain->self); + // RWByteAddress buffer is always uint in its underlying type. + statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", + to_expression(ops[3]), ", ", to_expression(tmp_id), ");"); + } } else { - // RWByteAddress buffer is always uint in its underlying type. - expr_type = SPIRType::UInt; - statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, ", ", - to_name(id), ");"); - } + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + forced_temporaries.insert(ops[1]); + + auto &type = get(result_type); + statement(variable_decl(type, to_name(id)), ";"); + + auto &data_type = expression_type(ops[2]); + auto *chain = maybe_get(ops[2]); + SPIRType::BaseType expr_type; + if (data_type.storage == StorageClassImage || !chain) + { + statement(atomic_op, "(", to_non_uniform_aware_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); + expr_type = data_type.basetype; + } + else + { + // RWByteAddress buffer is always uint in its underlying type. + string base = chain->base; + if (has_decoration(chain->self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain->self); + expr_type = SPIRType::UInt; + statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, + ", ", to_name(id), ");"); + } - auto expr = bitcast_expression(type, expr_type, to_name(id)); - set(id, expr, result_type, true); + auto expr = bitcast_expression(type, expr_type, to_name(id)); + set(id, expr, result_type, true); + } flush_all_atomic_capable_variables(); } @@ -3810,7 +5146,7 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i) uint32_t result_type = ops[0]; uint32_t id = ops[1]; - auto scope = static_cast(get(ops[2]).scalar()); + auto scope = static_cast(evaluate_constant_u32(ops[2])); if (scope != ScopeSubgroup) SPIRV_CROSS_THROW("Only subgroup scope is supported."); @@ -3822,6 +5158,11 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i) return join(expr, " * ", to_expression(ops[4])); }; + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(i); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + #define make_inclusive_BitAnd(expr) "" #define make_inclusive_BitOr(expr) "" #define make_inclusive_BitXor(expr) "" @@ -3848,26 +5189,22 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i) case OpGroupNonUniformInverseBallot: SPIRV_CROSS_THROW("Cannot trivially implement InverseBallot in HLSL."); - break; case OpGroupNonUniformBallotBitExtract: SPIRV_CROSS_THROW("Cannot trivially implement BallotBitExtract in HLSL."); - break; case OpGroupNonUniformBallotFindLSB: SPIRV_CROSS_THROW("Cannot trivially implement BallotFindLSB in HLSL."); - break; case OpGroupNonUniformBallotFindMSB: SPIRV_CROSS_THROW("Cannot trivially implement BallotFindMSB in HLSL."); - break; case OpGroupNonUniformBallotBitCount: { auto operation = static_cast(ops[3]); + bool forward = should_forward(ops[4]); if (operation == GroupOperationReduce) { - bool forward = should_forward(ops[4]); auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x) + countbits(", to_enclosed_expression(ops[4]), ".y)"); auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z) + countbits(", @@ -3876,22 +5213,66 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i) inherit_expression_dependencies(id, ops[4]); } else if (operation == GroupOperationInclusiveScan) - SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Inclusive Scan in HLSL."); + { + auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLeMask.x) + countbits(", + to_enclosed_expression(ops[4]), ".y & gl_SubgroupLeMask.y)"); + auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLeMask.z) + countbits(", + to_enclosed_expression(ops[4]), ".w & gl_SubgroupLeMask.w)"); + emit_op(result_type, id, join(left, " + ", right), forward); + if (!active_input_builtins.get(BuiltInSubgroupLeMask)) + { + active_input_builtins.set(BuiltInSubgroupLeMask); + force_recompile_guarantee_forward_progress(); + } + } else if (operation == GroupOperationExclusiveScan) - SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Exclusive Scan in HLSL."); + { + auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLtMask.x) + countbits(", + to_enclosed_expression(ops[4]), ".y & gl_SubgroupLtMask.y)"); + auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLtMask.z) + countbits(", + to_enclosed_expression(ops[4]), ".w & gl_SubgroupLtMask.w)"); + emit_op(result_type, id, join(left, " + ", right), forward); + if (!active_input_builtins.get(BuiltInSubgroupLtMask)) + { + active_input_builtins.set(BuiltInSubgroupLtMask); + force_recompile_guarantee_forward_progress(); + } + } else SPIRV_CROSS_THROW("Invalid BitCount operation."); break; } case OpGroupNonUniformShuffle: - SPIRV_CROSS_THROW("Cannot trivially implement Shuffle in HLSL."); + emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt"); + break; case OpGroupNonUniformShuffleXor: - SPIRV_CROSS_THROW("Cannot trivially implement ShuffleXor in HLSL."); + { + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", + "WaveGetLaneIndex() ^ ", to_enclosed_expression(ops[4]), ")"), forward); + inherit_expression_dependencies(ops[1], ops[3]); + break; + } case OpGroupNonUniformShuffleUp: - SPIRV_CROSS_THROW("Cannot trivially implement ShuffleUp in HLSL."); + { + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", + "WaveGetLaneIndex() - ", to_enclosed_expression(ops[4]), ")"), forward); + inherit_expression_dependencies(ops[1], ops[3]); + break; + } case OpGroupNonUniformShuffleDown: - SPIRV_CROSS_THROW("Cannot trivially implement ShuffleDown in HLSL."); + { + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", + "WaveGetLaneIndex() + ", to_enclosed_expression(ops[4]), ")"), forward); + inherit_expression_dependencies(ops[1], ops[3]); + break; + } case OpGroupNonUniformAll: emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllTrue"); @@ -3902,12 +5283,8 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i) break; case OpGroupNonUniformAllEqual: - { - auto &type = get(result_type); - emit_unary_func_op(result_type, id, ops[3], - type.basetype == SPIRType::Boolean ? "WaveActiveAllEqualBool" : "WaveActiveAllEqual"); + emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllEqual"); break; - } // clang-format off #define HLSL_GROUP_OP(op, hlsl_op, supports_scan) \ @@ -3930,25 +5307,42 @@ case OpGroupNonUniform##op: \ SPIRV_CROSS_THROW("Invalid group operation."); \ break; \ } + +#define HLSL_GROUP_OP_CAST(op, hlsl_op, type) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op_cast(result_type, id, ops[4], "WaveActive" #hlsl_op, type, type); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + HLSL_GROUP_OP(FAdd, Sum, true) HLSL_GROUP_OP(FMul, Product, true) HLSL_GROUP_OP(FMin, Min, false) HLSL_GROUP_OP(FMax, Max, false) HLSL_GROUP_OP(IAdd, Sum, true) HLSL_GROUP_OP(IMul, Product, true) - HLSL_GROUP_OP(SMin, Min, false) - HLSL_GROUP_OP(SMax, Max, false) - HLSL_GROUP_OP(UMin, Min, false) - HLSL_GROUP_OP(UMax, Max, false) + HLSL_GROUP_OP_CAST(SMin, Min, int_type) + HLSL_GROUP_OP_CAST(SMax, Max, int_type) + HLSL_GROUP_OP_CAST(UMin, Min, uint_type) + HLSL_GROUP_OP_CAST(UMax, Max, uint_type) HLSL_GROUP_OP(BitwiseAnd, BitAnd, false) HLSL_GROUP_OP(BitwiseOr, BitOr, false) HLSL_GROUP_OP(BitwiseXor, BitXor, false) + HLSL_GROUP_OP_CAST(LogicalAnd, BitAnd, uint_type) + HLSL_GROUP_OP_CAST(LogicalOr, BitOr, uint_type) + HLSL_GROUP_OP_CAST(LogicalXor, BitXor, uint_type) + #undef HLSL_GROUP_OP +#undef HLSL_GROUP_OP_CAST // clang-format on case OpGroupNonUniformQuadSwap: { - uint32_t direction = get(ops[4]).scalar(); + uint32_t direction = evaluate_constant_u32(ops[4]); if (direction == 0) emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossX"); else if (direction == 1) @@ -3980,7 +5374,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) #define HLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) #define HLSL_BOP_CAST(op, type) \ - emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode), false) #define HLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) #define HLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) #define HLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) @@ -3993,6 +5387,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) // If we need to do implicit bitcasts, make sure we do it with the correct type. uint32_t integer_width = get_integer_width_for_instruction(instruction); auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + opcode = get_remapped_spirv_op(opcode); switch (opcode) { @@ -4002,6 +5399,55 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) emit_access_chain(instruction); break; } + case OpBitcast: + { + auto bitcast_type = get_bitcast_type(ops[0], ops[2]); + if (bitcast_type == CompilerHLSL::TypeNormal) + CompilerGLSL::emit_instruction(instruction); + else + { + if (!requires_uint2_packing) + { + requires_uint2_packing = true; + force_recompile(); + } + + if (bitcast_type == CompilerHLSL::TypePackUint2x32) + emit_unary_func_op(ops[0], ops[1], ops[2], "spvPackUint2x32"); + else + emit_unary_func_op(ops[0], ops[1], ops[2], "spvUnpackUint2x32"); + } + + break; + } + + case OpSelect: + { + auto &value_type = expression_type(ops[3]); + if (value_type.basetype == SPIRType::Struct || is_array(value_type)) + { + // HLSL does not support ternary expressions on composites. + // Cannot use branches, since we might be in a continue block + // where explicit control flow is prohibited. + // Emit a helper function where we can use control flow. + TypeID value_type_id = expression_type_id(ops[3]); + auto itr = std::find(composite_selection_workaround_types.begin(), + composite_selection_workaround_types.end(), + value_type_id); + if (itr == composite_selection_workaround_types.end()) + { + composite_selection_workaround_types.push_back(value_type_id); + force_recompile(); + } + emit_uninitialized_temporary_expression(ops[0], ops[1]); + statement("spvSelectComposite(", + to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", + to_expression(ops[3]), ", ", to_expression(ops[4]), ");"); + } + else + CompilerGLSL::emit_instruction(instruction); + break; + } case OpStore: { @@ -4017,18 +5463,21 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpMatrixTimesVector: { + // Matrices are kept in a transposed state all the time, flip multiplication order always. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); break; } case OpVectorTimesMatrix: { + // Matrices are kept in a transposed state all the time, flip multiplication order always. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); break; } case OpMatrixTimesMatrix: { + // Matrices are kept in a transposed state all the time, flip multiplication order always. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); break; } @@ -4152,7 +5601,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "=="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); else HLSL_BOP_CAST(==, int_type); break; @@ -4160,12 +5609,19 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpLogicalEqual: case OpFOrdEqual: + case OpFUnordEqual: { + // HLSL != operator is unordered. + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. + // isnan() is apparently implemented as x != x as well. + // We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual. + // HACK: FUnordEqual will be implemented as FOrdEqual. + auto result_type = ops[0]; auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "=="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); else HLSL_BOP(==); break; @@ -4177,7 +5633,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); else HLSL_BOP_CAST(!=, int_type); break; @@ -4185,12 +5641,23 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpLogicalNotEqual: case OpFOrdNotEqual: + case OpFUnordNotEqual: { + // HLSL != operator is unordered. + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. + // isnan() is apparently implemented as x != x as well. + + // FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here. + // We would need to do something like not(UnordEqual), but that cannot be expressed either. + // Adding a lot of NaN checks would be a breaking change from perspective of performance. + // SPIR-V will generally use isnan() checks when this even matters. + // HACK: FOrdNotEqual will be implemented as FUnordEqual. + auto result_type = ops[0]; auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); else HLSL_BOP(!=); break; @@ -4201,10 +5668,10 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) { auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpUGreaterThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type); else HLSL_BOP_CAST(>, type); break; @@ -4216,21 +5683,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown); else HLSL_BOP(>); break; } + case OpFUnordGreaterThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpUGreaterThanEqual: case OpSGreaterThanEqual: { auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type); else HLSL_BOP_CAST(>=, type); break; @@ -4242,21 +5721,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown); else HLSL_BOP(>=); break; } + case OpFUnordGreaterThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpULessThan: case OpSLessThan: { auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpULessThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type); else HLSL_BOP_CAST(<, type); break; @@ -4268,21 +5759,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown); else HLSL_BOP(<); break; } + case OpFUnordLessThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpULessThanEqual: case OpSLessThanEqual: { auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpULessThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type); else HLSL_BOP_CAST(<=, type); break; @@ -4294,14 +5797,26 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown); else HLSL_BOP(<=); break; } + case OpFUnordLessThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpImageQueryLod: - emit_texture_op(instruction); + emit_texture_op(instruction, false); break; case OpImageQuerySizeLod: @@ -4309,12 +5824,11 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto result_type = ops[0]; auto id = ops[1]; - require_texture_query_variant(expression_type(ops[2])); - + require_texture_query_variant(ops[2]); auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); statement("uint ", dummy_samples_levels, ";"); - auto expr = join("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", ", + auto expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", ", bitcast_expression(SPIRType::UInt, ops[3]), ", ", dummy_samples_levels, ")"); auto &restype = get(ops[0]); @@ -4328,12 +5842,22 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto result_type = ops[0]; auto id = ops[1]; - require_texture_query_variant(expression_type(ops[2])); + require_texture_query_variant(ops[2]); + bool uav = expression_type(ops[2]).image.sampled == 2; + + if (const auto *var = maybe_get_backing_variable(ops[2])) + if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable)) + uav = false; auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); statement("uint ", dummy_samples_levels, ";"); - auto expr = join("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")"); + string expr; + if (uav) + expr = join("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", dummy_samples_levels, ")"); + else + expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")"); + auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::UInt, expr); emit_op(result_type, id, expr, true); @@ -4346,14 +5870,25 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto result_type = ops[0]; auto id = ops[1]; - require_texture_query_variant(expression_type(ops[2])); + require_texture_query_variant(ops[2]); + bool uav = expression_type(ops[2]).image.sampled == 2; + if (opcode == OpImageQueryLevels && uav) + SPIRV_CROSS_THROW("Cannot query levels for UAV images."); + + if (const auto *var = maybe_get_backing_variable(ops[2])) + if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable)) + uav = false; // Keep it simple and do not emit special variants to make this look nicer ... // This stuff is barely, if ever, used. forced_temporaries.insert(id); auto &type = get(result_type); statement(variable_decl(type, to_name(id)), ";"); - statement("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", 0u, ", to_name(id), ");"); + + if (uav) + statement("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", to_name(id), ");"); + else + statement("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", to_name(id), ");"); auto &restype = get(ops[0]); auto expr = bitcast_expression(restype, SPIRType::UInt, to_name(id)); @@ -4384,24 +5919,29 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) if (operands != ImageOperandsSampleMask || instruction.length != 6) SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used."); uint32_t sample = ops[5]; - imgexpr = join(to_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")"); + imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")"); } else - imgexpr = join(to_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))"); + imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))"); pure = true; } else { - imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"); + imgexpr = join(to_non_uniform_aware_expression(ops[2]), "[", to_expression(ops[3]), "]"); // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", // except that the underlying type changes how the data is interpreted. - if (var && !subpass_data) + + bool force_srv = + hlsl_options.nonwritable_uav_texture_as_srv && var && has_decoration(var->self, DecorationNonWritable); + pure = force_srv; + + if (var && !subpass_data && !force_srv) imgexpr = remap_swizzle(get(result_type), image_format_to_components(get(var->basetype).image.format), imgexpr); } - if (var && var->forwardable) + if (var) { bool forward = forced_temporaries.find(id) == end(forced_temporaries); auto &e = emit_op(result_type, id, imgexpr, forward); @@ -4437,7 +5977,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr); } - statement(to_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";"); + statement(to_non_uniform_aware_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";"); if (var && variable_storage_is_aliased(*var)) flush_all_aliased_variables(); break; @@ -4447,12 +5987,15 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) { uint32_t result_type = ops[0]; uint32_t id = ops[1]; - auto &e = - set(id, join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"), result_type, true); + + auto expr = to_expression(ops[2]); + expr += join("[", to_expression(ops[3]), "]"); + auto &e = set(id, expr, result_type, true); // When using the pointer, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); - e.loaded_from = var ? var->self : 0; + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); break; } @@ -4469,6 +6012,8 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpAtomicIAdd: case OpAtomicIIncrement: case OpAtomicIDecrement: + case OpAtomicLoad: + case OpAtomicStore: { emit_atomic(ops, instruction.length, opcode); break; @@ -4482,13 +6027,13 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) if (opcode == OpMemoryBarrier) { - memory = get(ops[0]).scalar(); - semantics = get(ops[1]).scalar(); + memory = evaluate_constant_u32(ops[0]); + semantics = evaluate_constant_u32(ops[1]); } else { - memory = get(ops[1]).scalar(); - semantics = get(ops[2]).scalar(); + memory = evaluate_constant_u32(ops[1]); + semantics = evaluate_constant_u32(ops[2]); } if (memory == ScopeSubgroup) @@ -4508,8 +6053,8 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) if (next && next->op == OpControlBarrier) { auto *next_ops = stream(*next); - uint32_t next_memory = get(next_ops[1]).scalar(); - uint32_t next_semantics = get(next_ops[2]).scalar(); + uint32_t next_memory = evaluate_constant_u32(next_ops[1]); + uint32_t next_semantics = evaluate_constant_u32(next_ops[2]); next_semantics = mask_relevant_memory_semantics(next_semantics); // There is no "just execution barrier" in HLSL. @@ -4581,7 +6126,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) force_recompile(); } - auto expr = join("SPIRV_Cross_bitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", + auto expr = join("spvBitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ")"); bool forward = @@ -4603,15 +6148,18 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) } if (opcode == OpBitFieldSExtract) - HLSL_TFOP(SPIRV_Cross_bitfieldSExtract); + HLSL_TFOP(spvBitfieldSExtract); else - HLSL_TFOP(SPIRV_Cross_bitfieldUExtract); + HLSL_TFOP(spvBitfieldUExtract); break; } case OpBitCount: - HLSL_UFOP(countbits); + { + auto basetype = expression_type(ops[2]).basetype; + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "countbits", basetype, basetype); break; + } case OpBitReverse: HLSL_UFOP(reversebits); @@ -4619,7 +6167,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpArrayLength: { - auto *var = maybe_get(ops[2]); + auto *var = maybe_get_backing_variable(ops[2]); if (!var) SPIRV_CROSS_THROW("Array length must point directly to an SSBO block."); @@ -4629,21 +6177,186 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) // This must be 32-bit uint, so we're good to go. emit_uninitialized_temporary_expression(ops[0], ops[1]); - statement(to_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");"); + statement(to_non_uniform_aware_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");"); uint32_t offset = type_struct_member_offset(type, ops[3]); uint32_t stride = type_struct_member_array_stride(type, ops[3]); statement(to_expression(ops[1]), " = (", to_expression(ops[1]), " - ", offset, ") / ", stride, ";"); break; } + case OpIsHelperInvocationEXT: + if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher."); + // Helper lane state with demote is volatile by nature. + // Do not forward this. + emit_op(ops[0], ops[1], "IsHelperLane()", false); + break; + + case OpBeginInvocationInterlockEXT: + case OpEndInvocationInterlockEXT: + if (hlsl_options.shader_model < 51) + SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1."); + break; // Nothing to do in the body + + case OpRayQueryInitializeKHR: + { + flush_variable_declaration(ops[0]); + + std::string ray_desc_name = get_unique_identifier(); + statement("RayDesc ", ray_desc_name, " = {", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", + to_expression(ops[6]), ", ", to_expression(ops[7]), "};"); + + statement(to_expression(ops[0]), ".TraceRayInline(", + to_expression(ops[1]), ", ", // acc structure + to_expression(ops[2]), ", ", // ray flags + to_expression(ops[3]), ", ", // mask + ray_desc_name, ");"); // ray + break; + } + case OpRayQueryProceedKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".Proceed()"), false); + break; + } + case OpRayQueryTerminateKHR: + { + flush_variable_declaration(ops[0]); + statement(to_expression(ops[0]), ".Abort();"); + break; + } + case OpRayQueryGenerateIntersectionKHR: + { + flush_variable_declaration(ops[0]); + statement(to_expression(ops[0]), ".CommitProceduralPrimitiveHit(", ops[1], ");"); + break; + } + case OpRayQueryConfirmIntersectionKHR: + { + flush_variable_declaration(ops[0]); + statement(to_expression(ops[0]), ".CommitNonOpaqueTriangleHit();"); + break; + } + case OpRayQueryGetIntersectionTypeKHR: + { + emit_rayquery_function(".CommittedStatus()", ".CandidateType()", ops); + break; + } + case OpRayQueryGetIntersectionTKHR: + { + emit_rayquery_function(".CommittedRayT()", ".CandidateTriangleRayT()", ops); + break; + } + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: + { + emit_rayquery_function(".CommittedInstanceID()", ".CandidateInstanceID()", ops); + break; + } + case OpRayQueryGetIntersectionInstanceIdKHR: + { + emit_rayquery_function(".CommittedInstanceIndex()", ".CandidateInstanceIndex()", ops); + break; + } + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: + { + emit_rayquery_function(".CommittedInstanceContributionToHitGroupIndex()", + ".CandidateInstanceContributionToHitGroupIndex()", ops); + break; + } + case OpRayQueryGetIntersectionGeometryIndexKHR: + { + emit_rayquery_function(".CommittedGeometryIndex()", + ".CandidateGeometryIndex()", ops); + break; + } + case OpRayQueryGetIntersectionPrimitiveIndexKHR: + { + emit_rayquery_function(".CommittedPrimitiveIndex()", ".CandidatePrimitiveIndex()", ops); + break; + } + case OpRayQueryGetIntersectionBarycentricsKHR: + { + emit_rayquery_function(".CommittedTriangleBarycentrics()", ".CandidateTriangleBarycentrics()", ops); + break; + } + case OpRayQueryGetIntersectionFrontFaceKHR: + { + emit_rayquery_function(".CommittedTriangleFrontFace()", ".CandidateTriangleFrontFace()", ops); + break; + } + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".CandidateProceduralPrimitiveNonOpaque()"), false); + break; + } + case OpRayQueryGetIntersectionObjectRayDirectionKHR: + { + emit_rayquery_function(".CommittedObjectRayDirection()", ".CandidateObjectRayDirection()", ops); + break; + } + case OpRayQueryGetIntersectionObjectRayOriginKHR: + { + flush_variable_declaration(ops[0]); + emit_rayquery_function(".CommittedObjectRayOrigin()", ".CandidateObjectRayOrigin()", ops); + break; + } + case OpRayQueryGetIntersectionObjectToWorldKHR: + { + emit_rayquery_function(".CommittedObjectToWorld4x3()", ".CandidateObjectToWorld4x3()", ops); + break; + } + case OpRayQueryGetIntersectionWorldToObjectKHR: + { + emit_rayquery_function(".CommittedWorldToObject4x3()", ".CandidateWorldToObject4x3()", ops); + break; + } + case OpRayQueryGetRayFlagsKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayFlags()"), false); + break; + } + case OpRayQueryGetRayTMinKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayTMin()"), false); + break; + } + case OpRayQueryGetWorldRayOriginKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayOrigin()"), false); + break; + } + case OpRayQueryGetWorldRayDirectionKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayDirection()"), false); + break; + } + case OpSetMeshOutputsEXT: + { + statement("SetMeshOutputCounts(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");"); + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; } } -void CompilerHLSL::require_texture_query_variant(const SPIRType &type) +void CompilerHLSL::require_texture_query_variant(uint32_t var_id) { + if (const auto *var = maybe_get_backing_variable(var_id)) + var_id = var->self; + + auto &type = expression_type(var_id); + bool uav = type.image.sampled == 2; + if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var_id, DecorationNonWritable)) + uav = false; + uint32_t bit = 0; switch (type.image.dim) { @@ -4692,17 +6405,22 @@ void CompilerHLSL::require_texture_query_variant(const SPIRType &type) SPIRV_CROSS_THROW("Unsupported query type."); } + auto norm_state = image_format_to_normalized_state(type.image.format); + auto &variant = uav ? required_texture_size_variants + .uav[uint32_t(norm_state)][image_format_to_components(type.image.format) - 1] : + required_texture_size_variants.srv; + uint64_t mask = 1ull << bit; - if ((required_textureSizeVariants & mask) == 0) + if ((variant & mask) == 0) { force_recompile(); - required_textureSizeVariants |= mask; + variant |= mask; } } void CompilerHLSL::set_root_constant_layouts(std::vector layout) { - root_constants_layout = move(layout); + root_constants_layout = std::move(layout); } void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &vertex_attributes) @@ -4710,7 +6428,7 @@ void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &ve remap_vertex_attributes.push_back(vertex_attributes); } -uint32_t CompilerHLSL::remap_num_workgroups_builtin() +VariableID CompilerHLSL::remap_num_workgroups_builtin() { update_active_builtins(); @@ -4753,9 +6471,15 @@ uint32_t CompilerHLSL::remap_num_workgroups_builtin() ir.meta[variable_id].decoration.alias = "SPIRV_Cross_NumWorkgroups"; num_workgroups_builtin = variable_id; + get_entry_point().interface_variables.push_back(num_workgroups_builtin); return variable_id; } +void CompilerHLSL::set_resource_binding_flags(HLSLBindingFlags flags) +{ + resource_binding_flags = flags; +} + void CompilerHLSL::validate_shader_model() { // Check for nonuniform qualifier. @@ -4769,6 +6493,12 @@ void CompilerHLSL::validate_shader_model() if (hlsl_options.shader_model < 51) SPIRV_CROSS_THROW( "Shader model 5.1 or higher is required to use bindless resources or NonUniformResourceIndex."); + break; + + case CapabilityVariablePointers: + case CapabilityVariablePointersStorageBuffer: + SPIRV_CROSS_THROW("VariablePointers capability is not supported in HLSL."); + default: break; } @@ -4776,10 +6506,15 @@ void CompilerHLSL::validate_shader_model() if (ir.addressing_model != AddressingModelLogical) SPIRV_CROSS_THROW("Only Logical addressing model can be used with HLSL."); + + if (hlsl_options.enable_16bit_types && hlsl_options.shader_model < 62) + SPIRV_CROSS_THROW("Need at least shader model 6.2 when enabling native 16-bit type support."); } string CompilerHLSL::compile() { + ir.fixup_reserved_names(); + // Do not deal with ES-isms like precision, older extensions and such. options.es = false; options.version = 450; @@ -4792,26 +6527,36 @@ string CompilerHLSL::compile() backend.uint16_t_literal_suffix = "u"; backend.basic_int_type = "int"; backend.basic_uint_type = "uint"; + backend.demote_literal = "discard"; + backend.boolean_mix_function = ""; backend.swizzle_is_function = false; backend.shared_is_implied = true; backend.unsized_array_supported = true; backend.explicit_struct_type = false; backend.use_initializer_list = true; backend.use_constructor_splatting = false; - backend.boolean_mix_support = false; backend.can_swizzle_scalar = true; backend.can_declare_struct_inline = false; backend.can_declare_arrays_inline = false; backend.can_return_array = false; backend.nonuniform_qualifier = "NonUniformResourceIndex"; backend.support_case_fallthrough = false; + backend.force_merged_mesh_block = get_execution_model() == ExecutionModelMeshEXT; + backend.force_gl_in_out_block = backend.force_merged_mesh_block; + + // SM 4.1 does not support precise for some reason. + backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40; + fixup_anonymous_struct_names(); fixup_type_alias(); reorder_type_alias(); build_function_control_flow_graphs_and_analyze(); validate_shader_model(); update_active_builtins(); analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); + if (get_execution_model() == ExecutionModelMeshEXT) + analyze_meshlet_writes(); // Subpass input needs SV_Position. if (need_subpass_input) @@ -4820,10 +6565,7 @@ string CompilerHLSL::compile() uint32_t pass_count = 0; do { - if (pass_count >= 3) - SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); - - reset(); + reset(pass_count); // Move constructor for this type is broken on GCC 4.9 ... buffer.reset(); @@ -4863,3 +6605,60 @@ void CompilerHLSL::emit_block_hints(const SPIRBlock &block) break; } } + +string CompilerHLSL::get_unique_identifier() +{ + return join("_", unique_identifier_count++, "ident"); +} + +void CompilerHLSL::add_hlsl_resource_binding(const HLSLResourceBinding &binding) +{ + StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding }; + resource_bindings[tuple] = { binding, false }; +} + +bool CompilerHLSL::is_hlsl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const +{ + StageSetBinding tuple = { model, desc_set, binding }; + auto itr = resource_bindings.find(tuple); + return itr != end(resource_bindings) && itr->second.second; +} + +CompilerHLSL::BitcastType CompilerHLSL::get_bitcast_type(uint32_t result_type, uint32_t op0) +{ + auto &rslt_type = get(result_type); + auto &expr_type = expression_type(op0); + + if (rslt_type.basetype == SPIRType::BaseType::UInt64 && expr_type.basetype == SPIRType::BaseType::UInt && + expr_type.vecsize == 2) + return BitcastType::TypePackUint2x32; + else if (rslt_type.basetype == SPIRType::BaseType::UInt && rslt_type.vecsize == 2 && + expr_type.basetype == SPIRType::BaseType::UInt64) + return BitcastType::TypeUnpackUint64; + + return BitcastType::TypeNormal; +} + +bool CompilerHLSL::is_hlsl_force_storage_buffer_as_uav(ID id) const +{ + if (hlsl_options.force_storage_buffer_as_uav) + { + return true; + } + + const uint32_t desc_set = get_decoration(id, spv::DecorationDescriptorSet); + const uint32_t binding = get_decoration(id, spv::DecorationBinding); + + return (force_uav_buffer_bindings.find({ desc_set, binding }) != force_uav_buffer_bindings.end()); +} + +void CompilerHLSL::set_hlsl_force_storage_buffer_as_uav(uint32_t desc_set, uint32_t binding) +{ + SetBindingPair pair = { desc_set, binding }; + force_uav_buffer_bindings.insert(pair); +} + +bool CompilerHLSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const +{ + return (builtin == BuiltInSampleMask); +} diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp index 796f502c5d2..57d1c2cdc01 100644 --- a/spirv_hlsl.hpp +++ b/spirv_hlsl.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2016-2019 Robert Konrad + * Copyright 2016-2021 Robert Konrad + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_HLSL_HPP #define SPIRV_HLSL_HPP @@ -41,6 +48,61 @@ struct RootConstants uint32_t space; }; +// For finer control, decorations may be removed from specific resources instead with unset_decoration(). +enum HLSLBindingFlagBits +{ + HLSL_BINDING_AUTO_NONE_BIT = 0, + + // Push constant (root constant) resources will be declared as CBVs (b-space) without a register() declaration. + // A register will be automatically assigned by the D3D compiler, but must therefore be reflected in D3D-land. + // Push constants do not normally have a DecorationBinding set, but if they do, this can be used to ignore it. + HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT = 1 << 0, + + // cbuffer resources will be declared as CBVs (b-space) without a register() declaration. + // A register will be automatically assigned, but must be reflected in D3D-land. + HLSL_BINDING_AUTO_CBV_BIT = 1 << 1, + + // All SRVs (t-space) will be declared without a register() declaration. + HLSL_BINDING_AUTO_SRV_BIT = 1 << 2, + + // All UAVs (u-space) will be declared without a register() declaration. + HLSL_BINDING_AUTO_UAV_BIT = 1 << 3, + + // All samplers (s-space) will be declared without a register() declaration. + HLSL_BINDING_AUTO_SAMPLER_BIT = 1 << 4, + + // No resources will be declared with register(). + HLSL_BINDING_AUTO_ALL = 0x7fffffff +}; +using HLSLBindingFlags = uint32_t; + +// By matching stage, desc_set and binding for a SPIR-V resource, +// register bindings are set based on whether the HLSL resource is a +// CBV, UAV, SRV or Sampler. A single binding in SPIR-V might contain multiple +// resource types, e.g. COMBINED_IMAGE_SAMPLER, and SRV/Sampler bindings will be used respectively. +// On SM 5.0 and lower, register_space is ignored. +// +// To remap a push constant block which does not have any desc_set/binding associated with it, +// use ResourceBindingPushConstant{DescriptorSet,Binding} as values for desc_set/binding. +// For deeper control of push constants, set_root_constant_layouts() can be used instead. +struct HLSLResourceBinding +{ + spv::ExecutionModel stage = spv::ExecutionModelMax; + uint32_t desc_set = 0; + uint32_t binding = 0; + + struct Binding + { + uint32_t register_space = 0; + uint32_t register_binding = 0; + } cbv, uav, srv, sampler; +}; + +enum HLSLAuxBinding +{ + HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE = 0 +}; + class CompilerHLSL : public CompilerGLSL { public: @@ -59,6 +121,30 @@ class CompilerHLSL : public CompilerGLSL // Set to false if you know you will never use base instance or base vertex // functionality as it might remove an internal cbuffer. bool support_nonzero_base_vertex_base_instance = false; + + // Forces a storage buffer to always be declared as UAV, even if the readonly decoration is used. + // By default, a readonly storage buffer will be declared as ByteAddressBuffer (SRV) instead. + // Alternatively, use set_hlsl_force_storage_buffer_as_uav to specify individually. + bool force_storage_buffer_as_uav = false; + + // Forces any storage image type marked as NonWritable to be considered an SRV instead. + // For this to work with function call parameters, NonWritable must be considered to be part of the type system + // so that NonWritable image arguments are also translated to Texture rather than RWTexture. + bool nonwritable_uav_texture_as_srv = false; + + // Enables native 16-bit types. Needs SM 6.2. + // Uses half/int16_t/uint16_t instead of min16* types. + // Also adds support for 16-bit load-store from (RW)ByteAddressBuffer. + bool enable_16bit_types = false; + + // If matrices are used as IO variables, flatten the attribute declaration to use + // TEXCOORD{N,N+1,N+2,...} rather than TEXCOORDN_{0,1,2,3}. + // If add_vertex_attribute_remap is used and this feature is used, + // the semantic name will be queried once per active location. + bool flatten_matrix_vertex_input_semantics = false; + + // Rather than emitting main() for the entry point, use the name in SPIR-V. + bool use_entry_point_name = false; }; explicit CompilerHLSL(std::vector spirv_) @@ -114,7 +200,26 @@ class CompilerHLSL : public CompilerGLSL // If non-zero, this returns the variable ID of a cbuffer which corresponds to // the cbuffer declared above. By default, no binding or descriptor set decoration is set, // so the calling application should declare explicit bindings on this ID before calling compile(). - uint32_t remap_num_workgroups_builtin(); + VariableID remap_num_workgroups_builtin(); + + // Controls how resource bindings are declared in the output HLSL. + void set_resource_binding_flags(HLSLBindingFlags flags); + + // resource is a resource binding to indicate the HLSL CBV, SRV, UAV or sampler binding + // to use for a particular SPIR-V description set + // and binding. If resource bindings are provided, + // is_hlsl_resource_binding_used() will return true after calling ::compile() if + // the set/binding combination was used by the HLSL code. + void add_hlsl_resource_binding(const HLSLResourceBinding &resource); + bool is_hlsl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding) const; + + // Controls which storage buffer bindings will be forced to be declared as UAVs. + void set_hlsl_force_storage_buffer_as_uav(uint32_t desc_set, uint32_t binding); + + // By default, these magic buffers are not assigned a specific binding. + void set_hlsl_aux_buffer_binding(HLSLAuxBinding binding, uint32_t register_index, uint32_t register_space); + void unset_hlsl_aux_buffer_binding(HLSLAuxBinding binding); + bool is_hlsl_aux_buffer_binding_used(HLSLAuxBinding binding) const; private: std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; @@ -126,10 +231,13 @@ class CompilerHLSL : public CompilerGLSL void emit_header() override; void emit_resources(); void emit_interface_block_globally(const SPIRVariable &type); - void emit_interface_block_in_struct(const SPIRVariable &type, std::unordered_set &active_locations); + void emit_interface_block_in_struct(const SPIRVariable &var, std::unordered_set &active_locations); + void emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index, uint32_t location, + std::unordered_set &active_locations); void emit_builtin_inputs_in_struct(); void emit_builtin_outputs_in_struct(); - void emit_texture_op(const Instruction &i) override; + void emit_builtin_primitive_outputs_in_struct(); + void emit_texture_op(const Instruction &i, bool sparse) override; void emit_instruction(const Instruction &instruction) override; void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, uint32_t count) override; @@ -145,16 +253,25 @@ class CompilerHLSL : public CompilerGLSL std::string layout_for_member(const SPIRType &type, uint32_t index) override; std::string to_interpolation_qualifiers(const Bitset &flags) override; std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; - std::string to_func_call_arg(uint32_t id) override; + bool emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) override; + std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; std::string to_sampler_expression(uint32_t id); std::string to_resource_binding(const SPIRVariable &var); std::string to_resource_binding_sampler(const SPIRVariable &var); - std::string to_resource_register(char space, uint32_t binding, uint32_t set); + std::string to_resource_register(HLSLBindingFlagBits flag, char space, uint32_t binding, uint32_t set); + std::string to_initializer_expression(const SPIRVariable &var) override; void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; void emit_access_chain(const Instruction &instruction); void emit_load(const Instruction &instruction); - std::string read_access_chain(const SPIRAccessChain &chain); - void write_access_chain(const SPIRAccessChain &chain, uint32_t value); + void read_access_chain(std::string *expr, const std::string &lhs, const SPIRAccessChain &chain); + void read_access_chain_struct(const std::string &lhs, const SPIRAccessChain &chain); + void read_access_chain_array(const std::string &lhs, const SPIRAccessChain &chain); + void write_access_chain(const SPIRAccessChain &chain, uint32_t value, const SmallVector &composite_chain); + void write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value, + const SmallVector &composite_chain); + void write_access_chain_array(const SPIRAccessChain &chain, uint32_t value, + const SmallVector &composite_chain); + std::string write_access_chain_value(uint32_t value, const SmallVector &composite_chain, bool enclose); void emit_store(const Instruction &instruction); void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op); void emit_subgroup_op(const Instruction &i) override; @@ -162,15 +279,19 @@ class CompilerHLSL : public CompilerGLSL void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier, uint32_t base_offset = 0) override; + void emit_rayquery_function(const char *commited, const char *candidate, const uint32_t *ops); const char *to_storage_qualifiers_glsl(const SPIRVariable &var) override; void replace_illegal_names() override; + bool is_hlsl_force_storage_buffer_as_uav(ID id) const; + Options hlsl_options; // TODO: Refactor this to be more similar to MSL, maybe have some common system in place? bool requires_op_fmod = false; bool requires_fp16_packing = false; + bool requires_uint2_packing = false; bool requires_explicit_fp16_packing = false; bool requires_unorm8_packing = false; bool requires_snorm8_packing = false; @@ -183,8 +304,25 @@ class CompilerHLSL : public CompilerGLSL bool requires_inverse_4x4 = false; bool requires_scalar_reflect = false; bool requires_scalar_refract = false; - uint64_t required_textureSizeVariants = 0; - void require_texture_query_variant(const SPIRType &type); + bool requires_scalar_faceforward = false; + + struct TextureSizeVariants + { + // MSVC 2013 workaround. + TextureSizeVariants() + { + srv = 0; + for (auto &unorm : uav) + for (auto &u : unorm) + u = 0; + } + uint64_t srv; + uint64_t uav[3][4]; + } required_texture_size_variants; + + void require_texture_query_variant(uint32_t var_id); + void emit_texture_size_variants(uint64_t variant_mask, const char *vecsize_qualifier, bool uav, + const char *type_qualifier); enum TextureQueryVariantDim { @@ -209,6 +347,19 @@ class CompilerHLSL : public CompilerGLSL QueryTypeCount = 3 }; + enum BitcastType + { + TypeNormal, + TypePackUint2x32, + TypeUnpackUint64 + }; + + void analyze_meshlet_writes(); + void analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive, + std::unordered_set &processed_func_ids); + + BitcastType get_bitcast_type(uint32_t result_type, uint32_t op0); + void emit_builtin_variables(); bool require_output = false; bool require_input = false; @@ -216,16 +367,39 @@ class CompilerHLSL : public CompilerGLSL uint32_t type_to_consumed_locations(const SPIRType &type) const; - void emit_io_block(const SPIRVariable &var); std::string to_semantic(uint32_t location, spv::ExecutionModel em, spv::StorageClass sc); uint32_t num_workgroups_builtin = 0; + HLSLBindingFlags resource_binding_flags = 0; // Custom root constant layout, which should be emitted // when translating push constant ranges. std::vector root_constants_layout; void validate_shader_model(); + + std::string get_unique_identifier(); + uint32_t unique_identifier_count = 0; + + std::unordered_map, InternalHasher> resource_bindings; + void remap_hlsl_resource_binding(HLSLBindingFlagBits type, uint32_t &desc_set, uint32_t &binding); + + std::unordered_set force_uav_buffer_bindings; + + struct + { + uint32_t register_index = 0; + uint32_t register_space = 0; + bool explicit_binding = false; + bool used = false; + } base_vertex_info; + + // Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but SV_Coverage is a scalar in HLSL. + bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override; + + std::vector composite_selection_workaround_types; + + std::string get_inner_entry_point_name() const; }; } // namespace SPIRV_CROSS_NAMESPACE diff --git a/spirv_msl.cpp b/spirv_msl.cpp index eaee10a0493..da5656ab77e 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2016-2019 The Brenwill Workshop Ltd. + * Copyright 2016-2021 The Brenwill Workshop Ltd. + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #include "spirv_msl.hpp" #include "GLSL.std.450.h" @@ -27,9 +34,10 @@ using namespace std; static const uint32_t k_unknown_location = ~0u; static const uint32_t k_unknown_component = ~0u; +static const char *force_inline = "static inline __attribute__((always_inline))"; CompilerMSL::CompilerMSL(std::vector spirv_) - : CompilerGLSL(move(spirv_)) + : CompilerGLSL(std::move(spirv_)) { } @@ -48,17 +56,81 @@ CompilerMSL::CompilerMSL(ParsedIR &&ir_) { } -void CompilerMSL::add_msl_vertex_attribute(const MSLVertexAttr &va) +void CompilerMSL::add_msl_shader_input(const MSLShaderInterfaceVariable &si) +{ + inputs_by_location[{si.location, si.component}] = si; + if (si.builtin != BuiltInMax && !inputs_by_builtin.count(si.builtin)) + inputs_by_builtin[si.builtin] = si; +} + +void CompilerMSL::add_msl_shader_output(const MSLShaderInterfaceVariable &so) { - vtx_attrs_by_location[va.location] = va; - if (va.builtin != BuiltInMax && !vtx_attrs_by_builtin.count(va.builtin)) - vtx_attrs_by_builtin[va.builtin] = va; + outputs_by_location[{so.location, so.component}] = so; + if (so.builtin != BuiltInMax && !outputs_by_builtin.count(so.builtin)) + outputs_by_builtin[so.builtin] = so; } void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding) { StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding }; resource_bindings[tuple] = { binding, false }; + + // If we might need to pad argument buffer members to positionally align + // arg buffer indexes, also maintain a lookup by argument buffer index. + if (msl_options.pad_argument_buffer_resources) + { + StageSetBinding arg_idx_tuple = { binding.stage, binding.desc_set, k_unknown_component }; + +#define ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(rez) \ + arg_idx_tuple.binding = binding.msl_##rez; \ + resource_arg_buff_idx_to_binding_number[arg_idx_tuple] = binding.binding + + switch (binding.basetype) + { + case SPIRType::Void: + case SPIRType::Boolean: + case SPIRType::SByte: + case SPIRType::UByte: + case SPIRType::Short: + case SPIRType::UShort: + case SPIRType::Int: + case SPIRType::UInt: + case SPIRType::Int64: + case SPIRType::UInt64: + case SPIRType::AtomicCounter: + case SPIRType::Half: + case SPIRType::Float: + case SPIRType::Double: + ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(buffer); + break; + case SPIRType::Image: + ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(texture); + break; + case SPIRType::Sampler: + ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(sampler); + break; + case SPIRType::SampledImage: + ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(texture); + ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(sampler); + break; + default: + SPIRV_CROSS_THROW("Unexpected argument buffer resource base type. When padding argument buffer elements, " + "all descriptor set resources must be supplied with a base type by the app."); + } +#undef ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP + } +} + +void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index) +{ + SetBindingPair pair = { desc_set, binding }; + buffers_requiring_dynamic_offset[pair] = { index, 0 }; +} + +void CompilerMSL::add_inline_uniform_block(uint32_t desc_set, uint32_t binding) +{ + SetBindingPair pair = { desc_set, binding }; + inline_uniform_blocks.insert(pair); } void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set) @@ -67,18 +139,66 @@ void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set) argument_buffer_discrete_mask |= 1u << desc_set; } -bool CompilerMSL::is_msl_vertex_attribute_used(uint32_t location) +void CompilerMSL::set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage) { - return vtx_attrs_in_use.count(location) != 0; + if (desc_set < kMaxArgumentBuffers) + { + if (device_storage) + argument_buffer_device_storage_mask |= 1u << desc_set; + else + argument_buffer_device_storage_mask &= ~(1u << desc_set); + } +} + +bool CompilerMSL::is_msl_shader_input_used(uint32_t location) +{ + // Don't report internal location allocations to app. + return location_inputs_in_use.count(location) != 0 && + location_inputs_in_use_fallback.count(location) == 0; +} + +bool CompilerMSL::is_msl_shader_output_used(uint32_t location) +{ + // Don't report internal location allocations to app. + return location_outputs_in_use.count(location) != 0 && + location_outputs_in_use_fallback.count(location) == 0; +} + +uint32_t CompilerMSL::get_automatic_builtin_input_location(spv::BuiltIn builtin) const +{ + auto itr = builtin_to_automatic_input_location.find(builtin); + if (itr == builtin_to_automatic_input_location.end()) + return k_unknown_location; + else + return itr->second; +} + +uint32_t CompilerMSL::get_automatic_builtin_output_location(spv::BuiltIn builtin) const +{ + auto itr = builtin_to_automatic_output_location.find(builtin); + if (itr == builtin_to_automatic_output_location.end()) + return k_unknown_location; + else + return itr->second; } -bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) +bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const { StageSetBinding tuple = { model, desc_set, binding }; auto itr = resource_bindings.find(tuple); return itr != end(resource_bindings) && itr->second.second; } +// Returns the size of the array of resources used by the variable with the specified id. +// The returned value is retrieved from the resource binding added using add_msl_resource_binding(). +uint32_t CompilerMSL::get_resource_array_size(uint32_t id) const +{ + StageSetBinding tuple = { get_entry_point().model, get_decoration(id, DecorationDescriptorSet), + get_decoration(id, DecorationBinding) }; + auto itr = resource_bindings.find(tuple); + return itr != end(resource_bindings) ? itr->second.first.count : 0; +} + uint32_t CompilerMSL::get_automatic_msl_resource_binding(uint32_t id) const { return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexPrimary); @@ -89,26 +209,58 @@ uint32_t CompilerMSL::get_automatic_msl_resource_binding_secondary(uint32_t id) return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexSecondary); } +uint32_t CompilerMSL::get_automatic_msl_resource_binding_tertiary(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexTertiary); +} + +uint32_t CompilerMSL::get_automatic_msl_resource_binding_quaternary(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexQuaternary); +} + void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components) { fragment_output_components[location] = components; } +bool CompilerMSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const +{ + return (builtin == BuiltInSampleMask); +} + void CompilerMSL::build_implicit_builtins() { bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition); - bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex; - bool need_tesc_params = get_execution_model() == ExecutionModelTessellationControl; + bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex && + !msl_options.vertex_for_tessellation; + bool need_tesc_params = is_tesc_shader(); + bool need_tese_params = is_tese_shader() && msl_options.raw_buffer_tese_input; bool need_subgroup_mask = active_input_builtins.get(BuiltInSubgroupEqMask) || active_input_builtins.get(BuiltInSubgroupGeMask) || active_input_builtins.get(BuiltInSubgroupGtMask) || active_input_builtins.get(BuiltInSubgroupLeMask) || active_input_builtins.get(BuiltInSubgroupLtMask); bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) || active_input_builtins.get(BuiltInSubgroupGtMask)); - bool need_multiview = get_execution_model() == ExecutionModelVertex && + bool need_multiview = get_execution_model() == ExecutionModelVertex && !msl_options.view_index_from_device_index && + msl_options.multiview_layered_rendering && (msl_options.multiview || active_input_builtins.get(BuiltInViewIndex)); + bool need_dispatch_base = + msl_options.dispatch_base && get_execution_model() == ExecutionModelGLCompute && + (active_input_builtins.get(BuiltInWorkgroupId) || active_input_builtins.get(BuiltInGlobalInvocationId)); + bool need_grid_params = get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation; + bool need_vertex_base_params = + need_grid_params && + (active_input_builtins.get(BuiltInVertexId) || active_input_builtins.get(BuiltInVertexIndex) || + active_input_builtins.get(BuiltInBaseVertex) || active_input_builtins.get(BuiltInInstanceId) || + active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance)); + bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId); + bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups); + if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params || - need_multiview || needs_subgroup_invocation_id) + need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params || + needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation || + has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size) { bool has_frag_coord = false; bool has_sample_id = false; @@ -121,21 +273,68 @@ void CompilerMSL::build_implicit_builtins() bool has_subgroup_invocation_id = false; bool has_subgroup_size = false; bool has_view_idx = false; + bool has_layer = false; + bool has_helper_invocation = false; + bool has_local_invocation_index = false; + bool has_workgroup_size = false; + uint32_t workgroup_id_type = 0; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin) + if (var.storage != StorageClassInput && var.storage != StorageClassOutput) + return; + if (!interface_variable_exists_in_entry_point(var.self)) + return; + if (!has_decoration(var.self, DecorationBuiltIn)) return; BuiltIn builtin = ir.meta[var.self].decoration.builtin_type; - if (need_subpass_input && builtin == BuiltInFragCoord) + + if (var.storage == StorageClassOutput) + { + if (has_additional_fixed_sample_mask() && builtin == BuiltInSampleMask) + { + builtin_sample_mask_id = var.self; + mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var.self); + does_shader_write_sample_mask = true; + } + } + + if (var.storage != StorageClassInput) + return; + + // Use Metal's native frame-buffer fetch API for subpass inputs. + if (need_subpass_input && (!msl_options.use_framebuffer_fetch_subpasses)) { - builtin_frag_coord_id = var.self; - has_frag_coord = true; + switch (builtin) + { + case BuiltInFragCoord: + mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var.self); + builtin_frag_coord_id = var.self; + has_frag_coord = true; + break; + case BuiltInLayer: + if (!msl_options.arrayed_subpass_input || msl_options.multiview) + break; + mark_implicit_builtin(StorageClassInput, BuiltInLayer, var.self); + builtin_layer_id = var.self; + has_layer = true; + break; + case BuiltInViewIndex: + if (!msl_options.multiview) + break; + mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var.self); + builtin_view_idx_id = var.self; + has_view_idx = true; + break; + default: + break; + } } - if (need_sample_pos && builtin == BuiltInSampleId) + if ((need_sample_pos || needs_sample_id) && builtin == BuiltInSampleId) { builtin_sample_id_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var.self); has_sample_id = true; } @@ -145,18 +344,22 @@ void CompilerMSL::build_implicit_builtins() { case BuiltInVertexIndex: builtin_vertex_idx_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var.self); has_vertex_idx = true; break; case BuiltInBaseVertex: builtin_base_vertex_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var.self); has_base_vertex = true; break; case BuiltInInstanceIndex: builtin_instance_idx_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var.self); has_instance_idx = true; break; case BuiltInBaseInstance: builtin_base_instance_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var.self); has_base_instance = true; break; default: @@ -164,100 +367,193 @@ void CompilerMSL::build_implicit_builtins() } } - if (need_tesc_params) + if (need_tesc_params && builtin == BuiltInInvocationId) { - switch (builtin) - { - case BuiltInInvocationId: - builtin_invocation_id_id = var.self; - has_invocation_id = true; - break; - case BuiltInPrimitiveId: - builtin_primitive_id_id = var.self; - has_primitive_id = true; - break; - default: - break; - } + builtin_invocation_id_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var.self); + has_invocation_id = true; + } + + if ((need_tesc_params || need_tese_params) && builtin == BuiltInPrimitiveId) + { + builtin_primitive_id_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var.self); + has_primitive_id = true; + } + + if (need_tese_params && builtin == BuiltInTessLevelOuter) + { + tess_level_outer_var_id = var.self; + } + + if (need_tese_params && builtin == BuiltInTessLevelInner) + { + tess_level_inner_var_id = var.self; } if ((need_subgroup_mask || needs_subgroup_invocation_id) && builtin == BuiltInSubgroupLocalInvocationId) { builtin_subgroup_invocation_id_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var.self); has_subgroup_invocation_id = true; } - if (need_subgroup_ge_mask && builtin == BuiltInSubgroupSize) + if ((need_subgroup_ge_mask || needs_subgroup_size) && builtin == BuiltInSubgroupSize) { builtin_subgroup_size_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var.self); has_subgroup_size = true; } if (need_multiview) { - if (builtin == BuiltInInstanceIndex) + switch (builtin) { + case BuiltInInstanceIndex: // The view index here is derived from the instance index. builtin_instance_idx_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var.self); has_instance_idx = true; - } - - if (builtin == BuiltInViewIndex) - { + break; + case BuiltInBaseInstance: + // If a non-zero base instance is used, we need to adjust for it when calculating the view index. + builtin_base_instance_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var.self); + has_base_instance = true; + break; + case BuiltInViewIndex: builtin_view_idx_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var.self); has_view_idx = true; + break; + default: + break; } } + + if (needs_helper_invocation && builtin == BuiltInHelperInvocation) + { + builtin_helper_invocation_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInHelperInvocation, var.self); + has_helper_invocation = true; + } + + if (need_local_invocation_index && builtin == BuiltInLocalInvocationIndex) + { + builtin_local_invocation_index_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var.self); + has_local_invocation_index = true; + } + + if (need_workgroup_size && builtin == BuiltInLocalInvocationId) + { + builtin_workgroup_size_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self); + has_workgroup_size = true; + } + + // The base workgroup needs to have the same type and vector size + // as the workgroup or invocation ID, so keep track of the type that + // was used. + if (need_dispatch_base && workgroup_id_type == 0 && + (builtin == BuiltInWorkgroupId || builtin == BuiltInGlobalInvocationId)) + workgroup_id_type = var.basetype; }); - if (!has_frag_coord && need_subpass_input) + // Use Metal's native frame-buffer fetch API for subpass inputs. + if ((!has_frag_coord || (msl_options.multiview && !has_view_idx) || + (msl_options.arrayed_subpass_input && !msl_options.multiview && !has_layer)) && + (!msl_options.use_framebuffer_fetch_subpasses) && need_subpass_input) { - uint32_t offset = ir.increase_bound_by(3); - uint32_t type_id = offset; - uint32_t type_ptr_id = offset + 1; - uint32_t var_id = offset + 2; + if (!has_frag_coord) + { + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + // Create gl_FragCoord. + SPIRType vec4_type; + vec4_type.basetype = SPIRType::Float; + vec4_type.width = 32; + vec4_type.vecsize = 4; + set(type_id, vec4_type); + + SPIRType vec4_type_ptr; + vec4_type_ptr = vec4_type; + vec4_type_ptr.pointer = true; + vec4_type_ptr.pointer_depth++; + vec4_type_ptr.parent_type = type_id; + vec4_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, vec4_type_ptr); + ptr_type.self = type_id; - // Create gl_FragCoord. - SPIRType vec4_type; - vec4_type.basetype = SPIRType::Float; - vec4_type.width = 32; - vec4_type.vecsize = 4; - set(type_id, vec4_type); - - SPIRType vec4_type_ptr; - vec4_type_ptr = vec4_type; - vec4_type_ptr.pointer = true; - vec4_type_ptr.parent_type = type_id; - vec4_type_ptr.storage = StorageClassInput; - auto &ptr_type = set(type_ptr_id, vec4_type_ptr); - ptr_type.self = type_id; + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord); + builtin_frag_coord_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var_id); + } - set(var_id, type_ptr_id, StorageClassInput); - set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord); - builtin_frag_coord_id = var_id; - mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var_id); + if (!has_layer && msl_options.arrayed_subpass_input && !msl_options.multiview) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_Layer. + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.pointer_depth++; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInLayer); + builtin_layer_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInLayer, var_id); + } + + if (!has_view_idx && msl_options.multiview) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_ViewIndex. + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.pointer_depth++; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex); + builtin_view_idx_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id); + } } - if (!has_sample_id && need_sample_pos) + if (!has_sample_id && (need_sample_pos || needs_sample_id)) { - uint32_t offset = ir.increase_bound_by(3); - uint32_t type_id = offset; - uint32_t type_ptr_id = offset + 1; - uint32_t var_id = offset + 2; + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; // Create gl_SampleID. - SPIRType uint_type; - uint_type.basetype = SPIRType::UInt; - uint_type.width = 32; - set(type_id, uint_type); - SPIRType uint_type_ptr; - uint_type_ptr = uint_type; + uint_type_ptr = get_uint_type(); uint_type_ptr.pointer = true; - uint_type_ptr.parent_type = type_id; + uint_type_ptr.pointer_depth++; + uint_type_ptr.parent_type = get_uint_type_id(); uint_type_ptr.storage = StorageClassInput; auto &ptr_type = set(type_ptr_id, uint_type_ptr); - ptr_type.self = type_id; + ptr_type.self = get_uint_type_id(); set(var_id, type_ptr_id, StorageClassInput); set_decoration(var_id, DecorationBuiltIn, BuiltInSampleId); @@ -266,24 +562,18 @@ void CompilerMSL::build_implicit_builtins() } if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) || - (need_multiview && (!has_instance_idx || !has_view_idx))) + (need_multiview && (!has_instance_idx || !has_base_instance || !has_view_idx))) { - uint32_t offset = ir.increase_bound_by(2); - uint32_t type_id = offset; - uint32_t type_ptr_id = offset + 1; - - SPIRType uint_type; - uint_type.basetype = SPIRType::UInt; - uint_type.width = 32; - set(type_id, uint_type); + uint32_t type_ptr_id = ir.increase_bound_by(1); SPIRType uint_type_ptr; - uint_type_ptr = uint_type; + uint_type_ptr = get_uint_type(); uint_type_ptr.pointer = true; - uint_type_ptr.parent_type = type_id; + uint_type_ptr.pointer_depth++; + uint_type_ptr.parent_type = get_uint_type_id(); uint_type_ptr.storage = StorageClassInput; auto &ptr_type = set(type_ptr_id, uint_type_ptr); - ptr_type.self = type_id; + ptr_type.self = get_uint_type_id(); if (need_vertex_params && !has_vertex_idx) { @@ -316,30 +606,9 @@ void CompilerMSL::build_implicit_builtins() set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex); builtin_instance_idx_id = var_id; mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id); - - if (need_multiview) - { - // Multiview shaders are not allowed to write to gl_Layer, ostensibly because - // it is implicitly written from gl_ViewIndex, but we have to do that explicitly. - // Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but - // gl_Layer is an output in vertex-pipeline shaders. - uint32_t type_ptr_out_id = ir.increase_bound_by(2); - SPIRType uint_type_ptr_out; - uint_type_ptr_out = uint_type; - uint_type_ptr_out.pointer = true; - uint_type_ptr_out.parent_type = type_id; - uint_type_ptr_out.storage = StorageClassOutput; - auto &ptr_out_type = set(type_ptr_out_id, uint_type_ptr_out); - ptr_out_type.self = type_id; - var_id = type_ptr_out_id + 1; - set(var_id, type_ptr_out_id, StorageClassOutput); - set_decoration(var_id, DecorationBuiltIn, BuiltInLayer); - builtin_layer_id = var_id; - mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id); - } } - if (need_vertex_params && !has_base_instance) + if (!has_base_instance) // Needed by both multiview and tessellation { uint32_t var_id = ir.increase_bound_by(1); @@ -350,6 +619,28 @@ void CompilerMSL::build_implicit_builtins() mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id); } + if (need_multiview) + { + // Multiview shaders are not allowed to write to gl_Layer, ostensibly because + // it is implicitly written from gl_ViewIndex, but we have to do that explicitly. + // Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but + // gl_Layer is an output in vertex-pipeline shaders. + uint32_t type_ptr_out_id = ir.increase_bound_by(2); + SPIRType uint_type_ptr_out; + uint_type_ptr_out = get_uint_type(); + uint_type_ptr_out.pointer = true; + uint_type_ptr_out.pointer_depth++; + uint_type_ptr_out.parent_type = get_uint_type_id(); + uint_type_ptr_out.storage = StorageClassOutput; + auto &ptr_out_type = set(type_ptr_out_id, uint_type_ptr_out); + ptr_out_type.self = get_uint_type_id(); + uint32_t var_id = type_ptr_out_id + 1; + set(var_id, type_ptr_out_id, StorageClassOutput); + set_decoration(var_id, DecorationBuiltIn, BuiltInLayer); + builtin_layer_id = var_id; + mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id); + } + if (need_multiview && !has_view_idx) { uint32_t var_id = ir.increase_bound_by(1); @@ -362,26 +653,31 @@ void CompilerMSL::build_implicit_builtins() } } - if (need_tesc_params && (!has_invocation_id || !has_primitive_id)) + if ((need_tesc_params && (msl_options.multi_patch_workgroup || !has_invocation_id || !has_primitive_id)) || + (need_tese_params && !has_primitive_id) || need_grid_params) { - uint32_t offset = ir.increase_bound_by(2); - uint32_t type_id = offset; - uint32_t type_ptr_id = offset + 1; - - SPIRType uint_type; - uint_type.basetype = SPIRType::UInt; - uint_type.width = 32; - set(type_id, uint_type); + uint32_t type_ptr_id = ir.increase_bound_by(1); SPIRType uint_type_ptr; - uint_type_ptr = uint_type; + uint_type_ptr = get_uint_type(); uint_type_ptr.pointer = true; - uint_type_ptr.parent_type = type_id; + uint_type_ptr.pointer_depth++; + uint_type_ptr.parent_type = get_uint_type_id(); uint_type_ptr.storage = StorageClassInput; auto &ptr_type = set(type_ptr_id, uint_type_ptr); - ptr_type.self = type_id; + ptr_type.self = get_uint_type_id(); + + if ((need_tesc_params && msl_options.multi_patch_workgroup) || need_grid_params) + { + uint32_t var_id = ir.increase_bound_by(1); - if (!has_invocation_id) + // Create gl_GlobalInvocationID. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInGlobalInvocationId); + builtin_invocation_id_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInGlobalInvocationId, var_id); + } + else if (need_tesc_params && !has_invocation_id) { uint32_t var_id = ir.increase_bound_by(1); @@ -392,7 +688,7 @@ void CompilerMSL::build_implicit_builtins() mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var_id); } - if (!has_primitive_id) + if ((need_tesc_params || need_tese_params) && !has_primitive_id) { uint32_t var_id = ir.increase_bound_by(1); @@ -402,28 +698,34 @@ void CompilerMSL::build_implicit_builtins() builtin_primitive_id_id = var_id; mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var_id); } + + if (need_grid_params) + { + uint32_t var_id = ir.increase_bound_by(1); + + set(var_id, build_extended_vector_type(get_uint_type_id(), 3), StorageClassInput); + set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInStageInputSize); + get_entry_point().interface_variables.push_back(var_id); + set_name(var_id, "spvStageInputSize"); + builtin_stage_input_size_id = var_id; + } } if (!has_subgroup_invocation_id && (need_subgroup_mask || needs_subgroup_invocation_id)) { - uint32_t offset = ir.increase_bound_by(3); - uint32_t type_id = offset; - uint32_t type_ptr_id = offset + 1; - uint32_t var_id = offset + 2; + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; // Create gl_SubgroupInvocationID. - SPIRType uint_type; - uint_type.basetype = SPIRType::UInt; - uint_type.width = 32; - set(type_id, uint_type); - SPIRType uint_type_ptr; - uint_type_ptr = uint_type; + uint_type_ptr = get_uint_type(); uint_type_ptr.pointer = true; - uint_type_ptr.parent_type = type_id; + uint_type_ptr.pointer_depth++; + uint_type_ptr.parent_type = get_uint_type_id(); uint_type_ptr.storage = StorageClassInput; auto &ptr_type = set(type_ptr_id, uint_type_ptr); - ptr_type.self = type_id; + ptr_type.self = get_uint_type_id(); set(var_id, type_ptr_id, StorageClassInput); set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupLocalInvocationId); @@ -431,31 +733,158 @@ void CompilerMSL::build_implicit_builtins() mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id); } - if (!has_subgroup_size && need_subgroup_ge_mask) + if (!has_subgroup_size && (need_subgroup_ge_mask || needs_subgroup_size)) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_SubgroupSize. + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.pointer_depth++; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupSize); + builtin_subgroup_size_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var_id); + } + + if (need_dispatch_base || need_vertex_base_params) + { + if (workgroup_id_type == 0) + workgroup_id_type = build_extended_vector_type(get_uint_type_id(), 3); + uint32_t var_id; + if (msl_options.supports_msl_version(1, 2)) + { + // If we have MSL 1.2, we can (ab)use the [[grid_origin]] builtin + // to convey this information and save a buffer slot. + uint32_t offset = ir.increase_bound_by(1); + var_id = offset; + + set(var_id, workgroup_id_type, StorageClassInput); + set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase); + get_entry_point().interface_variables.push_back(var_id); + } + else + { + // Otherwise, we need to fall back to a good ol' fashioned buffer. + uint32_t offset = ir.increase_bound_by(2); + var_id = offset; + uint32_t type_id = offset + 1; + + SPIRType var_type = get(workgroup_id_type); + var_type.storage = StorageClassUniform; + set(type_id, var_type); + + set(var_id, type_id, StorageClassUniform); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, ~(5u)); + set_decoration(var_id, DecorationBinding, msl_options.indirect_params_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, + msl_options.indirect_params_buffer_index); + } + set_name(var_id, "spvDispatchBase"); + builtin_dispatch_base_id = var_id; + } + + if (has_additional_fixed_sample_mask() && !does_shader_write_sample_mask) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t var_id = offset + 1; + + // Create gl_SampleMask. + SPIRType uint_type_ptr_out; + uint_type_ptr_out = get_uint_type(); + uint_type_ptr_out.pointer = true; + uint_type_ptr_out.pointer_depth++; + uint_type_ptr_out.parent_type = get_uint_type_id(); + uint_type_ptr_out.storage = StorageClassOutput; + + auto &ptr_out_type = set(offset, uint_type_ptr_out); + ptr_out_type.self = get_uint_type_id(); + set(var_id, offset, StorageClassOutput); + set_decoration(var_id, DecorationBuiltIn, BuiltInSampleMask); + builtin_sample_mask_id = var_id; + mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var_id); + } + + if (!has_helper_invocation && needs_helper_invocation) { uint32_t offset = ir.increase_bound_by(3); uint32_t type_id = offset; uint32_t type_ptr_id = offset + 1; uint32_t var_id = offset + 2; - // Create gl_SubgroupSize. - SPIRType uint_type; - uint_type.basetype = SPIRType::UInt; - uint_type.width = 32; - set(type_id, uint_type); + // Create gl_HelperInvocation. + SPIRType bool_type; + bool_type.basetype = SPIRType::Boolean; + bool_type.width = 8; + bool_type.vecsize = 1; + set(type_id, bool_type); + + SPIRType bool_type_ptr_in; + bool_type_ptr_in = bool_type; + bool_type_ptr_in.pointer = true; + bool_type_ptr_in.pointer_depth++; + bool_type_ptr_in.parent_type = type_id; + bool_type_ptr_in.storage = StorageClassInput; + + auto &ptr_in_type = set(type_ptr_id, bool_type_ptr_in); + ptr_in_type.self = type_id; + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInHelperInvocation); + builtin_helper_invocation_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInHelperInvocation, var_id); + } + if (need_local_invocation_index && !has_local_invocation_index) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_LocalInvocationIndex. SPIRType uint_type_ptr; - uint_type_ptr = uint_type; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.pointer_depth++; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInLocalInvocationIndex); + builtin_local_invocation_index_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var_id); + } + + if (need_workgroup_size && !has_workgroup_size) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_WorkgroupSize. + uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3); + SPIRType uint_type_ptr = get(type_id); uint_type_ptr.pointer = true; + uint_type_ptr.pointer_depth++; uint_type_ptr.parent_type = type_id; uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); ptr_type.self = type_id; - set(var_id, type_ptr_id, StorageClassInput); - set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupSize); - builtin_subgroup_size_id = var_id; - mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var_id); + set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize); + builtin_workgroup_size_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id); } } @@ -470,7 +899,7 @@ void CompilerMSL::build_implicit_builtins() swizzle_buffer_id = var_id; } - if (!buffers_requiring_array_length.empty()) + if (needs_buffer_size_buffer()) { uint32_t var_id = build_constant_uint_array_pointer(); set_name(var_id, "spvBufferSizeConstants"); @@ -491,18 +920,127 @@ void CompilerMSL::build_implicit_builtins() set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index); view_mask_buffer_id = var_id; } -} -void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id) -{ - Bitset *active_builtins = nullptr; - switch (storage) + if (!buffers_requiring_dynamic_offset.empty()) { - case StorageClassInput: - active_builtins = &active_input_builtins; - break; - - case StorageClassOutput: + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvDynamicOffsets"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, ~(5u)); + set_decoration(var_id, DecorationBinding, msl_options.dynamic_offsets_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, + msl_options.dynamic_offsets_buffer_index); + dynamic_offsets_buffer_id = var_id; + } + + // If we're returning a struct from a vertex-like entry point, we must return a position attribute. + bool need_position = (get_execution_model() == ExecutionModelVertex || is_tese_shader()) && + !capture_output_to_buffer && !get_is_rasterization_disabled() && + !active_output_builtins.get(BuiltInPosition); + + if (need_position) + { + // If we can get away with returning void from entry point, we don't need to care. + // If there is at least one other stage output, we need to return [[position]], + // so we need to create one if it doesn't appear in the SPIR-V. Before adding the + // implicit variable, check if it actually exists already, but just has not been used + // or initialized, and if so, mark it as active, and do not create the implicit variable. + bool has_output = false; + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self)) + { + has_output = true; + + // Check if the var is the Position builtin + if (has_decoration(var.self, DecorationBuiltIn) && get_decoration(var.self, DecorationBuiltIn) == BuiltInPosition) + active_output_builtins.set(BuiltInPosition); + + // If the var is a struct, check if any members is the Position builtin + auto &var_type = get_variable_element_type(var); + if (var_type.basetype == SPIRType::Struct) + { + auto mbr_cnt = var_type.member_types.size(); + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + auto builtin = BuiltInMax; + bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); + if (is_builtin && builtin == BuiltInPosition) + active_output_builtins.set(BuiltInPosition); + } + } + } + }); + need_position = has_output && !active_output_builtins.get(BuiltInPosition); + } + + if (need_position) + { + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + // Create gl_Position. + SPIRType vec4_type; + vec4_type.basetype = SPIRType::Float; + vec4_type.width = 32; + vec4_type.vecsize = 4; + set(type_id, vec4_type); + + SPIRType vec4_type_ptr; + vec4_type_ptr = vec4_type; + vec4_type_ptr.pointer = true; + vec4_type_ptr.pointer_depth++; + vec4_type_ptr.parent_type = type_id; + vec4_type_ptr.storage = StorageClassOutput; + auto &ptr_type = set(type_ptr_id, vec4_type_ptr); + ptr_type.self = type_id; + + set(var_id, type_ptr_id, StorageClassOutput); + set_decoration(var_id, DecorationBuiltIn, BuiltInPosition); + mark_implicit_builtin(StorageClassOutput, BuiltInPosition, var_id); + } +} + +// Checks if the specified builtin variable (e.g. gl_InstanceIndex) is marked as active. +// If not, it marks it as active and forces a recompilation. +// This might be used when the optimization of inactive builtins was too optimistic (e.g. when "spvOut" is emitted). +void CompilerMSL::ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin) +{ + Bitset *active_builtins = nullptr; + switch (storage) + { + case StorageClassInput: + active_builtins = &active_input_builtins; + break; + + case StorageClassOutput: + active_builtins = &active_output_builtins; + break; + + default: + break; + } + + // At this point, the specified builtin variable must have already been declared in the entry point. + // If not, mark as active and force recompile. + if (active_builtins != nullptr && !active_builtins->get(builtin)) + { + active_builtins->set(builtin); + force_recompile(); + } +} + +void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id) +{ + Bitset *active_builtins = nullptr; + switch (storage) + { + case StorageClassInput: + active_builtins = &active_input_builtins; + break; + + case StorageClassOutput: active_builtins = &active_output_builtins; break; @@ -512,27 +1050,24 @@ void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, u assert(active_builtins != nullptr); active_builtins->set(builtin); - get_entry_point().interface_variables.push_back(id); + + auto &var = get_entry_point().interface_variables; + if (find(begin(var), end(var), VariableID(id)) == end(var)) + var.push_back(id); } uint32_t CompilerMSL::build_constant_uint_array_pointer() { - uint32_t offset = ir.increase_bound_by(4); - uint32_t type_id = offset; - uint32_t type_ptr_id = offset + 1; - uint32_t type_ptr_ptr_id = offset + 2; - uint32_t var_id = offset + 3; + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_ptr_id = offset; + uint32_t type_ptr_ptr_id = offset + 1; + uint32_t var_id = offset + 2; // Create a buffer to hold extra data, including the swizzle constants. - SPIRType uint_type; - uint_type.basetype = SPIRType::UInt; - uint_type.width = 32; - set(type_id, uint_type); - - SPIRType uint_type_pointer = uint_type; + SPIRType uint_type_pointer = get_uint_type(); uint_type_pointer.pointer = true; - uint_type_pointer.pointer_depth = 1; - uint_type_pointer.parent_type = type_id; + uint_type_pointer.pointer_depth++; + uint_type_pointer.parent_type = get_uint_type_id(); uint_type_pointer.storage = StorageClassUniform; set(type_ptr_id, uint_type_pointer); set_decoration(type_ptr_id, DecorationArrayStride, 4); @@ -591,14 +1126,35 @@ SPIRType &CompilerMSL::get_patch_stage_out_struct_type() std::string CompilerMSL::get_tess_factor_struct_name() { - if (get_entry_point().flags.get(ExecutionModeTriangles)) + if (is_tessellating_triangles()) return "MTLTriangleTessellationFactorsHalf"; return "MTLQuadTessellationFactorsHalf"; } +SPIRType &CompilerMSL::get_uint_type() +{ + return get(get_uint_type_id()); +} + +uint32_t CompilerMSL::get_uint_type_id() +{ + if (uint_type_id != 0) + return uint_type_id; + + uint_type_id = ir.increase_bound_by(1); + + SPIRType type; + type.basetype = SPIRType::UInt; + type.width = 32; + set(uint_type_id, type); + return uint_type_id; +} + void CompilerMSL::emit_entry_point_declarations() { // FIXME: Get test coverage here ... + // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries + declare_complex_constant_arrays(); // Emit constexpr samplers here. for (auto &samp : constexpr_samplers_by_id) @@ -717,30 +1273,170 @@ void CompilerMSL::emit_entry_point_declarations() convert_to_string(s.lod_clamp_max, current_locale_radix_character), ")")); } - statement("constexpr sampler ", - type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), - "(", merge(args), ");"); + // If we would emit no arguments, then omit the parentheses entirely. Otherwise, + // we'll wind up with a "most vexing parse" situation. + if (args.empty()) + statement("constexpr sampler ", + type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), + ";"); + else + statement("constexpr sampler ", + type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), + "(", merge(args), ");"); + } + + // Emit dynamic buffers here. + for (auto &dynamic_buffer : buffers_requiring_dynamic_offset) + { + if (!dynamic_buffer.second.second) + { + // Could happen if no buffer was used at requested binding point. + continue; + } + + const auto &var = get(dynamic_buffer.second.second); + uint32_t var_id = var.self; + const auto &type = get_variable_data_type(var); + string name = to_name(var.self); + uint32_t desc_set = get_decoration(var.self, DecorationDescriptorSet); + uint32_t arg_id = argument_buffer_ids[desc_set]; + uint32_t base_index = dynamic_buffer.second.first; + + if (!type.array.empty()) + { + // This is complicated, because we need to support arrays of arrays. + // And it's even worse if the outermost dimension is a runtime array, because now + // all this complicated goop has to go into the shader itself. (FIXME) + if (!type.array[type.array.size() - 1]) + SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet."); + else + { + is_using_builtin_array = true; + statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, true), name, + type_to_array_glsl(type), " ="); + + uint32_t dim = uint32_t(type.array.size()); + uint32_t j = 0; + for (SmallVector indices(type.array.size()); + indices[type.array.size() - 1] < to_array_size_literal(type); j++) + { + while (dim > 0) + { + begin_scope(); + --dim; + } + + string arrays; + for (uint32_t i = uint32_t(type.array.size()); i; --i) + arrays += join("[", indices[i - 1], "]"); + statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ", + to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ", + to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"), + arrays, " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + j, "]),"); + + while (++indices[dim] >= to_array_size_literal(type, dim) && dim < type.array.size() - 1) + { + end_scope(","); + indices[dim++] = 0; + } + } + end_scope_decl(); + statement_no_indent(""); + is_using_builtin_array = false; + } + } + else + { + statement(get_argument_address_space(var), " auto& ", to_restrict(var_id, true), name, " = *(", + get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, false), ")((", + get_argument_address_space(var), " char* ", to_restrict(var_id, false), ")", to_name(arg_id), ".", + ensure_valid_name(name, "m"), " + ", to_name(dynamic_offsets_buffer_id), "[", base_index, "]);"); + } } // Emit buffer arrays here. - for (uint32_t array_id : buffer_arrays) + for (uint32_t array_id : buffer_arrays_discrete) { const auto &var = get(array_id); const auto &type = get_variable_data_type(var); + const auto &buffer_type = get_variable_element_type(var); string name = to_name(array_id); - statement(get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + name + "[] ="); + statement(get_argument_address_space(var), " ", type_to_glsl(buffer_type), "* ", to_restrict(array_id, true), name, + "[] ="); begin_scope(); - for (uint32_t i = 0; i < type.array[0]; ++i) - statement(name + "_" + convert_to_string(i) + ","); + for (uint32_t i = 0; i < to_array_size_literal(type); ++i) + statement(name, "_", i, ","); end_scope_decl(); statement_no_indent(""); } - // For some reason, without this, we end up emitting the arrays twice. - buffer_arrays.clear(); + // Discrete descriptors are processed in entry point emission every compiler iteration. + buffer_arrays_discrete.clear(); + + // Emit buffer aliases here. + for (auto &var_id : buffer_aliases_discrete) + { + const auto &var = get(var_id); + const auto &type = get_variable_data_type(var); + auto addr_space = get_argument_address_space(var); + auto name = to_name(var_id); + + uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); + uint32_t desc_binding = get_decoration(var_id, DecorationBinding); + auto alias_name = join("spvBufferAliasSet", desc_set, "Binding", desc_binding); + + statement(addr_space, " auto& ", to_restrict(var_id, true), + name, + " = *(", addr_space, " ", type_to_glsl(type), "*)", alias_name, ";"); + } + // Discrete descriptors are processed in entry point emission every compiler iteration. + buffer_aliases_discrete.clear(); + + for (auto &var_pair : buffer_aliases_argument) + { + uint32_t var_id = var_pair.first; + uint32_t alias_id = var_pair.second; + + const auto &var = get(var_id); + const auto &type = get_variable_data_type(var); + auto addr_space = get_argument_address_space(var); + + if (type.array.empty()) + { + statement(addr_space, " auto& ", to_restrict(var_id, true), to_name(var_id), " = (", addr_space, " ", + type_to_glsl(type), "&)", ir.meta[alias_id].decoration.qualified_alias, ";"); + } + else + { + const char *desc_addr_space = descriptor_address_space(var_id, var.storage, "thread"); + + // Esoteric type cast. Reference to array of pointers. + // Auto here defers to UBO or SSBO. The address space of the reference needs to refer to the + // address space of the argument buffer itself, which is usually constant, but can be const device for + // large argument buffers. + is_using_builtin_array = true; + statement(desc_addr_space, " auto& ", to_restrict(var_id, true), to_name(var_id), " = (", addr_space, " ", + type_to_glsl(type), "* ", desc_addr_space, " (&)", + type_to_array_glsl(type), ")", ir.meta[alias_id].decoration.qualified_alias, ";"); + is_using_builtin_array = false; + } + } + + // Emit disabled fragment outputs. + std::sort(disabled_frag_outputs.begin(), disabled_frag_outputs.end()); + for (uint32_t var_id : disabled_frag_outputs) + { + auto &var = get(var_id); + add_local_variable_name(var_id); + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } } string CompilerMSL::compile() { + replace_illegal_entry_point_names(); + ir.fixup_reserved_names(); + // Do not deal with GLES-isms like precision, older extensions and such. options.vulkan_semantics = true; options.es = false; @@ -749,14 +1445,14 @@ string CompilerMSL::compile() backend.float_literal_suffix = false; backend.uint32_t_literal_suffix = true; backend.int16_t_literal_suffix = ""; - backend.uint16_t_literal_suffix = "u"; + backend.uint16_t_literal_suffix = ""; backend.basic_int_type = "int"; backend.basic_uint_type = "uint"; backend.basic_int8_type = "char"; backend.basic_uint8_type = "uchar"; backend.basic_int16_type = "short"; backend.basic_uint16_type = "ushort"; - backend.discard_literal = "discard_fragment()"; + backend.boolean_mix_function = "select"; backend.swizzle_is_function = false; backend.shared_is_implied = false; backend.use_initializer_list = true; @@ -764,14 +1460,21 @@ string CompilerMSL::compile() backend.native_row_major_matrix = false; backend.unsized_array_supported = false; backend.can_declare_arrays_inline = false; - backend.can_return_array = false; - backend.boolean_mix_support = false; backend.allow_truncated_access_chain = true; - backend.array_is_value_type = false; backend.comparison_image_samples_scalar = true; backend.native_pointers = true; backend.nonuniform_qualifier = ""; backend.support_small_type_sampling_result = true; + backend.supports_empty_struct = true; + backend.support_64bit_switch = true; + + // Allow Metal to use the array template unless we force it off. + backend.can_return_array = !msl_options.force_native_arrays; + backend.array_is_value_type = !msl_options.force_native_arrays; + // Arrays which are part of buffer objects are never considered to be value types (just plain C-style). + backend.array_is_value_type_in_buffer_blocks = false; + backend.support_pointer_to_pointer = true; + backend.implicit_c_integer_promotion_rules = true; capture_output_to_buffer = msl_options.capture_output_to_buffer; is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; @@ -780,29 +1483,53 @@ string CompilerMSL::compile() for (auto &id : next_metal_resource_ids) id = 0; + fixup_anonymous_struct_names(); fixup_type_alias(); replace_illegal_names(); - - struct_member_padding.clear(); + sync_entry_point_aliases_and_names(); build_function_control_flow_graphs_and_analyze(); update_active_builtins(); analyze_image_and_sampler_usage(); analyze_sampled_image_usage(); + analyze_interlocked_resource_usage(); preprocess_op_codes(); build_implicit_builtins(); + if (needs_manual_helper_invocation_updates() && + (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation)) + { + string discard_expr = + join(builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), " = true, discard_fragment()"); + backend.discard_literal = discard_expr; + backend.demote_literal = discard_expr; + } + else + { + backend.discard_literal = "discard_fragment()"; + backend.demote_literal = "discard_fragment()"; + } + fixup_image_load_store_access(); set_enabled_interface_variables(get_active_interface_variables()); + if (msl_options.force_active_argument_buffer_resources) + activate_argument_buffer_resources(); + if (swizzle_buffer_id) - active_interface_variables.insert(swizzle_buffer_id); + add_active_interface_variable(swizzle_buffer_id); if (buffer_size_buffer_id) - active_interface_variables.insert(buffer_size_buffer_id); + add_active_interface_variable(buffer_size_buffer_id); if (view_mask_buffer_id) - active_interface_variables.insert(view_mask_buffer_id); + add_active_interface_variable(view_mask_buffer_id); + if (dynamic_offsets_buffer_id) + add_active_interface_variable(dynamic_offsets_buffer_id); if (builtin_layer_id) - active_interface_variables.insert(builtin_layer_id); + add_active_interface_variable(builtin_layer_id); + if (builtin_dispatch_base_id && !msl_options.supports_msl_version(1, 2)) + add_active_interface_variable(builtin_dispatch_base_id); + if (builtin_sample_mask_id) + add_active_interface_variable(builtin_sample_mask_id); // Create structs to hold input, output and uniform variables. // Do output first to ensure out. is declared at top of entry function. @@ -810,10 +1537,10 @@ string CompilerMSL::compile() stage_out_var_id = add_interface_block(StorageClassOutput); patch_stage_out_var_id = add_interface_block(StorageClassOutput, true); stage_in_var_id = add_interface_block(StorageClassInput); - if (get_execution_model() == ExecutionModelTessellationEvaluation) + if (is_tese_shader()) patch_stage_in_var_id = add_interface_block(StorageClassInput, true); - if (get_execution_model() == ExecutionModelTessellationControl) + if (is_tesc_shader()) stage_out_ptr_var_id = add_interface_block_pointer(stage_out_var_id, StorageClassOutput); if (is_tessellation_shader()) stage_in_ptr_var_id = add_interface_block_pointer(stage_in_var_id, StorageClassInput); @@ -846,10 +1573,7 @@ string CompilerMSL::compile() uint32_t pass_count = 0; do { - if (pass_count >= 3) - SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); - - reset(); + reset(pass_count); // Start bindings at zero. next_metal_resource_index_buffer = 0; @@ -862,9 +1586,10 @@ string CompilerMSL::compile() buffer.reset(); emit_header(); + emit_custom_templates(); + emit_custom_functions(); emit_specialization_constants_and_structs(); emit_resources(); - emit_custom_functions(); emit_function(get(ir.default_entry_point), Bitset()); pass_count++; @@ -887,13 +1612,15 @@ void CompilerMSL::preprocess_op_codes() add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\""); } - // Metal vertex functions that write to resources must disable rasterization and return void. - if (preproc.uses_resource_write) + // Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to + // resources must disable rasterization and return void. + if ((preproc.uses_buffer_write && !msl_options.supports_msl_version(2, 1)) || + (preproc.uses_image_write && !msl_options.supports_msl_version(2, 2))) is_rasterization_disabled = true; // Tessellation control shaders are run as compute functions in Metal, and so // must capture their output to a buffer. - if (get_execution_model() == ExecutionModelTessellationControl) + if (is_tesc_shader() || (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)) { is_rasterization_disabled = true; capture_output_to_buffer = true; @@ -901,6 +1628,44 @@ void CompilerMSL::preprocess_op_codes() if (preproc.needs_subgroup_invocation_id) needs_subgroup_invocation_id = true; + if (preproc.needs_subgroup_size) + needs_subgroup_size = true; + // build_implicit_builtins() hasn't run yet, and in fact, this needs to execute + // before then so that gl_SampleID will get added; so we also need to check if + // that function would add gl_FragCoord. + if (preproc.needs_sample_id || msl_options.force_sample_rate_shading || + (is_sample_rate() && (active_input_builtins.get(BuiltInFragCoord) || + (need_subpass_input_ms && !msl_options.use_framebuffer_fetch_subpasses)))) + needs_sample_id = true; + if (preproc.needs_helper_invocation) + needs_helper_invocation = true; + + // OpKill is removed by the parser, so we need to identify those by inspecting + // blocks. + ir.for_each_typed_id([&preproc](uint32_t, SPIRBlock &block) { + if (block.terminator == SPIRBlock::Kill) + preproc.uses_discard = true; + }); + + // Fragment shaders that both write to storage resources and discard fragments + // need checks on the writes, to work around Metal allowing these writes despite + // the fragment being dead. + if (msl_options.check_discarded_frag_stores && preproc.uses_discard && + (preproc.uses_buffer_write || preproc.uses_image_write)) + { + frag_shader_needs_discard_checks = true; + needs_helper_invocation = true; + // Fragment discard store checks imply manual HelperInvocation updates. + msl_options.manual_helper_invocation_updates = true; + } + + if (is_intersection_query()) + { + add_header_line("#if __METAL_VERSION__ >= 230"); + add_header_line("#include "); + add_header_line("using namespace metal::raytracing;"); + add_header_line("#endif"); + } } // Move the Private and Workgroup global variables to the entry function. @@ -931,6 +1696,30 @@ void CompilerMSL::extract_global_variables_from_functions() // Uniforms unordered_set global_var_ids; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + // Some builtins resolve directly to a function call which does not need any declared variables. + // Skip these. + if (var.storage == StorageClassInput && has_decoration(var.self, DecorationBuiltIn)) + { + auto bi_type = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + if (bi_type == BuiltInHelperInvocation && !needs_manual_helper_invocation_updates()) + return; + if (bi_type == BuiltInHelperInvocation && needs_manual_helper_invocation_updates()) + { + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.3 on iOS."); + else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS."); + // Make sure this is declared and initialized. + // Force this to have the proper name. + set_name(var.self, builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput)); + auto &entry_func = this->get(ir.default_entry_point); + entry_func.add_local_variable(var.self); + vars_needing_early_declaration.push_back(var.self); + entry_func.fixup_hooks_in.push_back([this, &var]() + { statement(to_name(var.self), " = simd_is_helper_thread();"); }); + } + } + if (var.storage == StorageClassInput || var.storage == StorageClassOutput || var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) @@ -990,12 +1779,26 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: if (global_var_ids.find(base_id) != global_var_ids.end()) added_arg_ids.insert(base_id); + // Use Metal's native frame-buffer fetch API for subpass inputs. auto &type = get(ops[0]); - if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && + (!msl_options.use_framebuffer_fetch_subpasses)) { // Implicitly reads gl_FragCoord. assert(builtin_frag_coord_id != 0); added_arg_ids.insert(builtin_frag_coord_id); + if (msl_options.multiview) + { + // Implicitly reads gl_ViewIndex. + assert(builtin_view_idx_id != 0); + added_arg_ids.insert(builtin_view_idx_id); + } + else if (msl_options.arrayed_subpass_input) + { + // Implicitly reads gl_Layer. + assert(builtin_layer_id != 0); + added_arg_ids.insert(builtin_layer_id); + } } break; @@ -1025,6 +1828,14 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: uint32_t base_id = ops[0]; if (global_var_ids.find(base_id) != global_var_ids.end()) added_arg_ids.insert(base_id); + + uint32_t rvalue_id = ops[1]; + if (global_var_ids.find(rvalue_id) != global_var_ids.end()) + added_arg_ids.insert(rvalue_id); + + if (needs_frag_discard_checks()) + added_arg_ids.insert(builtin_helper_invocation_id); + break; } @@ -1039,10 +1850,164 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: break; } + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicStore: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + case OpImageWrite: + if (needs_frag_discard_checks()) + added_arg_ids.insert(builtin_helper_invocation_id); + break; + + // Emulate texture2D atomic operations + case OpImageTexelPointer: + { + // When using the pointer, we need to know which variable it is actually loaded from. + uint32_t base_id = ops[2]; + auto *var = maybe_get_backing_variable(base_id); + if (var && atomic_image_vars.count(var->self)) + { + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + } + break; + } + + case OpExtInst: + { + uint32_t extension_set = ops[2]; + if (get(extension_set).ext == SPIRExtension::GLSL) + { + auto op_450 = static_cast(ops[3]); + switch (op_450) + { + case GLSLstd450InterpolateAtCentroid: + case GLSLstd450InterpolateAtSample: + case GLSLstd450InterpolateAtOffset: + { + // For these, we really need the stage-in block. It is theoretically possible to pass the + // interpolant object, but a) doing so would require us to create an entirely new variable + // with Interpolant type, and b) if we have a struct or array, handling all the members and + // elements could get unwieldy fast. + added_arg_ids.insert(stage_in_var_id); + break; + } + + case GLSLstd450Modf: + case GLSLstd450Frexp: + { + uint32_t base_id = ops[5]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + break; + } + + default: + break; + } + } + break; + } + + case OpGroupNonUniformInverseBallot: + { + added_arg_ids.insert(builtin_subgroup_invocation_id_id); + break; + } + + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + { + added_arg_ids.insert(builtin_subgroup_size_id); + break; + } + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast(ops[3]); + switch (operation) + { + case GroupOperationReduce: + added_arg_ids.insert(builtin_subgroup_size_id); + break; + case GroupOperationInclusiveScan: + case GroupOperationExclusiveScan: + added_arg_ids.insert(builtin_subgroup_invocation_id_id); + break; + default: + break; + } + break; + } + + case OpDemoteToHelperInvocation: + if (needs_manual_helper_invocation_updates() && + (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation)) + added_arg_ids.insert(builtin_helper_invocation_id); + break; + + case OpIsHelperInvocationEXT: + if (needs_manual_helper_invocation_updates()) + added_arg_ids.insert(builtin_helper_invocation_id); + break; + + case OpRayQueryInitializeKHR: + case OpRayQueryProceedKHR: + case OpRayQueryTerminateKHR: + case OpRayQueryGenerateIntersectionKHR: + case OpRayQueryConfirmIntersectionKHR: + { + // Ray query accesses memory directly, need check pass down object if using Private storage class. + uint32_t base_id = ops[0]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + break; + } + + case OpRayQueryGetRayTMinKHR: + case OpRayQueryGetRayFlagsKHR: + case OpRayQueryGetWorldRayOriginKHR: + case OpRayQueryGetWorldRayDirectionKHR: + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: + case OpRayQueryGetIntersectionTypeKHR: + case OpRayQueryGetIntersectionTKHR: + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: + case OpRayQueryGetIntersectionInstanceIdKHR: + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: + case OpRayQueryGetIntersectionGeometryIndexKHR: + case OpRayQueryGetIntersectionPrimitiveIndexKHR: + case OpRayQueryGetIntersectionBarycentricsKHR: + case OpRayQueryGetIntersectionFrontFaceKHR: + case OpRayQueryGetIntersectionObjectRayDirectionKHR: + case OpRayQueryGetIntersectionObjectRayOriginKHR: + case OpRayQueryGetIntersectionObjectToWorldKHR: + case OpRayQueryGetIntersectionWorldToObjectKHR: + { + // Ray query accesses memory directly, need check pass down object if using Private storage class. + uint32_t base_id = ops[2]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + break; + } + default: break; } + if (needs_manual_helper_invocation_updates() && b.terminator == SPIRBlock::Kill && + (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation)) + added_arg_ids.insert(builtin_helper_invocation_id); + // TODO: Add all other operations which can affect memory. // We should consider a more unified system here to reduce boiler-plate. // This kind of analysis is done in several places ... @@ -1054,8 +2019,11 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: // Add the global variables as arguments to the function if (func_id != ir.default_entry_point) { - bool added_in = false; - bool added_out = false; + bool control_point_added_in = false; + bool control_point_added_out = false; + bool patch_added_in = false; + bool patch_added_out = false; + for (uint32_t arg_id : added_arg_ids) { auto &var = get(arg_id); @@ -1063,42 +2031,77 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: auto *p_type = &get(type_id); BuiltIn bi_type = BuiltIn(get_decoration(arg_id, DecorationBuiltIn)); - if (((is_tessellation_shader() && var.storage == StorageClassInput) || - (get_execution_model() == ExecutionModelTessellationControl && var.storage == StorageClassOutput)) && - !(has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type)) && - (!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || - bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || - p_type->basetype == SPIRType::Struct)) + bool is_patch = has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type); + bool is_block = has_decoration(p_type->self, DecorationBlock); + bool is_control_point_storage = + !is_patch && ((is_tessellation_shader() && var.storage == StorageClassInput) || + (is_tesc_shader() && var.storage == StorageClassOutput)); + bool is_patch_block_storage = is_patch && is_block && var.storage == StorageClassOutput; + bool is_builtin = is_builtin_variable(var); + bool variable_is_stage_io = + !is_builtin || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || + p_type->basetype == SPIRType::Struct; + bool is_redirected_to_global_stage_io = (is_control_point_storage || is_patch_block_storage) && + variable_is_stage_io; + + // If output is masked it is not considered part of the global stage IO interface. + if (is_redirected_to_global_stage_io && var.storage == StorageClassOutput) + is_redirected_to_global_stage_io = !is_stage_output_variable_masked(var); + + if (is_redirected_to_global_stage_io) { - // Tessellation control shaders see inputs and per-vertex outputs as arrays. - // Similarly, tessellation evaluation shaders see per-vertex inputs as arrays. + // Tessellation control shaders see inputs and per-point outputs as arrays. + // Similarly, tessellation evaluation shaders see per-point inputs as arrays. // We collected them into a structure; we must pass the array of this // structure to the function. std::string name; + if (is_patch) + name = var.storage == StorageClassInput ? patch_stage_in_var_name : patch_stage_out_var_name; + else + name = var.storage == StorageClassInput ? "gl_in" : "gl_out"; + + if (var.storage == StorageClassOutput && has_decoration(p_type->self, DecorationBlock)) + { + // If we're redirecting a block, we might still need to access the original block + // variable if we're masking some members. + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(p_type->member_types.size()); mbr_idx++) + { + if (is_stage_output_block_member_masked(var, mbr_idx, true)) + { + func.add_parameter(var.basetype, var.self, true); + break; + } + } + } + if (var.storage == StorageClassInput) { + auto &added_in = is_patch ? patch_added_in : control_point_added_in; if (added_in) continue; - name = input_wg_var_name; - arg_id = stage_in_ptr_var_id; + arg_id = is_patch ? patch_stage_in_var_id : stage_in_ptr_var_id; added_in = true; } else if (var.storage == StorageClassOutput) { + auto &added_out = is_patch ? patch_added_out : control_point_added_out; if (added_out) continue; - name = "gl_out"; - arg_id = stage_out_ptr_var_id; + arg_id = is_patch ? patch_stage_out_var_id : stage_out_ptr_var_id; added_out = true; } + type_id = get(arg_id).basetype; uint32_t next_id = ir.increase_bound_by(1); func.add_parameter(type_id, next_id, true); set(next_id, type_id, StorageClassFunction, 0, arg_id); set_name(next_id, name); + if (is_tese_shader() && msl_options.raw_buffer_tese_input && var.storage == StorageClassInput) + set_decoration(next_id, DecorationNonWritable); } - else if (is_builtin_variable(var) && p_type->basetype == SPIRType::Struct) + else if (is_builtin && has_decoration(p_type->self, DecorationBlock)) { // Get the pointee type type_id = get_pointee_type_id(type_id); @@ -1108,7 +2111,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: for (auto &mbr_type_id : p_type->member_types) { BuiltIn builtin = BuiltInMax; - bool is_builtin = is_member_builtin(*p_type, mbr_idx, &builtin); + is_builtin = is_member_builtin(*p_type, mbr_idx, &builtin); if (is_builtin && has_active_builtin(builtin, var.storage)) { // Add a arg variable with the same type and decorations as the member @@ -1122,6 +2125,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: ptr.self = mbr_type_id; ptr.storage = var.storage; ptr.pointer = true; + ptr.pointer_depth++; ptr.parent_type = mbr_type_id; func.add_parameter(mbr_type_id, var_id, true); @@ -1159,11 +2163,25 @@ void CompilerMSL::mark_packable_structs() (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) mark_as_packable(type); } + + if (var.storage == StorageClassWorkgroup) + { + auto *type = &this->get(var.basetype); + if (type->basetype == SPIRType::Struct) + mark_as_workgroup_struct(*type); + } + }); + + // Physical storage buffer pointers can appear outside of the context of a variable, if the address + // is calculated from a ulong or uvec2 and cast to a pointer, so check if they need to be packed too. + ir.for_each_typed_id([&](uint32_t, SPIRType &type) { + if (type.basetype == SPIRType::Struct && type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + mark_as_packable(type); }); } // If the specified type is a struct, it and any nested structs -// are marked as packable with the SPIRVCrossDecorationPacked decoration, +// are marked as packable with the SPIRVCrossDecorationBufferBlockRepacked decoration, void CompilerMSL::mark_as_packable(SPIRType &type) { // If this is not the base type (eg. it's a pointer or array), tunnel down @@ -1173,12 +2191,13 @@ void CompilerMSL::mark_as_packable(SPIRType &type) return; } - if (type.basetype == SPIRType::Struct) + // Handle possible recursion when a struct contains a pointer to its own type nested somewhere. + if (type.basetype == SPIRType::Struct && !has_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked)) { - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + set_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked); // Recurse - size_t mbr_cnt = type.member_types.size(); + uint32_t mbr_cnt = uint32_t(type.member_types.size()); for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) { uint32_t mbr_type_id = type.member_types[mbr_idx]; @@ -1193,36 +2212,215 @@ void CompilerMSL::mark_as_packable(SPIRType &type) } } -// If a vertex attribute exists at the location, it is marked as being used by this shader -void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, StorageClass storage) -{ - if ((get_execution_model() == ExecutionModelVertex || is_tessellation_shader()) && (storage == StorageClassInput)) - vtx_attrs_in_use.insert(location); -} - -uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const +// If the specified type is a struct, it and any nested structs +// are marked as used with workgroup storage using the SPIRVCrossDecorationWorkgroupStruct decoration. +void CompilerMSL::mark_as_workgroup_struct(SPIRType &type) { - auto itr = fragment_output_components.find(location); - if (itr == end(fragment_output_components)) - return 4; - else - return itr->second; -} + // If this is not the base type (eg. it's a pointer or array), tunnel down + if (type.parent_type) + { + mark_as_workgroup_struct(get(type.parent_type)); + return; + } -uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t components) -{ - uint32_t new_type_id = ir.increase_bound_by(1); - auto &type = set(new_type_id, get(type_id)); - type.vecsize = components; - type.self = new_type_id; - type.parent_type = type_id; - type.pointer = false; + // Handle possible recursion when a struct contains a pointer to its own type nested somewhere. + if (type.basetype == SPIRType::Struct && !has_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct)) + { + set_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct); - return new_type_id; + // Recurse + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + uint32_t mbr_type_id = type.member_types[mbr_idx]; + auto &mbr_type = get(mbr_type_id); + mark_as_workgroup_struct(mbr_type); + if (mbr_type.type_alias) + { + auto &mbr_type_alias = get(mbr_type.type_alias); + mark_as_workgroup_struct(mbr_type_alias); + } + } + } +} + +// If a shader input exists at the location, it is marked as being used by this shader +void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, + StorageClass storage, bool fallback) +{ + uint32_t count = type_to_location_count(type); + switch (storage) + { + case StorageClassInput: + for (uint32_t i = 0; i < count; i++) + { + location_inputs_in_use.insert(location + i); + if (fallback) + location_inputs_in_use_fallback.insert(location + i); + } + break; + case StorageClassOutput: + for (uint32_t i = 0; i < count; i++) + { + location_outputs_in_use.insert(location + i); + if (fallback) + location_outputs_in_use_fallback.insert(location + i); + } + break; + default: + return; + } +} + +uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const +{ + auto itr = fragment_output_components.find(location); + if (itr == end(fragment_output_components)) + return 4; + else + return itr->second; +} + +uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t components, SPIRType::BaseType basetype) +{ + uint32_t new_type_id = ir.increase_bound_by(1); + auto &old_type = get(type_id); + auto *type = &set(new_type_id, old_type); + type->vecsize = components; + if (basetype != SPIRType::Unknown) + type->basetype = basetype; + type->self = new_type_id; + type->parent_type = type_id; + type->array.clear(); + type->array_size_literal.clear(); + type->pointer = false; + + if (is_array(old_type)) + { + uint32_t array_type_id = ir.increase_bound_by(1); + type = &set(array_type_id, *type); + type->parent_type = new_type_id; + type->array = old_type.array; + type->array_size_literal = old_type.array_size_literal; + new_type_id = array_type_id; + } + + if (old_type.pointer) + { + uint32_t ptr_type_id = ir.increase_bound_by(1); + type = &set(ptr_type_id, *type); + type->self = new_type_id; + type->parent_type = new_type_id; + type->storage = old_type.storage; + type->pointer = true; + type->pointer_depth++; + new_type_id = ptr_type_id; + } + + return new_type_id; +} + +uint32_t CompilerMSL::build_msl_interpolant_type(uint32_t type_id, bool is_noperspective) +{ + uint32_t new_type_id = ir.increase_bound_by(1); + SPIRType &type = set(new_type_id, get(type_id)); + type.basetype = SPIRType::Interpolant; + type.parent_type = type_id; + // In Metal, the pull-model interpolant type encodes perspective-vs-no-perspective in the type itself. + // Add this decoration so we know which argument to pass to the template. + if (is_noperspective) + set_decoration(new_type_id, DecorationNoPerspective); + return new_type_id; +} + +bool CompilerMSL::add_component_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, + SPIRVariable &var, + const SPIRType &type, + InterfaceBlockMeta &meta) +{ + // Deal with Component decorations. + const InterfaceBlockMeta::LocationMeta *location_meta = nullptr; + uint32_t location = ~0u; + if (has_decoration(var.self, DecorationLocation)) + { + location = get_decoration(var.self, DecorationLocation); + auto location_meta_itr = meta.location_meta.find(location); + if (location_meta_itr != end(meta.location_meta)) + location_meta = &location_meta_itr->second; + } + + // Check if we need to pad fragment output to match a certain number of components. + if (location_meta) + { + bool pad_fragment_output = has_decoration(var.self, DecorationLocation) && + msl_options.pad_fragment_output_components && + get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput; + + auto &entry_func = get(ir.default_entry_point); + uint32_t start_component = get_decoration(var.self, DecorationComponent); + uint32_t type_components = type.vecsize; + uint32_t num_components = location_meta->num_components; + + if (pad_fragment_output) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + num_components = std::max(num_components, get_target_components_for_fragment_location(locn)); + } + + // We have already declared an IO block member as m_location_N. + // Just emit an early-declared variable and fixup as needed. + // Arrays need to be unrolled here since each location might need a different number of components. + entry_func.add_local_variable(var.self); + vars_needing_early_declaration.push_back(var.self); + + if (var.storage == StorageClassInput) + { + entry_func.fixup_hooks_in.push_back([=, &type, &var]() { + if (!type.array.empty()) + { + uint32_t array_size = to_array_size_literal(type); + for (uint32_t loc_off = 0; loc_off < array_size; loc_off++) + { + statement(to_name(var.self), "[", loc_off, "]", " = ", ib_var_ref, + ".m_location_", location + loc_off, + vector_swizzle(type_components, start_component), ";"); + } + } + else + { + statement(to_name(var.self), " = ", ib_var_ref, ".m_location_", location, + vector_swizzle(type_components, start_component), ";"); + } + }); + } + else + { + entry_func.fixup_hooks_out.push_back([=, &type, &var]() { + if (!type.array.empty()) + { + uint32_t array_size = to_array_size_literal(type); + for (uint32_t loc_off = 0; loc_off < array_size; loc_off++) + { + statement(ib_var_ref, ".m_location_", location + loc_off, + vector_swizzle(type_components, start_component), " = ", + to_name(var.self), "[", loc_off, "];"); + } + } + else + { + statement(ib_var_ref, ".m_location_", location, + vector_swizzle(type_components, start_component), " = ", to_name(var.self), ";"); + } + }); + } + return true; + } + else + return false; } void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var, bool strip_array) + SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta) { bool is_builtin = is_builtin_variable(var); BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); @@ -1237,16 +2435,26 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co var.basetype = type_id; type_id = get_pointee_type_id(var.basetype); - if (strip_array && is_array(get(type_id))) + if (meta.strip_array && is_array(get(type_id))) type_id = get(type_id).parent_type; auto &type = get(type_id); uint32_t target_components = 0; uint32_t type_components = type.vecsize; + bool padded_output = false; + bool padded_input = false; + uint32_t start_component = 0; - // Check if we need to pad fragment output to match a certain number of components. - if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components && - get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput) + auto &entry_func = get(ir.default_entry_point); + + if (add_component_variable_to_interface_block(storage, ib_var_ref, var, type, meta)) + return; + + bool pad_fragment_output = has_decoration(var.self, DecorationLocation) && + msl_options.pad_fragment_output_components && + get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput; + + if (pad_fragment_output) { uint32_t locn = get_decoration(var.self, DecorationLocation); target_components = get_target_components_for_fragment_location(locn); @@ -1258,7 +2466,10 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co } } - ib_type.member_types.push_back(type_id); + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types.push_back(build_msl_interpolant_type(type_id, is_noperspective)); + else + ib_type.member_types.push_back(type_id); // Give the member a name string mbr_name = ensure_valid_name(to_expression(var.self), "m"); @@ -1266,54 +2477,109 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co // Update the original variable reference to include the structure reference string qual_var_name = ib_var_ref + "." + mbr_name; - auto &entry_func = get(ir.default_entry_point); + // If using pull-model interpolation, need to add a call to the correct interpolation method. + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + { + if (is_centroid) + qual_var_name += ".interpolate_at_centroid()"; + else if (is_sample) + qual_var_name += join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")"); + else + qual_var_name += ".interpolate_at_center()"; + } - if (padded_output) + if (padded_output || padded_input) { entry_func.add_local_variable(var.self); vars_needing_early_declaration.push_back(var.self); - entry_func.fixup_hooks_out.push_back([=, &var]() { - SPIRType &padded_type = this->get(type_id); - statement(qual_var_name, " = ", remap_swizzle(padded_type, type_components, to_name(var.self)), ";"); - }); + if (padded_output) + { + entry_func.fixup_hooks_out.push_back([=, &var]() { + statement(qual_var_name, vector_swizzle(type_components, start_component), " = ", to_name(var.self), + ";"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([=, &var]() { + statement(to_name(var.self), " = ", qual_var_name, vector_swizzle(type_components, start_component), + ";"); + }); + } } - else if (!strip_array) + else if (!meta.strip_array) ir.meta[var.self].decoration.qualified_alias = qual_var_name; - if (var.storage == StorageClassOutput && var.initializer != 0) + if (var.storage == StorageClassOutput && var.initializer != ID(0)) { - entry_func.fixup_hooks_in.push_back( - [=, &var]() { statement(qual_var_name, " = ", to_expression(var.initializer), ";"); }); + if (padded_output || padded_input) + { + entry_func.fixup_hooks_in.push_back( + [=, &var]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); }); + } + else + { + if (meta.strip_array) + { + entry_func.fixup_hooks_in.push_back([=, &var]() { + uint32_t index = get_extended_decoration(var.self, SPIRVCrossDecorationInterfaceMemberIndex); + auto invocation = to_tesc_invocation_id(); + statement(to_expression(stage_out_ptr_var_id), "[", + invocation, "].", + to_member_name(ib_type, index), " = ", to_expression(var.initializer), "[", + invocation, "];"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([=, &var]() { + statement(qual_var_name, " = ", to_expression(var.initializer), ";"); + }); + } + } } // Copy the variable location from the original variable to the member if (get_decoration_bitset(var.self).get(DecorationLocation)) { uint32_t locn = get_decoration(var.self, DecorationLocation); - if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader())) + uint32_t comp = get_decoration(var.self, DecorationComponent); + if (storage == StorageClassInput) { - type_id = ensure_correct_attribute_type(var.basetype, locn); + type_id = ensure_correct_input_type(var.basetype, locn, comp, 0, meta.strip_array); var.basetype = type_id; + type_id = get_pointee_type_id(type_id); - if (strip_array && is_array(get(type_id))) + if (meta.strip_array && is_array(get(type_id))) type_id = get(type_id).parent_type; - ib_type.member_types[ib_mbr_idx] = type_id; + if (pull_model_inputs.count(var.self)) + ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(type_id, is_noperspective); + else + ib_type.member_types[ib_mbr_idx] = type_id; } set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, storage); + if (comp) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp); + mark_location_as_used_by_shader(locn, get(type_id), storage); + } + else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin)) + { + uint32_t locn = inputs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, type, storage); } - else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin)) + else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) { - uint32_t locn = vtx_attrs_by_builtin[builtin].location; + uint32_t locn = outputs_by_builtin[builtin].location; set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, storage); + mark_location_as_used_by_shader(locn, type, storage); } if (get_decoration_bitset(var.self).get(DecorationComponent)) { - uint32_t comp = get_decoration(var.self, DecorationComponent); - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp); + uint32_t component = get_decoration(var.self, DecorationComponent); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, component); } if (get_decoration_bitset(var.self).get(DecorationIndex)) @@ -1331,25 +2597,32 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co } // Copy interpolation decorations if needed - if (is_flat) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); - if (is_noperspective) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); - if (is_centroid) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); - if (is_sample) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + if (storage != StorageClassInput || !pull_model_inputs.count(var.self)) + { + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + } set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); } void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var, bool strip_array) + SPIRType &ib_type, SPIRVariable &var, + InterfaceBlockMeta &meta) { auto &entry_func = get(ir.default_entry_point); - auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var); uint32_t elem_cnt = 0; + if (add_component_variable_to_interface_block(storage, ib_var_ref, var, var_type, meta)) + return; + if (is_matrix(var_type)) { if (is_array(var_type)) @@ -1382,10 +2655,33 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage if (is_builtin) set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction)); - entry_func.add_local_variable(var.self); + bool flatten_from_ib_var = false; + string flatten_from_ib_mbr_name; - // We need to declare the variable early and at entry-point scope. - vars_needing_early_declaration.push_back(var.self); + if (storage == StorageClassOutput && is_builtin && builtin == BuiltInClipDistance) + { + // Also declare [[clip_distance]] attribute here. + uint32_t clip_array_mbr_idx = uint32_t(ib_type.member_types.size()); + ib_type.member_types.push_back(get_variable_data_type_id(var)); + set_member_decoration(ib_type.self, clip_array_mbr_idx, DecorationBuiltIn, BuiltInClipDistance); + + flatten_from_ib_mbr_name = builtin_to_glsl(BuiltInClipDistance, StorageClassOutput); + set_member_name(ib_type.self, clip_array_mbr_idx, flatten_from_ib_mbr_name); + + // When we flatten, we flatten directly from the "out" struct, + // not from a function variable. + flatten_from_ib_var = true; + + if (!msl_options.enable_clip_distance_user_varying) + return; + } + else if (!meta.strip_array) + { + // Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped. + entry_func.add_local_variable(var.self); + // We need to declare the variable early and at entry-point scope. + vars_needing_early_declaration.push_back(var.self); + } for (uint32_t i = 0; i < elem_cnt; i++) { @@ -1410,7 +2706,10 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage } } - ib_type.member_types.push_back(get_pointee_type_id(type_id)); + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types.push_back(build_msl_interpolant_type(get_pointee_type_id(type_id), is_noperspective)); + else + ib_type.member_types.push_back(get_pointee_type_id(type_id)); // Give the member a name string mbr_name = ensure_valid_name(join(to_expression(var.self), "_", i), "m"); @@ -1420,21 +2719,38 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage if (get_decoration_bitset(var.self).get(DecorationLocation)) { uint32_t locn = get_decoration(var.self, DecorationLocation) + i; - if (storage == StorageClassInput && - (get_execution_model() == ExecutionModelVertex || is_tessellation_shader())) + uint32_t comp = get_decoration(var.self, DecorationComponent); + if (storage == StorageClassInput) { - var.basetype = ensure_correct_attribute_type(var.basetype, locn); - uint32_t mbr_type_id = ensure_correct_attribute_type(usable_type->self, locn); - ib_type.member_types[ib_mbr_idx] = mbr_type_id; + var.basetype = ensure_correct_input_type(var.basetype, locn, comp, 0, meta.strip_array); + uint32_t mbr_type_id = ensure_correct_input_type(usable_type->self, locn, comp, 0, meta.strip_array); + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); + else + ib_type.member_types[ib_mbr_idx] = mbr_type_id; } set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, storage); + if (comp) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp); + mark_location_as_used_by_shader(locn, *usable_type, storage); + } + else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin)) + { + uint32_t locn = inputs_by_builtin[builtin].location + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, *usable_type, storage); } - else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin)) + else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) { - uint32_t locn = vtx_attrs_by_builtin[builtin].location + i; + uint32_t locn = outputs_by_builtin[builtin].location + i; set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, storage); + mark_location_as_used_by_shader(locn, *usable_type, storage); + } + else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)) + { + // Declare the Clip/CullDistance as [[user(clip/cullN)]]. + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, i); } if (get_decoration_bitset(var.self).get(DecorationIndex)) @@ -1443,25 +2759,44 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index); } - // Copy interpolation decorations if needed - if (is_flat) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); - if (is_noperspective) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); - if (is_centroid) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); - if (is_sample) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + if (storage != StorageClassInput || !pull_model_inputs.count(var.self)) + { + // Copy interpolation decorations if needed + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + } set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); - if (!strip_array) + // Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped. + if (!meta.strip_array) { switch (storage) { case StorageClassInput: - entry_func.fixup_hooks_in.push_back( - [=, &var]() { statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";"); }); + entry_func.fixup_hooks_in.push_back([=, &var]() { + if (pull_model_inputs.count(var.self)) + { + string lerp_call; + if (is_centroid) + lerp_call = ".interpolate_at_centroid()"; + else if (is_sample) + lerp_call = join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")"); + else + lerp_call = ".interpolate_at_center()"; + statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, lerp_call, ";"); + } + else + { + statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";"); + } + }); break; case StorageClassOutput: @@ -1474,6 +2809,9 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage remap_swizzle(padded_type, usable_type->vecsize, join(to_name(var.self), "[", i, "]")), ";"); } + else if (flatten_from_ib_var) + statement(ib_var_ref, ".", mbr_name, " = ", ib_var_ref, ".", flatten_from_ib_mbr_name, "[", i, + "];"); else statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), "[", i, "];"); }); @@ -1486,42 +2824,17 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage } } -uint32_t CompilerMSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) -{ - auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); - uint32_t location = get_decoration(var.self, DecorationLocation); - - for (uint32_t i = 0; i < mbr_idx; i++) - { - auto &mbr_type = get(type.member_types[i]); - - // Start counting from any place we have a new location decoration. - if (has_member_decoration(type.self, mbr_idx, DecorationLocation)) - location = get_member_decoration(type.self, mbr_idx, DecorationLocation); - - uint32_t location_count = 1; - - if (mbr_type.columns > 1) - location_count = mbr_type.columns; - - if (!mbr_type.array.empty()) - for (uint32_t j = 0; j < uint32_t(mbr_type.array.size()); j++) - location_count *= to_array_size_literal(mbr_type, j); - - location += location_count; - } - - return location; -} - -void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var, - uint32_t mbr_idx, bool strip_array) +void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass storage, + const string &ib_var_ref, SPIRType &ib_type, + SPIRVariable &var, SPIRType &var_type, + uint32_t mbr_idx, InterfaceBlockMeta &meta, + const string &mbr_name_qual, + const string &var_chain_qual, + uint32_t &location, uint32_t &var_mbr_idx) { auto &entry_func = get(ir.default_entry_point); - auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); - BuiltIn builtin; + BuiltIn builtin = BuiltInMax; bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); bool is_flat = has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat); @@ -1534,13 +2847,15 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass uint32_t mbr_type_id = var_type.member_types[mbr_idx]; auto &mbr_type = get(mbr_type_id); - uint32_t elem_cnt = 0; + bool mbr_is_indexable = false; + uint32_t elem_cnt = 1; if (is_matrix(mbr_type)) { if (is_array(mbr_type)) SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables."); + mbr_is_indexable = true; elem_cnt = mbr_type.columns; } else if (is_array(mbr_type)) @@ -1548,6 +2863,7 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass if (mbr_type.array.size() != 1) SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables."); + mbr_is_indexable = true; elem_cnt = to_array_size_literal(mbr_type); } @@ -1557,67 +2873,154 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass while (is_array(*usable_type) || is_matrix(*usable_type)) usable_type = &get(usable_type->parent_type); + bool flatten_from_ib_var = false; + string flatten_from_ib_mbr_name; + + if (storage == StorageClassOutput && is_builtin && builtin == BuiltInClipDistance) + { + // Also declare [[clip_distance]] attribute here. + uint32_t clip_array_mbr_idx = uint32_t(ib_type.member_types.size()); + ib_type.member_types.push_back(mbr_type_id); + set_member_decoration(ib_type.self, clip_array_mbr_idx, DecorationBuiltIn, BuiltInClipDistance); + + flatten_from_ib_mbr_name = builtin_to_glsl(BuiltInClipDistance, StorageClassOutput); + set_member_name(ib_type.self, clip_array_mbr_idx, flatten_from_ib_mbr_name); + + // When we flatten, we flatten directly from the "out" struct, + // not from a function variable. + flatten_from_ib_var = true; + + if (!msl_options.enable_clip_distance_user_varying) + return; + } + + // Recursively handle nested structures. + if (mbr_type.basetype == SPIRType::Struct) + { + for (uint32_t i = 0; i < elem_cnt; i++) + { + string mbr_name = append_member_name(mbr_name_qual, var_type, mbr_idx) + (mbr_is_indexable ? join("_", i) : ""); + string var_chain = join(var_chain_qual, ".", to_member_name(var_type, mbr_idx), (mbr_is_indexable ? join("[", i, "]") : "")); + uint32_t sub_mbr_cnt = uint32_t(mbr_type.member_types.size()); + for (uint32_t sub_mbr_idx = 0; sub_mbr_idx < sub_mbr_cnt; sub_mbr_idx++) + { + add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, + var, mbr_type, sub_mbr_idx, + meta, mbr_name, var_chain, + location, var_mbr_idx); + // FIXME: Recursive structs and tessellation breaks here. + var_mbr_idx++; + } + } + return; + } + for (uint32_t i = 0; i < elem_cnt; i++) { // Add a reference to the variable type to the interface struct. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); - ib_type.member_types.push_back(usable_type->self); + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types.push_back(build_msl_interpolant_type(usable_type->self, is_noperspective)); + else + ib_type.member_types.push_back(usable_type->self); // Give the member a name - string mbr_name = ensure_valid_name(join(to_qualified_member_name(var_type, mbr_idx), "_", i), "m"); + string mbr_name = ensure_valid_name(append_member_name(mbr_name_qual, var_type, mbr_idx) + (mbr_is_indexable ? join("_", i) : ""), "m"); set_member_name(ib_type.self, ib_mbr_idx, mbr_name); - if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation)) + // Once we determine the location of the first member within nested structures, + // from a var of the topmost structure, the remaining flattened members of + // the nested structures will have consecutive location values. At this point, + // we've recursively tunnelled into structs, arrays, and matrices, and are + // down to a single location for each member now. + if (!is_builtin && location != UINT32_MAX) { - uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation) + i; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, storage); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, *usable_type, storage); + location++; + } + else if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation)) + { + location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation) + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, *usable_type, storage); + location++; } else if (has_decoration(var.self, DecorationLocation)) { - uint32_t locn = get_accumulated_member_location(var, mbr_idx, strip_array) + i; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, storage); + location = get_accumulated_member_location(var, mbr_idx, meta.strip_array) + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, *usable_type, storage); + location++; } - else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin)) + else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin)) { - uint32_t locn = vtx_attrs_by_builtin[builtin].location + i; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, storage); + location = inputs_by_builtin[builtin].location + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, *usable_type, storage); + location++; + } + else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) + { + location = outputs_by_builtin[builtin].location + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, *usable_type, storage); + location++; + } + else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)) + { + // Declare the Clip/CullDistance as [[user(clip/cullN)]]. + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, i); } if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent)) - SPIRV_CROSS_THROW("DecorationComponent on matrices and arrays make little sense."); + SPIRV_CROSS_THROW("DecorationComponent on matrices and arrays is not supported."); - // Copy interpolation decorations if needed - if (is_flat) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); - if (is_noperspective) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); - if (is_centroid) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); - if (is_sample) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + if (storage != StorageClassInput || !pull_model_inputs.count(var.self)) + { + // Copy interpolation decorations if needed + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + } set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); - set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx); + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, var_mbr_idx); // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate. - if (!strip_array) + if (!meta.strip_array && meta.allow_local_declaration) { + string var_chain = join(var_chain_qual, ".", to_member_name(var_type, mbr_idx), (mbr_is_indexable ? join("[", i, "]") : "")); switch (storage) { case StorageClassInput: - entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() { - statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), "[", i, "] = ", ib_var_ref, - ".", mbr_name, ";"); + entry_func.fixup_hooks_in.push_back([=, &var]() { + string lerp_call; + if (pull_model_inputs.count(var.self)) + { + if (is_centroid) + lerp_call = ".interpolate_at_centroid()"; + else if (is_sample) + lerp_call = join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")"); + else + lerp_call = ".interpolate_at_center()"; + } + statement(var_chain, " = ", ib_var_ref, ".", mbr_name, lerp_call, ";"); }); break; case StorageClassOutput: - entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() { - statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), ".", - to_member_name(var_type, mbr_idx), "[", i, "];"); + entry_func.fixup_hooks_out.push_back([=]() { + if (flatten_from_ib_var) + statement(ib_var_ref, ".", mbr_name, " = ", ib_var_ref, ".", flatten_from_ib_mbr_name, "[", i, "];"); + else + statement(ib_var_ref, ".", mbr_name, " = ", var_chain, ";"); }); break; @@ -1628,11 +3031,14 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass } } -void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var, uint32_t mbr_idx, - bool strip_array) +void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass storage, + const string &ib_var_ref, SPIRType &ib_type, + SPIRVariable &var, SPIRType &var_type, + uint32_t mbr_idx, InterfaceBlockMeta &meta, + const string &mbr_name_qual, + const string &var_chain_qual, + uint32_t &location, uint32_t &var_mbr_idx) { - auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); auto &entry_func = get(ir.default_entry_point); BuiltIn builtin = BuiltInMax; @@ -1651,35 +3057,51 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); mbr_type_id = ensure_correct_builtin_type(mbr_type_id, builtin); var_type.member_types[mbr_idx] = mbr_type_id; - ib_type.member_types.push_back(mbr_type_id); + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types.push_back(build_msl_interpolant_type(mbr_type_id, is_noperspective)); + else + ib_type.member_types.push_back(mbr_type_id); // Give the member a name - string mbr_name = ensure_valid_name(to_qualified_member_name(var_type, mbr_idx), "m"); + string mbr_name = ensure_valid_name(append_member_name(mbr_name_qual, var_type, mbr_idx), "m"); set_member_name(ib_type.self, ib_mbr_idx, mbr_name); // Update the original variable reference to include the structure reference string qual_var_name = ib_var_ref + "." + mbr_name; + // If using pull-model interpolation, need to add a call to the correct interpolation method. + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + { + if (is_centroid) + qual_var_name += ".interpolate_at_centroid()"; + else if (is_sample) + qual_var_name += join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")"); + else + qual_var_name += ".interpolate_at_center()"; + } - if (is_builtin && !strip_array) + bool flatten_stage_out = false; + string var_chain = var_chain_qual + "." + to_member_name(var_type, mbr_idx); + if (is_builtin && !meta.strip_array) { // For the builtin gl_PerVertex, we cannot treat it as a block anyways, // so redirect to qualified name. set_member_qualified_name(var_type.self, mbr_idx, qual_var_name); } - else if (!strip_array) + else if (!meta.strip_array && meta.allow_local_declaration) { // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate. switch (storage) { case StorageClassInput: - entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() { - statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), " = ", qual_var_name, ";"); + entry_func.fixup_hooks_in.push_back([=]() { + statement(var_chain, " = ", qual_var_name, ";"); }); break; case StorageClassOutput: - entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() { - statement(qual_var_name, " = ", to_name(var.self), ".", to_member_name(var_type, mbr_idx), ";"); + flatten_stage_out = true; + entry_func.fixup_hooks_out.push_back([=]() { + statement(qual_var_name, " = ", var_chain, ";"); }); break; @@ -1688,41 +3110,63 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor } } - // Copy the variable location from the original variable to the member - if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation)) + // Once we determine the location of the first member within nested structures, + // from a var of the topmost structure, the remaining flattened members of + // the nested structures will have consecutive location values. At this point, + // we've recursively tunnelled into structs, arrays, and matrices, and are + // down to a single location for each member now. + if (!is_builtin && location != UINT32_MAX) { - uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation); - if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader())) - { - mbr_type_id = ensure_correct_attribute_type(mbr_type_id, locn); - var_type.member_types[mbr_idx] = mbr_type_id; - ib_type.member_types[ib_mbr_idx] = mbr_type_id; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, get(mbr_type_id), storage); + location += type_to_location_count(get(mbr_type_id)); + } + else if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation)) + { + location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation); + uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent); + if (storage == StorageClassInput) + { + mbr_type_id = ensure_correct_input_type(mbr_type_id, location, comp, 0, meta.strip_array); + var_type.member_types[mbr_idx] = mbr_type_id; + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); + else + ib_type.member_types[ib_mbr_idx] = mbr_type_id; } - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, storage); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, get(mbr_type_id), storage); + location += type_to_location_count(get(mbr_type_id)); } else if (has_decoration(var.self, DecorationLocation)) { - // The block itself might have a location and in this case, all members of the block - // receive incrementing locations. - uint32_t locn = get_accumulated_member_location(var, mbr_idx, strip_array); - if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader())) + location = get_accumulated_member_location(var, mbr_idx, meta.strip_array); + if (storage == StorageClassInput) { - mbr_type_id = ensure_correct_attribute_type(mbr_type_id, locn); + mbr_type_id = ensure_correct_input_type(mbr_type_id, location, 0, 0, meta.strip_array); var_type.member_types[mbr_idx] = mbr_type_id; - ib_type.member_types[ib_mbr_idx] = mbr_type_id; + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); + else + ib_type.member_types[ib_mbr_idx] = mbr_type_id; } - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, storage); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, get(mbr_type_id), storage); + location += type_to_location_count(get(mbr_type_id)); } - else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin)) + else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin)) { - uint32_t locn = 0; - auto builtin_itr = vtx_attrs_by_builtin.find(builtin); - if (builtin_itr != end(vtx_attrs_by_builtin)) - locn = builtin_itr->second.location; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, storage); + location = inputs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, get(mbr_type_id), storage); + location += type_to_location_count(get(mbr_type_id)); + } + else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) + { + location = outputs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, get(mbr_type_id), storage); + location += type_to_location_count(get(mbr_type_id)); } // Copy the component location, if present. @@ -1740,18 +3184,48 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor qual_pos_var_name = qual_var_name; } - // Copy interpolation decorations if needed - if (is_flat) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); - if (is_noperspective) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); - if (is_centroid) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); - if (is_sample) - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + const SPIRConstant *c = nullptr; + if (!flatten_stage_out && var.storage == StorageClassOutput && + var.initializer != ID(0) && (c = maybe_get(var.initializer))) + { + if (meta.strip_array) + { + entry_func.fixup_hooks_in.push_back([=, &var]() { + auto &type = this->get(var.basetype); + uint32_t index = get_extended_member_decoration(var.self, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex); + + auto invocation = to_tesc_invocation_id(); + auto constant_chain = join(to_expression(var.initializer), "[", invocation, "]"); + statement(to_expression(stage_out_ptr_var_id), "[", + invocation, "].", + to_member_name(ib_type, index), " = ", + constant_chain, ".", to_member_name(type, mbr_idx), ";"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([=]() { + statement(qual_var_name, " = ", constant_expression( + this->get(c->subconstants[mbr_idx])), ";"); + }); + } + } + + if (storage != StorageClassInput || !pull_model_inputs.count(var.self)) + { + // Copy interpolation decorations if needed + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + } set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); - set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx); + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, var_mbr_idx); } // In Metal, the tessellation levels are stored as tightly packed half-precision floating point values. @@ -1762,30 +3236,38 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var) { - auto &entry_func = get(ir.default_entry_point); auto &var_type = get_variable_element_type(var); BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + bool triangles = is_tessellating_triangles(); + string mbr_name; - // Force the variable to have the proper name. - set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction)); + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + + const auto mark_locations = [&](const SPIRType &new_var_type) { + if (get_decoration_bitset(var.self).get(DecorationLocation)) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, new_var_type, StorageClassInput); + } + else if (inputs_by_builtin.count(builtin)) + { + uint32_t locn = inputs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, new_var_type, StorageClassInput); + } + }; - if (get_entry_point().flags.get(ExecutionModeTriangles)) + if (triangles) { // Triangles are tricky, because we want only one member in the struct. - - // We need to declare the variable early and at entry-point scope. - entry_func.add_local_variable(var.self); - vars_needing_early_declaration.push_back(var.self); - - string mbr_name = "gl_TessLevel"; + mbr_name = "gl_TessLevel"; // If we already added the other one, we can skip this step. if (!added_builtin_tess_level) { - // Add a reference to the variable type to the interface struct. - uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); - uint32_t type_id = build_extended_vector_type(var_type.self, 4); ib_type.member_types.push_back(type_id); @@ -1793,97 +3275,220 @@ void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_ // Give the member a name set_member_name(ib_type.self, ib_mbr_idx, mbr_name); - // There is no qualified alias since we need to flatten the internal array on return. - if (get_decoration_bitset(var.self).get(DecorationLocation)) - { - uint32_t locn = get_decoration(var.self, DecorationLocation); - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, StorageClassInput); - } - else if (vtx_attrs_by_builtin.count(builtin)) - { - uint32_t locn = vtx_attrs_by_builtin[builtin].location; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, StorageClassInput); - } + // We cannot decorate both, but the important part is that + // it's marked as builtin so we can get automatic attribute assignment if needed. + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + mark_locations(var_type); added_builtin_tess_level = true; } - - switch (builtin) - { - case BuiltInTessLevelOuter: - entry_func.fixup_hooks_in.push_back([=, &var]() { - statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".x;"); - statement(to_name(var.self), "[1] = ", ib_var_ref, ".", mbr_name, ".y;"); - statement(to_name(var.self), "[2] = ", ib_var_ref, ".", mbr_name, ".z;"); - }); - break; - - case BuiltInTessLevelInner: - entry_func.fixup_hooks_in.push_back( - [=, &var]() { statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".w;"); }); - break; - - default: - assert(false); - break; - } } else { - // Add a reference to the variable type to the interface struct. - uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + mbr_name = builtin_to_glsl(builtin, StorageClassFunction); uint32_t type_id = build_extended_vector_type(var_type.self, builtin == BuiltInTessLevelOuter ? 4 : 2); - // Change the type of the variable, too. + uint32_t ptr_type_id = ir.increase_bound_by(1); auto &new_var_type = set(ptr_type_id, get(type_id)); new_var_type.pointer = true; + new_var_type.pointer_depth++; new_var_type.storage = StorageClassInput; new_var_type.parent_type = type_id; - var.basetype = ptr_type_id; ib_type.member_types.push_back(type_id); // Give the member a name - string mbr_name = to_expression(var.self); set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); - // Since vectors can be indexed like arrays, there is no need to unpack this. We can - // just refer to the vector directly. So give it a qualified alias. - string qual_var_name = ib_var_ref + "." + mbr_name; - ir.meta[var.self].decoration.qualified_alias = qual_var_name; + mark_locations(new_var_type); + } - if (get_decoration_bitset(var.self).get(DecorationLocation)) + add_tess_level_input(ib_var_ref, mbr_name, var); +} + +void CompilerMSL::add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var) +{ + auto &entry_func = get(ir.default_entry_point); + BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + + // Force the variable to have the proper name. + string var_name = builtin_to_glsl(builtin, StorageClassFunction); + set_name(var.self, var_name); + + // We need to declare the variable early and at entry-point scope. + entry_func.add_local_variable(var.self); + vars_needing_early_declaration.push_back(var.self); + bool triangles = is_tessellating_triangles(); + + if (builtin == BuiltInTessLevelOuter) + { + entry_func.fixup_hooks_in.push_back( + [=]() + { + statement(var_name, "[0] = ", base_ref, ".", mbr_name, "[0];"); + statement(var_name, "[1] = ", base_ref, ".", mbr_name, "[1];"); + statement(var_name, "[2] = ", base_ref, ".", mbr_name, "[2];"); + if (!triangles) + statement(var_name, "[3] = ", base_ref, ".", mbr_name, "[3];"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([=]() { + if (triangles) + { + if (msl_options.raw_buffer_tese_input) + statement(var_name, "[0] = ", base_ref, ".", mbr_name, ";"); + else + statement(var_name, "[0] = ", base_ref, ".", mbr_name, "[3];"); + } + else + { + statement(var_name, "[0] = ", base_ref, ".", mbr_name, "[0];"); + statement(var_name, "[1] = ", base_ref, ".", mbr_name, "[1];"); + } + }); + } +} + +bool CompilerMSL::variable_storage_requires_stage_io(spv::StorageClass storage) const +{ + if (storage == StorageClassOutput) + return !capture_output_to_buffer; + else if (storage == StorageClassInput) + return !(is_tesc_shader() && msl_options.multi_patch_workgroup) && + !(is_tese_shader() && msl_options.raw_buffer_tese_input); + else + return false; +} + +string CompilerMSL::to_tesc_invocation_id() +{ + if (msl_options.multi_patch_workgroup) + { + // n.b. builtin_invocation_id_id here is the dispatch global invocation ID, + // not the TC invocation ID. + return join(to_expression(builtin_invocation_id_id), ".x % ", get_entry_point().output_vertices); + } + else + return builtin_to_glsl(BuiltInInvocationId, StorageClassInput); +} + +void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array) +{ + auto &entry_func = get(ir.default_entry_point); + bool threadgroup_storage = variable_decl_is_remapped_storage(masked_var, StorageClassWorkgroup); + + if (threadgroup_storage && msl_options.multi_patch_workgroup) + { + // We need one threadgroup block per patch, so fake this. + entry_func.fixup_hooks_in.push_back([this, &masked_var]() { + auto &type = get_variable_data_type(masked_var); + add_local_variable_name(masked_var.self); + + bool old_is_builtin = is_using_builtin_array; + is_using_builtin_array = true; + + const uint32_t max_control_points_per_patch = 32u; + uint32_t max_num_instances = + (max_control_points_per_patch + get_entry_point().output_vertices - 1u) / + get_entry_point().output_vertices; + statement("threadgroup ", type_to_glsl(type), " ", + "spvStorage", to_name(masked_var.self), "[", max_num_instances, "]", + type_to_array_glsl(type), ";"); + + // Assign a threadgroup slice to each PrimitiveID. + // We assume here that workgroup size is rounded to 32, + // since that's the maximum number of control points per patch. + // We cannot size the array based on fixed dispatch parameters, + // since Metal does not allow that. :( + // FIXME: We will likely need an option to support passing down target workgroup size, + // so we can emit appropriate size here. + statement("threadgroup ", type_to_glsl(type), " ", + "(&", to_name(masked_var.self), ")", + type_to_array_glsl(type), " = spvStorage", to_name(masked_var.self), "[", + "(", to_expression(builtin_invocation_id_id), ".x / ", + get_entry_point().output_vertices, ") % ", + max_num_instances, "];"); + + is_using_builtin_array = old_is_builtin; + }); + } + else + { + entry_func.add_local_variable(masked_var.self); + } + + if (!threadgroup_storage) + { + vars_needing_early_declaration.push_back(masked_var.self); + } + else if (masked_var.initializer) + { + // Cannot directly initialize threadgroup variables. Need fixup hooks. + ID initializer = masked_var.initializer; + if (strip_array) { - uint32_t locn = get_decoration(var.self, DecorationLocation); - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, StorageClassInput); + entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() { + auto invocation = to_tesc_invocation_id(); + statement(to_expression(masked_var.self), "[", + invocation, "] = ", + to_expression(initializer), "[", + invocation, "];"); + }); } - else if (vtx_attrs_by_builtin.count(builtin)) + else { - uint32_t locn = vtx_attrs_by_builtin[builtin].location; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); - mark_location_as_used_by_shader(locn, StorageClassInput); + entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() { + statement(to_expression(masked_var.self), " = ", to_expression(initializer), ";"); + }); } } } void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type, - SPIRVariable &var, bool strip_array) + SPIRVariable &var, InterfaceBlockMeta &meta) { auto &entry_func = get(ir.default_entry_point); // Tessellation control I/O variables and tessellation evaluation per-point inputs are // usually declared as arrays. In these cases, we want to add the element type to the // interface block, since in Metal it's the interface block itself which is arrayed. - auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var); bool is_builtin = is_builtin_variable(var); auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + bool is_block = has_decoration(var_type.self, DecorationBlock); + + // If stage variables are masked out, emit them as plain variables instead. + // For builtins, we query them one by one later. + // IO blocks are not masked here, we need to mask them per-member instead. + if (storage == StorageClassOutput && is_stage_output_variable_masked(var)) + { + // If we ignore an output, we must still emit it, since it might be used by app. + // Instead, just emit it as early declaration. + emit_local_masked_variable(var, meta.strip_array); + return; + } + + if (storage == StorageClassInput && has_decoration(var.self, DecorationPerVertexKHR)) + SPIRV_CROSS_THROW("PerVertexKHR decoration is not supported in MSL."); + + // If variable names alias, they will end up with wrong names in the interface struct, because + // there might be aliases in the member name cache and there would be a mismatch in fixup_in code. + // Make sure to register the variables as unique resource names ahead of time. + // This would normally conflict with the name cache when emitting local variables, + // but this happens in the setup stage, before we hit compilation loops. + // The name cache is cleared before we actually emit code, so this is safe. + add_resource_name(var.self); if (var_type.basetype == SPIRType::Struct) { - if (!is_builtin_type(var_type) && (!capture_output_to_buffer || storage == StorageClassInput) && !strip_array) + bool block_requires_flattening = + variable_storage_requires_stage_io(storage) || (is_block && var_type.array.empty()); + bool needs_local_declaration = !is_builtin && block_requires_flattening && meta.allow_local_declaration; + + if (needs_local_declaration) { // For I/O blocks or structs, we will need to pass the block itself around // to functions if they are used globally in leaf functions. @@ -1891,67 +3496,171 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st // we unflatten I/O blocks while running the shader, // and pass the actual struct type down to leaf functions. // We then unflatten inputs, and flatten outputs in the "fixup" stages. - entry_func.add_local_variable(var.self); - vars_needing_early_declaration.push_back(var.self); + emit_local_masked_variable(var, meta.strip_array); } - if (capture_output_to_buffer && storage != StorageClassInput && !has_decoration(var_type.self, DecorationBlock)) + if (!block_requires_flattening) { // In Metal tessellation shaders, the interface block itself is arrayed. This makes things // very complicated, since stage-in structures in MSL don't support nested structures. // Luckily, for stage-out when capturing output, we can avoid this and just add // composite members directly, because the stage-out structure is stored to a buffer, // not returned. - add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array); + add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta); } else { - // Flatten the struct members into the interface struct - for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++) + bool masked_block = false; + uint32_t location = UINT32_MAX; + uint32_t var_mbr_idx = 0; + uint32_t elem_cnt = 1; + if (is_matrix(var_type)) { - builtin = BuiltInMax; - is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); - auto &mbr_type = get(var_type.member_types[mbr_idx]); + if (is_array(var_type)) + SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables."); + + elem_cnt = var_type.columns; + } + else if (is_array(var_type)) + { + if (var_type.array.size() != 1) + SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables."); + + elem_cnt = to_array_size_literal(var_type); + } - if (!is_builtin || has_active_builtin(builtin, storage)) + for (uint32_t elem_idx = 0; elem_idx < elem_cnt; elem_idx++) + { + // Flatten the struct members into the interface struct + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++) { - if ((!is_builtin || - (storage == StorageClassInput && get_execution_model() != ExecutionModelFragment)) && - (storage == StorageClassInput || storage == StorageClassOutput) && - (is_matrix(mbr_type) || is_array(mbr_type))) + builtin = BuiltInMax; + is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); + auto &mbr_type = get(var_type.member_types[mbr_idx]); + + if (storage == StorageClassOutput && is_stage_output_block_member_masked(var, mbr_idx, meta.strip_array)) { - add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx, - strip_array); + location = UINT32_MAX; // Skip this member and resolve location again on next var member + + if (is_block) + masked_block = true; + + // Non-builtin block output variables are just ignored, since they will still access + // the block variable as-is. They're just not flattened. + if (is_builtin && !meta.strip_array) + { + // Emit a fake variable instead. + uint32_t ids = ir.increase_bound_by(2); + uint32_t ptr_type_id = ids + 0; + uint32_t var_id = ids + 1; + + auto ptr_type = mbr_type; + ptr_type.pointer = true; + ptr_type.pointer_depth++; + ptr_type.parent_type = var_type.member_types[mbr_idx]; + ptr_type.storage = StorageClassOutput; + + uint32_t initializer = 0; + if (var.initializer) + if (auto *c = maybe_get(var.initializer)) + initializer = c->subconstants[mbr_idx]; + + set(ptr_type_id, ptr_type); + set(var_id, ptr_type_id, StorageClassOutput, initializer); + entry_func.add_local_variable(var_id); + vars_needing_early_declaration.push_back(var_id); + set_name(var_id, builtin_to_glsl(builtin, StorageClassOutput)); + set_decoration(var_id, DecorationBuiltIn, builtin); + } } - else + else if (!is_builtin || has_active_builtin(builtin, storage)) { - add_plain_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx, - strip_array); + bool is_composite_type = is_matrix(mbr_type) || is_array(mbr_type) || mbr_type.basetype == SPIRType::Struct; + bool attribute_load_store = + storage == StorageClassInput && get_execution_model() != ExecutionModelFragment; + bool storage_is_stage_io = variable_storage_requires_stage_io(storage); + + // Clip/CullDistance always need to be declared as user attributes. + if (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance) + is_builtin = false; + + const string var_name = to_name(var.self); + string mbr_name_qual = var_name; + string var_chain_qual = var_name; + if (elem_cnt > 1) + { + mbr_name_qual += join("_", elem_idx); + var_chain_qual += join("[", elem_idx, "]"); + } + + if ((!is_builtin || attribute_load_store) && storage_is_stage_io && is_composite_type) + { + add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, + var, var_type, mbr_idx, meta, + mbr_name_qual, var_chain_qual, + location, var_mbr_idx); + } + else + { + add_plain_member_variable_to_interface_block(storage, ib_var_ref, ib_type, + var, var_type, mbr_idx, meta, + mbr_name_qual, var_chain_qual, + location, var_mbr_idx); + } } + var_mbr_idx++; + } + } + + // If we're redirecting a block, we might still need to access the original block + // variable if we're masking some members. + if (masked_block && !needs_local_declaration && (!is_builtin_variable(var) || is_tesc_shader())) + { + if (is_builtin_variable(var)) + { + // Ensure correct names for the block members if we're actually going to + // declare gl_PerVertex. + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++) + { + set_member_name(var_type.self, mbr_idx, builtin_to_glsl( + BuiltIn(get_member_decoration(var_type.self, mbr_idx, DecorationBuiltIn)), + StorageClassOutput)); + } + + set_name(var_type.self, "gl_PerVertex"); + set_name(var.self, "gl_out_masked"); + stage_out_masked_builtin_type_id = var_type.self; } + emit_local_masked_variable(var, meta.strip_array); } } } - else if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput && - !strip_array && is_builtin && (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner)) + else if (is_tese_shader() && storage == StorageClassInput && !meta.strip_array && is_builtin && + (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner)) { add_tess_level_input_to_interface_block(ib_var_ref, ib_type, var); } else if (var_type.basetype == SPIRType::Boolean || var_type.basetype == SPIRType::Char || - type_is_integral(var_type) || type_is_floating_point(var_type) || var_type.basetype == SPIRType::Boolean) + type_is_integral(var_type) || type_is_floating_point(var_type)) { if (!is_builtin || has_active_builtin(builtin, storage)) { + bool is_composite_type = is_matrix(var_type) || is_array(var_type); + bool storage_is_stage_io = variable_storage_requires_stage_io(storage); + bool attribute_load_store = storage == StorageClassInput && get_execution_model() != ExecutionModelFragment; + + // Clip/CullDistance always needs to be declared as user attributes. + if (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance) + is_builtin = false; + // MSL does not allow matrices or arrays in input or output variables, so need to handle it specially. - if ((!is_builtin || (storage == StorageClassInput && get_execution_model() != ExecutionModelFragment)) && - (storage == StorageClassInput || (storage == StorageClassOutput && !capture_output_to_buffer)) && - (is_matrix(var_type) || is_array(var_type))) + if ((!is_builtin || attribute_load_store) && storage_is_stage_io && is_composite_type) { - add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array); + add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta); } else { - add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array); + add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta); } } } @@ -1961,62 +3670,45 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st // for per-vertex variables in a tessellation control shader. void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t ib_type_id) { - // Only needed for tessellation shaders. - if (get_execution_model() != ExecutionModelTessellationControl && - !(get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput)) + // Only needed for tessellation shaders and pull-model interpolants. + // Need to redirect interface indices back to variables themselves. + // For structs, each member of the struct need a separate instance. + if (!is_tesc_shader() && !(is_tese_shader() && storage == StorageClassInput) && + !(get_execution_model() == ExecutionModelFragment && storage == StorageClassInput && + !pull_model_inputs.empty())) return; - bool in_array = false; - for (uint32_t i = 0; i < ir.meta[ib_type_id].members.size(); i++) + auto mbr_cnt = uint32_t(ir.meta[ib_type_id].members.size()); + for (uint32_t i = 0; i < mbr_cnt; i++) { - auto &mbr_dec = ir.meta[ib_type_id].members[i]; - uint32_t var_id = mbr_dec.extended.ib_orig_id; + uint32_t var_id = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceOrigID); if (!var_id) continue; auto &var = get(var_id); - // Unfortunately, all this complexity is needed to handle flattened structs and/or - // arrays. - if (storage == StorageClassInput) - { - auto &type = get_variable_element_type(var); - if (is_array(type) || is_matrix(type)) - { - if (in_array) - continue; - in_array = true; - set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); - } - else - { - if (type.basetype == SPIRType::Struct) - { - uint32_t mbr_idx = - get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex); - auto &mbr_type = get(type.member_types[mbr_idx]); + auto &type = get_variable_element_type(var); - if (is_array(mbr_type) || is_matrix(mbr_type)) - { - if (in_array) - continue; - in_array = true; - set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i); - } - else - { - in_array = false; - set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i); - } - } - else - { - in_array = false; - set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); - } - } + bool flatten_composites = variable_storage_requires_stage_io(var.storage); + bool is_block = has_decoration(type.self, DecorationBlock); + + uint32_t mbr_idx = uint32_t(-1); + if (type.basetype == SPIRType::Struct && (flatten_composites || is_block)) + mbr_idx = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex); + + if (mbr_idx != uint32_t(-1)) + { + // Only set the lowest InterfaceMemberIndex for each variable member. + // IB struct members will be emitted in-order w.r.t. interface member index. + if (!has_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex)) + set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i); } else - set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); + { + // Only set the lowest InterfaceMemberIndex for each variable. + // IB struct members will be emitted in-order w.r.t. interface member index. + if (!has_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex)) + set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); + } } } @@ -2029,6 +3721,16 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) bool incl_builtins = storage == StorageClassOutput || is_tessellation_shader(); bool has_seen_barycentric = false; + InterfaceBlockMeta meta; + + // Varying interfaces between stages which use "user()" attribute can be dealt with + // without explicit packing and unpacking of components. For any variables which link against the runtime + // in some way (vertex attributes, fragment output, etc), we'll need to deal with it somehow. + bool pack_components = + (storage == StorageClassInput && get_execution_model() == ExecutionModelVertex) || + (storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment) || + (storage == StorageClassOutput && get_execution_model() == ExecutionModelVertex && capture_output_to_buffer); + ir.for_each_typed_id([&](uint32_t var_id, SPIRVariable &var) { if (var.storage != storage) return; @@ -2036,29 +3738,81 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) auto &type = this->get(var.basetype); bool is_builtin = is_builtin_variable(var); - auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); + bool is_block = has_decoration(type.self, DecorationBlock); + + auto bi_type = BuiltInMax; + bool builtin_is_gl_in_out = false; + if (is_builtin && !is_block) + { + bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); + builtin_is_gl_in_out = bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance; + } + + if (is_builtin && is_block) + builtin_is_gl_in_out = true; + + uint32_t location = get_decoration(var_id, DecorationLocation); + + bool builtin_is_stage_in_out = builtin_is_gl_in_out || + bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex || + bi_type == BuiltInBaryCoordKHR || bi_type == BuiltInBaryCoordNoPerspKHR || + bi_type == BuiltInFragDepth || + bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask; // These builtins are part of the stage in/out structs. bool is_interface_block_builtin = - (bi_type == BuiltInPosition || bi_type == BuiltInPointSize || bi_type == BuiltInClipDistance || - bi_type == BuiltInCullDistance || bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex || - bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV || bi_type == BuiltInFragDepth || - bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask) || - (get_execution_model() == ExecutionModelTessellationEvaluation && - (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner)); + builtin_is_stage_in_out || (is_tese_shader() && !msl_options.raw_buffer_tese_input && + (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner)); bool is_active = interface_variable_exists_in_entry_point(var.self); if (is_builtin && is_active) { // Only emit the builtin if it's active in this entry point. Interface variable list might lie. - is_active = has_active_builtin(bi_type, storage); + if (is_block) + { + // If any builtin is active, the block is active. + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + for (uint32_t i = 0; !is_active && i < mbr_cnt; i++) + is_active = has_active_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)), storage); + } + else + { + is_active = has_active_builtin(bi_type, storage); + } } bool filter_patch_decoration = (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch; bool hidden = is_hidden_variable(var, incl_builtins); + + // ClipDistance is never hidden, we need to emulate it when used as an input. + if (bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance) + hidden = false; + + // It's not enough to simply avoid marking fragment outputs if the pipeline won't + // accept them. We can't put them in the struct at all, or otherwise the compiler + // complains that the outputs weren't explicitly marked. + // Frag depth and stencil outputs are incompatible with explicit early fragment tests. + // In GLSL, depth and stencil outputs are just ignored when explicit early fragment tests are required. + // In Metal, it's a compilation error, so we need to exclude them from the output struct. + if (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput && !patch && + ((is_builtin && ((bi_type == BuiltInFragDepth && (!msl_options.enable_frag_depth_builtin || uses_explicit_early_fragment_test())) || + (bi_type == BuiltInFragStencilRefEXT && (!msl_options.enable_frag_stencil_ref_builtin || uses_explicit_early_fragment_test())))) || + (!is_builtin && !(msl_options.enable_frag_output_mask & (1 << location))))) + { + hidden = true; + disabled_frag_outputs.push_back(var_id); + // If a builtin, force it to have the proper name, and mark it as not part of the output struct. + if (is_builtin) + { + set_name(var_id, builtin_to_glsl(bi_type, StorageClassFunction)); + mask_stage_output_by_builtin(bi_type); + } + } + // Barycentric inputs must be emitted in stage-in, because they can have interpolation arguments. - if (is_active && (bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV)) + if (is_active && (bi_type == BuiltInBaryCoordKHR || bi_type == BuiltInBaryCoordNoPerspKHR)) { if (has_seen_barycentric) SPIRV_CROSS_THROW("Cannot declare both BaryCoordNV and BaryCoordNoPerspNV in same shader in MSL."); @@ -2070,13 +3824,64 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) (!is_builtin || is_interface_block_builtin)) { vars.push_back(&var); + + if (!is_builtin) + { + // Need to deal specially with DecorationComponent. + // Multiple variables can alias the same Location, and try to make sure each location is declared only once. + // We will swizzle data in and out to make this work. + // This is only relevant for vertex inputs and fragment outputs. + // Technically tessellation as well, but it is too complicated to support. + uint32_t component = get_decoration(var_id, DecorationComponent); + if (component != 0) + { + if (is_tessellation_shader()) + SPIRV_CROSS_THROW("Component decoration is not supported in tessellation shaders."); + else if (pack_components) + { + uint32_t array_size = 1; + if (!type.array.empty()) + array_size = to_array_size_literal(type); + + for (uint32_t location_offset = 0; location_offset < array_size; location_offset++) + { + auto &location_meta = meta.location_meta[location + location_offset]; + location_meta.num_components = std::max(location_meta.num_components, component + type.vecsize); + + // For variables sharing location, decorations and base type must match. + location_meta.base_type_id = type.self; + location_meta.flat = has_decoration(var.self, DecorationFlat); + location_meta.noperspective = has_decoration(var.self, DecorationNoPerspective); + location_meta.centroid = has_decoration(var.self, DecorationCentroid); + location_meta.sample = has_decoration(var.self, DecorationSample); + } + } + } + } + } + + if (is_tese_shader() && msl_options.raw_buffer_tese_input && patch && storage == StorageClassInput && + (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner)) + { + // In this case, we won't add the builtin to the interface struct, + // but we still need the hook to run to populate the arrays. + string base_ref = join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "]"); + const char *mbr_name = + bi_type == BuiltInTessLevelOuter ? "edgeTessellationFactor" : "insideTessellationFactor"; + add_tess_level_input(base_ref, mbr_name, var); + if (inputs_by_builtin.count(bi_type)) + { + uint32_t locn = inputs_by_builtin[bi_type].location; + mark_location_as_used_by_shader(locn, type, StorageClassInput); + } } }); // If no variables qualify, leave. // For patch input in a tessellation evaluation shader, the per-vertex stage inputs // are included in a special patch control point array. - if (vars.empty() && !(storage == StorageClassInput && patch && stage_in_var_id)) + if (vars.empty() && + !(!msl_options.raw_buffer_tese_input && storage == StorageClassInput && patch && stage_in_var_id)) return 0; // Add a new typed variable for this interface structure. @@ -2099,30 +3904,74 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) { case StorageClassInput: ib_var_ref = patch ? patch_stage_in_var_name : stage_in_var_name; - if (get_execution_model() == ExecutionModelTessellationControl) + switch (get_execution_model()) { - // Add a hook to populate the shared workgroup memory containing - // the gl_in array. + case ExecutionModelTessellationControl: + // Add a hook to populate the shared workgroup memory containing the gl_in array. entry_func.fixup_hooks_in.push_back([=]() { - // Can't use PatchVertices yet; the hook for that may not have run yet. - statement("if (", to_expression(builtin_invocation_id_id), " < ", "spvIndirectParams[0])"); - statement(" ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id), "] = ", ib_var_ref, - ";"); - statement("threadgroup_barrier(mem_flags::mem_threadgroup);"); - statement("if (", to_expression(builtin_invocation_id_id), " >= ", get_entry_point().output_vertices, - ")"); - statement(" return;"); + // Can't use PatchVertices, PrimitiveId, or InvocationId yet; the hooks for those may not have run yet. + if (msl_options.multi_patch_workgroup) + { + // n.b. builtin_invocation_id_id here is the dispatch global invocation ID, + // not the TC invocation ID. + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_in = &", + input_buffer_var_name, "[min(", to_expression(builtin_invocation_id_id), ".x / ", + get_entry_point().output_vertices, + ", spvIndirectParams[1] - 1) * spvIndirectParams[0]];"); + } + else + { + // It's safe to use InvocationId here because it's directly mapped to a + // Metal builtin, and therefore doesn't need a hook. + statement("if (", to_expression(builtin_invocation_id_id), " < spvIndirectParams[0])"); + statement(" ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id), + "] = ", ib_var_ref, ";"); + statement("threadgroup_barrier(mem_flags::mem_threadgroup);"); + statement("if (", to_expression(builtin_invocation_id_id), + " >= ", get_entry_point().output_vertices, ")"); + statement(" return;"); + } }); - } - break; - - case StorageClassOutput: - { - ib_var_ref = patch ? patch_stage_out_var_name : stage_out_var_name; - - // Add the output interface struct as a local variable to the entry function. - // If the entry point should return the output struct, set the entry function - // to return the output interface struct, otherwise to return nothing. + break; + case ExecutionModelTessellationEvaluation: + if (!msl_options.raw_buffer_tese_input) + break; + if (patch) + { + entry_func.fixup_hooks_in.push_back( + [=]() + { + statement("const device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", patch_input_buffer_var_name, "[", to_expression(builtin_primitive_id_id), + "];"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back( + [=]() + { + statement("const device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_in = &", + input_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ", + get_entry_point().output_vertices, "];"); + }); + } + break; + default: + break; + } + break; + + case StorageClassOutput: + { + ib_var_ref = patch ? patch_stage_out_var_name : stage_out_var_name; + + // Add the output interface struct as a local variable to the entry function. + // If the entry point should return the output struct, set the entry function + // to return the output interface struct, otherwise to return nothing. + // Watch out for the rare case where the terminator of the last entry point block is a + // Kill, instead of a Return. Based on SPIR-V's block-domination rules, we assume that + // any block that has a Kill will also have a terminating Return, except the last block. // Indicate the output var requires early initialization. bool ep_should_return_output = !get_is_rasterization_disabled(); uint32_t rtn_id = ep_should_return_output ? ib_var_id : 0; @@ -2132,7 +3981,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) for (auto &blk_id : entry_func.blocks) { auto &blk = get(blk_id); - if (blk.terminator == SPIRBlock::Return) + if (blk.terminator == SPIRBlock::Return || (blk.terminator == SPIRBlock::Kill && blk_id == entry_func.blocks.back())) blk.return_value = rtn_id; } vars_needing_early_declaration.push_back(ib_var_id); @@ -2152,26 +4001,72 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) { // The first member of the indirect buffer is always the number of vertices // to draw. - statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ", - output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id), " - ", - to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ", - to_expression(builtin_vertex_idx_id), " - ", to_expression(builtin_base_vertex_id), - "];"); + // We zero-base the InstanceID & VertexID variables for HLSL emulation elsewhere, so don't do it twice + if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation) + { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", output_buffer_var_name, "[", to_expression(builtin_invocation_id_id), + ".y * ", to_expression(builtin_stage_input_size_id), ".x + ", + to_expression(builtin_invocation_id_id), ".x];"); + } + else if (msl_options.enable_base_index_zero) + { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", output_buffer_var_name, "[", to_expression(builtin_instance_idx_id), + " * spvIndirectParams[0] + ", to_expression(builtin_vertex_idx_id), "];"); + } + else + { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id), + " - ", to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ", + to_expression(builtin_vertex_idx_id), " - ", + to_expression(builtin_base_vertex_id), "];"); + } } }); break; case ExecutionModelTessellationControl: - if (patch) - entry_func.fixup_hooks_in.push_back([=]() { - statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ", - patch_output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "];"); - }); + if (msl_options.multi_patch_workgroup) + { + // We cannot use PrimitiveId here, because the hook may not have run yet. + if (patch) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", patch_output_buffer_var_name, "[", to_expression(builtin_invocation_id_id), + ".x / ", get_entry_point().output_vertices, "];"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &", + output_buffer_var_name, "[", to_expression(builtin_invocation_id_id), ".x - ", + to_expression(builtin_invocation_id_id), ".x % ", + get_entry_point().output_vertices, "];"); + }); + } + } else - entry_func.fixup_hooks_in.push_back([=]() { - statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &", - output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ", - get_entry_point().output_vertices, "];"); - }); + { + if (patch) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", patch_output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), + "];"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &", + output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ", + get_entry_point().output_vertices, "];"); + }); + } + } break; default: break; @@ -2189,24 +4084,176 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) for (auto *p_var : vars) { - bool strip_array = - (get_execution_model() == ExecutionModelTessellationControl || - (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput)) && - !patch; - add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var, strip_array); + bool strip_array = (is_tesc_shader() || (is_tese_shader() && storage == StorageClassInput)) && !patch; + + // Fixing up flattened stores in TESC is impossible since the memory is group shared either via + // device (not masked) or threadgroup (masked) storage classes and it's race condition city. + meta.strip_array = strip_array; + meta.allow_local_declaration = !strip_array && !(is_tesc_shader() && storage == StorageClassOutput); + add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var, meta); + } + + if (((is_tesc_shader() && msl_options.multi_patch_workgroup) || + (is_tese_shader() && msl_options.raw_buffer_tese_input)) && + storage == StorageClassInput) + { + // For tessellation inputs, add all outputs from the previous stage to ensure + // the struct containing them is the correct size and layout. + for (auto &input : inputs_by_location) + { + if (location_inputs_in_use.count(input.first.location) != 0) + continue; + + if (patch != (input.second.rate == MSL_SHADER_VARIABLE_RATE_PER_PATCH)) + continue; + + // Tessellation levels have their own struct, so there's no need to add them here. + if (input.second.builtin == BuiltInTessLevelOuter || input.second.builtin == BuiltInTessLevelInner) + continue; + + // Create a fake variable to put at the location. + uint32_t offset = ir.increase_bound_by(4); + uint32_t type_id = offset; + uint32_t array_type_id = offset + 1; + uint32_t ptr_type_id = offset + 2; + uint32_t var_id = offset + 3; + + SPIRType type; + switch (input.second.format) + { + case MSL_SHADER_VARIABLE_FORMAT_UINT16: + case MSL_SHADER_VARIABLE_FORMAT_ANY16: + type.basetype = SPIRType::UShort; + type.width = 16; + break; + case MSL_SHADER_VARIABLE_FORMAT_ANY32: + default: + type.basetype = SPIRType::UInt; + type.width = 32; + break; + } + type.vecsize = input.second.vecsize; + set(type_id, type); + + type.array.push_back(0); + type.array_size_literal.push_back(true); + type.parent_type = type_id; + set(array_type_id, type); + + type.pointer = true; + type.pointer_depth++; + type.parent_type = array_type_id; + type.storage = storage; + auto &ptr_type = set(ptr_type_id, type); + ptr_type.self = array_type_id; + + auto &fake_var = set(var_id, ptr_type_id, storage); + set_decoration(var_id, DecorationLocation, input.first.location); + if (input.first.component) + set_decoration(var_id, DecorationComponent, input.first.component); + + meta.strip_array = true; + meta.allow_local_declaration = false; + add_variable_to_interface_block(storage, ib_var_ref, ib_type, fake_var, meta); + } + } + + if (capture_output_to_buffer && storage == StorageClassOutput) + { + // For captured output, add all inputs from the next stage to ensure + // the struct containing them is the correct size and layout. This is + // necessary for certain implicit builtins that may nonetheless be read, + // even when they aren't written. + for (auto &output : outputs_by_location) + { + if (location_outputs_in_use.count(output.first.location) != 0) + continue; + + // Create a fake variable to put at the location. + uint32_t offset = ir.increase_bound_by(4); + uint32_t type_id = offset; + uint32_t array_type_id = offset + 1; + uint32_t ptr_type_id = offset + 2; + uint32_t var_id = offset + 3; + + SPIRType type; + switch (output.second.format) + { + case MSL_SHADER_VARIABLE_FORMAT_UINT16: + case MSL_SHADER_VARIABLE_FORMAT_ANY16: + type.basetype = SPIRType::UShort; + type.width = 16; + break; + case MSL_SHADER_VARIABLE_FORMAT_ANY32: + default: + type.basetype = SPIRType::UInt; + type.width = 32; + break; + } + type.vecsize = output.second.vecsize; + set(type_id, type); + + if (is_tesc_shader()) + { + type.array.push_back(0); + type.array_size_literal.push_back(true); + type.parent_type = type_id; + set(array_type_id, type); + } + + type.pointer = true; + type.pointer_depth++; + type.parent_type = is_tesc_shader() ? array_type_id : type_id; + type.storage = storage; + auto &ptr_type = set(ptr_type_id, type); + ptr_type.self = type.parent_type; + + auto &fake_var = set(var_id, ptr_type_id, storage); + set_decoration(var_id, DecorationLocation, output.first.location); + if (output.first.component) + set_decoration(var_id, DecorationComponent, output.first.component); + + meta.strip_array = true; + meta.allow_local_declaration = false; + add_variable_to_interface_block(storage, ib_var_ref, ib_type, fake_var, meta); + } + } + + // When multiple variables need to access same location, + // unroll locations one by one and we will flatten output or input as necessary. + for (auto &loc : meta.location_meta) + { + uint32_t location = loc.first; + auto &location_meta = loc.second; + + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + uint32_t type_id = build_extended_vector_type(location_meta.base_type_id, location_meta.num_components); + ib_type.member_types.push_back(type_id); + + set_member_name(ib_type.self, ib_mbr_idx, join("m_location_", location)); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, get(type_id), storage); + + if (location_meta.flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (location_meta.noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (location_meta.centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (location_meta.sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); } // Sort the members of the structure by their locations. - MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Location); + MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::LocationThenBuiltInType); member_sorter.sort(); // The member indices were saved to the original variables, but after the members // were sorted, those indices are now likely incorrect. Fix those up now. - if (!patch) - fix_up_interface_member_indices(storage, ib_type_id); + fix_up_interface_member_indices(storage, ib_type_id); // For patch inputs, add one more member, holding the array of control point data. - if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput && patch && + if (is_tese_shader() && !msl_options.raw_buffer_tese_input && storage == StorageClassInput && patch && stage_in_var_id) { uint32_t pcp_type_id = ir.increase_bound_by(1); @@ -2220,6 +4267,9 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) set_member_name(ib_type.self, mbr_idx, "gl_in"); } + if (storage == StorageClassInput) + set_decoration(ib_var_id, DecorationNonWritable); + return ib_var_id; } @@ -2231,7 +4281,7 @@ uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageCla uint32_t ib_ptr_var_id; uint32_t next_id = ir.increase_bound_by(3); auto &ib_type = expression_type(ib_var_id); - if (get_execution_model() == ExecutionModelTessellationControl) + if (is_tesc_shader() || (is_tese_shader() && msl_options.raw_buffer_tese_input)) { // Tessellation control per-vertex I/O is presented as an array, so we must // do the same with our struct here. @@ -2239,7 +4289,13 @@ uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageCla auto &ib_ptr_type = set(ib_ptr_type_id, ib_type); ib_ptr_type.parent_type = ib_ptr_type.type_alias = ib_type.self; ib_ptr_type.pointer = true; - ib_ptr_type.storage = storage == StorageClassInput ? StorageClassWorkgroup : StorageClassStorageBuffer; + ib_ptr_type.pointer_depth++; + ib_ptr_type.storage = storage == StorageClassInput ? + ((is_tesc_shader() && msl_options.multi_patch_workgroup) || + (is_tese_shader() && msl_options.raw_buffer_tese_input) ? + StorageClassStorageBuffer : + StorageClassWorkgroup) : + StorageClassStorageBuffer; ir.meta[ib_ptr_type_id] = ir.meta[ib_type.self]; // To ensure that get_variable_data_type() doesn't strip off the pointer, // which we need, use another pointer. @@ -2252,7 +4308,9 @@ uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageCla ib_ptr_var_id = next_id; set(ib_ptr_var_id, ib_ptr_ptr_type_id, StorageClassFunction, 0); - set_name(ib_ptr_var_id, storage == StorageClassInput ? input_wg_var_name : "gl_out"); + set_name(ib_ptr_var_id, storage == StorageClassInput ? "gl_in" : "gl_out"); + if (storage == StorageClassInput) + set_decoration(ib_ptr_var_id, DecorationNonWritable); } else { @@ -2301,6 +4359,7 @@ uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn buil auto &ptr_type = set(ptr_type_id); ptr_type = base_type; ptr_type.pointer = true; + ptr_type.pointer_depth++; ptr_type.storage = type.storage; ptr_type.parent_type = base_type_id; return ptr_type_id; @@ -2309,395 +4368,821 @@ uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn buil return type_id; } -// Ensure that the type is compatible with the vertex attribute. +// Ensure that the type is compatible with the shader input. // If it is, simply return the given type ID. // Otherwise, create a new type, and return its ID. -uint32_t CompilerMSL::ensure_correct_attribute_type(uint32_t type_id, uint32_t location) +uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component, uint32_t num_components, bool strip_array) { auto &type = get(type_id); - auto p_va = vtx_attrs_by_location.find(location); - if (p_va == end(vtx_attrs_by_location)) + uint32_t max_array_dimensions = strip_array ? 1 : 0; + + // Struct and array types must match exactly. + if (type.basetype == SPIRType::Struct || type.array.size() > max_array_dimensions) return type_id; + auto p_va = inputs_by_location.find({location, component}); + if (p_va == end(inputs_by_location)) + { + if (num_components > type.vecsize) + return build_extended_vector_type(type_id, num_components); + else + return type_id; + } + + if (num_components == 0) + num_components = p_va->second.vecsize; + switch (p_va->second.format) { - case MSL_VERTEX_FORMAT_UINT8: + case MSL_SHADER_VARIABLE_FORMAT_UINT8: { switch (type.basetype) { case SPIRType::UByte: case SPIRType::UShort: case SPIRType::UInt: - return type_id; + if (num_components > type.vecsize) + return build_extended_vector_type(type_id, num_components); + else + return type_id; + case SPIRType::Short: + return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize, + SPIRType::UShort); case SPIRType::Int: - break; + return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize, + SPIRType::UInt); + default: SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader"); } - uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1); - uint32_t base_type_id = next_id++; - auto &base_type = set(base_type_id); - base_type = type; - base_type.basetype = type.basetype == SPIRType::Short ? SPIRType::UShort : SPIRType::UInt; - base_type.pointer = false; - - if (!type.pointer) - return base_type_id; - - uint32_t ptr_type_id = next_id++; - auto &ptr_type = set(ptr_type_id); - ptr_type = base_type; - ptr_type.pointer = true; - ptr_type.storage = type.storage; - ptr_type.parent_type = base_type_id; - return ptr_type_id; } - case MSL_VERTEX_FORMAT_UINT16: + case MSL_SHADER_VARIABLE_FORMAT_UINT16: { switch (type.basetype) { case SPIRType::UShort: case SPIRType::UInt: - return type_id; + if (num_components > type.vecsize) + return build_extended_vector_type(type_id, num_components); + else + return type_id; + case SPIRType::Int: - break; + return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize, + SPIRType::UInt); + default: SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader"); } - uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1); - uint32_t base_type_id = next_id++; - auto &base_type = set(base_type_id); - base_type = type; - base_type.basetype = SPIRType::UInt; - base_type.pointer = false; - - if (!type.pointer) - return base_type_id; - - uint32_t ptr_type_id = next_id++; - auto &ptr_type = set(ptr_type_id); - ptr_type = base_type; - ptr_type.pointer = true; - ptr_type.storage = type.storage; - ptr_type.parent_type = base_type_id; - return ptr_type_id; } default: - case MSL_VERTEX_FORMAT_OTHER: + if (num_components > type.vecsize) + type_id = build_extended_vector_type(type_id, num_components); break; } return type_id; } +void CompilerMSL::mark_struct_members_packed(const SPIRType &type) +{ + // Handle possible recursion when a struct contains a pointer to its own type nested somewhere. + if (has_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked)) + return; + + set_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked); + + // Problem case! Struct needs to be placed at an awkward alignment. + // Mark every member of the child struct as packed. + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < mbr_cnt; i++) + { + auto &mbr_type = get(type.member_types[i]); + if (mbr_type.basetype == SPIRType::Struct) + { + // Recursively mark structs as packed. + auto *struct_type = &mbr_type; + while (!struct_type->array.empty()) + struct_type = &get(struct_type->parent_type); + mark_struct_members_packed(*struct_type); + } + else if (!is_scalar(mbr_type)) + set_extended_member_decoration(type.self, i, SPIRVCrossDecorationPhysicalTypePacked); + } +} + +void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type) +{ + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < mbr_cnt; i++) + { + // Handle possible recursion when a struct contains a pointer to its own type nested somewhere. + auto &mbr_type = get(type.member_types[i]); + if (mbr_type.basetype == SPIRType::Struct && !(mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer)) + { + auto *struct_type = &mbr_type; + while (!struct_type->array.empty()) + struct_type = &get(struct_type->parent_type); + + if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPhysicalTypePacked)) + continue; + + uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, i); + uint32_t msl_size = get_declared_struct_member_size_msl(type, i); + uint32_t spirv_offset = type_struct_member_offset(type, i); + uint32_t spirv_offset_next; + if (i + 1 < mbr_cnt) + spirv_offset_next = type_struct_member_offset(type, i + 1); + else + spirv_offset_next = spirv_offset + msl_size; + + // Both are complicated cases. In scalar layout, a struct of float3 might just consume 12 bytes, + // and the next member will be placed at offset 12. + bool struct_is_misaligned = (spirv_offset % msl_alignment) != 0; + bool struct_is_too_large = spirv_offset + msl_size > spirv_offset_next; + uint32_t array_stride = 0; + bool struct_needs_explicit_padding = false; + + // Verify that if a struct is used as an array that ArrayStride matches the effective size of the struct. + if (!mbr_type.array.empty()) + { + array_stride = type_struct_member_array_stride(type, i); + uint32_t dimensions = uint32_t(mbr_type.array.size() - 1); + for (uint32_t dim = 0; dim < dimensions; dim++) + { + uint32_t array_size = to_array_size_literal(mbr_type, dim); + array_stride /= max(array_size, 1u); + } + + // Set expected struct size based on ArrayStride. + struct_needs_explicit_padding = true; + + // If struct size is larger than array stride, we might be able to fit, if we tightly pack. + if (get_declared_struct_size_msl(*struct_type) > array_stride) + struct_is_too_large = true; + } + + if (struct_is_misaligned || struct_is_too_large) + mark_struct_members_packed(*struct_type); + mark_scalar_layout_structs(*struct_type); + + if (struct_needs_explicit_padding) + { + msl_size = get_declared_struct_size_msl(*struct_type, true, true); + if (array_stride < msl_size) + { + SPIRV_CROSS_THROW("Cannot express an array stride smaller than size of struct type."); + } + else + { + if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget)) + { + if (array_stride != + get_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget)) + SPIRV_CROSS_THROW( + "A struct is used with different array strides. Cannot express this in MSL."); + } + else + set_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget, array_stride); + } + } + } + } +} + // Sort the members of the struct type by offset, and pack and then pad members where needed // to align MSL members with SPIR-V offsets. The struct members are iterated twice. Packing // occurs first, followed by padding, because packing a member reduces both its size and its // natural alignment, possibly requiring a padding member to be added ahead of it. -void CompilerMSL::align_struct(SPIRType &ib_type) +void CompilerMSL::align_struct(SPIRType &ib_type, unordered_set &aligned_structs) { - uint32_t &ib_type_id = ib_type.self; + // We align structs recursively, so stop any redundant work. + ID &ib_type_id = ib_type.self; + if (aligned_structs.count(ib_type_id)) + return; + aligned_structs.insert(ib_type_id); // Sort the members of the interface structure by their offset. // They should already be sorted per SPIR-V spec anyway. MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Offset); member_sorter.sort(); - uint32_t mbr_cnt = uint32_t(ib_type.member_types.size()); + auto mbr_cnt = uint32_t(ib_type.member_types.size()); + + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + // Pack any dependent struct types before we pack a parent struct. + auto &mbr_type = get(ib_type.member_types[mbr_idx]); + if (mbr_type.basetype == SPIRType::Struct) + align_struct(mbr_type, aligned_structs); + } // Test the alignment of each member, and if a member should be closer to the previous // member than the default spacing expects, it is likely that the previous member is in // a packed format. If so, and the previous member is packable, pack it. - // For example...this applies to any 3-element vector that is followed by a scalar. - uint32_t curr_offset = 0; + // For example ... this applies to any 3-element vector that is followed by a scalar. + uint32_t msl_offset = 0; for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) { - if (is_member_packable(ib_type, mbr_idx)) - { - set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPacked); - set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPackedType, - get_member_packed_type(ib_type, mbr_idx)); - } + // This checks the member in isolation, if the member needs some kind of type remapping to conform to SPIR-V + // offsets, array strides and matrix strides. + ensure_member_packing_rules_msl(ib_type, mbr_idx); - // Align current offset to the current member's default alignment. - size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1; - uint32_t aligned_curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask); + // Align current offset to the current member's default alignment. If the member was packed, it will observe + // the updated alignment here. + uint32_t msl_align_mask = get_declared_struct_member_alignment_msl(ib_type, mbr_idx) - 1; + uint32_t aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask; // Fetch the member offset as declared in the SPIRV. - uint32_t mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset); - if (mbr_offset > aligned_curr_offset) + uint32_t spirv_mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset); + if (spirv_mbr_offset > aligned_msl_offset) { // Since MSL and SPIR-V have slightly different struct member alignment and - // size rules, we'll pad to standard C-packing rules. If the member is farther + // size rules, we'll pad to standard C-packing rules with a char[] array. If the member is farther // away than C-packing, expects, add an inert padding member before the the member. - MSLStructMemberKey key = get_struct_member_key(ib_type_id, mbr_idx); - struct_member_padding[key] = mbr_offset - curr_offset; + uint32_t padding_bytes = spirv_mbr_offset - aligned_msl_offset; + set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPaddingTarget, padding_bytes); + + // Re-align as a sanity check that aligning post-padding matches up. + msl_offset += padding_bytes; + aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask; + } + else if (spirv_mbr_offset < aligned_msl_offset) + { + // This should not happen, but deal with unexpected scenarios. + // It *might* happen if a sub-struct has a larger alignment requirement in MSL than SPIR-V. + SPIRV_CROSS_THROW("Cannot represent buffer block correctly in MSL."); } + assert(aligned_msl_offset == spirv_mbr_offset); + // Increment the current offset to be positioned immediately after the current member. // Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here. if (mbr_idx + 1 < mbr_cnt) - curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size_msl(ib_type, mbr_idx)); + msl_offset = aligned_msl_offset + get_declared_struct_member_size_msl(ib_type, mbr_idx); } } -// Returns whether the specified struct member supports a packable type -// variation that is smaller than the unpacked variation of that type. -bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index, uint32_t base_offset) +bool CompilerMSL::validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const { - // We've already marked it as packable - if (has_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPacked)) - return true; - - auto &mbr_type = get(ib_type.member_types[index]); - - uint32_t component_size = mbr_type.width / 8; - uint32_t unpacked_mbr_size; - if (mbr_type.vecsize == 3) - unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns; - else - unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns; + auto &mbr_type = get(type.member_types[index]); + uint32_t spirv_offset = get_member_decoration(type.self, index, DecorationOffset); - // Special case for packing. Check for float[] or vec2[] in std140 layout. Here we actually need to pad out instead, - // but we will use the same mechanism. - if (is_array(mbr_type) && (is_scalar(mbr_type) || is_vector(mbr_type)) && mbr_type.vecsize <= 2 && - type_struct_member_array_stride(ib_type, index) == 4 * component_size) + if (index + 1 < type.member_types.size()) { - return true; + // First, we will check offsets. If SPIR-V offset + MSL size > SPIR-V offset of next member, + // we *must* perform some kind of remapping, no way getting around it. + // We can always pad after this member if necessary, so that case is fine. + uint32_t spirv_offset_next = get_member_decoration(type.self, index + 1, DecorationOffset); + assert(spirv_offset_next >= spirv_offset); + uint32_t maximum_size = spirv_offset_next - spirv_offset; + uint32_t msl_mbr_size = get_declared_struct_member_size_msl(type, index); + if (msl_mbr_size > maximum_size) + return false; } - uint32_t mbr_offset_curr = base_offset + get_member_decoration(ib_type.self, index, DecorationOffset); - if (mbr_type.basetype == SPIRType::Struct) + if (!mbr_type.array.empty()) { - // If this is a struct type, check if any of its members need packing. - for (uint32_t i = 0; i < mbr_type.member_types.size(); i++) - { - if (is_member_packable(mbr_type, i, mbr_offset_curr)) - { - set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked); - set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType, - get_member_packed_type(mbr_type, i)); - } - } - size_t declared_struct_size = get_declared_struct_size(mbr_type); - size_t alignment = get_declared_struct_member_alignment(ib_type, index); - declared_struct_size = (declared_struct_size + alignment - 1) & ~(alignment - 1); - // Check for array of struct, where the SPIR-V declares an array stride which is larger than the struct itself. - // This can happen for struct A { float a }; A a[]; in std140 layout. - // TODO: Emit a padded struct which can be used for this purpose. - if (is_array(mbr_type)) - { - size_t array_stride = type_struct_member_array_stride(ib_type, index); - if (array_stride > declared_struct_size) - return true; - if (array_stride < declared_struct_size) - { - // If the stride is *less* (i.e. more tightly packed), then - // we need to pack the members of the struct itself. - for (uint32_t i = 0; i < mbr_type.member_types.size(); i++) - { - if (is_member_packable(mbr_type, i, mbr_offset_curr + array_stride)) - { - set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked); - set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType, - get_member_packed_type(mbr_type, i)); - } - } - } - } - else - { - // Pack if there is not enough space between this member and next. - if (index < ib_type.member_types.size() - 1) - { - uint32_t mbr_offset_next = - base_offset + get_member_decoration(ib_type.self, index + 1, DecorationOffset); - if (declared_struct_size > mbr_offset_next - mbr_offset_curr) - { - for (uint32_t i = 0; i < mbr_type.member_types.size(); i++) - { - if (is_member_packable(mbr_type, i, mbr_offset_next)) - { - set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked); - set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType, - get_member_packed_type(mbr_type, i)); - } - } - } - } - } - } - - // TODO: Another sanity check for matrices. We currently do not support std140 matrices which need to be padded out per column. - //if (is_matrix(mbr_type) && mbr_type.vecsize <= 2 && type_struct_member_matrix_stride(ib_type, index) == 16) - // SPIRV_CROSS_THROW("Currently cannot support matrices with small vector size in std140 layout."); - - // Pack if the member's offset doesn't conform to the type's usual - // alignment. For example, a float3 at offset 4. - if (mbr_offset_curr % get_declared_struct_member_alignment(ib_type, index)) - return true; - - // Only vectors or 3-row matrices need to be packed. - if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3)) - return false; + // If we have an array type, array stride must match exactly with SPIR-V. - if (is_array(mbr_type)) - { - // If member is an array, and the array stride is larger than the type needs, don't pack it. - // Take into consideration multi-dimentional arrays. - uint32_t md_elem_cnt = 1; - size_t last_elem_idx = mbr_type.array.size() - 1; - for (uint32_t i = 0; i < last_elem_idx; i++) - md_elem_cnt *= max(to_array_size_literal(mbr_type, i), 1u); + // An exception to this requirement is if we have one array element. + // This comes from DX scalar layout workaround. + // If app tries to be cheeky and access the member out of bounds, this will not work, but this is the best we can do. + // In OpAccessChain with logical memory models, access chains must be in-bounds in SPIR-V specification. + bool relax_array_stride = mbr_type.array.back() == 1 && mbr_type.array_size_literal.back(); - uint32_t unpacked_array_stride = unpacked_mbr_size * md_elem_cnt; - uint32_t array_stride = type_struct_member_array_stride(ib_type, index); - return unpacked_array_stride > array_stride; - } - else - { - // Pack if there is not enough space between this member and next. - // If last member, only pack if it's a row-major matrix. - if (index < ib_type.member_types.size() - 1) + if (!relax_array_stride) { - uint32_t mbr_offset_next = base_offset + get_member_decoration(ib_type.self, index + 1, DecorationOffset); - return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr; + uint32_t spirv_array_stride = type_struct_member_array_stride(type, index); + uint32_t msl_array_stride = get_declared_struct_member_array_stride_msl(type, index); + if (spirv_array_stride != msl_array_stride) + return false; } - else - return is_matrix(mbr_type); } -} -uint32_t CompilerMSL::get_member_packed_type(SPIRType &type, uint32_t index) -{ - auto &mbr_type = get(type.member_types[index]); - if (is_matrix(mbr_type) && has_member_decoration(type.self, index, DecorationRowMajor)) + if (is_matrix(mbr_type)) { - // Packed row-major matrices are stored transposed. But, we don't know if - // we're dealing with a row-major matrix at the time we need to load it. - // So, we'll set a packed type with the columns and rows transposed, so we'll - // know to use the correct constructor. - uint32_t new_type_id = ir.increase_bound_by(1); - auto &transpose_type = set(new_type_id); - transpose_type = mbr_type; - transpose_type.vecsize = mbr_type.columns; - transpose_type.columns = mbr_type.vecsize; - return new_type_id; + // Need to check MatrixStride as well. + uint32_t spirv_matrix_stride = type_struct_member_matrix_stride(type, index); + uint32_t msl_matrix_stride = get_declared_struct_member_matrix_stride_msl(type, index); + if (spirv_matrix_stride != msl_matrix_stride) + return false; } - return type.member_types[index]; -} -// Returns a combination of type ID and member index for use as hash key -MSLStructMemberKey CompilerMSL::get_struct_member_key(uint32_t type_id, uint32_t index) -{ - MSLStructMemberKey k = type_id; - k <<= 32; - k += index; - return k; + // Now, we check alignment. + uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, index); + if ((spirv_offset % msl_alignment) != 0) + return false; + + // We're in the clear. + return true; +} + +// Here we need to verify that the member type we declare conforms to Offset, ArrayStride or MatrixStride restrictions. +// If there is a mismatch, we need to emit remapped types, either normal types, or "packed_X" types. +// In odd cases we need to emit packed and remapped types, for e.g. weird matrices or arrays with weird array strides. +void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index) +{ + if (validate_member_packing_rules_msl(ib_type, index)) + return; + + // We failed validation. + // This case will be nightmare-ish to deal with. This could possibly happen if struct alignment does not quite + // match up with what we want. Scalar block layout comes to mind here where we might have to work around the rule + // that struct alignment == max alignment of all members and struct size depends on this alignment. + // Can't repack structs, but can repack pointers to structs. + auto &mbr_type = get(ib_type.member_types[index]); + bool is_buff_ptr = mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer; + if (mbr_type.basetype == SPIRType::Struct && !is_buff_ptr) + SPIRV_CROSS_THROW("Cannot perform any repacking for structs when it is used as a member of another struct."); + + // Perform remapping here. + // There is nothing to be gained by using packed scalars, so don't attempt it. + if (!is_scalar(ib_type)) + set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked); + + // Try validating again, now with packed. + if (validate_member_packing_rules_msl(ib_type, index)) + return; + + // We're in deep trouble, and we need to create a new PhysicalType which matches up with what we expect. + // A lot of work goes here ... + // We will need remapping on Load and Store to translate the types between Logical and Physical. + + // First, we check if we have small vector std140 array. + // We detect this if we have an array of vectors, and array stride is greater than number of elements. + if (!mbr_type.array.empty() && !is_matrix(mbr_type)) + { + uint32_t array_stride = type_struct_member_array_stride(ib_type, index); + + // Hack off array-of-arrays until we find the array stride per element we must have to make it work. + uint32_t dimensions = uint32_t(mbr_type.array.size() - 1); + for (uint32_t dim = 0; dim < dimensions; dim++) + array_stride /= max(to_array_size_literal(mbr_type, dim), 1u); + + // Pointers are 8 bytes + uint32_t mbr_width_in_bytes = is_buff_ptr ? 8 : (mbr_type.width / 8); + uint32_t elems_per_stride = array_stride / mbr_width_in_bytes; + + if (elems_per_stride == 3) + SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios."); + else if (elems_per_stride > 4) + SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL."); + + auto physical_type = mbr_type; + physical_type.vecsize = elems_per_stride; + physical_type.parent_type = 0; + + // If this is a physical buffer pointer, replace type with a ulongn vector. + if (is_buff_ptr) + { + physical_type.width = 64; + physical_type.basetype = to_unsigned_basetype(physical_type.width); + physical_type.pointer = false; + physical_type.pointer_depth = false; + physical_type.forward_pointer = false; + } + + uint32_t type_id = ir.increase_bound_by(1); + set(type_id, physical_type); + set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id); + set_decoration(type_id, DecorationArrayStride, array_stride); + + // Remove packed_ for vectors of size 1, 2 and 4. + unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked); + } + else if (is_matrix(mbr_type)) + { + // MatrixStride might be std140-esque. + uint32_t matrix_stride = type_struct_member_matrix_stride(ib_type, index); + + uint32_t elems_per_stride = matrix_stride / (mbr_type.width / 8); + + if (elems_per_stride == 3) + SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios."); + else if (elems_per_stride > 4) + SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL."); + + bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor); + + auto physical_type = mbr_type; + physical_type.parent_type = 0; + if (row_major) + physical_type.columns = elems_per_stride; + else + physical_type.vecsize = elems_per_stride; + uint32_t type_id = ir.increase_bound_by(1); + set(type_id, physical_type); + set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id); + + // Remove packed_ for vectors of size 1, 2 and 4. + unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked); + } + else + SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL."); + + // Try validating again, now with physical type remapping. + if (validate_member_packing_rules_msl(ib_type, index)) + return; + + // We might have a particular odd scalar layout case where the last element of an array + // does not take up as much space as the ArrayStride or MatrixStride. This can happen with DX cbuffers. + // The "proper" workaround for this is extremely painful and essentially impossible in the edge case of float3[], + // so we hack around it by declaring the offending array or matrix with one less array size/col/row, + // and rely on padding to get the correct value. We will technically access arrays out of bounds into the padding region, + // but it should spill over gracefully without too much trouble. We rely on behavior like this for unsized arrays anyways. + + // E.g. we might observe a physical layout of: + // { float2 a[2]; float b; } in cbuffer layout where ArrayStride of a is 16, but offset of b is 24, packed right after a[1] ... + uint32_t type_id = get_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID); + auto &type = get(type_id); + + // Modify the physical type in-place. This is safe since each physical type workaround is a copy. + if (is_array(type)) + { + if (type.array.back() > 1) + { + if (!type.array_size_literal.back()) + SPIRV_CROSS_THROW("Cannot apply scalar layout workaround with spec constant array size."); + type.array.back() -= 1; + } + else + { + // We have an array of size 1, so we cannot decrement that. Our only option now is to + // force a packed layout instead, and drop the physical type remap since ArrayStride is meaningless now. + unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID); + set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked); + } + } + else if (is_matrix(type)) + { + bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor); + if (!row_major) + { + // Slice off one column. If we only have 2 columns, this might turn the matrix into a vector with one array element instead. + if (type.columns > 2) + { + type.columns--; + } + else if (type.columns == 2) + { + type.columns = 1; + assert(type.array.empty()); + type.array.push_back(1); + type.array_size_literal.push_back(true); + } + } + else + { + // Slice off one row. If we only have 2 rows, this might turn the matrix into a vector with one array element instead. + if (type.vecsize > 2) + { + type.vecsize--; + } + else if (type.vecsize == 2) + { + type.vecsize = type.columns; + type.columns = 1; + assert(type.array.empty()); + type.array.push_back(1); + type.array_size_literal.push_back(true); + } + } + } + + // This better validate now, or we must fail gracefully. + if (!validate_member_packing_rules_msl(ib_type, index)) + SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL."); } void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) { - if (!has_extended_decoration(lhs_expression, SPIRVCrossDecorationPacked) || - get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType) == 0) + auto &type = expression_type(rhs_expression); + + bool lhs_remapped_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID); + bool lhs_packed_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypePacked); + auto *lhs_e = maybe_get(lhs_expression); + auto *rhs_e = maybe_get(rhs_expression); + + bool transpose = lhs_e && lhs_e->need_transpose; + + // No physical type remapping, and no packed type, so can just emit a store directly. + if (!lhs_remapped_type && !lhs_packed_type) + { + // We might not be dealing with remapped physical types or packed types, + // but we might be doing a clean store to a row-major matrix. + // In this case, we just flip transpose states, and emit the store, a transpose must be in the RHS expression, if any. + if (is_matrix(type) && lhs_e && lhs_e->need_transpose) + { + lhs_e->need_transpose = false; + + if (rhs_e && rhs_e->need_transpose) + { + // Direct copy, but might need to unpack RHS. + // Skip the transpose, as we will transpose when writing to LHS and transpose(transpose(T)) == T. + rhs_e->need_transpose = false; + statement(to_expression(lhs_expression), " = ", to_unpacked_row_major_matrix_expression(rhs_expression), + ";"); + rhs_e->need_transpose = true; + } + else + statement(to_expression(lhs_expression), " = transpose(", to_unpacked_expression(rhs_expression), ");"); + + lhs_e->need_transpose = true; + register_write(lhs_expression); + } + else if (lhs_e && lhs_e->need_transpose) + { + lhs_e->need_transpose = false; + + // Storing a column to a row-major matrix. Unroll the write. + for (uint32_t c = 0; c < type.vecsize; c++) + { + auto lhs_expr = to_dereferenced_expression(lhs_expression); + auto column_index = lhs_expr.find_last_of('['); + if (column_index != string::npos) + { + statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ", + to_extract_component_expression(rhs_expression, c), ";"); + } + } + lhs_e->need_transpose = true; + register_write(lhs_expression); + } + else + CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression); + } + else if (!lhs_remapped_type && !is_matrix(type) && !transpose) { + // Even if the target type is packed, we can directly store to it. We cannot store to packed matrices directly, + // since they are declared as array of vectors instead, and we need the fallback path below. CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression); } else { - // Special handling when storing to a float[] or float2[] in std140 layout. + // Special handling when storing to a remapped physical type. + // This is mostly to deal with std140 padded matrices or vectors. + + TypeID physical_type_id = lhs_remapped_type ? + ID(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID)) : + type.self; - uint32_t type_id = get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType); - auto &type = get(type_id); - string lhs = to_dereferenced_expression(lhs_expression); - string rhs = to_pointer_expression(rhs_expression); - uint32_t stride = get_decoration(type_id, DecorationArrayStride); + auto &physical_type = get(physical_type_id); + + string cast_addr_space = "thread"; + auto *p_var_lhs = maybe_get_backing_variable(lhs_expression); + if (p_var_lhs) + cast_addr_space = get_type_address_space(get(p_var_lhs->basetype), lhs_expression); if (is_matrix(type)) { + const char *packed_pfx = lhs_packed_type ? "packed_" : ""; + // Packed matrices are stored as arrays of packed vectors, so we need // to assign the vectors one at a time. // For row-major matrices, we need to transpose the *right-hand* side, - // not the left-hand side. Otherwise, the changes will be lost. - auto *lhs_e = maybe_get(lhs_expression); - auto *rhs_e = maybe_get(rhs_expression); - bool transpose = lhs_e && lhs_e->need_transpose; + // not the left-hand side. + + // Lots of cases to cover here ... + + bool rhs_transpose = rhs_e && rhs_e->need_transpose; + SPIRType write_type = type; + string cast_expr; + + // We're dealing with transpose manually. + if (rhs_transpose) + rhs_e->need_transpose = false; + if (transpose) { + // We're dealing with transpose manually. lhs_e->need_transpose = false; - if (rhs_e) rhs_e->need_transpose = !rhs_e->need_transpose; - lhs = to_dereferenced_expression(lhs_expression); - rhs = to_pointer_expression(rhs_expression); + write_type.vecsize = type.columns; + write_type.columns = 1; + + if (physical_type.columns != type.columns) + cast_expr = join("(", cast_addr_space, " ", packed_pfx, type_to_glsl(write_type), "&)"); + + if (rhs_transpose) + { + // If RHS is also transposed, we can just copy row by row. + for (uint32_t i = 0; i < type.vecsize; i++) + { + statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", + to_unpacked_row_major_matrix_expression(rhs_expression), "[", i, "];"); + } + } + else + { + auto vector_type = expression_type(rhs_expression); + vector_type.vecsize = vector_type.columns; + vector_type.columns = 1; + + // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad, + // so pick out individual components instead. + for (uint32_t i = 0; i < type.vecsize; i++) + { + string rhs_row = type_to_glsl_constructor(vector_type) + "("; + for (uint32_t j = 0; j < vector_type.vecsize; j++) + { + rhs_row += join(to_enclosed_unpacked_expression(rhs_expression), "[", j, "][", i, "]"); + if (j + 1 < vector_type.vecsize) + rhs_row += ", "; + } + rhs_row += ")"; + + statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", rhs_row, ";"); + } + } + + // We're dealing with transpose manually. + lhs_e->need_transpose = true; } - for (uint32_t i = 0; i < type.columns; i++) - statement(enclose_expression(lhs), "[", i, "] = ", enclose_expression(rhs), "[", i, "];"); - if (transpose) + else { - lhs_e->need_transpose = true; - if (rhs_e) rhs_e->need_transpose = !rhs_e->need_transpose; + write_type.columns = 1; + + if (physical_type.vecsize != type.vecsize) + cast_expr = join("(", cast_addr_space, " ", packed_pfx, type_to_glsl(write_type), "&)"); + + if (rhs_transpose) + { + auto vector_type = expression_type(rhs_expression); + vector_type.columns = 1; + + // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad, + // so pick out individual components instead. + for (uint32_t i = 0; i < type.columns; i++) + { + string rhs_row = type_to_glsl_constructor(vector_type) + "("; + for (uint32_t j = 0; j < vector_type.vecsize; j++) + { + // Need to explicitly unpack expression since we've mucked with transpose state. + auto unpacked_expr = to_unpacked_row_major_matrix_expression(rhs_expression); + rhs_row += join(unpacked_expr, "[", j, "][", i, "]"); + if (j + 1 < vector_type.vecsize) + rhs_row += ", "; + } + rhs_row += ")"; + + statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", rhs_row, ";"); + } + } + else + { + // Copy column-by-column. + for (uint32_t i = 0; i < type.columns; i++) + { + statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", + to_enclosed_unpacked_expression(rhs_expression), "[", i, "];"); + } + } + } + + // We're dealing with transpose manually. + if (rhs_transpose) + rhs_e->need_transpose = true; + } + else if (transpose) + { + lhs_e->need_transpose = false; + + SPIRType write_type = type; + write_type.vecsize = 1; + write_type.columns = 1; + + // Storing a column to a row-major matrix. Unroll the write. + for (uint32_t c = 0; c < type.vecsize; c++) + { + auto lhs_expr = to_enclosed_expression(lhs_expression); + auto column_index = lhs_expr.find_last_of('['); + if (column_index != string::npos) + { + statement("((", cast_addr_space, " ", type_to_glsl(write_type), "*)&", + lhs_expr.insert(column_index, join('[', c, ']', ")")), " = ", + to_extract_component_expression(rhs_expression, c), ";"); + } } + + lhs_e->need_transpose = true; } - else if (is_array(type) && stride == 4 * type.width / 8) + else if ((is_matrix(physical_type) || is_array(physical_type)) && physical_type.vecsize > type.vecsize) { + assert(type.vecsize >= 1 && type.vecsize <= 3); + + // If we have packed types, we cannot use swizzled stores. + // We could technically unroll the store for each element if needed. + // When remapping to a std140 physical type, we always get float4, + // and the packed decoration should always be removed. + assert(!lhs_packed_type); + + string lhs = to_dereferenced_expression(lhs_expression); + string rhs = to_pointer_expression(rhs_expression); + // Unpack the expression so we can store to it with a float or float2. // It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead. - if (is_scalar(type)) - lhs = enclose_expression(lhs) + ".x"; - else if (is_vector(type) && type.vecsize == 2) - lhs = enclose_expression(lhs) + ".xy"; + lhs = join("(", cast_addr_space, " ", type_to_glsl(type), "&)", enclose_expression(lhs)); + if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); } - - if (!is_matrix(type)) + else if (!is_matrix(type)) { + string lhs = to_dereferenced_expression(lhs_expression); + string rhs = to_pointer_expression(rhs_expression); if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) statement(lhs, " = ", rhs, ";"); } + register_write(lhs_expression); } } +static bool expression_ends_with(const string &expr_str, const std::string &ending) +{ + if (expr_str.length() >= ending.length()) + return (expr_str.compare(expr_str.length() - ending.length(), ending.length(), ending) == 0); + else + return false; +} + // Converts the format of the current expression from packed to unpacked, // by wrapping the expression in a constructor of the appropriate type. -string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t packed_type_id) +// Also, handle special physical ID remapping scenarios, similar to emit_store_statement(). +string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t physical_type_id, + bool packed, bool row_major) { - const SPIRType *packed_type = nullptr; - uint32_t stride = 0; - if (packed_type_id) + // Trivial case, nothing to do. + if (physical_type_id == 0 && !packed) + return expr_str; + + const SPIRType *physical_type = nullptr; + if (physical_type_id) + physical_type = &get(physical_type_id); + + static const char *swizzle_lut[] = { + ".x", + ".xy", + ".xyz", + }; + + if (physical_type && is_vector(*physical_type) && is_array(*physical_type) && + physical_type->vecsize > type.vecsize && !expression_ends_with(expr_str, swizzle_lut[type.vecsize - 1])) { - packed_type = &get(packed_type_id); - stride = get_decoration(packed_type_id, DecorationArrayStride); + // std140 array cases for vectors. + assert(type.vecsize >= 1 && type.vecsize <= 3); + return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1]; + } + else if (physical_type && is_matrix(*physical_type) && is_vector(type) && physical_type->vecsize > type.vecsize) + { + // Extract column from padded matrix. + assert(type.vecsize >= 1 && type.vecsize <= 3); + return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1]; } - - // float[] and float2[] cases are really just padding, so directly swizzle from the backing float4 instead. - if (packed_type && is_array(*packed_type) && is_scalar(*packed_type) && stride == 4 * packed_type->width / 8) - return enclose_expression(expr_str) + ".x"; - else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2 && - stride == 4 * packed_type->width / 8) - return enclose_expression(expr_str) + ".xy"; else if (is_matrix(type)) { // Packed matrices are stored as arrays of packed vectors. Unfortunately, // we can't just pass the array straight to the matrix constructor. We have to // pass each vector individually, so that they can be unpacked to normal vectors. - if (!packed_type) - packed_type = &type; - const char *base_type = packed_type->width == 16 ? "half" : "float"; - string unpack_expr = join(type_to_glsl(*packed_type), "("); - for (uint32_t i = 0; i < packed_type->columns; i++) + if (!physical_type) + physical_type = &type; + + uint32_t vecsize = type.vecsize; + uint32_t columns = type.columns; + if (row_major) + swap(vecsize, columns); + + uint32_t physical_vecsize = row_major ? physical_type->columns : physical_type->vecsize; + + const char *base_type = type.width == 16 ? "half" : "float"; + string unpack_expr = join(base_type, columns, "x", vecsize, "("); + + const char *load_swiz = ""; + + if (physical_vecsize != vecsize) + load_swiz = swizzle_lut[vecsize - 1]; + + for (uint32_t i = 0; i < columns; i++) { if (i > 0) unpack_expr += ", "; - unpack_expr += join(base_type, packed_type->vecsize, "(", expr_str, "[", i, "])"); + + if (packed) + unpack_expr += join(base_type, physical_vecsize, "(", expr_str, "[", i, "]", ")", load_swiz); + else + unpack_expr += join(expr_str, "[", i, "]", load_swiz); } + unpack_expr += ")"; return unpack_expr; } else + { return join(type_to_glsl(type), "(", expr_str, ")"); + } } // Emits the file header info @@ -2706,6 +5191,11 @@ void CompilerMSL::emit_header() // This particular line can be overridden during compilation, so make it a flag and not a pragma line. if (suppress_missing_prototypes) statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\""); + + // Disable warning about missing braces for array template to make arrays a value type + if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0) + statement("#pragma clang diagnostic ignored \"-Wmissing-braces\""); + for (auto &pragma : pragma_lines) statement(pragma); @@ -2743,21 +5233,230 @@ void CompilerMSL::add_typedef_line(const string &line) force_recompile(); } +// Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared +void CompilerMSL::emit_custom_templates() +{ + static const char * const address_spaces[] = { + "thread", "constant", "device", "threadgroup", "threadgroup_imageblock", "ray_data", "object_data" + }; + + for (const auto &spv_func : spv_function_implementations) + { + switch (spv_func) + { + case SPVFuncImplUnsafeArray: + statement("template"); + statement("struct spvUnsafeArray"); + begin_scope(); + statement("T elements[Num ? Num : 1];"); + statement(""); + statement("thread T& operator [] (size_t pos) thread"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement("constexpr const thread T& operator [] (size_t pos) const thread"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement(""); + statement("device T& operator [] (size_t pos) device"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement("constexpr const device T& operator [] (size_t pos) const device"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement(""); + statement("constexpr const constant T& operator [] (size_t pos) const constant"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement(""); + statement("threadgroup T& operator [] (size_t pos) threadgroup"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement("constexpr const threadgroup T& operator [] (size_t pos) const threadgroup"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + end_scope_decl(); + statement(""); + break; + + case SPVFuncImplStorageMatrix: + statement("template"); + statement("struct spvStorageMatrix"); + begin_scope(); + statement("vec columns[Cols];"); + statement(""); + for (size_t method_idx = 0; method_idx < sizeof(address_spaces) / sizeof(address_spaces[0]); ++method_idx) + { + // Some address spaces require particular features. + if (method_idx == 4) // threadgroup_imageblock + statement("#ifdef __HAVE_IMAGEBLOCKS__"); + else if (method_idx == 5) // ray_data + statement("#ifdef __HAVE_RAYTRACING__"); + else if (method_idx == 6) // object_data + statement("#ifdef __HAVE_MESH__"); + const string &method_as = address_spaces[method_idx]; + statement("spvStorageMatrix() ", method_as, " = default;"); + if (method_idx != 1) // constant + { + statement(method_as, " spvStorageMatrix& operator=(initializer_list> cols) ", + method_as); + begin_scope(); + statement("size_t i;"); + statement("thread vec* col;"); + statement("for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)"); + statement(" columns[i] = *col;"); + statement("return *this;"); + end_scope(); + } + statement(""); + for (size_t param_idx = 0; param_idx < sizeof(address_spaces) / sizeof(address_spaces[0]); ++param_idx) + { + if (param_idx != method_idx) + { + if (param_idx == 4) // threadgroup_imageblock + statement("#ifdef __HAVE_IMAGEBLOCKS__"); + else if (param_idx == 5) // ray_data + statement("#ifdef __HAVE_RAYTRACING__"); + else if (param_idx == 6) // object_data + statement("#ifdef __HAVE_MESH__"); + } + const string ¶m_as = address_spaces[param_idx]; + statement("spvStorageMatrix(const ", param_as, " matrix& m) ", method_as); + begin_scope(); + statement("for (size_t i = 0; i < Cols; ++i)"); + statement(" columns[i] = m.columns[i];"); + end_scope(); + statement("spvStorageMatrix(const ", param_as, " spvStorageMatrix& m) ", method_as, " = default;"); + if (method_idx != 1) // constant + { + statement(method_as, " spvStorageMatrix& operator=(const ", param_as, + " matrix& m) ", method_as); + begin_scope(); + statement("for (size_t i = 0; i < Cols; ++i)"); + statement(" columns[i] = m.columns[i];"); + statement("return *this;"); + end_scope(); + statement(method_as, " spvStorageMatrix& operator=(const ", param_as, " spvStorageMatrix& m) ", + method_as, " = default;"); + } + if (param_idx != method_idx && param_idx >= 4) + statement("#endif"); + statement(""); + } + statement("operator matrix() const ", method_as); + begin_scope(); + statement("matrix m;"); + statement("for (int i = 0; i < Cols; ++i)"); + statement(" m.columns[i] = columns[i];"); + statement("return m;"); + end_scope(); + statement(""); + statement("vec operator[](size_t idx) const ", method_as); + begin_scope(); + statement("return columns[idx];"); + end_scope(); + if (method_idx != 1) // constant + { + statement(method_as, " vec& operator[](size_t idx) ", method_as); + begin_scope(); + statement("return columns[idx];"); + end_scope(); + } + if (method_idx >= 4) + statement("#endif"); + statement(""); + } + end_scope_decl(); + statement(""); + statement("template"); + statement("matrix transpose(spvStorageMatrix m)"); + begin_scope(); + statement("return transpose(matrix(m));"); + end_scope(); + statement(""); + statement("typedef spvStorageMatrix spvStorage_half2x2;"); + statement("typedef spvStorageMatrix spvStorage_half2x3;"); + statement("typedef spvStorageMatrix spvStorage_half2x4;"); + statement("typedef spvStorageMatrix spvStorage_half3x2;"); + statement("typedef spvStorageMatrix spvStorage_half3x3;"); + statement("typedef spvStorageMatrix spvStorage_half3x4;"); + statement("typedef spvStorageMatrix spvStorage_half4x2;"); + statement("typedef spvStorageMatrix spvStorage_half4x3;"); + statement("typedef spvStorageMatrix spvStorage_half4x4;"); + statement("typedef spvStorageMatrix spvStorage_float2x2;"); + statement("typedef spvStorageMatrix spvStorage_float2x3;"); + statement("typedef spvStorageMatrix spvStorage_float2x4;"); + statement("typedef spvStorageMatrix spvStorage_float3x2;"); + statement("typedef spvStorageMatrix spvStorage_float3x3;"); + statement("typedef spvStorageMatrix spvStorage_float3x4;"); + statement("typedef spvStorageMatrix spvStorage_float4x2;"); + statement("typedef spvStorageMatrix spvStorage_float4x3;"); + statement("typedef spvStorageMatrix spvStorage_float4x4;"); + statement(""); + break; + + default: + break; + } + } +} + // Emits any needed custom function bodies. +// Metal helper functions must be static force-inline, i.e. static inline __attribute__((always_inline)) +// otherwise they will cause problems when linked together in a single Metallib. void CompilerMSL::emit_custom_functions() { - for (uint32_t i = SPVFuncImplArrayCopyMultidimMax; i >= 2; i--) + for (uint32_t i = kArrayCopyMultidimMax; i >= 2; i--) if (spv_function_implementations.count(static_cast(SPVFuncImplArrayCopyMultidimBase + i))) spv_function_implementations.insert(static_cast(SPVFuncImplArrayCopyMultidimBase + i - 1)); - for (auto &spv_func : spv_function_implementations) + if (spv_function_implementations.count(SPVFuncImplDynamicImageSampler)) + { + // Unfortunately, this one needs a lot of the other functions to compile OK. + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW( + "spvDynamicImageSampler requires default-constructible texture objects, which require MSL 2.0."); + spv_function_implementations.insert(SPVFuncImplForwardArgs); + spv_function_implementations.insert(SPVFuncImplTextureSwizzle); + if (msl_options.swizzle_texture_samples) + spv_function_implementations.insert(SPVFuncImplGatherSwizzle); + for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane; + i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++) + spv_function_implementations.insert(static_cast(i)); + spv_function_implementations.insert(SPVFuncImplExpandITUFullRange); + spv_function_implementations.insert(SPVFuncImplExpandITUNarrowRange); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT709); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT601); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT2020); + } + + for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane; + i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++) + if (spv_function_implementations.count(static_cast(i))) + spv_function_implementations.insert(SPVFuncImplForwardArgs); + + if (spv_function_implementations.count(SPVFuncImplTextureSwizzle) || + spv_function_implementations.count(SPVFuncImplGatherSwizzle) || + spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle)) + { + spv_function_implementations.insert(SPVFuncImplForwardArgs); + spv_function_implementations.insert(SPVFuncImplGetSwizzle); + } + + for (const auto &spv_func : spv_function_implementations) { switch (spv_func) { case SPVFuncImplMod: statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()"); statement("template"); - statement("Tx mod(Tx x, Ty y)"); + statement("inline Tx mod(Tx x, Ty y)"); begin_scope(); statement("return x - y * floor(x / y);"); end_scope(); @@ -2767,7 +5466,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplRadians: statement("// Implementation of the GLSL radians() function"); statement("template"); - statement("T radians(T d)"); + statement("inline T radians(T d)"); begin_scope(); statement("return d * T(0.01745329251);"); end_scope(); @@ -2777,7 +5476,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplDegrees: statement("// Implementation of the GLSL degrees() function"); statement("template"); - statement("T degrees(T r)"); + statement("inline T degrees(T r)"); begin_scope(); statement("return r * T(57.2957795131);"); end_scope(); @@ -2787,7 +5486,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplFindILsb: statement("// Implementation of the GLSL findLSB() function"); statement("template"); - statement("T findLSB(T x)"); + statement("inline T spvFindLSB(T x)"); begin_scope(); statement("return select(ctz(x), T(-1), x == T(0));"); end_scope(); @@ -2797,7 +5496,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplFindUMsb: statement("// Implementation of the unsigned GLSL findMSB() function"); statement("template"); - statement("T findUMSB(T x)"); + statement("inline T spvFindUMSB(T x)"); begin_scope(); statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));"); end_scope(); @@ -2807,7 +5506,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplFindSMsb: statement("// Implementation of the signed GLSL findMSB() function"); statement("template"); - statement("T findSMSB(T x)"); + statement("inline T spvFindSMSB(T x)"); begin_scope(); statement("T v = select(x, T(-1) - x, x < T(0));"); statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));"); @@ -2818,7 +5517,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplSSign: statement("// Implementation of the GLSL sign() function for integer types"); statement("template::value>::type>"); - statement("T sign(T x)"); + statement("inline T sign(T x)"); begin_scope(); statement("return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));"); end_scope(); @@ -2826,42 +5525,34 @@ void CompilerMSL::emit_custom_functions() break; case SPVFuncImplArrayCopy: - statement("// Implementation of an array copy function to cover GLSL's ability to copy an array via " - "assignment."); - statement("template"); - statement("void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])"); - begin_scope(); - statement("for (uint i = 0; i < N; dst[i] = src[i], i++);"); - end_scope(); - statement(""); - - statement("template"); - statement("void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])"); - begin_scope(); - statement("for (uint i = 0; i < N; dst[i] = src[i], i++);"); - end_scope(); - statement(""); - break; - case SPVFuncImplArrayOfArrayCopy2Dim: case SPVFuncImplArrayOfArrayCopy3Dim: case SPVFuncImplArrayOfArrayCopy4Dim: case SPVFuncImplArrayOfArrayCopy5Dim: case SPVFuncImplArrayOfArrayCopy6Dim: { + // Unfortunately we cannot template on the address space, so combinatorial explosion it is. static const char *function_name_tags[] = { - "FromStack", - "FromConstant", + "FromConstantToStack", "FromConstantToThreadGroup", "FromStackToStack", + "FromStackToThreadGroup", "FromThreadGroupToStack", "FromThreadGroupToThreadGroup", + "FromDeviceToDevice", "FromConstantToDevice", "FromStackToDevice", + "FromThreadGroupToDevice", "FromDeviceToStack", "FromDeviceToThreadGroup", }; static const char *src_address_space[] = { - "thread const", - "constant", + "constant", "constant", "thread const", "thread const", + "threadgroup const", "threadgroup const", "device const", "constant", + "thread const", "threadgroup const", "device const", "device const", + }; + + static const char *dst_address_space[] = { + "thread", "threadgroup", "thread", "threadgroup", "thread", "threadgroup", + "device", "device", "device", "device", "thread", "threadgroup", }; - for (uint32_t variant = 0; variant < 2; variant++) + for (uint32_t variant = 0; variant < 12; variant++) { - uint32_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase; + uint8_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase; string tmp = "template 0) + { + string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width); + statement("// Returns 2D texture coords corresponding to 1D texel buffer coords"); + statement(force_inline); + statement("uint2 spvTexelBufferCoord(uint tc)"); + begin_scope(); + statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");")); + end_scope(); + statement(""); + } + else + { + statement("// Returns 2D texture coords corresponding to 1D texel buffer coords"); + statement( + "#define spvTexelBufferCoord(tc, tex) uint2((tc) % (tex).get_width(), (tc) / (tex).get_width())"); + statement(""); + } + break; + } + + // Emulate texture2D atomic operations + case SPVFuncImplImage2DAtomicCoords: + { + if (msl_options.supports_msl_version(1, 2)) + { + statement("// The required alignment of a linear texture of R32Uint format."); + statement("constant uint spvLinearTextureAlignmentOverride [[function_constant(", + msl_options.r32ui_alignment_constant_id, ")]];"); + statement("constant uint spvLinearTextureAlignment = ", + "is_function_constant_defined(spvLinearTextureAlignmentOverride) ? ", + "spvLinearTextureAlignmentOverride : ", msl_options.r32ui_linear_texture_alignment, ";"); + } + else + { + statement("// The required alignment of a linear texture of R32Uint format."); + statement("constant uint spvLinearTextureAlignment = ", msl_options.r32ui_linear_texture_alignment, + ";"); + } + statement("// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics"); + statement("#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + ", + " spvLinearTextureAlignment / 4 - 1) & ~(", + " spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)"); statement(""); break; } + // "fadd" intrinsic support + case SPVFuncImplFAdd: + statement("template"); + statement("[[clang::optnone]] T spvFAdd(T l, T r)"); + begin_scope(); + statement("return fma(T(1), l, r);"); + end_scope(); + statement(""); + break; + + // "fsub" intrinsic support + case SPVFuncImplFSub: + statement("template"); + statement("[[clang::optnone]] T spvFSub(T l, T r)"); + begin_scope(); + statement("return fma(T(-1), r, l);"); + end_scope(); + statement(""); + break; + + // "fmul' intrinsic support + case SPVFuncImplFMul: + statement("template"); + statement("[[clang::optnone]] T spvFMul(T l, T r)"); + begin_scope(); + statement("return fma(l, r, T(0));"); + end_scope(); + statement(""); + + statement("template"); + statement("[[clang::optnone]] vec spvFMulVectorMatrix(vec v, matrix m)"); + begin_scope(); + statement("vec res = vec(0);"); + statement("for (uint i = Rows; i > 0; --i)"); + begin_scope(); + statement("vec tmp(0);"); + statement("for (uint j = 0; j < Cols; ++j)"); + begin_scope(); + statement("tmp[j] = m[j][i - 1];"); + end_scope(); + statement("res = fma(tmp, vec(v[i - 1]), res);"); + end_scope(); + statement("return res;"); + end_scope(); + statement(""); + + statement("template"); + statement("[[clang::optnone]] vec spvFMulMatrixVector(matrix m, vec v)"); + begin_scope(); + statement("vec res = vec(0);"); + statement("for (uint i = Cols; i > 0; --i)"); + begin_scope(); + statement("res = fma(m[i - 1], vec(v[i - 1]), res);"); + end_scope(); + statement("return res;"); + end_scope(); + statement(""); + + statement("template"); + statement("[[clang::optnone]] matrix spvFMulMatrixMatrix(matrix l, matrix r)"); + begin_scope(); + statement("matrix res;"); + statement("for (uint i = 0; i < RCols; i++)"); + begin_scope(); + statement("vec tmp(0);"); + statement("for (uint j = 0; j < LCols; j++)"); + begin_scope(); + statement("tmp = fma(vec(r[i][j]), l[j], tmp);"); + end_scope(); + statement("res[i] = tmp;"); + end_scope(); + statement("return res;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplQuantizeToF16: + // Ensure fast-math is disabled to match Vulkan results. + // SpvHalfTypeSelector is used to match the half* template type to the float* template type. + // Depending on GPU, MSL does not always flush converted subnormal halfs to zero, + // as required by OpQuantizeToF16, so check for subnormals and flush them to zero. + statement("template struct SpvHalfTypeSelector;"); + statement("template <> struct SpvHalfTypeSelector { public: using H = half; };"); + statement("template struct SpvHalfTypeSelector> { using H = vec; };"); + statement("template::H>"); + statement("[[clang::optnone]] F spvQuantizeToF16(F fval)"); + begin_scope(); + statement("H hval = H(fval);"); + statement("hval = select(copysign(H(0), hval), hval, isnormal(hval) || isinf(hval) || isnan(hval));"); + statement("return F(hval);"); + end_scope(); + statement(""); + break; + + // Emulate texturecube_array with texture2d_array for iOS where this type is not available + case SPVFuncImplCubemapTo2DArrayFace: + statement(force_inline); + statement("float3 spvCubemapTo2DArrayFace(float3 P)"); + begin_scope(); + statement("float3 Coords = abs(P.xyz);"); + statement("float CubeFace = 0;"); + statement("float ProjectionAxis = 0;"); + statement("float u = 0;"); + statement("float v = 0;"); + statement("if (Coords.x >= Coords.y && Coords.x >= Coords.z)"); + begin_scope(); + statement("CubeFace = P.x >= 0 ? 0 : 1;"); + statement("ProjectionAxis = Coords.x;"); + statement("u = P.x >= 0 ? -P.z : P.z;"); + statement("v = -P.y;"); + end_scope(); + statement("else if (Coords.y >= Coords.x && Coords.y >= Coords.z)"); + begin_scope(); + statement("CubeFace = P.y >= 0 ? 2 : 3;"); + statement("ProjectionAxis = Coords.y;"); + statement("u = P.x;"); + statement("v = P.y >= 0 ? P.z : -P.z;"); + end_scope(); + statement("else"); + begin_scope(); + statement("CubeFace = P.z >= 0 ? 4 : 5;"); + statement("ProjectionAxis = Coords.z;"); + statement("u = P.z >= 0 ? P.x : -P.x;"); + statement("v = -P.y;"); + end_scope(); + statement("u = 0.5 * (u/ProjectionAxis + 1);"); + statement("v = 0.5 * (v/ProjectionAxis + 1);"); + statement("return float3(u, v, CubeFace);"); + end_scope(); + statement(""); + break; + case SPVFuncImplInverse4x4: statement("// Returns the determinant of a 2x2 matrix."); - statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)"); + statement(force_inline); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); begin_scope(); statement("return a1 * b2 - b1 * a2;"); end_scope(); statement(""); statement("// Returns the determinant of a 3x3 matrix."); - statement("inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " + statement(force_inline); + statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " "float c2, float c3)"); begin_scope(); statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, " @@ -2923,6 +5791,7 @@ void CompilerMSL::emit_custom_functions() statement(""); statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement(force_inline); statement("float4x4 spvInverse4x4(float4x4 m)"); begin_scope(); statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); @@ -2979,7 +5848,8 @@ void CompilerMSL::emit_custom_functions() if (spv_function_implementations.count(SPVFuncImplInverse4x4) == 0) { statement("// Returns the determinant of a 2x2 matrix."); - statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)"); + statement(force_inline); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); begin_scope(); statement("return a1 * b2 - b1 * a2;"); end_scope(); @@ -2988,6 +5858,7 @@ void CompilerMSL::emit_custom_functions() statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement(force_inline); statement("float3x3 spvInverse3x3(float3x3 m)"); begin_scope(); statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); @@ -3018,6 +5889,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplInverse2x2: statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement(force_inline); statement("float2x2 spvInverse2x2(float2x2 m)"); begin_scope(); statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); @@ -3039,65 +5911,24 @@ void CompilerMSL::emit_custom_functions() statement(""); break; - case SPVFuncImplRowMajor2x3: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float2x3 spvConvertFromRowMajor2x3(float2x3 m)"); - begin_scope(); - statement("return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));"); - end_scope(); - statement(""); - break; - - case SPVFuncImplRowMajor2x4: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float2x4 spvConvertFromRowMajor2x4(float2x4 m)"); - begin_scope(); - statement("return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], " - "m[1][3]));"); - end_scope(); - statement(""); - break; - - case SPVFuncImplRowMajor3x2: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float3x2 spvConvertFromRowMajor3x2(float3x2 m)"); - begin_scope(); - statement("return float3x2(float2(m[0][0], m[1][1]), float2(m[0][1], m[2][0]), float2(m[1][0], m[2][1]));"); - end_scope(); - statement(""); - break; - - case SPVFuncImplRowMajor3x4: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float3x4 spvConvertFromRowMajor3x4(float3x4 m)"); - begin_scope(); - statement("return float3x4(float4(m[0][0], m[0][3], m[1][2], m[2][1]), float4(m[0][1], m[1][0], m[1][3], " - "m[2][2]), float4(m[0][2], m[1][1], m[2][0], m[2][3]));"); - end_scope(); - statement(""); - break; - - case SPVFuncImplRowMajor4x2: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float4x2 spvConvertFromRowMajor4x2(float4x2 m)"); + case SPVFuncImplForwardArgs: + statement("template struct spvRemoveReference { typedef T type; };"); + statement("template struct spvRemoveReference { typedef T type; };"); + statement("template struct spvRemoveReference { typedef T type; };"); + statement("template inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference::type& x)"); begin_scope(); - statement("return float4x2(float2(m[0][0], m[2][0]), float2(m[0][1], m[2][1]), float2(m[1][0], m[3][0]), " - "float2(m[1][1], m[3][1]));"); + statement("return static_cast(x);"); end_scope(); - statement(""); - break; - - case SPVFuncImplRowMajor4x3: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float4x3 spvConvertFromRowMajor4x3(float4x3 m)"); + statement("template inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference::type&& x)"); begin_scope(); - statement("return float4x3(float3(m[0][0], m[1][1], m[2][2]), float3(m[0][1], m[1][2], m[3][0]), " - "float3(m[0][2], m[2][0], m[3][1]), float3(m[1][0], m[2][1], m[3][2]));"); + statement("return static_cast(x);"); end_scope(); statement(""); break; - case SPVFuncImplTextureSwizzle: + case SPVFuncImplGetSwizzle: statement("enum class spvSwizzle : uint"); begin_scope(); statement("none = 0,"); @@ -3109,20 +5940,6 @@ void CompilerMSL::emit_custom_functions() statement("alpha"); end_scope_decl(); statement(""); - statement("template struct spvRemoveReference { typedef T type; };"); - statement("template struct spvRemoveReference { typedef T type; };"); - statement("template struct spvRemoveReference { typedef T type; };"); - statement("template inline constexpr thread T&& spvForward(thread typename " - "spvRemoveReference::type& x)"); - begin_scope(); - statement("return static_cast(x);"); - end_scope(); - statement("template inline constexpr thread T&& spvForward(thread typename " - "spvRemoveReference::type&& x)"); - begin_scope(); - statement("return static_cast(x);"); - end_scope(); - statement(""); statement("template"); statement("inline T spvGetSwizzle(vec x, T c, spvSwizzle s)"); begin_scope(); @@ -3145,6 +5962,9 @@ void CompilerMSL::emit_custom_functions() end_scope(); end_scope(); statement(""); + break; + + case SPVFuncImplTextureSwizzle: statement("// Wrapper function that swizzles texture samples and fetches."); statement("template"); statement("inline vec spvTextureSwizzle(vec x, uint s)"); @@ -3163,11 +5983,14 @@ void CompilerMSL::emit_custom_functions() statement("return spvTextureSwizzle(vec(x, 0, 0, 1), s).x;"); end_scope(); statement(""); + break; + + case SPVFuncImplGatherSwizzle: statement("// Wrapper function that swizzles texture gathers."); - statement("template"); - statement( - "inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) " - "METAL_CONST_ARG(c)"); + statement("template class Tex, " + "typename... Ts>"); + statement("inline vec spvGatherSwizzle(const thread Tex& t, sampler s, " + "uint sw, component c, Ts... params) METAL_CONST_ARG(c)"); begin_scope(); statement("if (sw)"); begin_scope(); @@ -3204,10 +6027,14 @@ void CompilerMSL::emit_custom_functions() end_scope(); end_scope(); statement(""); + break; + + case SPVFuncImplGatherCompareSwizzle: statement("// Wrapper function that swizzles depth texture gathers."); - statement("template"); - statement( - "inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) "); + statement("template class Tex, " + "typename... Ts>"); + statement("inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler " + "s, uint sw, Ts... params) "); begin_scope(); statement("if (sw)"); begin_scope(); @@ -3230,16 +6057,89 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplSubgroupBroadcast: + // Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting + // them as integers. + statement("template"); + statement("inline T spvSubgroupBroadcast(T value, ushort lane)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return quad_broadcast(value, lane);"); + else + statement("return simd_broadcast(value, lane);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupBroadcast(bool value, ushort lane)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return !!quad_broadcast((ushort)value, lane);"); + else + statement("return !!simd_broadcast((ushort)value, lane);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupBroadcast(vec value, ushort lane)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return (vec)quad_broadcast((vec)value, lane);"); + else + statement("return (vec)simd_broadcast((vec)value, lane);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBroadcastFirst: + statement("template"); + statement("inline T spvSubgroupBroadcastFirst(T value)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return quad_broadcast_first(value);"); + else + statement("return simd_broadcast_first(value);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupBroadcastFirst(bool value)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return !!quad_broadcast_first((ushort)value);"); + else + statement("return !!simd_broadcast_first((ushort)value);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupBroadcastFirst(vec value)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return (vec)quad_broadcast_first((vec)value);"); + else + statement("return (vec)simd_broadcast_first((vec)value);"); + end_scope(); + statement(""); + break; + case SPVFuncImplSubgroupBallot: statement("inline uint4 spvSubgroupBallot(bool value)"); begin_scope(); - statement("simd_vote vote = simd_ballot(value);"); - statement("// simd_ballot() returns a 64-bit integer-like object, but"); - statement("// SPIR-V callers expect a uint4. We must convert."); - statement("// FIXME: This won't include higher bits if Apple ever supports"); - statement("// 128 lanes in an SIMD-group."); - statement("return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> " - "32) & 0xFFFFFFFF), 0, 0);"); + if (msl_options.use_quadgroup_operation()) + { + statement("return uint4((quad_vote::vote_t)quad_ballot(value), 0, 0, 0);"); + } + else if (msl_options.is_ios()) + { + // The current simd_vote on iOS uses a 32-bit integer-like object. + statement("return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0);"); + } + else + { + statement("simd_vote vote = simd_ballot(value);"); + statement("// simd_ballot() returns a 64-bit integer-like object, but"); + statement("// SPIR-V callers expect a uint4. We must convert."); + statement("// FIXME: This won't include higher bits if Apple ever supports"); + statement("// 128 lanes in an SIMD-group."); + statement("return uint4(as_type((simd_vote::vote_t)vote), 0, 0);"); + } end_scope(); statement(""); break; @@ -3253,8 +6153,18 @@ void CompilerMSL::emit_custom_functions() break; case SPVFuncImplSubgroupBallotFindLSB: - statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot)"); + statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)"); begin_scope(); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + } + statement("ballot &= mask;"); statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + " "ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);"); end_scope(); @@ -3262,8 +6172,18 @@ void CompilerMSL::emit_custom_functions() break; case SPVFuncImplSubgroupBallotFindMSB: - statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot)"); + statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)"); begin_scope(); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + } + statement("ballot &= mask;"); statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - " "(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), " "ballot.z == 0), ballot.w == 0);"); @@ -3272,24 +6192,52 @@ void CompilerMSL::emit_custom_functions() break; case SPVFuncImplSubgroupBallotBitCount: - statement("inline uint spvSubgroupBallotBitCount(uint4 ballot)"); + statement("inline uint spvPopCount4(uint4 ballot)"); begin_scope(); statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);"); end_scope(); statement(""); + statement("inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)"); + begin_scope(); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + } + statement("return spvPopCount4(ballot & mask);"); + end_scope(); + statement(""); statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)"); begin_scope(); - statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), " - "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), " - "uint2(0));"); - statement("return spvSubgroupBallotBitCount(ballot & mask);"); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), " + "uint2(0));"); + } + statement("return spvPopCount4(ballot & mask);"); end_scope(); statement(""); statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)"); begin_scope(); - statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), " - "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));"); - statement("return spvSubgroupBallotBitCount(ballot & mask);"); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));"); + } + statement("return spvPopCount4(ballot & mask);"); end_scope(); statement(""); break; @@ -3302,70 +6250,1013 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("inline bool spvSubgroupAllEqual(T value)"); begin_scope(); - statement("return simd_all(value == simd_broadcast_first(value));"); + if (msl_options.use_quadgroup_operation()) + statement("return quad_all(all(value == quad_broadcast_first(value)));"); + else + statement("return simd_all(all(value == simd_broadcast_first(value)));"); end_scope(); statement(""); statement("template<>"); statement("inline bool spvSubgroupAllEqual(bool value)"); begin_scope(); - statement("return simd_all(value) || !simd_any(value);"); + if (msl_options.use_quadgroup_operation()) + statement("return quad_all(value) || !quad_any(value);"); + else + statement("return simd_all(value) || !simd_any(value);"); + end_scope(); + statement(""); + statement("template"); + statement("inline bool spvSubgroupAllEqual(vec value)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return quad_all(all(value == (vec)quad_broadcast_first((vec)value)));"); + else + statement("return simd_all(all(value == (vec)simd_broadcast_first((vec)value)));"); end_scope(); statement(""); break; - case SPVFuncImplReflectScalar: - // Metal does not support scalar versions of these functions. + case SPVFuncImplSubgroupShuffle: statement("template"); - statement("inline T spvReflect(T i, T n)"); + statement("inline T spvSubgroupShuffle(T value, ushort lane)"); begin_scope(); - statement("return i - T(2) * i * n * n;"); + if (msl_options.use_quadgroup_operation()) + statement("return quad_shuffle(value, lane);"); + else + statement("return simd_shuffle(value, lane);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffle(bool value, ushort lane)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return !!quad_shuffle((ushort)value, lane);"); + else + statement("return !!simd_shuffle((ushort)value, lane);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffle(vec value, ushort lane)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return (vec)quad_shuffle((vec)value, lane);"); + else + statement("return (vec)simd_shuffle((vec)value, lane);"); end_scope(); statement(""); break; - case SPVFuncImplRefractScalar: - // Metal does not support scalar versions of these functions. + case SPVFuncImplSubgroupShuffleXor: statement("template"); - statement("inline T spvRefract(T i, T n, T eta)"); - begin_scope(); - statement("T NoI = n * i;"); - statement("T NoI2 = NoI * NoI;"); - statement("T k = T(1) - eta * eta * (T(1) - NoI2);"); - statement("if (k < T(0))"); + statement("inline T spvSubgroupShuffleXor(T value, ushort mask)"); begin_scope(); - statement("return T(0);"); + if (msl_options.use_quadgroup_operation()) + statement("return quad_shuffle_xor(value, mask);"); + else + statement("return simd_shuffle_xor(value, mask);"); end_scope(); - statement("else"); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffleXor(bool value, ushort mask)"); begin_scope(); - statement("return eta * i - (eta * NoI + sqrt(k)) * n;"); + if (msl_options.use_quadgroup_operation()) + statement("return !!quad_shuffle_xor((ushort)value, mask);"); + else + statement("return !!simd_shuffle_xor((ushort)value, mask);"); end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffleXor(vec value, ushort mask)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return (vec)quad_shuffle_xor((vec)value, mask);"); + else + statement("return (vec)simd_shuffle_xor((vec)value, mask);"); end_scope(); statement(""); break; - default: + case SPVFuncImplSubgroupShuffleUp: + statement("template"); + statement("inline T spvSubgroupShuffleUp(T value, ushort delta)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return quad_shuffle_up(value, delta);"); + else + statement("return simd_shuffle_up(value, delta);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffleUp(bool value, ushort delta)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return !!quad_shuffle_up((ushort)value, delta);"); + else + statement("return !!simd_shuffle_up((ushort)value, delta);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffleUp(vec value, ushort delta)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return (vec)quad_shuffle_up((vec)value, delta);"); + else + statement("return (vec)simd_shuffle_up((vec)value, delta);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupShuffleDown: + statement("template"); + statement("inline T spvSubgroupShuffleDown(T value, ushort delta)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return quad_shuffle_down(value, delta);"); + else + statement("return simd_shuffle_down(value, delta);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffleDown(bool value, ushort delta)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return !!quad_shuffle_down((ushort)value, delta);"); + else + statement("return !!simd_shuffle_down((ushort)value, delta);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffleDown(vec value, ushort delta)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return (vec)quad_shuffle_down((vec)value, delta);"); + else + statement("return (vec)simd_shuffle_down((vec)value, delta);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplQuadBroadcast: + statement("template"); + statement("inline T spvQuadBroadcast(T value, uint lane)"); + begin_scope(); + statement("return quad_broadcast(value, lane);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvQuadBroadcast(bool value, uint lane)"); + begin_scope(); + statement("return !!quad_broadcast((ushort)value, lane);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvQuadBroadcast(vec value, uint lane)"); + begin_scope(); + statement("return (vec)quad_broadcast((vec)value, lane);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplQuadSwap: + // We can implement this easily based on the following table giving + // the target lane ID from the direction and current lane ID: + // Direction + // | 0 | 1 | 2 | + // ---+---+---+---+ + // L 0 | 1 2 3 + // a 1 | 0 3 2 + // n 2 | 3 0 1 + // e 3 | 2 1 0 + // Notice that target = source ^ (direction + 1). + statement("template"); + statement("inline T spvQuadSwap(T value, uint dir)"); + begin_scope(); + statement("return quad_shuffle_xor(value, dir + 1);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvQuadSwap(bool value, uint dir)"); + begin_scope(); + statement("return !!quad_shuffle_xor((ushort)value, dir + 1);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvQuadSwap(vec value, uint dir)"); + begin_scope(); + statement("return (vec)quad_shuffle_xor((vec)value, dir + 1);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplReflectScalar: + // Metal does not support scalar versions of these functions. + // Ensure fast-math is disabled to match Vulkan results. + statement("template"); + statement("[[clang::optnone]] T spvReflect(T i, T n)"); + begin_scope(); + statement("return i - T(2) * i * n * n;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRefractScalar: + // Metal does not support scalar versions of these functions. + statement("template"); + statement("inline T spvRefract(T i, T n, T eta)"); + begin_scope(); + statement("T NoI = n * i;"); + statement("T NoI2 = NoI * NoI;"); + statement("T k = T(1) - eta * eta * (T(1) - NoI2);"); + statement("if (k < T(0))"); + begin_scope(); + statement("return T(0);"); + end_scope(); + statement("else"); + begin_scope(); + statement("return eta * i - (eta * NoI + sqrt(k)) * n;"); + end_scope(); + end_scope(); + statement(""); + break; + + case SPVFuncImplFaceForwardScalar: + // Metal does not support scalar versions of these functions. + statement("template"); + statement("inline T spvFaceForward(T n, T i, T nref)"); + begin_scope(); + statement("return i * nref < T(0) ? n : -n;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructNearest2Plane: + statement("template"); + statement("inline vec spvChromaReconstructNearest(texture2d plane0, texture2d plane1, sampler " + "samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.br = plane1.sample(samp, coord, spvForward(options)...).rg;"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructNearest3Plane: + statement("template"); + statement("inline vec spvChromaReconstructNearest(texture2d plane0, texture2d plane1, " + "texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.b = plane1.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.r = plane2.sample(samp, coord, spvForward(options)...).r;"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422CositedEven2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422CositedEven(texture2d plane0, texture2d " + "plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("if (fract(coord.x * plane1.get_width()) != 0.0)"); + begin_scope(); + statement("ycbcr.br = vec(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).rg);"); + end_scope(); + statement("else"); + begin_scope(); + statement("ycbcr.br = plane1.sample(samp, coord, spvForward(options)...).rg;"); + end_scope(); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422CositedEven3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422CositedEven(texture2d plane0, texture2d " + "plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("if (fract(coord.x * plane1.get_width()) != 0.0)"); + begin_scope(); + statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).r);"); + statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).r);"); + end_scope(); + statement("else"); + begin_scope(); + statement("ycbcr.b = plane1.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.r = plane2.sample(samp, coord, spvForward(options)...).r;"); + end_scope(); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422Midpoint2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422Midpoint(texture2d plane0, texture2d " + "plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);"); + statement("ycbcr.br = vec(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., offs), 0.25).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422Midpoint3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422Midpoint(texture2d plane0, texture2d " + "plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);"); + statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., offs), 0.25).r);"); + statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., offs), 0.25).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYCositedEven(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0)) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYCositedEven(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, " + "0.5)) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, " + "0.5)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYMidpoint(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0.5)) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYMidpoint(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0.5)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplExpandITUFullRange: + statement("template"); + statement("inline vec spvExpandITUFullRange(vec ycbcr, int n)"); + begin_scope(); + statement("ycbcr.br -= exp2(T(n-1))/(exp2(T(n))-1);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplExpandITUNarrowRange: + statement("template"); + statement("inline vec spvExpandITUNarrowRange(vec ycbcr, int n)"); + begin_scope(); + statement("ycbcr.g = (ycbcr.g * (exp2(T(n)) - 1) - ldexp(T(16), n - 8))/ldexp(T(219), n - 8);"); + statement("ycbcr.br = (ycbcr.br * (exp2(T(n)) - 1) - ldexp(T(128), n - 8))/ldexp(T(224), n - 8);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT709: + statement("// cf. Khronos Data Format Specification, section 15.1.1"); + statement("constant float3x3 spvBT709Factors = {{1, 1, 1}, {0, -0.13397432/0.7152, 1.8556}, {1.5748, " + "-0.33480248/0.7152, 0}};"); + statement(""); + statement("template"); + statement("inline vec spvConvertYCbCrBT709(vec ycbcr)"); + begin_scope(); + statement("vec rgba;"); + statement("rgba.rgb = vec(spvBT709Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT601: + statement("// cf. Khronos Data Format Specification, section 15.1.2"); + statement("constant float3x3 spvBT601Factors = {{1, 1, 1}, {0, -0.202008/0.587, 1.772}, {1.402, " + "-0.419198/0.587, 0}};"); + statement(""); + statement("template"); + statement("inline vec spvConvertYCbCrBT601(vec ycbcr)"); + begin_scope(); + statement("vec rgba;"); + statement("rgba.rgb = vec(spvBT601Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT2020: + statement("// cf. Khronos Data Format Specification, section 15.1.3"); + statement("constant float3x3 spvBT2020Factors = {{1, 1, 1}, {0, -0.11156702/0.6780, 1.8814}, {1.4746, " + "-0.38737742/0.6780, 0}};"); + statement(""); + statement("template"); + statement("inline vec spvConvertYCbCrBT2020(vec ycbcr)"); + begin_scope(); + statement("vec rgba;"); + statement("rgba.rgb = vec(spvBT2020Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplDynamicImageSampler: + statement("enum class spvFormatResolution"); + begin_scope(); + statement("_444 = 0,"); + statement("_422,"); + statement("_420"); + end_scope_decl(); + statement(""); + statement("enum class spvChromaFilter"); + begin_scope(); + statement("nearest = 0,"); + statement("linear"); + end_scope_decl(); + statement(""); + statement("enum class spvXChromaLocation"); + begin_scope(); + statement("cosited_even = 0,"); + statement("midpoint"); + end_scope_decl(); + statement(""); + statement("enum class spvYChromaLocation"); + begin_scope(); + statement("cosited_even = 0,"); + statement("midpoint"); + end_scope_decl(); + statement(""); + statement("enum class spvYCbCrModelConversion"); + begin_scope(); + statement("rgb_identity = 0,"); + statement("ycbcr_identity,"); + statement("ycbcr_bt_709,"); + statement("ycbcr_bt_601,"); + statement("ycbcr_bt_2020"); + end_scope_decl(); + statement(""); + statement("enum class spvYCbCrRange"); + begin_scope(); + statement("itu_full = 0,"); + statement("itu_narrow"); + end_scope_decl(); + statement(""); + statement("struct spvComponentBits"); + begin_scope(); + statement("constexpr explicit spvComponentBits(int v) thread : value(v) {}"); + statement("uchar value : 6;"); + end_scope_decl(); + statement("// A class corresponding to metal::sampler which holds sampler"); + statement("// Y'CbCr conversion info."); + statement("struct spvYCbCrSampler"); + begin_scope(); + statement("constexpr spvYCbCrSampler() thread : val(build()) {}"); + statement("template"); + statement("constexpr spvYCbCrSampler(Ts... t) thread : val(build(t...)) {}"); + statement("constexpr spvYCbCrSampler(const thread spvYCbCrSampler& s) thread = default;"); + statement(""); + statement("spvFormatResolution get_resolution() const thread"); + begin_scope(); + statement("return spvFormatResolution((val & resolution_mask) >> resolution_base);"); + end_scope(); + statement("spvChromaFilter get_chroma_filter() const thread"); + begin_scope(); + statement("return spvChromaFilter((val & chroma_filter_mask) >> chroma_filter_base);"); + end_scope(); + statement("spvXChromaLocation get_x_chroma_offset() const thread"); + begin_scope(); + statement("return spvXChromaLocation((val & x_chroma_off_mask) >> x_chroma_off_base);"); + end_scope(); + statement("spvYChromaLocation get_y_chroma_offset() const thread"); + begin_scope(); + statement("return spvYChromaLocation((val & y_chroma_off_mask) >> y_chroma_off_base);"); + end_scope(); + statement("spvYCbCrModelConversion get_ycbcr_model() const thread"); + begin_scope(); + statement("return spvYCbCrModelConversion((val & ycbcr_model_mask) >> ycbcr_model_base);"); + end_scope(); + statement("spvYCbCrRange get_ycbcr_range() const thread"); + begin_scope(); + statement("return spvYCbCrRange((val & ycbcr_range_mask) >> ycbcr_range_base);"); + end_scope(); + statement("int get_bpc() const thread { return (val & bpc_mask) >> bpc_base; }"); + statement(""); + statement("private:"); + statement("ushort val;"); + statement(""); + statement("constexpr static constant ushort resolution_bits = 2;"); + statement("constexpr static constant ushort chroma_filter_bits = 2;"); + statement("constexpr static constant ushort x_chroma_off_bit = 1;"); + statement("constexpr static constant ushort y_chroma_off_bit = 1;"); + statement("constexpr static constant ushort ycbcr_model_bits = 3;"); + statement("constexpr static constant ushort ycbcr_range_bit = 1;"); + statement("constexpr static constant ushort bpc_bits = 6;"); + statement(""); + statement("constexpr static constant ushort resolution_base = 0;"); + statement("constexpr static constant ushort chroma_filter_base = 2;"); + statement("constexpr static constant ushort x_chroma_off_base = 4;"); + statement("constexpr static constant ushort y_chroma_off_base = 5;"); + statement("constexpr static constant ushort ycbcr_model_base = 6;"); + statement("constexpr static constant ushort ycbcr_range_base = 9;"); + statement("constexpr static constant ushort bpc_base = 10;"); + statement(""); + statement( + "constexpr static constant ushort resolution_mask = ((1 << resolution_bits) - 1) << resolution_base;"); + statement("constexpr static constant ushort chroma_filter_mask = ((1 << chroma_filter_bits) - 1) << " + "chroma_filter_base;"); + statement("constexpr static constant ushort x_chroma_off_mask = ((1 << x_chroma_off_bit) - 1) << " + "x_chroma_off_base;"); + statement("constexpr static constant ushort y_chroma_off_mask = ((1 << y_chroma_off_bit) - 1) << " + "y_chroma_off_base;"); + statement("constexpr static constant ushort ycbcr_model_mask = ((1 << ycbcr_model_bits) - 1) << " + "ycbcr_model_base;"); + statement("constexpr static constant ushort ycbcr_range_mask = ((1 << ycbcr_range_bit) - 1) << " + "ycbcr_range_base;"); + statement("constexpr static constant ushort bpc_mask = ((1 << bpc_bits) - 1) << bpc_base;"); + statement(""); + statement("static constexpr ushort build()"); + begin_scope(); + statement("return 0;"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvFormatResolution res, Ts... t)"); + begin_scope(); + statement("return (ushort(res) << resolution_base) | (build(t...) & ~resolution_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvChromaFilter filt, Ts... t)"); + begin_scope(); + statement("return (ushort(filt) << chroma_filter_base) | (build(t...) & ~chroma_filter_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvXChromaLocation loc, Ts... t)"); + begin_scope(); + statement("return (ushort(loc) << x_chroma_off_base) | (build(t...) & ~x_chroma_off_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvYChromaLocation loc, Ts... t)"); + begin_scope(); + statement("return (ushort(loc) << y_chroma_off_base) | (build(t...) & ~y_chroma_off_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvYCbCrModelConversion model, Ts... t)"); + begin_scope(); + statement("return (ushort(model) << ycbcr_model_base) | (build(t...) & ~ycbcr_model_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvYCbCrRange range, Ts... t)"); + begin_scope(); + statement("return (ushort(range) << ycbcr_range_base) | (build(t...) & ~ycbcr_range_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvComponentBits bpc, Ts... t)"); + begin_scope(); + statement("return (ushort(bpc.value) << bpc_base) | (build(t...) & ~bpc_mask);"); + end_scope(); + end_scope_decl(); + statement(""); + statement("// A class which can hold up to three textures and a sampler, including"); + statement("// Y'CbCr conversion info, used to pass combined image-samplers"); + statement("// dynamically to functions."); + statement("template"); + statement("struct spvDynamicImageSampler"); + begin_scope(); + statement("texture2d plane0;"); + statement("texture2d plane1;"); + statement("texture2d plane2;"); + statement("sampler samp;"); + statement("spvYCbCrSampler ycbcr_samp;"); + statement("uint swizzle = 0;"); + statement(""); + if (msl_options.swizzle_texture_samples) + { + statement("constexpr spvDynamicImageSampler(texture2d tex, sampler samp, uint sw) thread :"); + statement(" plane0(tex), samp(samp), swizzle(sw) {}"); + } + else + { + statement("constexpr spvDynamicImageSampler(texture2d tex, sampler samp) thread :"); + statement(" plane0(tex), samp(samp) {}"); + } + statement("constexpr spvDynamicImageSampler(texture2d tex, sampler samp, spvYCbCrSampler ycbcr_samp, " + "uint sw) thread :"); + statement(" plane0(tex), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}"); + statement("constexpr spvDynamicImageSampler(texture2d plane0, texture2d plane1,"); + statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :"); + statement(" plane0(plane0), plane1(plane1), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}"); + statement( + "constexpr spvDynamicImageSampler(texture2d plane0, texture2d plane1, texture2d plane2,"); + statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :"); + statement(" plane0(plane0), plane1(plane1), plane2(plane2), samp(samp), ycbcr_samp(ycbcr_samp), " + "swizzle(sw) {}"); + statement(""); + // XXX This is really hard to follow... I've left comments to make it a bit easier. + statement("template"); + statement("vec do_sample(float2 coord, LodOptions... options) const thread"); + begin_scope(); + statement("if (!is_null_texture(plane1))"); + begin_scope(); + statement("if (ycbcr_samp.get_resolution() == spvFormatResolution::_444 ||"); + statement(" ycbcr_samp.get_chroma_filter() == spvChromaFilter::nearest)"); + begin_scope(); + statement("if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructNearest(plane0, plane1, plane2, samp, coord,"); + statement(" spvForward(options)...);"); + statement( + "return spvChromaReconstructNearest(plane0, plane1, samp, coord, spvForward(options)...);"); + end_scope(); // if (resolution == 422 || chroma_filter == nearest) + statement("switch (ycbcr_samp.get_resolution())"); + begin_scope(); + statement("case spvFormatResolution::_444: break;"); + statement("case spvFormatResolution::_422:"); + begin_scope(); + statement("switch (ycbcr_samp.get_x_chroma_offset())"); + begin_scope(); + statement("case spvXChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear422CositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear422CositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + statement("case spvXChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear422Midpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear422Midpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + end_scope(); // switch (x_chroma_offset) + end_scope(); // case 422: + statement("case spvFormatResolution::_420:"); + begin_scope(); + statement("switch (ycbcr_samp.get_x_chroma_offset())"); + begin_scope(); + statement("case spvXChromaLocation::cosited_even:"); + begin_scope(); + statement("switch (ycbcr_samp.get_y_chroma_offset())"); + begin_scope(); + statement("case spvYChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + statement("case spvYChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + end_scope(); // switch (y_chroma_offset) + end_scope(); // case x::cosited_even: + statement("case spvXChromaLocation::midpoint:"); + begin_scope(); + statement("switch (ycbcr_samp.get_y_chroma_offset())"); + begin_scope(); + statement("case spvYChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XMidpointYCositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XMidpointYCositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + statement("case spvYChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XMidpointYMidpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XMidpointYMidpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + end_scope(); // switch (y_chroma_offset) + end_scope(); // case x::midpoint + end_scope(); // switch (x_chroma_offset) + end_scope(); // case 420: + end_scope(); // switch (resolution) + end_scope(); // if (multiplanar) + statement("return plane0.sample(samp, coord, spvForward(options)...);"); + end_scope(); // do_sample() + statement("template "); + statement("vec sample(float2 coord, LodOptions... options) const thread"); + begin_scope(); + statement( + "vec s = spvTextureSwizzle(do_sample(coord, spvForward(options)...), swizzle);"); + statement("if (ycbcr_samp.get_ycbcr_model() == spvYCbCrModelConversion::rgb_identity)"); + statement(" return s;"); + statement(""); + statement("switch (ycbcr_samp.get_ycbcr_range())"); + begin_scope(); + statement("case spvYCbCrRange::itu_full:"); + statement(" s = spvExpandITUFullRange(s, ycbcr_samp.get_bpc());"); + statement(" break;"); + statement("case spvYCbCrRange::itu_narrow:"); + statement(" s = spvExpandITUNarrowRange(s, ycbcr_samp.get_bpc());"); + statement(" break;"); + end_scope(); + statement(""); + statement("switch (ycbcr_samp.get_ycbcr_model())"); + begin_scope(); + statement("case spvYCbCrModelConversion::rgb_identity:"); // Silence Clang warning + statement("case spvYCbCrModelConversion::ycbcr_identity:"); + statement(" return s;"); + statement("case spvYCbCrModelConversion::ycbcr_bt_709:"); + statement(" return spvConvertYCbCrBT709(s);"); + statement("case spvYCbCrModelConversion::ycbcr_bt_601:"); + statement(" return spvConvertYCbCrBT601(s);"); + statement("case spvYCbCrModelConversion::ycbcr_bt_2020:"); + statement(" return spvConvertYCbCrBT2020(s);"); + end_scope(); + end_scope(); + statement(""); + // Sampler Y'CbCr conversion forbids offsets. + statement("vec sample(float2 coord, int2 offset) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvTextureSwizzle(plane0.sample(samp, coord, offset), swizzle);"); + else + statement("return plane0.sample(samp, coord, offset);"); + end_scope(); + statement("template"); + statement("vec sample(float2 coord, lod_options options, int2 offset) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvTextureSwizzle(plane0.sample(samp, coord, options, offset), swizzle);"); + else + statement("return plane0.sample(samp, coord, options, offset);"); + end_scope(); + statement("#if __HAVE_MIN_LOD_CLAMP__"); + statement("vec sample(float2 coord, bias b, min_lod_clamp min_lod, int2 offset) const thread"); + begin_scope(); + statement("return plane0.sample(samp, coord, b, min_lod, offset);"); + end_scope(); + statement( + "vec sample(float2 coord, gradient2d grad, min_lod_clamp min_lod, int2 offset) const thread"); + begin_scope(); + statement("return plane0.sample(samp, coord, grad, min_lod, offset);"); + end_scope(); + statement("#endif"); + statement(""); + // Y'CbCr conversion forbids all operations but sampling. + statement("vec read(uint2 coord, uint lod = 0) const thread"); + begin_scope(); + statement("return plane0.read(coord, lod);"); + end_scope(); + statement(""); + statement("vec gather(float2 coord, int2 offset = int2(0), component c = component::x) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvGatherSwizzle(plane0, samp, swizzle, c, coord, offset);"); + else + statement("return plane0.gather(samp, coord, offset, c);"); + end_scope(); + end_scope_decl(); + statement(""); + + default: break; } } } -// Undefined global memory is not allowed in MSL. -// Declare constant and init to zeros. Use {}, as global constructors can break Metal. -void CompilerMSL::declare_undefined_values() +static string inject_top_level_storage_qualifier(const string &expr, const string &qualifier) +{ + // Easier to do this through text munging since the qualifier does not exist in the type system at all, + // and plumbing in all that information is not very helpful. + size_t last_reference = expr.find_last_of('&'); + size_t last_pointer = expr.find_last_of('*'); + size_t last_significant = string::npos; + + if (last_reference == string::npos) + last_significant = last_pointer; + else if (last_pointer == string::npos) + last_significant = last_reference; + else + last_significant = std::max(last_reference, last_pointer); + + if (last_significant == string::npos) + return join(qualifier, " ", expr); + else + { + return join(expr.substr(0, last_significant + 1), " ", + qualifier, expr.substr(last_significant + 1, string::npos)); + } +} + +void CompilerMSL::declare_constant_arrays() { + bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1; + + // MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to + // global constants directly, so we are able to use constants as variable expressions. bool emitted = false; - ir.for_each_typed_id([&](uint32_t, SPIRUndef &undef) { - auto &type = this->get(undef.basetype); - statement("constant ", variable_decl(type, to_name(undef.self), undef.self), " = {};"); - emitted = true; + + ir.for_each_typed_id([&](uint32_t, SPIRConstant &c) { + if (c.specialization) + return; + + auto &type = this->get(c.constant_type); + // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries. + // FIXME: However, hoisting constants to main() means we need to pass down constant arrays to leaf functions if they are used there. + // If there are multiple functions in the module, drop this case to avoid breaking use cases which do not need to + // link into Metal libraries. This is hacky. + if (!type.array.empty() && (!fully_inlined || is_scalar(type) || is_vector(type))) + { + add_resource_name(c.self); + auto name = to_name(c.self); + statement(inject_top_level_storage_qualifier(variable_decl(type, name), "constant"), + " = ", constant_expression(c), ";"); + emitted = true; + } }); if (emitted) statement(""); } -void CompilerMSL::declare_constant_arrays() +// Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries +void CompilerMSL::declare_complex_constant_arrays() { + // If we do not have a fully inlined module, we did not opt in to + // declaring constant arrays of complex types. See CompilerMSL::declare_constant_arrays(). + bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1; + if (!fully_inlined) + return; + // MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to // global constants directly, so we are able to use constants as variable expressions. bool emitted = false; @@ -3375,10 +7266,11 @@ void CompilerMSL::declare_constant_arrays() return; auto &type = this->get(c.constant_type); - if (!type.array.empty()) + if (!type.array.empty() && !(is_scalar(type) || is_vector(type))) { + add_resource_name(c.self); auto name = to_name(c.self); - statement("constant ", variable_decl(type, name), " = ", constant_expression(c), ";"); + statement("", variable_decl(type, name), " = ", constant_expression(c), ";"); emitted = true; } }); @@ -3390,7 +7282,6 @@ void CompilerMSL::declare_constant_arrays() void CompilerMSL::emit_resources() { declare_constant_arrays(); - declare_undefined_values(); // Emit the special [[stage_in]] and [[stage_out]] interface blocks which we created. emit_interface_block(stage_out_var_id); @@ -3403,12 +7294,57 @@ void CompilerMSL::emit_resources() void CompilerMSL::emit_specialization_constants_and_structs() { SpecializationConstant wg_x, wg_y, wg_z; - uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); bool emitted = false; unordered_set declared_structs; + unordered_set aligned_structs; + + // First, we need to deal with scalar block layout. + // It is possible that a struct may have to be placed at an alignment which does not match the innate alignment of the struct itself. + // In that case, if such a case exists for a struct, we must force that all elements of the struct become packed_ types. + // This makes the struct alignment as small as physically possible. + // When we actually align the struct later, we can insert padding as necessary to make the packed members behave like normally aligned types. + ir.for_each_typed_id([&](uint32_t type_id, const SPIRType &type) { + if (type.basetype == SPIRType::Struct && + has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked)) + mark_scalar_layout_structs(type); + }); + + bool builtin_block_type_is_required = false; + // Very special case. If gl_PerVertex is initialized as an array (tessellation) + // we have to potentially emit the gl_PerVertex struct type so that we can emit a constant LUT. + ir.for_each_typed_id([&](uint32_t, SPIRConstant &c) { + auto &type = this->get(c.constant_type); + if (is_array(type) && has_decoration(type.self, DecorationBlock) && is_builtin_type(type)) + builtin_block_type_is_required = true; + }); + + // Very particular use of the soft loop lock. + // align_struct may need to create custom types on the fly, but we don't care about + // these types for purpose of iterating over them in ir.ids_for_type and friends. + auto loop_lock = ir.create_loop_soft_lock(); + + // Physical storage buffer pointers can have cyclical references, + // so emit forward declarations of them before other structs. + // Ignore type_id because we want the underlying struct type from the pointer. + ir.for_each_typed_id([&](uint32_t /* type_id */, const SPIRType &type) { + if (type.basetype == SPIRType::Struct && + type.pointer && type.storage == StorageClassPhysicalStorageBuffer && + declared_structs.count(type.self) == 0) + { + statement("struct ", to_name(type.self), ";"); + declared_structs.insert(type.self); + emitted = true; + } + }); + if (emitted) + statement(""); + + emitted = false; + declared_structs.clear(); - for (auto &id_ : ir.ids_for_constant_or_type) + for (auto &id_ : ir.ids_for_constant_undef_or_type) { auto &id = ir.ids[id_]; @@ -3429,6 +7365,7 @@ void CompilerMSL::emit_specialization_constants_and_structs() { auto &type = get(c.constant_type); string sc_type_name = type_to_glsl(type); + add_resource_name(c.self); string sc_name = to_name(c.self); string sc_tmp_name = sc_name + "_tmp"; @@ -3447,259 +7384,733 @@ void CompilerMSL::emit_specialization_constants_and_structs() statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name, ") ? ", sc_tmp_name, " : ", constant_expression(c), ";"); } - else if (has_decoration(c.self, DecorationSpecId)) + else if (has_decoration(c.self, DecorationSpecId)) + { + // Fallback to macro overrides. + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + + statement("#ifndef ", c.specialization_constant_macro_name); + statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); + statement("#endif"); + statement("constant ", sc_type_name, " ", sc_name, " = ", c.specialization_constant_macro_name, + ";"); + } + else + { + // Composite specialization constants must be built from other specialization constants. + statement("constant ", sc_type_name, " ", sc_name, " = ", constant_expression(c), ";"); + } + emitted = true; + } + } + else if (id.get_type() == TypeConstantOp) + { + auto &c = id.get(); + auto &type = get(c.basetype); + add_resource_name(c.self); + auto name = to_name(c.self); + statement("constant ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); + emitted = true; + } + else if (id.get_type() == TypeType) + { + // Output non-builtin interface structs. These include local function structs + // and structs nested within uniform and read-write buffers. + auto &type = id.get(); + TypeID type_id = type.self; + + bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && !type.pointer; + bool is_block = + has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + + bool is_builtin_block = is_block && is_builtin_type(type); + bool is_declarable_struct = is_struct && (!is_builtin_block || builtin_block_type_is_required); + + // We'll declare this later. + if (stage_out_var_id && get_stage_out_struct_type().self == type_id) + is_declarable_struct = false; + if (patch_stage_out_var_id && get_patch_stage_out_struct_type().self == type_id) + is_declarable_struct = false; + if (stage_in_var_id && get_stage_in_struct_type().self == type_id) + is_declarable_struct = false; + if (patch_stage_in_var_id && get_patch_stage_in_struct_type().self == type_id) + is_declarable_struct = false; + + // Special case. Declare builtin struct anyways if we need to emit a threadgroup version of it. + if (stage_out_masked_builtin_type_id == type_id) + is_declarable_struct = true; + + // Align and emit declarable structs...but avoid declaring each more than once. + if (is_declarable_struct && declared_structs.count(type_id) == 0) + { + if (emitted) + statement(""); + emitted = false; + + declared_structs.insert(type_id); + + if (has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked)) + align_struct(type, aligned_structs); + + // Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc. + emit_struct(get(type_id)); + } + } + else if (id.get_type() == TypeUndef) + { + auto &undef = id.get(); + auto &type = get(undef.basetype); + // OpUndef can be void for some reason ... + if (type.basetype == SPIRType::Void) + return; + + // Undefined global memory is not allowed in MSL. + // Declare constant and init to zeros. Use {}, as global constructors can break Metal. + statement( + inject_top_level_storage_qualifier(variable_decl(type, to_name(undef.self), undef.self), "constant"), + " = {};"); + emitted = true; + } + } + + if (emitted) + statement(""); +} + +void CompilerMSL::emit_binary_ptr_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, join(to_ptr_expression(op0), " ", op, " ", to_ptr_expression(op1)), forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +string CompilerMSL::to_ptr_expression(uint32_t id, bool register_expression_read) +{ + auto *e = maybe_get(id); + auto expr = enclose_expression(e && e->need_transpose ? e->expression : to_expression(id, register_expression_read)); + if (!should_dereference(id)) + expr = address_of_expression(expr); + return expr; +} + +void CompilerMSL::emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, + join("(isunordered(", to_enclosed_unpacked_expression(op0), ", ", to_enclosed_unpacked_expression(op1), + ") || ", to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1), + ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id, uint32_t ptr) +{ + auto &ptr_type = expression_type(ptr); + auto &result_type = get(result_type_id); + if (ptr_type.storage != StorageClassInput && ptr_type.storage != StorageClassOutput) + return false; + if (ptr_type.storage == StorageClassOutput && is_tese_shader()) + return false; + + if (has_decoration(ptr, DecorationPatch)) + return false; + bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable; + + bool flattened_io = variable_storage_requires_stage_io(ptr_type.storage); + + bool flat_data_type = flattened_io && + (is_matrix(result_type) || is_array(result_type) || result_type.basetype == SPIRType::Struct); + + // Edge case, even with multi-patch workgroups, we still need to unroll load + // if we're loading control points directly. + if (ptr_is_io_variable && is_array(result_type)) + flat_data_type = true; + + if (!flat_data_type) + return false; + + // Now, we must unflatten a composite type and take care of interleaving array access with gl_in/gl_out. + // Lots of painful code duplication since we *really* should not unroll these kinds of loads in entry point fixup + // unless we're forced to do this when the code is emitting inoptimal OpLoads. + string expr; + + uint32_t interface_index = get_extended_decoration(ptr, SPIRVCrossDecorationInterfaceMemberIndex); + auto *var = maybe_get_backing_variable(ptr); + auto &expr_type = get_pointee_type(ptr_type.self); + + const auto &iface_type = expression_type(stage_in_ptr_var_id); + + if (!flattened_io) + { + // Simplest case for multi-patch workgroups, just unroll array as-is. + if (interface_index == uint32_t(-1)) + return false; + + expr += type_to_glsl(result_type) + "({ "; + uint32_t num_control_points = to_array_size_literal(result_type, uint32_t(result_type.array.size()) - 1); + + for (uint32_t i = 0; i < num_control_points; i++) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (i + 1 < num_control_points) + expr += ", "; + } + expr += " })"; + } + else if (result_type.array.size() > 2) + { + SPIRV_CROSS_THROW("Cannot load tessellation IO variables with more than 2 dimensions."); + } + else if (result_type.array.size() == 2) + { + if (!ptr_is_io_variable) + SPIRV_CROSS_THROW("Loading an array-of-array must be loaded directly from an IO variable."); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + if (result_type.basetype == SPIRType::Struct || is_matrix(result_type)) + SPIRV_CROSS_THROW("Cannot load array-of-array of composite type in tessellation IO."); + + expr += type_to_glsl(result_type) + "({ "; + uint32_t num_control_points = to_array_size_literal(result_type, 1); + uint32_t base_interface_index = interface_index; + + auto &sub_type = get(result_type.parent_type); + + for (uint32_t i = 0; i < num_control_points; i++) + { + expr += type_to_glsl(sub_type) + "({ "; + interface_index = base_interface_index; + uint32_t array_size = to_array_size_literal(result_type, 0); + for (uint32_t j = 0; j < array_size; j++, interface_index++) + { + const uint32_t indices[2] = { i, interface_index }; + + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct && + expr_type.vecsize > sub_type.vecsize) + expr += vector_swizzle(sub_type.vecsize, 0); + + if (j + 1 < array_size) + expr += ", "; + } + expr += " })"; + if (i + 1 < num_control_points) + expr += ", "; + } + expr += " })"; + } + else if (result_type.basetype == SPIRType::Struct) + { + bool is_array_of_struct = is_array(result_type); + if (is_array_of_struct && !ptr_is_io_variable) + SPIRV_CROSS_THROW("Loading array of struct from IO variable must come directly from IO variable."); + + uint32_t num_control_points = 1; + if (is_array_of_struct) + { + num_control_points = to_array_size_literal(result_type, 0); + expr += type_to_glsl(result_type) + "({ "; + } + + auto &struct_type = is_array_of_struct ? get(result_type.parent_type) : result_type; + assert(struct_type.array.empty()); + + for (uint32_t i = 0; i < num_control_points; i++) + { + expr += type_to_glsl(struct_type) + "{ "; + for (uint32_t j = 0; j < uint32_t(struct_type.member_types.size()); j++) + { + // The base interface index is stored per variable for structs. + if (var) + { + interface_index = + get_extended_member_decoration(var->self, j, SPIRVCrossDecorationInterfaceMemberIndex); + } + + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + + const auto &mbr_type = get(struct_type.member_types[j]); + const auto &expr_mbr_type = get(expr_type.member_types[j]); + if (is_matrix(mbr_type) && ptr_type.storage == StorageClassInput) { - // Fallback to macro overrides. - c.specialization_constant_macro_name = - constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + expr += type_to_glsl(mbr_type) + "("; + for (uint32_t k = 0; k < mbr_type.columns; k++, interface_index++) + { + if (is_array_of_struct) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal( + stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + } + else + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_mbr_type.vecsize > mbr_type.vecsize) + expr += vector_swizzle(mbr_type.vecsize, 0); - statement("#ifndef ", c.specialization_constant_macro_name); - statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); - statement("#endif"); - statement("constant ", sc_type_name, " ", sc_name, " = ", c.specialization_constant_macro_name, - ";"); + if (k + 1 < mbr_type.columns) + expr += ", "; + } + expr += ")"; + } + else if (is_array(mbr_type)) + { + expr += type_to_glsl(mbr_type) + "({ "; + uint32_t array_size = to_array_size_literal(mbr_type, 0); + for (uint32_t k = 0; k < array_size; k++, interface_index++) + { + if (is_array_of_struct) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal( + stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + } + else + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_mbr_type.vecsize > mbr_type.vecsize) + expr += vector_swizzle(mbr_type.vecsize, 0); + + if (k + 1 < array_size) + expr += ", "; + } + expr += " })"; } else { - // Composite specialization constants must be built from other specialization constants. - statement("constant ", sc_type_name, " ", sc_name, " = ", constant_expression(c), ";"); + if (is_array_of_struct) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, + &meta); + } + else + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_mbr_type.vecsize > mbr_type.vecsize) + expr += vector_swizzle(mbr_type.vecsize, 0); } - emitted = true; + + if (j + 1 < struct_type.member_types.size()) + expr += ", "; } + expr += " }"; + if (i + 1 < num_control_points) + expr += ", "; } - else if (id.get_type() == TypeConstantOp) - { - auto &c = id.get(); - auto &type = get(c.basetype); - auto name = to_name(c.self); - statement("constant ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); - emitted = true; - } - else if (id.get_type() == TypeType) - { - // Output non-builtin interface structs. These include local function structs - // and structs nested within uniform and read-write buffers. - auto &type = id.get(); - uint32_t type_id = type.self; - - bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty(); - bool is_block = - has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + if (is_array_of_struct) + expr += " })"; + } + else if (is_matrix(result_type)) + { + bool is_array_of_matrix = is_array(result_type); + if (is_array_of_matrix && !ptr_is_io_variable) + SPIRV_CROSS_THROW("Loading array of matrix from IO variable must come directly from IO variable."); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); - bool is_builtin_block = is_block && is_builtin_type(type); - bool is_declarable_struct = is_struct && !is_builtin_block; + if (is_array_of_matrix) + { + // Loading a matrix from each control point. + uint32_t base_interface_index = interface_index; + uint32_t num_control_points = to_array_size_literal(result_type, 0); + expr += type_to_glsl(result_type) + "({ "; - // We'll declare this later. - if (stage_out_var_id && get_stage_out_struct_type().self == type_id) - is_declarable_struct = false; - if (patch_stage_out_var_id && get_patch_stage_out_struct_type().self == type_id) - is_declarable_struct = false; - if (stage_in_var_id && get_stage_in_struct_type().self == type_id) - is_declarable_struct = false; - if (patch_stage_in_var_id && get_patch_stage_in_struct_type().self == type_id) - is_declarable_struct = false; + auto &matrix_type = get_variable_element_type(get(ptr)); - // Align and emit declarable structs...but avoid declaring each more than once. - if (is_declarable_struct && declared_structs.count(type_id) == 0) + for (uint32_t i = 0; i < num_control_points; i++) { - if (emitted) - statement(""); - emitted = false; - - declared_structs.insert(type_id); - - if (has_extended_decoration(type_id, SPIRVCrossDecorationPacked)) - align_struct(type); + interface_index = base_interface_index; + expr += type_to_glsl(matrix_type) + "("; + for (uint32_t j = 0; j < result_type.columns; j++, interface_index++) + { + const uint32_t indices[2] = { i, interface_index }; + + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (expr_type.vecsize > result_type.vecsize) + expr += vector_swizzle(result_type.vecsize, 0); + if (j + 1 < result_type.columns) + expr += ", "; + } + expr += ")"; + if (i + 1 < num_control_points) + expr += ", "; + } - // Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc. - emit_struct(get(type_id)); + expr += " })"; + } + else + { + expr += type_to_glsl(result_type) + "("; + for (uint32_t i = 0; i < result_type.columns; i++, interface_index++) + { + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_type.vecsize > result_type.vecsize) + expr += vector_swizzle(result_type.vecsize, 0); + if (i + 1 < result_type.columns) + expr += ", "; } + expr += ")"; } } + else if (ptr_is_io_variable) + { + assert(is_array(result_type)); + assert(result_type.array.size() == 1); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); - if (emitted) - statement(""); -} + // We're loading an array directly from a global variable. + // This means we're loading one member from each control point. + expr += type_to_glsl(result_type) + "({ "; + uint32_t num_control_points = to_array_size_literal(result_type, 0); -void CompilerMSL::emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - const char *op) -{ - bool forward = should_forward(op0) && should_forward(op1); - emit_op(result_type, result_id, - join("(isunordered(", to_enclosed_unpacked_expression(op0), ", ", to_enclosed_unpacked_expression(op1), - ") || ", to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1), - ")"), - forward); + for (uint32_t i = 0; i < num_control_points; i++) + { + const uint32_t indices[2] = { i, interface_index }; - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (expr_type.vecsize > result_type.vecsize) + expr += vector_swizzle(result_type.vecsize, 0); + + if (i + 1 < num_control_points) + expr += ", "; + } + expr += " })"; + } + else + { + // We're loading an array from a concrete control point. + assert(is_array(result_type)); + assert(result_type.array.size() == 1); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + + expr += type_to_glsl(result_type) + "({ "; + uint32_t array_size = to_array_size_literal(result_type, 0); + for (uint32_t i = 0; i < array_size; i++, interface_index++) + { + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_type.vecsize > result_type.vecsize) + expr += vector_swizzle(result_type.vecsize, 0); + if (i + 1 < array_size) + expr += ", "; + } + expr += " })"; + } + + emit_op(result_type_id, id, expr, false); + register_read(id, ptr, false); + return true; } bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t length) { // If this is a per-vertex output, remap it to the I/O array buffer. - auto *var = maybe_get(ops[2]); - BuiltIn bi_type = BuiltIn(get_decoration(ops[2], DecorationBuiltIn)); - if (var && - (var->storage == StorageClassInput || - (get_execution_model() == ExecutionModelTessellationControl && var->storage == StorageClassOutput)) && - !(has_decoration(ops[2], DecorationPatch) || is_patch_block(get_variable_data_type(*var))) && - (!is_builtin_variable(*var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || - bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || - get_variable_data_type(*var).basetype == SPIRType::Struct)) + + // Any object which did not go through IO flattening shenanigans will go there instead. + // We will unflatten on-demand instead as needed, but not all possible cases can be supported, especially with arrays. + + auto *var = maybe_get_backing_variable(ops[2]); + bool patch = false; + bool flat_data = false; + bool ptr_is_chain = false; + bool flatten_composites = false; + + bool is_block = false; + bool is_arrayed = false; + + if (var) { + auto &type = get_variable_data_type(*var); + is_block = has_decoration(type.self, DecorationBlock); + is_arrayed = !type.array.empty(); + + flatten_composites = variable_storage_requires_stage_io(var->storage); + patch = has_decoration(ops[2], DecorationPatch) || is_patch_block(type); + + // Should match strip_array in add_interface_block. + flat_data = var->storage == StorageClassInput || (var->storage == StorageClassOutput && is_tesc_shader()); + + // Patch inputs are treated as normal block IO variables, so they don't deal with this path at all. + if (patch && (!is_block || is_arrayed || var->storage == StorageClassInput)) + flat_data = false; + + // We might have a chained access chain, where + // we first take the access chain to the control point, and then we chain into a member or something similar. + // In this case, we need to skip gl_in/gl_out remapping. + // Also, skip ptr chain for patches. + ptr_is_chain = var->self != ID(ops[2]); + } + + bool builtin_variable = false; + bool variable_is_flat = false; + + if (var && flat_data) + { + builtin_variable = is_builtin_variable(*var); + + BuiltIn bi_type = BuiltInMax; + if (builtin_variable && !is_block) + bi_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); + + variable_is_flat = !builtin_variable || is_block || + bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance; + } + + if (variable_is_flat) + { + // If output is masked, it is emitted as a "normal" variable, just go through normal code paths. + // Only check this for the first level of access chain. + // Dealing with this for partial access chains should be possible, but awkward. + if (var->storage == StorageClassOutput && !ptr_is_chain) + { + bool masked = false; + if (is_block) + { + uint32_t relevant_member_index = patch ? 3 : 4; + // FIXME: This won't work properly if the application first access chains into gl_out element, + // then access chains into the member. Super weird, but theoretically possible ... + if (length > relevant_member_index) + { + uint32_t mbr_idx = get(ops[relevant_member_index]).scalar(); + masked = is_stage_output_block_member_masked(*var, mbr_idx, true); + } + } + else if (var) + masked = is_stage_output_variable_masked(*var); + + if (masked) + return false; + } + AccessChainMeta meta; SmallVector indices; - uint32_t next_id = ir.increase_bound_by(2); + uint32_t next_id = ir.increase_bound_by(1); indices.reserve(length - 3 + 1); - uint32_t type_id = next_id++; - SPIRType new_uint_type; - new_uint_type.basetype = SPIRType::UInt; - new_uint_type.width = 32; - set(type_id, new_uint_type); - indices.push_back(ops[3]); + uint32_t first_non_array_index = (ptr_is_chain ? 3 : 4) - (patch ? 1 : 0); + + VariableID stage_var_id; + if (patch) + stage_var_id = var->storage == StorageClassInput ? patch_stage_in_var_id : patch_stage_out_var_id; + else + stage_var_id = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id; + + VariableID ptr = ptr_is_chain ? VariableID(ops[2]) : stage_var_id; + if (!ptr_is_chain && !patch) + { + // Index into gl_in/gl_out with first array index. + indices.push_back(ops[first_non_array_index - 1]); + } + + auto &result_ptr_type = get(ops[0]); uint32_t const_mbr_id = next_id++; uint32_t index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex); - uint32_t ptr = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id; - if (var->storage == StorageClassInput || has_decoration(get_variable_element_type(*var).self, DecorationBlock)) + + // If we have a pointer chain expression, and we are no longer pointing to a composite + // object, we are in the clear. There is no longer a need to flatten anything. + bool further_access_chain_is_trivial = false; + if (ptr_is_chain && flatten_composites) { - uint32_t i = 4; + auto &ptr_type = expression_type(ptr); + if (!is_array(ptr_type) && !is_matrix(ptr_type) && ptr_type.basetype != SPIRType::Struct) + further_access_chain_is_trivial = true; + } + + if (!further_access_chain_is_trivial && (flatten_composites || is_block)) + { + uint32_t i = first_non_array_index; auto *type = &get_variable_element_type(*var); - if (index == uint32_t(-1) && length >= 5) + if (index == uint32_t(-1) && length >= (first_non_array_index + 1)) { // Maybe this is a struct type in the input class, in which case // we put it as a decoration on the corresponding member. - index = get_extended_member_decoration(ops[2], get_constant(ops[4]).scalar(), + uint32_t mbr_idx = get_constant(ops[first_non_array_index]).scalar(); + index = get_extended_member_decoration(var->self, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex); assert(index != uint32_t(-1)); i++; - type = &get(type->member_types[get_constant(ops[4]).scalar()]); + type = &get(type->member_types[mbr_idx]); } - // In this case, we flattened structures and arrays, so now we have to + + // In this case, we're poking into flattened structures and arrays, so now we have to // combine the following indices. If we encounter a non-constant index, // we're hosed. - for (; i < length; ++i) + for (; flatten_composites && i < length; ++i) { if (!is_array(*type) && !is_matrix(*type) && type->basetype != SPIRType::Struct) break; - auto &c = get_constant(ops[i]); - index += c.scalar(); + auto *c = maybe_get(ops[i]); + if (!c || c->specialization) + SPIRV_CROSS_THROW("Trying to dynamically index into an array interface variable in tessellation. " + "This is currently unsupported."); + + // We're in flattened space, so just increment the member index into IO block. + // We can only do this once in the current implementation, so either: + // Struct, Matrix or 1-dimensional array for a control point. + if (type->basetype == SPIRType::Struct && var->storage == StorageClassOutput) + { + // Need to consider holes, since individual block members might be masked away. + uint32_t mbr_idx = c->scalar(); + for (uint32_t j = 0; j < mbr_idx; j++) + if (!is_stage_output_block_member_masked(*var, j, true)) + index++; + } + else + index += c->scalar(); + if (type->parent_type) type = &get(type->parent_type); else if (type->basetype == SPIRType::Struct) - type = &get(type->member_types[c.scalar()]); + type = &get(type->member_types[c->scalar()]); } - // If the access chain terminates at a composite type, the composite - // itself might be copied. In that case, we must unflatten it. - if (is_matrix(*type) || is_array(*type) || type->basetype == SPIRType::Struct) - { - std::string temp_name = join(to_name(var->self), "_", ops[1]); - statement(variable_decl(*type, temp_name, var->self), ";"); - // Set up the initializer for this temporary variable. - indices.push_back(const_mbr_id); - if (type->basetype == SPIRType::Struct) - { - for (uint32_t j = 0; j < type->member_types.size(); j++) - { - index = get_extended_member_decoration(ops[2], j, SPIRVCrossDecorationInterfaceMemberIndex); - const auto &mbr_type = get(type->member_types[j]); - if (is_matrix(mbr_type)) - { - for (uint32_t k = 0; k < mbr_type.columns; k++, index++) - { - set(const_mbr_id, type_id, index, false); - auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, - true); - statement(temp_name, ".", to_member_name(*type, j), "[", k, "] = ", e, ";"); - } - } - else if (is_array(mbr_type)) - { - for (uint32_t k = 0; k < mbr_type.array[0]; k++, index++) - { - set(const_mbr_id, type_id, index, false); - auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, - true); - statement(temp_name, ".", to_member_name(*type, j), "[", k, "] = ", e, ";"); - } - } - else - { - set(const_mbr_id, type_id, index, false); - auto e = - access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, true); - statement(temp_name, ".", to_member_name(*type, j), " = ", e, ";"); - } - } - } - else if (is_matrix(*type)) - { - for (uint32_t j = 0; j < type->columns; j++, index++) - { - set(const_mbr_id, type_id, index, false); - auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true); - statement(temp_name, "[", j, "] = ", e, ";"); - } - } - else // Must be an array - { - assert(is_array(*type)); - for (uint32_t j = 0; j < type->array[0]; j++, index++) - { - set(const_mbr_id, type_id, index, false); - auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true); - statement(temp_name, "[", j, "] = ", e, ";"); - } - } - // This needs to be a variable instead of an expression so we don't - // try to dereference this as a variable pointer. - set(ops[1], ops[0], var->storage); - ir.meta[ops[1]] = ir.meta[ops[2]]; - set_name(ops[1], temp_name); - if (has_decoration(var->self, DecorationInvariant)) - set_decoration(ops[1], DecorationInvariant); - for (uint32_t j = 2; j < length; j++) - inherit_expression_dependencies(ops[1], ops[j]); - return true; - } - else + // We're not going to emit the actual member name, we let any further OpLoad take care of that. + // Tag the access chain with the member index we're referencing. + bool defer_access_chain = flatten_composites && (is_matrix(result_ptr_type) || is_array(result_ptr_type) || + result_ptr_type.basetype == SPIRType::Struct); + + if (!defer_access_chain) { - set(const_mbr_id, type_id, index, false); + // Access the appropriate member of gl_in/gl_out. + set(const_mbr_id, get_uint_type_id(), index, false); indices.push_back(const_mbr_id); + // Member index is now irrelevant. + index = uint32_t(-1); + + // Append any straggling access chain indices. if (i < length) indices.insert(indices.end(), ops + i, ops + length); } + else + { + // We must have consumed the entire access chain if we're deferring it. + assert(i == length); + } + + if (index != uint32_t(-1)) + set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index); + else + unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex); } else { - assert(index != uint32_t(-1)); - set(const_mbr_id, type_id, index, false); - indices.push_back(const_mbr_id); + if (index != uint32_t(-1)) + { + set(const_mbr_id, get_uint_type_id(), index, false); + indices.push_back(const_mbr_id); + } + + // Member index is now irrelevant. + index = uint32_t(-1); + unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex); - indices.insert(indices.end(), ops + 4, ops + length); + indices.insert(indices.end(), ops + first_non_array_index, ops + length); } // We use the pointer to the base of the input/output array here, // so this is always a pointer chain. - auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), get(ops[0]), &meta, true); - auto &expr = set(ops[1], move(e), ops[0], should_forward(ops[2])); + string e; + + if (!ptr_is_chain) + { + // This is the start of an access chain, use ptr_chain to index into control point array. + e = access_chain(ptr, indices.data(), uint32_t(indices.size()), result_ptr_type, &meta, !patch); + } + else + { + // If we're accessing a struct, we need to use member indices which are based on the IO block, + // not actual struct type, so we have to use a split access chain here where + // first path resolves the control point index, i.e. gl_in[index], and second half deals with + // looking up flattened member name. + + // However, it is possible that we partially accessed a struct, + // by taking pointer to member inside the control-point array. + // For this case, we fall back to a natural access chain since we have already dealt with remapping struct members. + // One way to check this here is if we have 2 implied read expressions. + // First one is the gl_in/gl_out struct itself, then an index into that array. + // If we have traversed further, we use a normal access chain formulation. + auto *ptr_expr = maybe_get(ptr); + bool split_access_chain_formulation = flatten_composites && ptr_expr && + ptr_expr->implied_read_expressions.size() == 2 && + !further_access_chain_is_trivial; + + if (split_access_chain_formulation) + { + e = join(to_expression(ptr), + access_chain_internal(stage_var_id, indices.data(), uint32_t(indices.size()), + ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta)); + } + else + { + e = access_chain_internal(ptr, indices.data(), uint32_t(indices.size()), 0, &meta); + } + } + + // Get the actual type of the object that was accessed. If it's a vector type and we changed it, + // then we'll need to add a swizzle. + // For this, we can't necessarily rely on the type of the base expression, because it might be + // another access chain, and it will therefore already have the "correct" type. + auto *expr_type = &get_variable_data_type(*var); + if (has_extended_decoration(ops[2], SPIRVCrossDecorationTessIOOriginalInputTypeID)) + expr_type = &get(get_extended_decoration(ops[2], SPIRVCrossDecorationTessIOOriginalInputTypeID)); + for (uint32_t i = 3; i < length; i++) + { + if (!is_array(*expr_type) && expr_type->basetype == SPIRType::Struct) + expr_type = &get(expr_type->member_types[get(ops[i]).scalar()]); + else + expr_type = &get(expr_type->parent_type); + } + if (!is_array(*expr_type) && !is_matrix(*expr_type) && expr_type->basetype != SPIRType::Struct && + expr_type->vecsize > result_ptr_type.vecsize) + e += vector_swizzle(result_ptr_type.vecsize, 0); + + auto &expr = set(ops[1], std::move(e), ops[0], should_forward(ops[2])); expr.loaded_from = var->self; expr.need_transpose = meta.need_transpose; expr.access_chain = true; // Mark the result as being packed if necessary. if (meta.storage_is_packed) - set_extended_decoration(ops[1], SPIRVCrossDecorationPacked); - if (meta.storage_packed_type != 0) - set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type); + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked); + if (meta.storage_physical_type != 0) + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); if (meta.storage_is_invariant) set_decoration(ops[1], DecorationInvariant); + // Save the type we found in case the result is used in another access chain. + set_extended_decoration(ops[1], SPIRVCrossDecorationTessIOOriginalInputTypeID, expr_type->self); + + // If we have some expression dependencies in our access chain, this access chain is technically a forwarded + // temporary which could be subject to invalidation. + // Need to assume we're forwarded while calling inherit_expression_depdendencies. + forwarded_temporaries.insert(ops[1]); + // The access chain itself is never forced to a temporary, but its dependencies might. + suppressed_usage_tracking.insert(ops[1]); for (uint32_t i = 2; i < length; i++) { @@ -3707,6 +8118,11 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l add_implied_read_expression(expr, ops[i]); } + // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries, + // we're not forwarded after all. + if (expr.expression_dependencies.empty()) + forwarded_temporaries.erase(ops[1]); + return true; } @@ -3716,9 +8132,9 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l // expression so we don't try to dereference it as a variable pointer. // Don't do this if the index is a constant 1, though. We need to drop stores // to that one. - auto *m = ir.find_meta(var ? var->self : 0); - if (get_execution_model() == ExecutionModelTessellationControl && var && m && - m->decoration.builtin_type == BuiltInTessLevelInner && get_entry_point().flags.get(ExecutionModeTriangles)) + auto *m = ir.find_meta(var ? var->self : ID(0)); + if (is_tesc_shader() && var && m && m->decoration.builtin_type == BuiltInTessLevelInner && + is_tessellating_triangles()) { auto *c = maybe_get(ops[3]); if (c && c->scalar() == 1) @@ -3735,7 +8151,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs) { - if (!get_entry_point().flags.get(ExecutionModeTriangles)) + if (!is_tessellating_triangles()) return false; // In SPIR-V, TessLevelInner always has two elements and TessLevelOuter always has @@ -3745,7 +8161,7 @@ bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs) // In Metal, however, only the first element of TessLevelInner and the first three // of TessLevelOuter are accessible. This stems from how in Metal, the tessellation // levels must be stored to a dedicated buffer in a particular format that depends - // on the patch type. Therefore, in Triangles mode, any access to the second + // on the patch type. Therefore, in Triangles mode, any store to the second // inner level or the fourth outer level must be dropped. const auto *e = maybe_get(id_lhs); if (!e || !e->access_chain) @@ -3760,12 +8176,117 @@ bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs) (builtin == BuiltInTessLevelOuter && c->scalar() == 3); } +void CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, + spv::StorageClass storage, bool &is_packed) +{ + // If there is any risk of writes happening with the access chain in question, + // and there is a risk of concurrent write access to other components, + // we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect. + // The MSL compiler refuses to allow component-level access for any non-packed vector types. + if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup)) + { + const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device"; + expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")"); + + // Further indexing should happen with packed rules (array index, not swizzle). + is_packed = true; + } +} + +bool CompilerMSL::access_chain_needs_stage_io_builtin_translation(uint32_t base) +{ + auto *var = maybe_get_backing_variable(base); + if (!var || !is_tessellation_shader()) + return true; + + // We only need to rewrite builtin access chains when accessing flattened builtins like gl_ClipDistance_N. + // Avoid overriding it back to just gl_ClipDistance. + // This can only happen in scenarios where we cannot flatten/unflatten access chains, so, the only case + // where this triggers is evaluation shader inputs. + bool redirect_builtin = is_tese_shader() ? var->storage == StorageClassOutput : false; + return redirect_builtin; +} + +// Sets the interface member index for an access chain to a pull-model interpolant. +void CompilerMSL::fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length) +{ + auto *var = maybe_get_backing_variable(ops[2]); + if (!var || !pull_model_inputs.count(var->self)) + return; + // Get the base index. + uint32_t interface_index; + auto &var_type = get_variable_data_type(*var); + auto &result_type = get(ops[0]); + auto *type = &var_type; + if (has_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex)) + { + interface_index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex); + } + else + { + // Assume an access chain into a struct variable. + assert(var_type.basetype == SPIRType::Struct); + auto &c = get(ops[3 + var_type.array.size()]); + interface_index = + get_extended_member_decoration(var->self, c.scalar(), SPIRVCrossDecorationInterfaceMemberIndex); + } + // Accumulate indices. We'll have to skip over the one for the struct, if present, because we already accounted + // for that getting the base index. + for (uint32_t i = 3; i < length; ++i) + { + if (is_vector(*type) && !is_array(*type) && is_scalar(result_type)) + { + // We don't want to combine the next index. Actually, we need to save it + // so we know to apply a swizzle to the result of the interpolation. + set_extended_decoration(ops[1], SPIRVCrossDecorationInterpolantComponentExpr, ops[i]); + break; + } + + auto *c = maybe_get(ops[i]); + if (!c || c->specialization) + SPIRV_CROSS_THROW("Trying to dynamically index into an array interface variable using pull-model " + "interpolation. This is currently unsupported."); + + if (type->parent_type) + type = &get(type->parent_type); + else if (type->basetype == SPIRType::Struct) + type = &get(type->member_types[c->scalar()]); + + if (!has_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex) && + i - 3 == var_type.array.size()) + continue; + + interface_index += c->scalar(); + } + // Save this to the access chain itself so we can recover it later when calling an interpolation function. + set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, interface_index); +} + + +// If the physical type of a physical buffer pointer has been changed +// to a ulong or ulongn vector, add a cast back to the pointer type. +void CompilerMSL::check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) +{ + auto *p_physical_type = maybe_get(physical_type); + if (p_physical_type && + p_physical_type->storage == StorageClassPhysicalStorageBuffer && + p_physical_type->basetype == to_unsigned_basetype(64)) + { + if (p_physical_type->vecsize > 1) + expr += ".x"; + + expr = join("((", type_to_glsl(*type), ")", expr, ")"); + } +} + // Override for MSL-specific syntax instructions void CompilerMSL::emit_instruction(const Instruction &instruction) { #define MSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define MSL_PTR_BOP(op) emit_binary_ptr_op(ops[0], ops[1], ops[2], ops[3], #op) + // MSL does care about implicit integer promotion, but those cases are all handled in common code. #define MSL_BOP_CAST(op, type) \ - emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode), false) #define MSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) #define MSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) #define MSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) @@ -3778,6 +8299,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) auto ops = stream(instruction); auto opcode = static_cast(instruction.op); + opcode = get_remapped_spirv_op(opcode); + // If we need to do implicit bitcasts, make sure we do it with the correct type. uint32_t integer_width = get_integer_width_for_instruction(instruction); auto int_type = to_signed_basetype(integer_width); @@ -3785,6 +8308,24 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) switch (opcode) { + case OpLoad: + { + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + if (is_tessellation_shader()) + { + if (!emit_tessellation_io_load(ops[0], id, ptr)) + CompilerGLSL::emit_instruction(instruction); + } + else + { + // Sample mask input for Metal is not an array + if (BuiltIn(get_decoration(ptr, DecorationBuiltIn)) == BuiltInSampleMask) + set_decoration(id, DecorationBuiltIn, BuiltInSampleMask); + CompilerGLSL::emit_instruction(instruction); + } + break; + } // Comparisons case OpIEqual: @@ -3802,6 +8343,10 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) case OpLogicalNotEqual: case OpFOrdNotEqual: + // TODO: Should probably negate the == result here. + // Typically OrdNotEqual comes from GLSL which itself does not really specify what + // happens with NaN. + // Consider fixing this if we run into real issues. MSL_BOP(!=); break; @@ -3858,7 +8403,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; case OpFUnordNotEqual: - MSL_UNORD_BOP(!=); + // not equal in MSL generates une opcodes to begin with. + // Since unordered not equal is how it works in C, just inherit that behavior. + MSL_BOP(!=); break; case OpFUnordGreaterThan: @@ -3877,6 +8424,19 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) MSL_UNORD_BOP(<=); break; + // Pointer math + case OpPtrEqual: + MSL_PTR_BOP(==); + break; + + case OpPtrNotEqual: + MSL_PTR_BOP(!=); + break; + + case OpPtrDiff: + MSL_PTR_BOP(-); + break; + // Derivatives case OpDPdx: case OpDPdxFine: @@ -3901,26 +8461,62 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) // Bitfield case OpBitFieldInsert: - MSL_QFOP(insert_bits); + { + emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "insert_bits", SPIRType::UInt); break; + } case OpBitFieldSExtract: + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", int_type, int_type, + SPIRType::UInt, SPIRType::UInt); + break; + } + case OpBitFieldUExtract: - MSL_TFOP(extract_bits); + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", uint_type, uint_type, + SPIRType::UInt, SPIRType::UInt); break; + } case OpBitReverse: + // BitReverse does not have issues with sign since result type must match input type. MSL_UFOP(reverse_bits); break; case OpBitCount: - MSL_UFOP(popcount); + { + auto basetype = expression_type(ops[2]).basetype; + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "popcount", basetype, basetype); break; + } case OpFRem: MSL_BFOP(fmod); break; + case OpFMul: + if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction)) + MSL_BFOP(spvFMul); + else + MSL_BOP(*); + break; + + case OpFAdd: + if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction)) + MSL_BFOP(spvFAdd); + else + MSL_BOP(+); + break; + + case OpFSub: + if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction)) + MSL_BFOP(spvFSub); + else + MSL_BOP(-); + break; + // Atomics case OpAtomicExchange: { @@ -3929,7 +8525,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) uint32_t ptr = ops[2]; uint32_t mem_sem = ops[4]; uint32_t val = ops[5]; - emit_atomic_func_op(result_type, id, "atomic_exchange_explicit", mem_sem, mem_sem, false, ptr, val); + emit_atomic_func_op(result_type, id, "atomic_exchange_explicit", opcode, mem_sem, mem_sem, false, ptr, val); break; } @@ -3942,7 +8538,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) uint32_t mem_sem_fail = ops[5]; uint32_t val = ops[6]; uint32_t comp = ops[7]; - emit_atomic_func_op(result_type, id, "atomic_compare_exchange_weak_explicit", mem_sem_pass, mem_sem_fail, true, + emit_atomic_func_op(result_type, id, "atomic_compare_exchange_weak_explicit", opcode, + mem_sem_pass, mem_sem_fail, true, ptr, comp, true, false, val); break; } @@ -3956,7 +8553,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) uint32_t id = ops[1]; uint32_t ptr = ops[2]; uint32_t mem_sem = ops[4]; - emit_atomic_func_op(result_type, id, "atomic_load_explicit", mem_sem, mem_sem, false, ptr, 0); + emit_atomic_func_op(result_type, id, "atomic_load_explicit", opcode, mem_sem, mem_sem, false, ptr, 0); break; } @@ -3967,7 +8564,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) uint32_t ptr = ops[0]; uint32_t mem_sem = ops[2]; uint32_t val = ops[3]; - emit_atomic_func_op(result_type, id, "atomic_store_explicit", mem_sem, mem_sem, false, ptr, val); + emit_atomic_func_op(result_type, id, "atomic_store_explicit", opcode, mem_sem, mem_sem, false, ptr, val); break; } @@ -3979,7 +8576,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) uint32_t ptr = ops[2]; \ uint32_t mem_sem = ops[4]; \ uint32_t val = valsrc; \ - emit_atomic_func_op(result_type, id, "atomic_fetch_" #op "_explicit", mem_sem, mem_sem, false, ptr, val, \ + emit_atomic_func_op(result_type, id, "atomic_fetch_" #op "_explicit", opcode, \ + mem_sem, mem_sem, false, ptr, val, \ false, valconst); \ } while (false) @@ -4042,7 +8640,42 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) } } - emit_texture_op(instruction); + emit_texture_op(instruction, false); + break; + } + + // Emulate texture2D atomic operations + case OpImageTexelPointer: + { + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + if (var && atomic_image_vars.count(var->self)) + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + std::string coord = to_expression(ops[3]); + auto &type = expression_type(ops[2]); + if (type.image.dim == Dim2D) + { + coord = join("spvImage2DAtomicCoord(", coord, ", ", to_expression(ops[2]), ")"); + } + + auto &e = set(id, join(to_expression(ops[2]), "_atomic[", coord, "]"), result_type, true); + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); + } + else + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &e = + set(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); + } break; } @@ -4093,11 +8726,22 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) auto store_type = texel_type; store_type.vecsize = 4; - statement(join(to_expression(img_id), ".write(", - remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), ", ", - to_function_args(img_id, img_type, true, false, false, coord_id, 0, 0, 0, 0, lod, 0, 0, 0, 0, 0, - 0, &forward), - ");")); + TextureFunctionArguments args = {}; + args.base.img = img_id; + args.base.imgtype = &img_type; + args.base.is_fetch = true; + args.coord = coord_id; + args.lod = lod; + + string expr; + if (needs_frag_discard_checks()) + expr = join("(", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), " ? ((void)0) : "); + expr += join(to_expression(img_id), ".write(", + remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), ", ", + CompilerMSL::to_function_args(args, &forward), ")"); + if (needs_frag_discard_checks()) + expr += ")"; + statement(expr, ";"); if (p_var && variable_storage_is_aliased(*p_var)) flush_all_aliased_variables(); @@ -4141,7 +8785,11 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) expr += ", " + img_exp + ".get_depth(" + lod + ")"; if (img_is_array) + { expr += ", " + img_exp + ".get_array_size()"; + if (img_dim == DimCube && msl_options.emulate_cube_array) + expr += " / 6"; + } expr += ")"; @@ -4211,45 +8859,25 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) } else { - auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); auto *var = maybe_get_backing_variable(ops[2]); + SPIRExpression *e; + if (var && has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler)) + e = &emit_op(result_type, id, join(to_expression(ops[2]), ".plane0"), true, true); + else + e = &emit_op(result_type, id, to_expression(ops[2]), true, true); if (var) - e.loaded_from = var->self; + e->loaded_from = var->self; } break; } - case OpImageTexelPointer: - SPIRV_CROSS_THROW("MSL does not support atomic operations on images or texel buffers."); - // Casting case OpQuantizeToF16: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t arg = ops[2]; - - string exp; - auto &type = get(result_type); - - switch (type.vecsize) - { - case 1: - exp = join("float(half(", to_expression(arg), "))"); - break; - case 2: - exp = join("float2(half2(", to_expression(arg), "))"); - break; - case 3: - exp = join("float3(half3(", to_expression(arg), "))"); - break; - case 4: - exp = join("float4(half4(", to_expression(arg), "))"); - break; - default: - SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); - } - + string exp = join("spvQuantizeToF16(", to_expression(arg), ")"); emit_op(result_type, id, exp, should_forward(arg)); break; } @@ -4264,17 +8892,38 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) } else CompilerGLSL::emit_instruction(instruction); + fix_up_interpolant_access_chain(ops, instruction.length); break; case OpStore: - if (is_out_of_bounds_tessellation_level(ops[0])) - break; + { + const auto &type = expression_type(ops[0]); - if (maybe_emit_array_assignment(ops[0], ops[1])) + if (is_out_of_bounds_tessellation_level(ops[0])) break; - CompilerGLSL::emit_instruction(instruction); + if (needs_frag_discard_checks() && + (type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform)) + { + // If we're in a continue block, this kludge will make the block too complex + // to emit normally. + assert(current_emitting_block); + auto cont_type = continue_block_type(*current_emitting_block); + if (cont_type != SPIRBlock::ContinueNone && cont_type != SPIRBlock::ComplexLoop) + { + current_emitting_block->complex_continue = true; + force_recompile(); + } + statement("if (!", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), ")"); + begin_scope(); + } + if (!maybe_emit_array_assignment(ops[0], ops[1])) + CompilerGLSL::emit_instruction(instruction); + if (needs_frag_discard_checks() && + (type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform)) + end_scope(); break; + } // Compute barriers case OpMemoryBarrier: @@ -4289,25 +8938,6 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) emit_barrier(ops[0], ops[1], ops[2]); break; - case OpVectorTimesMatrix: - case OpMatrixTimesVector: - { - // If the matrix needs transpose and it is square or packed, just flip the multiply order. - uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3]; - auto *e = maybe_get(mtx_id); - auto &t = expression_type(mtx_id); - bool is_packed = has_extended_decoration(mtx_id, SPIRVCrossDecorationPacked); - if (e && e->need_transpose && (t.columns == t.vecsize || is_packed)) - { - e->need_transpose = false; - emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*"); - e->need_transpose = true; - } - else - MSL_BOP(*); - break; - } - case OpOuterProduct: { uint32_t result_type = ops[0]; @@ -4320,7 +8950,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) expr += "("; for (uint32_t col = 0; col < type.columns; col++) { - expr += to_enclosed_expression(a); + expr += to_enclosed_unpacked_expression(a); expr += " * "; expr += to_extract_component_expression(b, col); if (col + 1 < type.columns) @@ -4333,6 +8963,85 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } + case OpVectorTimesMatrix: + case OpMatrixTimesVector: + { + if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction)) + { + CompilerGLSL::emit_instruction(instruction); + break; + } + + // If the matrix needs transpose, just flip the multiply order. + auto *e = maybe_get(ops[opcode == OpMatrixTimesVector ? 2 : 3]); + if (e && e->need_transpose) + { + e->need_transpose = false; + string expr; + + if (opcode == OpMatrixTimesVector) + { + expr = join("spvFMulVectorMatrix(", to_enclosed_unpacked_expression(ops[3]), ", ", + to_unpacked_row_major_matrix_expression(ops[2]), ")"); + } + else + { + expr = join("spvFMulMatrixVector(", to_unpacked_row_major_matrix_expression(ops[3]), ", ", + to_enclosed_unpacked_expression(ops[2]), ")"); + } + + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + emit_op(ops[0], ops[1], expr, forward); + e->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + { + if (opcode == OpMatrixTimesVector) + MSL_BFOP(spvFMulMatrixVector); + else + MSL_BFOP(spvFMulVectorMatrix); + } + break; + } + + case OpMatrixTimesMatrix: + { + if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction)) + { + CompilerGLSL::emit_instruction(instruction); + break; + } + + auto *a = maybe_get(ops[2]); + auto *b = maybe_get(ops[3]); + + // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed. + // a^T * b^T = (b * a)^T. + if (a && b && a->need_transpose && b->need_transpose) + { + a->need_transpose = false; + b->need_transpose = false; + + auto expr = + join("spvFMulMatrixMatrix(", enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), ", ", + enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])), ")"); + + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + auto &e = emit_op(ops[0], ops[1], expr, forward); + e.need_transpose = true; + a->need_transpose = true; + b->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + MSL_BFOP(spvFMulMatrixMatrix); + + break; + } + case OpIAddCarry: case OpISubBorrow: { @@ -4340,27 +9049,25 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; - forced_temporaries.insert(result_id); auto &type = get(result_type); - statement(variable_decl(type, to_name(result_id)), ";"); - set(result_id, to_name(result_id), result_type, true); + emit_uninitialized_temporary_expression(result_type, result_id); auto &res_type = get(type.member_types[1]); if (opcode == OpIAddCarry) { - statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " + ", - to_enclosed_expression(op1), ";"); + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", + to_enclosed_unpacked_expression(op0), " + ", to_enclosed_unpacked_expression(op1), ";"); statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type), - "(1), ", type_to_glsl(res_type), "(0), ", to_expression(result_id), ".", to_member_name(type, 0), - " >= max(", to_expression(op0), ", ", to_expression(op1), "));"); + "(1), ", type_to_glsl(res_type), "(0), ", to_unpacked_expression(result_id), ".", to_member_name(type, 0), + " >= max(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "));"); } else { - statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " - ", - to_enclosed_expression(op1), ";"); + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_unpacked_expression(op0), " - ", + to_enclosed_unpacked_expression(op1), ";"); statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type), - "(1), ", type_to_glsl(res_type), "(0), ", to_enclosed_expression(op0), - " >= ", to_enclosed_expression(op1), ");"); + "(1), ", type_to_glsl(res_type), "(0), ", to_enclosed_unpacked_expression(op0), + " >= ", to_enclosed_unpacked_expression(op1), ");"); } break; } @@ -4372,15 +9079,34 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; - forced_temporaries.insert(result_id); auto &type = get(result_type); - statement(variable_decl(type, to_name(result_id)), ";"); - set(result_id, to_name(result_id), result_type, true); + auto input_type = opcode == OpSMulExtended ? int_type : uint_type; + auto &output_type = get_type(result_type); + string cast_op0, cast_op1; + + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, false); + + emit_uninitialized_temporary_expression(result_type, result_id); + + string mullo_expr, mulhi_expr; + mullo_expr = join(cast_op0, " * ", cast_op1); + mulhi_expr = join("mulhi(", cast_op0, ", ", cast_op1, ")"); + + auto &low_type = get_type(output_type.member_types[0]); + auto &high_type = get_type(output_type.member_types[1]); + if (low_type.basetype != input_type) + { + expected_type.basetype = input_type; + mullo_expr = join(bitcast_glsl_op(low_type, expected_type), "(", mullo_expr, ")"); + } + if (high_type.basetype != input_type) + { + expected_type.basetype = input_type; + mulhi_expr = join(bitcast_glsl_op(high_type, expected_type), "(", mulhi_expr, ")"); + } - statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " * ", - to_enclosed_expression(op1), ";"); - statement(to_expression(result_id), ".", to_member_name(type, 1), " = mulhi(", to_expression(op0), ", ", - to_expression(op1), ");"); + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", mullo_expr, ";"); + statement(to_expression(result_id), ".", to_member_name(type, 1), " = ", mulhi_expr, ";"); break; } @@ -4395,6 +9121,208 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } + // SPV_INTEL_shader_integer_functions2 + case OpUCountLeadingZerosINTEL: + MSL_UFOP(clz); + break; + + case OpUCountTrailingZerosINTEL: + MSL_UFOP(ctz); + break; + + case OpAbsISubINTEL: + case OpAbsUSubINTEL: + MSL_BFOP(absdiff); + break; + + case OpIAddSatINTEL: + case OpUAddSatINTEL: + MSL_BFOP(addsat); + break; + + case OpIAverageINTEL: + case OpUAverageINTEL: + MSL_BFOP(hadd); + break; + + case OpIAverageRoundedINTEL: + case OpUAverageRoundedINTEL: + MSL_BFOP(rhadd); + break; + + case OpISubSatINTEL: + case OpUSubSatINTEL: + MSL_BFOP(subsat); + break; + + case OpIMul32x16INTEL: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2], b = ops[3]; + bool forward = should_forward(a) && should_forward(b); + emit_op(result_type, id, join("int(short(", to_unpacked_expression(a), ")) * int(short(", to_unpacked_expression(b), "))"), forward); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } + + case OpUMul32x16INTEL: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2], b = ops[3]; + bool forward = should_forward(a) && should_forward(b); + emit_op(result_type, id, join("uint(ushort(", to_unpacked_expression(a), ")) * uint(ushort(", to_unpacked_expression(b), "))"), forward); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } + + // SPV_EXT_demote_to_helper_invocation + case OpDemoteToHelperInvocationEXT: + if (!msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("discard_fragment() does not formally have demote semantics until MSL 2.3."); + CompilerGLSL::emit_instruction(instruction); + break; + + case OpIsHelperInvocationEXT: + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires MSL 2.3 on iOS."); + else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires MSL 2.1 on macOS."); + emit_op(ops[0], ops[1], + needs_manual_helper_invocation_updates() ? builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput) : + "simd_is_helper_thread()", + false); + break; + + case OpBeginInvocationInterlockEXT: + case OpEndInvocationInterlockEXT: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("Raster order groups require MSL 2.0."); + break; // Nothing to do in the body + + case OpConvertUToAccelerationStructureKHR: + SPIRV_CROSS_THROW("ConvertUToAccelerationStructure is not supported in MSL."); + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: + SPIRV_CROSS_THROW("BindingTableRecordOffset is not supported in MSL."); + + case OpRayQueryInitializeKHR: + { + flush_variable_declaration(ops[0]); + + statement(to_expression(ops[0]), ".reset(", "ray(", to_expression(ops[4]), ", ", to_expression(ops[6]), ", ", + to_expression(ops[5]), ", ", to_expression(ops[7]), "), ", to_expression(ops[1]), + ", intersection_params());"); + break; + } + case OpRayQueryProceedKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".next()"), false); + break; + } +#define MSL_RAY_QUERY_IS_CANDIDATE get(ops[3]).scalar_i32() == 0 + +#define MSL_RAY_QUERY_GET_OP(op, msl_op) \ + case OpRayQueryGet##op##KHR: \ + flush_variable_declaration(ops[2]); \ + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_" #msl_op "()"), false); \ + break + +#define MSL_RAY_QUERY_OP_INNER2(op, msl_prefix, msl_op) \ + case OpRayQueryGet##op##KHR: \ + flush_variable_declaration(ops[2]); \ + if (MSL_RAY_QUERY_IS_CANDIDATE) \ + emit_op(ops[0], ops[1], join(to_expression(ops[2]), #msl_prefix "_candidate_" #msl_op "()"), false); \ + else \ + emit_op(ops[0], ops[1], join(to_expression(ops[2]), #msl_prefix "_committed_" #msl_op "()"), false); \ + break + +#define MSL_RAY_QUERY_GET_OP2(op, msl_op) MSL_RAY_QUERY_OP_INNER2(op, .get, msl_op) +#define MSL_RAY_QUERY_IS_OP2(op, msl_op) MSL_RAY_QUERY_OP_INNER2(op, .is, msl_op) + + MSL_RAY_QUERY_GET_OP(RayTMin, ray_min_distance); + MSL_RAY_QUERY_GET_OP(WorldRayOrigin, world_space_ray_origin); + MSL_RAY_QUERY_GET_OP(WorldRayDirection, world_space_ray_direction); + MSL_RAY_QUERY_GET_OP2(IntersectionInstanceId, instance_id); + MSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex, user_instance_id); + MSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics, triangle_barycentric_coord); + MSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex, primitive_id); + MSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex, geometry_id); + MSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin, ray_origin); + MSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection, ray_direction); + MSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld, object_to_world_transform); + MSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject, world_to_object_transform); + MSL_RAY_QUERY_IS_OP2(IntersectionFrontFace, triangle_front_facing); + + case OpRayQueryGetIntersectionTypeKHR: + flush_variable_declaration(ops[2]); + if (MSL_RAY_QUERY_IS_CANDIDATE) + emit_op(ops[0], ops[1], join("uint(", to_expression(ops[2]), ".get_candidate_intersection_type()) - 1"), + false); + else + emit_op(ops[0], ops[1], join("uint(", to_expression(ops[2]), ".get_committed_intersection_type())"), false); + break; + case OpRayQueryGetIntersectionTKHR: + flush_variable_declaration(ops[2]); + if (MSL_RAY_QUERY_IS_CANDIDATE) + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_candidate_triangle_distance()"), false); + else + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_committed_distance()"), false); + break; + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".is_candidate_non_opaque_bounding_box()"), false); + break; + } + case OpRayQueryConfirmIntersectionKHR: + flush_variable_declaration(ops[0]); + statement(to_expression(ops[0]), ".commit_triangle_intersection();"); + break; + case OpRayQueryGenerateIntersectionKHR: + flush_variable_declaration(ops[0]); + statement(to_expression(ops[0]), ".commit_bounding_box_intersection(", to_expression(ops[1]), ");"); + break; + case OpRayQueryTerminateKHR: + flush_variable_declaration(ops[0]); + statement(to_expression(ops[0]), ".abort();"); + break; +#undef MSL_RAY_QUERY_GET_OP +#undef MSL_RAY_QUERY_IS_CANDIDATE +#undef MSL_RAY_QUERY_IS_OP2 +#undef MSL_RAY_QUERY_GET_OP2 +#undef MSL_RAY_QUERY_OP_INNER2 + + case OpConvertPtrToU: + case OpConvertUToPtr: + case OpBitcast: + { + auto &type = get(ops[0]); + auto &input_type = expression_type(ops[2]); + + if (opcode != OpBitcast || type.pointer || input_type.pointer) + { + string op; + + if (type.vecsize == 1 && input_type.vecsize == 1) + op = join("reinterpret_cast<", type_to_glsl(type), ">(", to_unpacked_expression(ops[2]), ")"); + else if (input_type.vecsize == 2) + op = join("reinterpret_cast<", type_to_glsl(type), ">(as_type(", to_unpacked_expression(ops[2]), "))"); + else + op = join("as_type<", type_to_glsl(type), ">(reinterpret_cast(", to_unpacked_expression(ops[2]), "))"); + + emit_op(ops[0], ops[1], op, should_forward(ops[2])); + inherit_expression_dependencies(ops[1], ops[2]); + } + else + CompilerGLSL::emit_instruction(instruction); + + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; @@ -4403,16 +9331,51 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) previous_instruction_opcode = opcode; } +void CompilerMSL::emit_texture_op(const Instruction &i, bool sparse) +{ + if (sparse) + SPIRV_CROSS_THROW("Sparse feedback not yet supported in MSL."); + + if (msl_options.use_framebuffer_fetch_subpasses) + { + auto *ops = stream(i); + + uint32_t result_type_id = ops[0]; + uint32_t id = ops[1]; + uint32_t img = ops[2]; + + auto &type = expression_type(img); + auto &imgtype = get(type.self); + + // Use Metal's native frame-buffer fetch API for subpass inputs. + if (imgtype.image.dim == DimSubpassData) + { + // Subpass inputs cannot be invalidated, + // so just forward the expression directly. + string expr = to_expression(img); + emit_op(result_type_id, id, expr, true); + return; + } + } + + // Fallback to default implementation + CompilerGLSL::emit_texture_op(i, sparse); +} + void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem) { - if (get_execution_model() != ExecutionModelGLCompute && get_execution_model() != ExecutionModelTessellationControl) + if (get_execution_model() != ExecutionModelGLCompute && !is_tesc_shader()) return; - uint32_t exe_scope = id_exe_scope ? get(id_exe_scope).scalar() : uint32_t(ScopeInvocation); - uint32_t mem_scope = id_mem_scope ? get(id_mem_scope).scalar() : uint32_t(ScopeInvocation); + uint32_t exe_scope = id_exe_scope ? evaluate_constant_u32(id_exe_scope) : uint32_t(ScopeInvocation); + uint32_t mem_scope = id_mem_scope ? evaluate_constant_u32(id_mem_scope) : uint32_t(ScopeInvocation); // Use the wider of the two scopes (smaller value) exe_scope = min(exe_scope, mem_scope); + if (msl_options.emulate_subgroups && exe_scope >= ScopeSubgroup && !id_mem_sem) + // In this case, we assume a "subgroup" size of 1. The barrier, then, is a noop. + return; + string bar_stmt; if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2)) bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier"; @@ -4420,7 +9383,7 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin bar_stmt = "threadgroup_barrier"; bar_stmt += "("; - uint32_t mem_sem = id_mem_sem ? get(id_mem_sem).scalar() : uint32_t(MemorySemanticsMaskNone); + uint32_t mem_sem = id_mem_sem ? evaluate_constant_u32(id_mem_sem) : uint32_t(MemorySemanticsMaskNone); // Use the | operator to combine flags if we can. if (msl_options.supports_msl_version(1, 2)) @@ -4428,11 +9391,12 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin string mem_flags = ""; // For tesc shaders, this also affects objects in the Output storage class. // Since in Metal, these are placed in a device buffer, we have to sync device memory here. - if (get_execution_model() == ExecutionModelTessellationControl || + if (is_tesc_shader() || (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask))) mem_flags += "mem_flags::mem_device"; - if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | - MemorySemanticsAtomicCounterMemoryMask)) + + // Fix tessellation patch function processing + if (is_tesc_shader() || (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask))) { if (!mem_flags.empty()) mem_flags += " | "; @@ -4453,13 +9417,11 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin else { if ((mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)) && - (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | - MemorySemanticsAtomicCounterMemoryMask))) + (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask))) bar_stmt += "mem_flags::mem_device_and_threadgroup"; else if (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)) bar_stmt += "mem_flags::mem_device"; - else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | - MemorySemanticsAtomicCounterMemoryMask)) + else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask)) bar_stmt += "mem_flags::mem_threadgroup"; else if (mem_sem & MemorySemanticsImageMemoryMask) bar_stmt += "mem_flags::mem_texture"; @@ -4467,29 +9429,6 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin bar_stmt += "mem_flags::mem_none"; } - if (msl_options.is_ios() && (msl_options.supports_msl_version(2) && !msl_options.supports_msl_version(2, 1))) - { - bar_stmt += ", "; - - switch (mem_scope) - { - case ScopeCrossDevice: - case ScopeDevice: - bar_stmt += "memory_scope_device"; - break; - - case ScopeSubgroup: - case ScopeInvocation: - bar_stmt += "memory_scope_simdgroup"; - break; - - case ScopeWorkgroup: - default: - bar_stmt += "memory_scope_threadgroup"; - break; - } - } - bar_stmt += ");"; statement(bar_stmt); @@ -4499,50 +9438,146 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin flush_all_active_variables(); } -void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id) +static bool storage_class_array_is_thread(StorageClass storage) { - // Assignment from an array initializer is fine. - auto &type = expression_type(rhs_id); - auto *var = maybe_get_backing_variable(rhs_id); - - // Unfortunately, we cannot template on address space in MSL, - // so explicit address space redirection it is ... - bool is_constant = false; - if (ir.ids[rhs_id].get_type() == TypeConstant) + switch (storage) { - is_constant = true; + case StorageClassInput: + case StorageClassOutput: + case StorageClassGeneric: + case StorageClassFunction: + case StorageClassPrivate: + return true; + + default: + return false; } - else if (var && var->remapped_variable && var->statically_assigned && - ir.ids[var->static_expression].get_type() == TypeConstant) +} + +void CompilerMSL::emit_array_copy(const string &lhs, uint32_t lhs_id, uint32_t rhs_id, + StorageClass lhs_storage, StorageClass rhs_storage) +{ + // Allow Metal to use the array template to make arrays a value type. + // This, however, cannot be used for threadgroup address specifiers, so consider the custom array copy as fallback. + bool lhs_is_thread_storage = storage_class_array_is_thread(lhs_storage); + bool rhs_is_thread_storage = storage_class_array_is_thread(rhs_storage); + + bool lhs_is_array_template = lhs_is_thread_storage; + bool rhs_is_array_template = rhs_is_thread_storage; + + // Special considerations for stage IO variables. + // If the variable is actually backed by non-user visible device storage, we use array templates for those. + // + // Another special consideration is given to thread local variables which happen to have Offset decorations + // applied to them. Block-like types do not use array templates, so we need to force POD path if we detect + // these scenarios. This check isn't perfect since it would be technically possible to mix and match these things, + // and for a fully correct solution we might have to track array template state through access chains as well, + // but for all reasonable use cases, this should suffice. + // This special case should also only apply to Function/Private storage classes. + // We should not check backing variable for temporaries. + auto *lhs_var = maybe_get_backing_variable(lhs_id); + if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage)) + lhs_is_array_template = true; + else if (lhs_var && (lhs_storage == StorageClassFunction || lhs_storage == StorageClassPrivate) && + type_is_block_like(get(lhs_var->basetype))) + lhs_is_array_template = false; + + auto *rhs_var = maybe_get_backing_variable(rhs_id); + if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage)) + rhs_is_array_template = true; + else if (rhs_var && (rhs_storage == StorageClassFunction || rhs_storage == StorageClassPrivate) && + type_is_block_like(get(rhs_var->basetype))) + rhs_is_array_template = false; + + // If threadgroup storage qualifiers are *not* used: + // Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier. + if (lhs_is_array_template && rhs_is_array_template && !using_builtin_array()) { - is_constant = true; + statement(lhs, " = ", to_expression(rhs_id), ";"); } - - // For the case where we have OpLoad triggering an array copy, - // we cannot easily detect this case ahead of time since it's - // context dependent. We might have to force a recompile here - // if this is the only use of array copies in our shader. - if (type.array.size() > 1) + else { - if (type.array.size() > SPVFuncImplArrayCopyMultidimMax) - SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); - auto func = static_cast(SPVFuncImplArrayCopyMultidimBase + type.array.size()); - if (spv_function_implementations.count(func) == 0) + // Assignment from an array initializer is fine. + auto &type = expression_type(rhs_id); + auto *var = maybe_get_backing_variable(rhs_id); + + // Unfortunately, we cannot template on address space in MSL, + // so explicit address space redirection it is ... + bool is_constant = false; + if (ir.ids[rhs_id].get_type() == TypeConstant) + { + is_constant = true; + } + else if (var && var->remapped_variable && var->statically_assigned && + ir.ids[var->static_expression].get_type() == TypeConstant) + { + is_constant = true; + } + else if (rhs_storage == StorageClassUniform || rhs_storage == StorageClassUniformConstant) + { + is_constant = true; + } + + // For the case where we have OpLoad triggering an array copy, + // we cannot easily detect this case ahead of time since it's + // context dependent. We might have to force a recompile here + // if this is the only use of array copies in our shader. + if (type.array.size() > 1) { - spv_function_implementations.insert(func); - suppress_missing_prototypes = true; - force_recompile(); + if (type.array.size() > kArrayCopyMultidimMax) + SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); + auto func = static_cast(SPVFuncImplArrayCopyMultidimBase + type.array.size()); + add_spv_func_and_recompile(func); } + else + add_spv_func_and_recompile(SPVFuncImplArrayCopy); + + const char *tag = nullptr; + if (lhs_is_thread_storage && is_constant) + tag = "FromConstantToStack"; + else if (lhs_storage == StorageClassWorkgroup && is_constant) + tag = "FromConstantToThreadGroup"; + else if (lhs_is_thread_storage && rhs_is_thread_storage) + tag = "FromStackToStack"; + else if (lhs_storage == StorageClassWorkgroup && rhs_is_thread_storage) + tag = "FromStackToThreadGroup"; + else if (lhs_is_thread_storage && rhs_storage == StorageClassWorkgroup) + tag = "FromThreadGroupToStack"; + else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup) + tag = "FromThreadGroupToThreadGroup"; + else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassStorageBuffer) + tag = "FromDeviceToDevice"; + else if (lhs_storage == StorageClassStorageBuffer && is_constant) + tag = "FromConstantToDevice"; + else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassWorkgroup) + tag = "FromThreadGroupToDevice"; + else if (lhs_storage == StorageClassStorageBuffer && rhs_is_thread_storage) + tag = "FromStackToDevice"; + else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassStorageBuffer) + tag = "FromDeviceToThreadGroup"; + else if (lhs_is_thread_storage && rhs_storage == StorageClassStorageBuffer) + tag = "FromDeviceToStack"; + else + SPIRV_CROSS_THROW("Unknown storage class used for copying arrays."); + + // Pass internal array of spvUnsafeArray<> into wrapper functions + if (lhs_is_array_template && rhs_is_array_template && !msl_options.force_native_arrays) + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);"); + if (lhs_is_array_template && !msl_options.force_native_arrays) + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");"); + else if (rhs_is_array_template && !msl_options.force_native_arrays) + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);"); + else + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");"); } - else if (spv_function_implementations.count(SPVFuncImplArrayCopy) == 0) - { - spv_function_implementations.insert(SPVFuncImplArrayCopy); - suppress_missing_prototypes = true; - force_recompile(); - } +} - const char *tag = is_constant ? "FromConstant" : "FromStack"; - statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");"); +uint32_t CompilerMSL::get_physical_tess_level_array_size(spv::BuiltIn builtin) const +{ + if (is_tessellating_triangles()) + return builtin == BuiltInTessLevelInner ? 1 : 3; + else + return builtin == BuiltInTessLevelInner ? 2 : 4; } // Since MSL does not allow arrays to be copied via simple variable assignment, @@ -4573,41 +9608,102 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs) return true; } + if (is_tesc_shader() && has_decoration(id_lhs, DecorationBuiltIn)) + { + auto builtin = BuiltIn(get_decoration(id_lhs, DecorationBuiltIn)); + // Need to manually unroll the array store. + if (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter) + { + uint32_t array_size = get_physical_tess_level_array_size(builtin); + if (array_size == 1) + statement(to_expression(id_lhs), " = half(", to_expression(id_rhs), "[0]);"); + else + { + for (uint32_t i = 0; i < array_size; i++) + statement(to_expression(id_lhs), "[", i, "] = half(", to_expression(id_rhs), "[", i, "]);"); + } + return true; + } + } + // Ensure the LHS variable has been declared auto *p_v_lhs = maybe_get_backing_variable(id_lhs); if (p_v_lhs) flush_variable_declaration(p_v_lhs->self); - emit_array_copy(to_expression(id_lhs), id_rhs); + auto lhs_storage = get_expression_effective_storage_class(id_lhs); + auto rhs_storage = get_expression_effective_storage_class(id_rhs); + emit_array_copy(to_expression(id_lhs), id_lhs, id_rhs, lhs_storage, rhs_storage); register_write(id_lhs); return true; } // Emits one of the atomic functions. In MSL, the atomic functions operate on pointers -void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1, - uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1, +void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, Op opcode, + uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1, bool op1_is_pointer, bool op1_is_literal, uint32_t op2) { - forced_temporaries.insert(result_id); - - string exp = string(op) + "("; + string exp; auto &type = get_pointee_type(expression_type(obj)); - exp += "(volatile "; + auto expected_type = type.basetype; + if (opcode == OpAtomicUMax || opcode == OpAtomicUMin) + expected_type = to_unsigned_basetype(type.width); + else if (opcode == OpAtomicSMax || opcode == OpAtomicSMin) + expected_type = to_signed_basetype(type.width); + + if (type.width == 64) + SPIRV_CROSS_THROW("MSL currently does not support 64-bit atomics."); + + auto remapped_type = type; + remapped_type.basetype = expected_type; + auto *var = maybe_get_backing_variable(obj); if (!var) SPIRV_CROSS_THROW("No backing variable for atomic operation."); - exp += get_argument_address_space(*var); + const auto &res_type = get(var->basetype); + + bool is_atomic_compare_exchange_strong = op1_is_pointer && op1; + + bool check_discard = opcode != OpAtomicLoad && needs_frag_discard_checks() && + ((res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image) || + var->storage == StorageClassStorageBuffer || var->storage == StorageClassUniform); + + if (check_discard) + { + if (is_atomic_compare_exchange_strong) + { + // We're already emitting a CAS loop here; a conditional won't hurt. + emit_uninitialized_temporary_expression(result_type, result_id); + statement("if (!", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), ")"); + begin_scope(); + } + else + exp = join("(!", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), " ? "); + } + + exp += string(op) + "("; + exp += "("; + // Emulate texture2D atomic operations + if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image) + { + exp += "device"; + } + else + { + exp += get_argument_address_space(*var); + } + exp += " atomic_"; - exp += type_to_glsl(type); + // For signed and unsigned min/max, we can signal this through the pointer type. + // There is no other way, since C++ does not have explicit signage for atomics. + exp += type_to_glsl(remapped_type); exp += "*)"; exp += "&"; exp += to_enclosed_expression(obj); - bool is_atomic_compare_exchange_strong = op1_is_pointer && op1; - if (is_atomic_compare_exchange_strong) { assert(strcmp(op, "atomic_compare_exchange_weak_explicit") == 0); @@ -4629,12 +9725,42 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, // the CAS loop, otherwise it will loop infinitely, with the comparison test always failing. // The function updates the comparitor value from the memory value, so the additional // comparison test evaluates the memory value against the expected value. - statement(variable_decl(type, to_name(result_id)), ";"); + if (!check_discard) + emit_uninitialized_temporary_expression(result_type, result_id); statement("do"); begin_scope(); statement(to_name(result_id), " = ", to_expression(op1), ";"); end_scope_decl(join("while (!", exp, " && ", to_name(result_id), " == ", to_enclosed_expression(op1), ")")); - set(result_id, to_name(result_id), result_type, true); + if (check_discard) + { + end_scope(); + statement("else"); + begin_scope(); + exp = "atomic_load_explicit("; + exp += "("; + // Emulate texture2D atomic operations + if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image) + exp += "device"; + else + exp += get_argument_address_space(*var); + + exp += " atomic_"; + exp += type_to_glsl(remapped_type); + exp += "*)"; + + exp += "&"; + exp += to_enclosed_expression(obj); + + if (has_mem_order_2) + exp += string(", ") + get_memory_order(mem_order_2); + else + exp += string(", ") + get_memory_order(mem_order_1); + + exp += ")"; + + statement(to_name(result_id), " = ", exp, ";"); + end_scope(); + } } else { @@ -4644,7 +9770,7 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, if (op1_is_literal) exp += join(", ", op1); else - exp += ", " + to_expression(op1); + exp += ", " + bitcast_expression(expected_type, op1); } if (op2) exp += ", " + to_expression(op2); @@ -4654,7 +9780,46 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, exp += string(", ") + get_memory_order(mem_order_2); exp += ")"; - emit_op(result_type, result_id, exp, false); + + if (check_discard) + { + exp += " : "; + if (strcmp(op, "atomic_store_explicit") != 0) + { + exp += "atomic_load_explicit("; + exp += "("; + // Emulate texture2D atomic operations + if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image) + exp += "device"; + else + exp += get_argument_address_space(*var); + + exp += " atomic_"; + exp += type_to_glsl(remapped_type); + exp += "*)"; + + exp += "&"; + exp += to_enclosed_expression(obj); + + if (has_mem_order_2) + exp += string(", ") + get_memory_order(mem_order_2); + else + exp += string(", ") + get_memory_order(mem_order_1); + + exp += ")"; + } + else + exp += "((void)0)"; + exp += ")"; + } + + if (expected_type != type.basetype) + exp = bitcast_expression(type, expected_type, exp); + + if (strcmp(op, "atomic_store_explicit") != 0) + emit_op(result_type, result_id, exp, false); + else + statement(exp, ";"); } flush_all_atomic_capable_variables(); @@ -4666,7 +9831,8 @@ const char *CompilerMSL::get_memory_order(uint32_t) return "memory_order_relaxed"; } -// Override for MSL-specific extension syntax instructions +// Override for MSL-specific extension syntax instructions. +// In some cases, deliberately select either the fast or precise versions of the MSL functions to match Vulkan math precision results. void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count) { auto op = static_cast(eop); @@ -4676,10 +9842,21 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); + op = get_remapped_glsl_op(op); + switch (op) { + case GLSLstd450Sinh: + emit_unary_func_op(result_type, id, args[0], "fast::sinh"); + break; + case GLSLstd450Cosh: + emit_unary_func_op(result_type, id, args[0], "fast::cosh"); + break; + case GLSLstd450Tanh: + emit_unary_func_op(result_type, id, args[0], "precise::tanh"); + break; case GLSLstd450Atan2: - emit_binary_func_op(result_type, id, args[0], args[1], "atan2"); + emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2"); break; case GLSLstd450InverseSqrt: emit_unary_func_op(result_type, id, args[0], "rsqrt"); @@ -4688,12 +9865,20 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, emit_unary_func_op(result_type, id, args[0], "rint"); break; + case GLSLstd450FindILsb: + { + // In this template version of findLSB, we return T. + auto basetype = expression_type(args[0]).basetype; + emit_unary_func_op_cast(result_type, id, args[0], "spvFindLSB", basetype, basetype); + break; + } + case GLSLstd450FindSMsb: - emit_unary_func_op_cast(result_type, id, args[0], "findSMSB", int_type, int_type); + emit_unary_func_op_cast(result_type, id, args[0], "spvFindSMSB", int_type, int_type); break; case GLSLstd450FindUMsb: - emit_unary_func_op_cast(result_type, id, args[0], "findUMSB", uint_type, uint_type); + emit_unary_func_op_cast(result_type, id, args[0], "spvFindUMSB", uint_type, uint_type); break; case GLSLstd450PackSnorm4x8: @@ -4812,10 +9997,71 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "precise::clamp"); break; - // TODO: - // GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier) - // GLSLstd450InterpolateAtSample (sample_no_perspective qualifier) - // GLSLstd450InterpolateAtOffset + case GLSLstd450InterpolateAtCentroid: + { + // We can't just emit the expression normally, because the qualified name contains a call to the default + // interpolate method, or refers to a local variable. We saved the interface index we need; use it to construct + // the base for the method call. + uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex); + string component; + if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr)) + { + uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr); + auto *c = maybe_get(index_expr); + if (!c || c->specialization) + component = join("[", to_expression(index_expr), "]"); + else + component = join(".", index_to_swizzle(c->scalar())); + } + emit_op(result_type, id, + join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index), + ".interpolate_at_centroid()", component), + should_forward(args[0])); + break; + } + + case GLSLstd450InterpolateAtSample: + { + uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex); + string component; + if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr)) + { + uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr); + auto *c = maybe_get(index_expr); + if (!c || c->specialization) + component = join("[", to_expression(index_expr), "]"); + else + component = join(".", index_to_swizzle(c->scalar())); + } + emit_op(result_type, id, + join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index), + ".interpolate_at_sample(", to_expression(args[1]), ")", component), + should_forward(args[0]) && should_forward(args[1])); + break; + } + + case GLSLstd450InterpolateAtOffset: + { + uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex); + string component; + if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr)) + { + uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr); + auto *c = maybe_get(index_expr); + if (!c || c->specialization) + component = join("[", to_expression(index_expr), "]"); + else + component = join(".", index_to_swizzle(c->scalar())); + } + // Like Direct3D, Metal puts the (0, 0) at the upper-left corner, not the center as SPIR-V and GLSL do. + // Offset the offset by (1/2 - 1/16), or 0.4375, to compensate for this. + // It has to be (1/2 - 1/16) and not 1/2, or several CTS tests subtly break on Intel. + emit_op(result_type, id, + join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index), + ".interpolate_at_offset(", to_expression(args[1]), " + 0.4375)", component), + should_forward(args[0]) && should_forward(args[1])); + break; + } case GLSLstd450Distance: // MSL does not support scalar versions here. @@ -4823,7 +10069,8 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, { // Equivalent to length(a - b) -> abs(a - b). emit_op(result_type, id, - join("abs(", to_unpacked_expression(args[0]), " - ", to_unpacked_expression(args[1]), ")"), + join("abs(", to_enclosed_unpacked_expression(args[0]), " - ", + to_enclosed_unpacked_expression(args[1]), ")"), should_forward(args[0]) && should_forward(args[1])); inherit_expression_dependencies(id, args[0]); inherit_expression_dependencies(id, args[1]); @@ -4833,27 +10080,27 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, break; case GLSLstd450Length: - // MSL does not support scalar versions here. + // MSL does not support scalar versions, so use abs(). if (expression_type(args[0]).vecsize == 1) - { - // Equivalent to abs(). emit_unary_func_op(result_type, id, args[0], "abs"); - } else CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); break; case GLSLstd450Normalize: + { + auto &exp_type = expression_type(args[0]); // MSL does not support scalar versions here. - if (expression_type(args[0]).vecsize == 1) - { - // Returns -1 or 1 for valid input, sign() does the job. + // MSL has no implementation for normalize in the fast:: namespace for half2 and half3 + // Returns -1 or 1 for valid input, sign() does the job. + if (exp_type.vecsize == 1) emit_unary_func_op(result_type, id, args[0], "sign"); - } + else if (exp_type.vecsize <= 3 && exp_type.basetype == SPIRType::Half) + emit_unary_func_op(result_type, id, args[0], "normalize"); else - CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + emit_unary_func_op(result_type, id, args[0], "fast::normalize"); break; - + } case GLSLstd450Reflect: if (get(result_type).vecsize == 1) emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect"); @@ -4868,12 +10115,87 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); break; + case GLSLstd450FaceForward: + if (get(result_type).vecsize == 1) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward"); + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Modf: + case GLSLstd450Frexp: + { + // Special case. If the variable is a scalar access chain, we cannot use it directly. We have to emit a temporary. + // Another special case is if the variable is in a storage class which is not thread. + auto *ptr = maybe_get(args[1]); + auto &type = expression_type(args[1]); + + bool is_thread_storage = storage_class_array_is_thread(type.storage); + if (type.storage == StorageClassOutput && capture_output_to_buffer) + is_thread_storage = false; + + if (!is_thread_storage || + (ptr && ptr->access_chain && is_scalar(expression_type(args[1])))) + { + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + + // Need to create temporaries and copy over to access chain after. + // We cannot directly take the reference of a vector swizzle in MSL, even if it's scalar ... + uint32_t &tmp_id = extra_sub_expressions[id]; + if (!tmp_id) + tmp_id = ir.increase_bound_by(1); + + uint32_t tmp_type_id = get_pointee_type_id(expression_type_id(args[1])); + emit_uninitialized_temporary_expression(tmp_type_id, tmp_id); + emit_binary_func_op(result_type, id, args[0], tmp_id, eop == GLSLstd450Modf ? "modf" : "frexp"); + statement(to_expression(args[1]), " = ", to_expression(tmp_id), ";"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + } + default: CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); break; } } +void CompilerMSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, + const uint32_t *args, uint32_t count) +{ + enum AMDShaderTrinaryMinMax + { + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9 + }; + + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Trinary min/max functions require MSL 2.1."); + + auto op = static_cast(eop); + + switch (op) + { + case FMid3AMD: + case UMid3AMD: + case SMid3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "median3"); + break; + default: + CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(result_type, id, eop, args, count); + break; + } +} + // Emit a structure declaration for the specified interface variable. void CompilerMSL::emit_interface_block(uint32_t ib_var_id) { @@ -4881,7 +10203,8 @@ void CompilerMSL::emit_interface_block(uint32_t ib_var_id) { auto &ib_var = get(ib_var_id); auto &ib_type = get_variable_data_type(ib_var); - assert(ib_type.basetype == SPIRType::Struct && !ib_type.member_types.empty()); + //assert(ib_type.basetype == SPIRType::Struct && !ib_type.member_types.empty()); + assert(ib_type.basetype == SPIRType::Struct); emit_struct(ib_type); } } @@ -4896,30 +10219,34 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) local_variable_names = resource_names; string decl; - processing_entry_point = (func.self == ir.default_entry_point); + processing_entry_point = func.self == ir.default_entry_point; + + // Metal helper functions must be static force-inline otherwise they will cause problems when linked together in a single Metallib. + if (!processing_entry_point) + statement(force_inline); auto &type = get(func.return_type); - if (type.array.empty()) + if (!type.array.empty() && msl_options.force_native_arrays) { - decl += func_type_decl(type); + // We cannot return native arrays in MSL, so "return" through an out variable. + decl += "void"; } else { - // We cannot return arrays in MSL, so "return" through an out variable. - decl = "void"; + decl += func_type_decl(type); } decl += " "; decl += to_name(func.self); decl += "("; - if (!type.array.empty()) + if (!type.array.empty() && msl_options.force_native_arrays) { // Fake arrays returns by writing to an out array instead. decl += "thread "; decl += type_to_glsl(type); - decl += " (&SPIRV_Cross_return_value)"; + decl += " (&spvReturnValue)"; decl += type_to_array_glsl(type); if (!func.arguments.empty()) decl += ", "; @@ -4932,6 +10259,9 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) else decl += entry_point_args_classic(!func.arguments.empty()); + // append entry point args to avoid conflicts in local variable names. + local_variable_names.insert(resource_names.begin(), resource_names.end()); + // If entry point function has variables that require early declaration, // ensure they each have an empty initializer, creating one if needed. // This is done at this late stage because the initialization expression @@ -4939,7 +10269,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) for (auto var_id : vars_needing_early_declaration) { auto &ed_var = get(var_id); - uint32_t &initializer = ed_var.initializer; + ID &initializer = ed_var.initializer; if (!initializer) initializer = ir.increase_bound_by(1); @@ -4968,19 +10298,46 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) decl += argument_decl(arg); - // Manufacture automatic sampler arg for SampledImage texture + bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + auto &arg_type = get(arg.type); - if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer) - decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id)); + if (arg_type.basetype == SPIRType::SampledImage && !is_dynamic_img_sampler) + { + // Manufacture automatic plane args for multiplanar texture + uint32_t planes = 1; + if (auto *constexpr_sampler = find_constexpr_sampler(name_id)) + if (constexpr_sampler->ycbcr_conversion_enable) + planes = constexpr_sampler->planes; + for (uint32_t i = 1; i < planes; i++) + decl += join(", ", argument_decl(arg), plane_name_suffix, i); + + // Manufacture automatic sampler arg for SampledImage texture + if (arg_type.image.dim != DimBuffer) + { + if (arg_type.array.empty()) + { + decl += join(", ", sampler_type(arg_type, arg.id), " ", to_sampler_expression(arg.id)); + } + else + { + const char *sampler_address_space = + descriptor_address_space(name_id, + StorageClassUniformConstant, + "thread const"); + decl += join(", ", sampler_address_space, " ", sampler_type(arg_type, arg.id), "& ", to_sampler_expression(arg.id)); + } + } + } // Manufacture automatic swizzle arg. - if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type)) + if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type) && + !is_dynamic_img_sampler) { bool arg_is_array = !arg_type.array.empty(); decl += join(", constant uint", arg_is_array ? "* " : "& ", to_swizzle_expression(arg.id)); } - if (buffers_requiring_array_length.count(name_id)) + if (buffer_requires_array_length(name_id)) { bool arg_is_array = !arg_type.array.empty(); decl += join(", constant uint", arg_is_array ? "* " : "& ", to_buffer_size_expression(name_id)); @@ -4994,60 +10351,158 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) statement(decl); } +static bool needs_chroma_reconstruction(const MSLConstexprSampler *constexpr_sampler) +{ + // For now, only multiplanar images need explicit reconstruction. GBGR and BGRG images + // use implicit reconstruction. + return constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && constexpr_sampler->planes > 1; +} + // Returns the texture sampling function string for the specified image and sampling characteristics. -string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool, - bool has_offset, bool, bool has_dref, uint32_t, uint32_t) +string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args) { + VariableID img = args.base.img; + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) + { + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } + // Special-case gather. We have to alter the component being looked up // in the swizzle case. - if (msl_options.swizzle_texture_samples && is_gather) + if (msl_options.swizzle_texture_samples && args.base.is_gather && !is_dynamic_img_sampler && + (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable)) { - string fname = imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle"; - fname += "<" + type_to_glsl(get(imgtype.image.type)) + ", metal::" + type_to_glsl(imgtype); - // Add the arg types ourselves. Yes, this sucks, but Clang can't - // deduce template pack parameters in the middle of an argument list. - switch (imgtype.image.dim) - { - case Dim2D: - fname += ", float2"; - if (imgtype.image.arrayed) - fname += ", uint"; - if (imgtype.image.depth) - fname += ", float"; - if (!imgtype.image.depth || has_offset) - fname += ", int2"; - break; - case DimCube: - fname += ", float3"; - if (imgtype.image.arrayed) - fname += ", uint"; - if (imgtype.image.depth) - fname += ", float"; - break; - default: - SPIRV_CROSS_THROW("Invalid texture dimension for gather op."); - } - fname += ">"; - return fname; + bool is_compare = comparison_ids.count(img); + add_spv_func_and_recompile(is_compare ? SPVFuncImplGatherCompareSwizzle : SPVFuncImplGatherSwizzle); + return is_compare ? "spvGatherCompareSwizzle" : "spvGatherSwizzle"; } auto *combined = maybe_get(img); // Texture reference - string fname = to_expression(combined ? combined->image : img) + "."; - if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype)) - fname = "spvTextureSwizzle(" + fname; - - // Texture function and sampler - if (is_fetch) - fname += "read"; - else if (is_gather) - fname += "gather"; + string fname; + if (needs_chroma_reconstruction(constexpr_sampler) && !is_dynamic_img_sampler) + { + if (constexpr_sampler->planes != 2 && constexpr_sampler->planes != 3) + SPIRV_CROSS_THROW("Unhandled number of color image planes!"); + // 444 images aren't downsampled, so we don't need to do linear filtering. + if (constexpr_sampler->resolution == MSL_FORMAT_RESOLUTION_444 || + constexpr_sampler->chroma_filter == MSL_SAMPLER_FILTER_NEAREST) + { + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest3Plane); + fname = "spvChromaReconstructNearest"; + } + else // Linear with a downsampled format + { + fname = "spvChromaReconstructLinear"; + switch (constexpr_sampler->resolution) + { + case MSL_FORMAT_RESOLUTION_444: + assert(false); + break; // not reached + case MSL_FORMAT_RESOLUTION_422: + switch (constexpr_sampler->x_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven3Plane); + fname += "422CositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint3Plane); + fname += "422Midpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid chroma location."); + } + break; + case MSL_FORMAT_RESOLUTION_420: + fname += "420"; + switch (constexpr_sampler->x_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + switch (constexpr_sampler->y_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane); + fname += "XCositedEvenYCositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane); + fname += "XCositedEvenYMidpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid Y chroma location."); + } + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + switch (constexpr_sampler->y_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane); + fname += "XMidpointYCositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane); + fname += "XMidpointYMidpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid Y chroma location."); + } + break; + default: + SPIRV_CROSS_THROW("Invalid X chroma location."); + } + break; + default: + SPIRV_CROSS_THROW("Invalid format resolution."); + } + } + } else - fname += "sample"; + { + fname = to_expression(combined ? combined->image : img) + "."; + + // Texture function and sampler + if (args.base.is_fetch) + fname += "read"; + else if (args.base.is_gather) + fname += "gather"; + else + fname += "sample"; - if (has_dref) - fname += "_compare"; + if (args.has_dref) + fname += "_compare"; + } return fname; } @@ -5068,28 +10523,72 @@ static inline bool sampling_type_needs_f32_conversion(const SPIRType &type) } // Returns the function args for a texture sampling function for the specified image and sampling characteristics. -string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, - uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y, - uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, - uint32_t sample, uint32_t minlod, bool *p_forward) +string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward) { + VariableID img = args.base.img; + auto &imgtype = *args.base.imgtype; + uint32_t lod = args.lod; + uint32_t grad_x = args.grad_x; + uint32_t grad_y = args.grad_y; + uint32_t bias = args.bias; + + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) + { + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } + string farg_str; - if (!is_fetch) - farg_str += to_sampler_expression(img); + bool forward = true; - if (msl_options.swizzle_texture_samples && is_gather) + if (!is_dynamic_img_sampler) { - if (!farg_str.empty()) - farg_str += ", "; + // Texture reference (for some cases) + if (needs_chroma_reconstruction(constexpr_sampler)) + { + // Multiplanar images need two or three textures. + farg_str += to_expression(img); + for (uint32_t i = 1; i < constexpr_sampler->planes; i++) + farg_str += join(", ", to_expression(img), plane_name_suffix, i); + } + else if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) && + msl_options.swizzle_texture_samples && args.base.is_gather) + { + auto *combined = maybe_get(img); + farg_str += to_expression(combined ? combined->image : img); + } + + // Sampler reference + if (!args.base.is_fetch) + { + if (!farg_str.empty()) + farg_str += ", "; + farg_str += to_sampler_expression(img); + } + + if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) && + msl_options.swizzle_texture_samples && args.base.is_gather) + { + // Add the swizzle constant from the swizzle buffer. + farg_str += ", " + to_swizzle_expression(img); + used_swizzle_buffer = true; + } - auto *combined = maybe_get(img); - farg_str += to_expression(combined ? combined->image : img); + // Swizzled gather puts the component before the other args, to allow template + // deduction to work. + if (args.component && msl_options.swizzle_texture_samples) + { + forward = should_forward(args.component); + farg_str += ", " + to_component_argument(args.component); + } } // Texture coordinates - bool forward = should_forward(coord); - auto coord_expr = to_enclosed_expression(coord); - auto &coord_type = expression_type(coord); + forward = forward && should_forward(args.coord); + auto coord_expr = to_enclosed_expression(args.coord); + auto &coord_type = expression_type(args.coord); bool coord_is_fp = type_is_floating_point(coord_type); bool is_cube_fetch = false; @@ -5103,11 +10602,19 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool if (coord_type.vecsize > 1) tex_coords = enclose_expression(tex_coords) + ".x"; - if (is_fetch) + if (args.base.is_fetch) tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; else if (sampling_type_needs_f32_conversion(coord_type)) tex_coords = convert_to_f32(tex_coords, 1); + if (msl_options.texture_1D_as_2D) + { + if (args.base.is_fetch) + tex_coords = "uint2(" + tex_coords + ", 0)"; + else + tex_coords = "float2(" + tex_coords + ", 0.5)"; + } + alt_coord_component = 1; break; @@ -5122,25 +10629,36 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool else { // Metal texel buffer textures are 2D, so convert 1D coord to 2D. - if (is_fetch) - tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + // Support for Metal 2.1's new texture_buffer type. + if (args.base.is_fetch) + { + if (msl_options.texel_buffer_texture_width > 0) + { + tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + } + else + { + tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ", " + + to_expression(img) + ")"; + } + } } alt_coord_component = 1; break; case DimSubpassData: - if (imgtype.image.ms) - tex_coords = "uint2(gl_FragCoord.xy)"; - else - tex_coords = join("uint2(gl_FragCoord.xy), 0"); + // If we're using Metal's native frame-buffer fetch API for subpass inputs, + // this path will not be hit. + tex_coords = "uint2(gl_FragCoord.xy)"; + alt_coord_component = 2; break; case Dim2D: if (coord_type.vecsize > 2) tex_coords = enclose_expression(tex_coords) + ".xy"; - if (is_fetch) + if (args.base.is_fetch) tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; else if (sampling_type_needs_f32_conversion(coord_type)) tex_coords = convert_to_f32(tex_coords, 2); @@ -5152,7 +10670,7 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool if (coord_type.vecsize > 3) tex_coords = enclose_expression(tex_coords) + ".xyz"; - if (is_fetch) + if (args.base.is_fetch) tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; else if (sampling_type_needs_f32_conversion(coord_type)) tex_coords = convert_to_f32(tex_coords, 3); @@ -5161,7 +10679,7 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool break; case DimCube: - if (is_fetch) + if (args.base.is_fetch) { is_cube_fetch = true; tex_coords += ".xy"; @@ -5183,76 +10701,114 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool break; } - if (is_fetch && offset) - { - // Fetch offsets must be applied directly to the coordinate. - forward = forward && should_forward(offset); - auto &type = expression_type(offset); - if (type.basetype != SPIRType::UInt) - tex_coords += " + " + bitcast_expression(SPIRType::UInt, offset); - else - tex_coords += " + " + to_enclosed_expression(offset); - } - else if (is_fetch && coffset) + if (args.base.is_fetch && args.offset) { // Fetch offsets must be applied directly to the coordinate. - forward = forward && should_forward(coffset); - auto &type = expression_type(coffset); - if (type.basetype != SPIRType::UInt) - tex_coords += " + " + bitcast_expression(SPIRType::UInt, coffset); + forward = forward && should_forward(args.offset); + auto &type = expression_type(args.offset); + if (imgtype.image.dim == Dim1D && msl_options.texture_1D_as_2D) + { + if (type.basetype != SPIRType::UInt) + tex_coords += join(" + uint2(", bitcast_expression(SPIRType::UInt, args.offset), ", 0)"); + else + tex_coords += join(" + uint2(", to_enclosed_expression(args.offset), ", 0)"); + } else - tex_coords += " + " + to_enclosed_expression(coffset); + { + if (type.basetype != SPIRType::UInt) + tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.offset); + else + tex_coords += " + " + to_enclosed_expression(args.offset); + } } // If projection, use alt coord as divisor - if (is_proj) + if (args.base.is_proj) { if (sampling_type_needs_f32_conversion(coord_type)) - tex_coords += " / " + convert_to_f32(to_extract_component_expression(coord, alt_coord_component), 1); + tex_coords += " / " + convert_to_f32(to_extract_component_expression(args.coord, alt_coord_component), 1); else - tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component); + tex_coords += " / " + to_extract_component_expression(args.coord, alt_coord_component); } if (!farg_str.empty()) farg_str += ", "; - farg_str += tex_coords; - // If fetch from cube, add face explicitly - if (is_cube_fetch) + if (imgtype.image.dim == DimCube && imgtype.image.arrayed && msl_options.emulate_cube_array) { - // Special case for cube arrays, face and layer are packed in one dimension. - if (imgtype.image.arrayed) - farg_str += ", uint(" + to_extract_component_expression(coord, 2) + ") % 6u"; + farg_str += "spvCubemapTo2DArrayFace(" + tex_coords + ").xy"; + + if (is_cube_fetch) + farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ")"; else - farg_str += ", uint(" + round_fp_tex_coords(to_extract_component_expression(coord, 2), coord_is_fp) + ")"; + farg_str += + ", uint(spvCubemapTo2DArrayFace(" + tex_coords + ").z) + (uint(" + + round_fp_tex_coords(to_extract_component_expression(args.coord, alt_coord_component), coord_is_fp) + + ") * 6u)"; + + add_spv_func_and_recompile(SPVFuncImplCubemapTo2DArrayFace); } + else + { + farg_str += tex_coords; + + // If fetch from cube, add face explicitly + if (is_cube_fetch) + { + // Special case for cube arrays, face and layer are packed in one dimension. + if (imgtype.image.arrayed) + farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ") % 6u"; + else + farg_str += + ", uint(" + round_fp_tex_coords(to_extract_component_expression(args.coord, 2), coord_is_fp) + ")"; + } - // If array, use alt coord - if (imgtype.image.arrayed) - { - // Special case for cube arrays, face and layer are packed in one dimension. - if (imgtype.image.dim == DimCube && is_fetch) - farg_str += ", uint(" + to_extract_component_expression(coord, 2) + ") / 6u"; - else - farg_str += ", uint(" + - round_fp_tex_coords(to_extract_component_expression(coord, alt_coord_component), coord_is_fp) + - ")"; + // If array, use alt coord + if (imgtype.image.arrayed) + { + // Special case for cube arrays, face and layer are packed in one dimension. + if (imgtype.image.dim == DimCube && args.base.is_fetch) + { + farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ") / 6u"; + } + else + { + farg_str += + ", uint(" + + round_fp_tex_coords(to_extract_component_expression(args.coord, alt_coord_component), coord_is_fp) + + ")"; + if (imgtype.image.dim == DimSubpassData) + { + if (msl_options.multiview) + farg_str += " + gl_ViewIndex"; + else if (msl_options.arrayed_subpass_input) + farg_str += " + gl_Layer"; + } + } + } + else if (imgtype.image.dim == DimSubpassData) + { + if (msl_options.multiview) + farg_str += ", gl_ViewIndex"; + else if (msl_options.arrayed_subpass_input) + farg_str += ", gl_Layer"; + } } // Depth compare reference value - if (dref) + if (args.dref) { - forward = forward && should_forward(dref); + forward = forward && should_forward(args.dref); farg_str += ", "; - auto &dref_type = expression_type(dref); + auto &dref_type = expression_type(args.dref); string dref_expr; - if (is_proj) - dref_expr = - join(to_enclosed_expression(dref), " / ", to_extract_component_expression(coord, alt_coord_component)); + if (args.base.is_proj) + dref_expr = join(to_enclosed_expression(args.dref), " / ", + to_extract_component_expression(args.coord, alt_coord_component)); else - dref_expr = to_expression(dref); + dref_expr = to_expression(args.dref); if (sampling_type_needs_f32_conversion(dref_type)) dref_expr = convert_to_f32(dref_expr, 1); @@ -5274,10 +10830,10 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool grad_y = 0; farg_str += ", level(0)"; } - else + else if (!msl_options.supports_msl_version(2, 3)) { SPIRV_CROSS_THROW("Using non-constant 0.0 gradient() qualifier for sample_compare. This is not " - "supported in MSL macOS."); + "supported on macOS prior to MSL 2.3."); } } @@ -5289,27 +10845,27 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool { bias = 0; } - else + else if (!msl_options.supports_msl_version(2, 3)) { - SPIRV_CROSS_THROW( - "Using non-constant 0.0 bias() qualifier for sample_compare. This is not supported in MSL macOS."); + SPIRV_CROSS_THROW("Using non-constant 0.0 bias() qualifier for sample_compare. This is not supported " + "on macOS prior to MSL 2.3."); } } } // LOD Options // Metal does not support LOD for 1D textures. - if (bias && imgtype.image.dim != Dim1D) + if (bias && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D)) { forward = forward && should_forward(bias); farg_str += ", bias(" + to_expression(bias) + ")"; } // Metal does not support LOD for 1D textures. - if (lod && imgtype.image.dim != Dim1D) + if (lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D)) { forward = forward && should_forward(lod); - if (is_fetch) + if (args.base.is_fetch) { farg_str += ", " + to_expression(lod); } @@ -5318,8 +10874,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool farg_str += ", level(" + to_expression(lod) + ")"; } } - else if (is_fetch && !lod && imgtype.image.dim != Dim1D && imgtype.image.dim != DimBuffer && !imgtype.image.ms && - imgtype.image.sampled != 2) + else if (args.base.is_fetch && !lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D) && + imgtype.image.dim != DimBuffer && !imgtype.image.ms && imgtype.image.sampled != 2) { // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. // Check for sampled type as well, because is_fetch is also used for OpImageRead in MSL. @@ -5327,13 +10883,14 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool } // Metal does not support LOD for 1D textures. - if ((grad_x || grad_y) && imgtype.image.dim != Dim1D) + if ((grad_x || grad_y) && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D)) { forward = forward && should_forward(grad_x); forward = forward && should_forward(grad_y); string grad_opt; switch (imgtype.image.dim) { + case Dim1D: case Dim2D: grad_opt = "2d"; break; @@ -5341,7 +10898,10 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool grad_opt = "3d"; break; case DimCube: - grad_opt = "cube"; + if (imgtype.image.arrayed && msl_options.emulate_cube_array) + grad_opt = "2d"; + else + grad_opt = "cube"; break; default: grad_opt = "unsupported_gradient_dimension"; @@ -5350,46 +10910,47 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool farg_str += ", gradient" + grad_opt + "(" + to_expression(grad_x) + ", " + to_expression(grad_y) + ")"; } - if (minlod) + if (args.min_lod) { - if (msl_options.is_macos()) - { - if (!msl_options.supports_msl_version(2, 2)) - SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up on macOS."); - } - else if (msl_options.is_ios()) - SPIRV_CROSS_THROW("min_lod_clamp() is not supported on iOS."); + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up."); - forward = forward && should_forward(minlod); - farg_str += ", min_lod_clamp(" + to_expression(minlod) + ")"; + forward = forward && should_forward(args.min_lod); + farg_str += ", min_lod_clamp(" + to_expression(args.min_lod) + ")"; } // Add offsets string offset_expr; - if (coffset && !is_fetch) - { - forward = forward && should_forward(coffset); - offset_expr = to_expression(coffset); - } - else if (offset && !is_fetch) + const SPIRType *offset_type = nullptr; + if (args.offset && !args.base.is_fetch) { - forward = forward && should_forward(offset); - offset_expr = to_expression(offset); + forward = forward && should_forward(args.offset); + offset_expr = to_expression(args.offset); + offset_type = &expression_type(args.offset); } if (!offset_expr.empty()) { switch (imgtype.image.dim) { + case Dim1D: + if (!msl_options.texture_1D_as_2D) + break; + if (offset_type->vecsize > 1) + offset_expr = enclose_expression(offset_expr) + ".x"; + + farg_str += join(", int2(", offset_expr, ", 0)"); + break; + case Dim2D: - if (coord_type.vecsize > 2) + if (offset_type->vecsize > 2) offset_expr = enclose_expression(offset_expr) + ".xy"; farg_str += ", " + offset_expr; break; case Dim3D: - if (coord_type.vecsize > 3) + if (offset_type->vecsize > 3) offset_expr = enclose_expression(offset_expr) + ".xyz"; farg_str += ", " + offset_expr; @@ -5400,30 +10961,37 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool } } - if (comp) + if (args.component) { // If 2D has gather component, ensure it also has an offset arg if (imgtype.image.dim == Dim2D && offset_expr.empty()) farg_str += ", int2(0)"; - forward = forward && should_forward(comp); - farg_str += ", " + to_component_argument(comp); - } + if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler) + { + forward = forward && should_forward(args.component); - if (sample) - { - forward = forward && should_forward(sample); - farg_str += ", "; - farg_str += to_expression(sample); + uint32_t image_var = 0; + if (const auto *combined = maybe_get(img)) + { + if (const auto *img_var = maybe_get_backing_variable(combined->image)) + image_var = img_var->self; + } + else if (const auto *var = maybe_get_backing_variable(img)) + { + image_var = var->self; + } + + if (image_var == 0 || !is_depth_image(expression_type(image_var), image_var)) + farg_str += ", " + to_component_argument(args.component); + } } - if (msl_options.swizzle_texture_samples && is_sampled_image_type(imgtype)) + if (args.sample) { - // Add the swizzle constant from the swizzle buffer. - if (!is_gather) - farg_str += ")"; - farg_str += ", " + to_swizzle_expression(img); - used_swizzle_buffer = true; + forward = forward && should_forward(args.sample); + farg_str += ", "; + farg_str += to_expression(args.sample); } *p_forward = forward; @@ -5441,13 +11009,7 @@ string CompilerMSL::round_fp_tex_coords(string tex_coords, bool coord_is_fp) // The ID must be a scalar constant. string CompilerMSL::to_component_argument(uint32_t id) { - if (ir.ids[id].get_type() != TypeConstant) - { - SPIRV_CROSS_THROW("ID " + to_string(id) + " is not an OpConstant."); - return "component::x"; - } - - uint32_t component_index = get(id).scalar(); + uint32_t component_index = evaluate_constant_u32(id); switch (component_index) { case 0: @@ -5462,7 +11024,6 @@ string CompilerMSL::to_component_argument(uint32_t id) default: SPIRV_CROSS_THROW("The value (" + to_string(component_index) + ") of OpConstant ID " + to_string(id) + " is not a valid Component index, which must be one of 0, 1, 2, or 3."); - return "component::x"; } } @@ -5472,14 +11033,222 @@ void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id set(result_id, result_type, image_id, samp_id); } +string CompilerMSL::to_texture_op(const Instruction &i, bool sparse, bool *forward, + SmallVector &inherited_expressions) +{ + auto *ops = stream(i); + uint32_t result_type_id = ops[0]; + uint32_t img = ops[2]; + auto &result_type = get(result_type_id); + auto op = static_cast(i.op); + bool is_gather = (op == OpImageGather || op == OpImageDrefGather); + + // Bypass pointers because we need the real image struct + auto &type = expression_type(img); + auto &imgtype = get(type.self); + + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) + { + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } + + string expr; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler) + { + // If this needs sampler Y'CbCr conversion, we need to do some additional + // processing. + switch (constexpr_sampler->ycbcr_model) + { + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY: + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY: + // Default + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT709); + expr += "spvConvertYCbCrBT709("; + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT601); + expr += "spvConvertYCbCrBT601("; + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT2020); + expr += "spvConvertYCbCrBT2020("; + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion."); + } + + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) + { + switch (constexpr_sampler->ycbcr_range) + { + case MSL_SAMPLER_YCBCR_RANGE_ITU_FULL: + add_spv_func_and_recompile(SPVFuncImplExpandITUFullRange); + expr += "spvExpandITUFullRange("; + break; + case MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW: + add_spv_func_and_recompile(SPVFuncImplExpandITUNarrowRange); + expr += "spvExpandITUNarrowRange("; + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr range."); + } + } + } + else if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) && + !is_dynamic_img_sampler) + { + add_spv_func_and_recompile(SPVFuncImplTextureSwizzle); + expr += "spvTextureSwizzle("; + } + + string inner_expr = CompilerGLSL::to_texture_op(i, sparse, forward, inherited_expressions); + + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler) + { + if (!constexpr_sampler->swizzle_is_identity()) + { + static const char swizzle_names[] = "rgba"; + if (!constexpr_sampler->swizzle_has_one_or_zero()) + { + // If we can, do it inline. + expr += inner_expr + "."; + for (uint32_t c = 0; c < 4; c++) + { + switch (constexpr_sampler->swizzle[c]) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + expr += swizzle_names[c]; + break; + case MSL_COMPONENT_SWIZZLE_R: + case MSL_COMPONENT_SWIZZLE_G: + case MSL_COMPONENT_SWIZZLE_B: + case MSL_COMPONENT_SWIZZLE_A: + expr += swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R]; + break; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + } + } + } + else + { + // Otherwise, we need to emit a temporary and swizzle that. + uint32_t temp_id = ir.increase_bound_by(1); + emit_op(result_type_id, temp_id, inner_expr, false); + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(temp_id, inherit); + inherited_expressions.clear(); + inherited_expressions.push_back(temp_id); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(temp_id); + break; + + default: + break; + } + expr += type_to_glsl(result_type) + "("; + for (uint32_t c = 0; c < 4; c++) + { + switch (constexpr_sampler->swizzle[c]) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + expr += to_expression(temp_id) + "." + swizzle_names[c]; + break; + case MSL_COMPONENT_SWIZZLE_ZERO: + expr += "0"; + break; + case MSL_COMPONENT_SWIZZLE_ONE: + expr += "1"; + break; + case MSL_COMPONENT_SWIZZLE_R: + case MSL_COMPONENT_SWIZZLE_G: + case MSL_COMPONENT_SWIZZLE_B: + case MSL_COMPONENT_SWIZZLE_A: + expr += to_expression(temp_id) + "." + + swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R]; + break; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + } + if (c < 3) + expr += ", "; + } + expr += ")"; + } + } + else + expr += inner_expr; + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) + { + expr += join(", ", constexpr_sampler->bpc, ")"); + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY) + expr += ")"; + } + } + else + { + expr += inner_expr; + if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) && + !is_dynamic_img_sampler) + { + // Add the swizzle constant from the swizzle buffer. + expr += ", " + to_swizzle_expression(img) + ")"; + used_swizzle_buffer = true; + } + } + + return expr; +} + +static string create_swizzle(MSLComponentSwizzle swizzle) +{ + switch (swizzle) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + return "spvSwizzle::none"; + case MSL_COMPONENT_SWIZZLE_ZERO: + return "spvSwizzle::zero"; + case MSL_COMPONENT_SWIZZLE_ONE: + return "spvSwizzle::one"; + case MSL_COMPONENT_SWIZZLE_R: + return "spvSwizzle::red"; + case MSL_COMPONENT_SWIZZLE_G: + return "spvSwizzle::green"; + case MSL_COMPONENT_SWIZZLE_B: + return "spvSwizzle::blue"; + case MSL_COMPONENT_SWIZZLE_A: + return "spvSwizzle::alpha"; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + } +} + // Returns a string representation of the ID, usable as a function arg. // Manufacture automatic sampler arg for SampledImage texture. -string CompilerMSL::to_func_call_arg(uint32_t id) +string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) { string arg_str; + auto &type = expression_type(id); + bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + // If the argument *itself* is a "dynamic" combined-image sampler, then we can just pass that around. + bool arg_is_dynamic_img_sampler = has_extended_decoration(id, SPIRVCrossDecorationDynamicImageSampler); + if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler) + arg_str = join("spvDynamicImageSampler<", type_to_glsl(get(type.image.type)), ">("); + auto *c = maybe_get(id); - if (c && !get(c->constant_type).array.empty()) + if (msl_options.force_native_arrays && c && !get(c->constant_type).array.empty()) { // If we are passing a constant array directly to a function for some reason, // the callee will expect an argument in thread const address space @@ -5492,42 +11261,125 @@ string CompilerMSL::to_func_call_arg(uint32_t id) // so just create a thread local copy in the current function. arg_str = join("_", id, "_array_copy"); auto &constants = current_function->constant_arrays_needed_on_stack; - auto itr = find(begin(constants), end(constants), id); + auto itr = find(begin(constants), end(constants), ID(id)); if (itr == end(constants)) { force_recompile(); constants.push_back(id); } } + // Dereference pointer variables where needed. + // FIXME: This dereference is actually backwards. We should really just support passing pointer variables between functions. + else if (should_dereference(id)) + arg_str += dereference_expression(type, CompilerGLSL::to_func_call_arg(arg, id)); else - arg_str = CompilerGLSL::to_func_call_arg(id); - - // Manufacture automatic sampler arg if the arg is a SampledImage texture. - auto &type = expression_type(id); - if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) - { - // Need to check the base variable in case we need to apply a qualified alias. - uint32_t var_id = 0; - auto *sampler_var = maybe_get(id); - if (sampler_var) - var_id = sampler_var->basevariable; - - arg_str += ", " + to_sampler_expression(var_id ? var_id : id); - } + arg_str += CompilerGLSL::to_func_call_arg(arg, id); + // Need to check the base variable in case we need to apply a qualified alias. uint32_t var_id = 0; auto *var = maybe_get(id); if (var) var_id = var->basevariable; - if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) + if (!arg_is_dynamic_img_sampler) { - // Need to check the base variable in case we need to apply a qualified alias. - arg_str += ", " + to_swizzle_expression(var_id ? var_id : id); + auto *constexpr_sampler = find_constexpr_sampler(var_id ? var_id : id); + if (type.basetype == SPIRType::SampledImage) + { + // Manufacture automatic plane args for multiplanar texture + uint32_t planes = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + { + planes = constexpr_sampler->planes; + // If this parameter isn't aliasing a global, then we need to use + // the special "dynamic image-sampler" class to pass it--and we need + // to use it for *every* non-alias parameter, in case a combined + // image-sampler with a Y'CbCr conversion is passed. Hopefully, this + // pathological case is so rare that it should never be hit in practice. + if (!arg.alias_global_variable) + add_spv_func_and_recompile(SPVFuncImplDynamicImageSampler); + } + for (uint32_t i = 1; i < planes; i++) + arg_str += join(", ", CompilerGLSL::to_func_call_arg(arg, id), plane_name_suffix, i); + // Manufacture automatic sampler arg if the arg is a SampledImage texture. + if (type.image.dim != DimBuffer) + arg_str += ", " + to_sampler_expression(var_id ? var_id : id); + + // Add sampler Y'CbCr conversion info if we have it + if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + { + SmallVector samp_args; + + switch (constexpr_sampler->resolution) + { + case MSL_FORMAT_RESOLUTION_444: + // Default + break; + case MSL_FORMAT_RESOLUTION_422: + samp_args.push_back("spvFormatResolution::_422"); + break; + case MSL_FORMAT_RESOLUTION_420: + samp_args.push_back("spvFormatResolution::_420"); + break; + default: + SPIRV_CROSS_THROW("Invalid format resolution."); + } + + if (constexpr_sampler->chroma_filter != MSL_SAMPLER_FILTER_NEAREST) + samp_args.push_back("spvChromaFilter::linear"); + + if (constexpr_sampler->x_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN) + samp_args.push_back("spvXChromaLocation::midpoint"); + if (constexpr_sampler->y_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN) + samp_args.push_back("spvYChromaLocation::midpoint"); + switch (constexpr_sampler->ycbcr_model) + { + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY: + // Default + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_identity"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_709"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_601"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_2020"); + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion."); + } + if (constexpr_sampler->ycbcr_range != MSL_SAMPLER_YCBCR_RANGE_ITU_FULL) + samp_args.push_back("spvYCbCrRange::itu_narrow"); + samp_args.push_back(join("spvComponentBits(", constexpr_sampler->bpc, ")")); + arg_str += join(", spvYCbCrSampler(", merge(samp_args), ")"); + } + } + + if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + arg_str += join(", (uint(", create_swizzle(constexpr_sampler->swizzle[3]), ") << 24) | (uint(", + create_swizzle(constexpr_sampler->swizzle[2]), ") << 16) | (uint(", + create_swizzle(constexpr_sampler->swizzle[1]), ") << 8) | uint(", + create_swizzle(constexpr_sampler->swizzle[0]), ")"); + else if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) + arg_str += ", " + to_swizzle_expression(var_id ? var_id : id); + + if (buffer_requires_array_length(var_id)) + arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id); + + if (is_dynamic_img_sampler) + arg_str += ")"; } - if (buffers_requiring_array_length.count(var_id)) - arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id); + // Emulate texture2D atomic operations + auto *backing_var = maybe_get_backing_variable(var_id); + if (backing_var && atomic_image_vars.count(backing_var->self)) + { + arg_str += ", " + to_expression(var_id) + "_atomic"; + } return arg_str; } @@ -5538,7 +11390,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id) string CompilerMSL::to_sampler_expression(uint32_t id) { auto *combined = maybe_get(id); - auto expr = to_expression(combined ? combined->image : id); + auto expr = to_expression(combined ? combined->image : VariableID(id)); auto index = expr.find_first_of('['); uint32_t samp_id = 0; @@ -5559,13 +11411,13 @@ string CompilerMSL::to_swizzle_expression(uint32_t id) { auto *combined = maybe_get(id); - auto expr = to_expression(combined ? combined->image : id); + auto expr = to_expression(combined ? combined->image : VariableID(id)); auto index = expr.find_first_of('['); // If an image is part of an argument buffer translate this to a legal identifier. - for (auto &c : expr) - if (c == '.') - c = '_'; + string::size_type period = 0; + while ((period = expr.find_first_of('.', period)) != string::npos && period < index) + expr[period] = '_'; if (index == string::npos) return expr + swizzle_name_suffix; @@ -5621,99 +11473,39 @@ bool CompilerMSL::is_patch_block(const SPIRType &type) // Checks whether the ID is a row_major matrix that requires conversion before use bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id) { - // Natively supported row-major matrices do not need to be converted. - if (backend.native_row_major_matrix) - return false; - - // Non-matrix or column-major matrix types do not need to be converted. - if (!has_decoration(id, DecorationRowMajor)) - return false; - - // Generate a function that will swap matrix elements from row-major to column-major. - // Packed row-matrix should just use transpose() function. - if (!has_extended_decoration(id, SPIRVCrossDecorationPacked)) - { - const auto type = expression_type(id); - add_convert_row_major_matrix_function(type.columns, type.vecsize); - } - - return true; + auto *e = maybe_get(id); + if (e) + return e->need_transpose; + else + return has_decoration(id, DecorationRowMajor); } // Checks whether the member is a row_major matrix that requires conversion before use bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) { - // Natively supported row-major matrices do not need to be converted. - if (backend.native_row_major_matrix) - return false; - - // Non-matrix or column-major matrix types do not need to be converted. - if (!has_member_decoration(type.self, index, DecorationRowMajor)) - return false; - - // Generate a function that will swap matrix elements from row-major to column-major. - // Packed row-matrix should just use transpose() function. - if (!has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked)) - { - const auto mbr_type = get(type.member_types[index]); - add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize); - } - - return true; + return has_member_decoration(type.self, index, DecorationRowMajor); } -// Adds a function suitable for converting a non-square row-major matrix to a column-major matrix. -void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows) +string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, + bool is_packed) { - SPVFuncImpl spv_func; - if (cols == rows) // Square matrix...just use transpose() function - return; - else if (cols == 2 && rows == 3) - spv_func = SPVFuncImplRowMajor2x3; - else if (cols == 2 && rows == 4) - spv_func = SPVFuncImplRowMajor2x4; - else if (cols == 3 && rows == 2) - spv_func = SPVFuncImplRowMajor3x2; - else if (cols == 3 && rows == 4) - spv_func = SPVFuncImplRowMajor3x4; - else if (cols == 4 && rows == 2) - spv_func = SPVFuncImplRowMajor4x2; - else if (cols == 4 && rows == 3) - spv_func = SPVFuncImplRowMajor4x3; - else - SPIRV_CROSS_THROW("Could not convert row-major matrix."); - - auto rslt = spv_function_implementations.insert(spv_func); - if (rslt.second) + if (!is_matrix(exp_type)) { - suppress_missing_prototypes = true; - force_recompile(); + return CompilerGLSL::convert_row_major_matrix(std::move(exp_str), exp_type, physical_type_id, is_packed); } -} - -// Wraps the expression string in a function call that converts the -// row_major matrix result of the expression to a column_major matrix. -string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, bool is_packed) -{ - strip_enclosed_expression(exp_str); - - string func_name; - - // Square and packed matrices can just use transpose - if (exp_type.columns == exp_type.vecsize || is_packed) - func_name = "transpose"; else - func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize); - - return join(func_name, "(", exp_str, ")"); + { + strip_enclosed_expression(exp_str); + if (physical_type_id != 0 || is_packed) + exp_str = unpack_expression_type(exp_str, exp_type, physical_type_id, is_packed, true); + return join("transpose(", exp_str, ")"); + } } // Called automatically at the end of the entry point function void CompilerMSL::emit_fixup() { - if ((get_execution_model() == ExecutionModelVertex || - get_execution_model() == ExecutionModelTessellationEvaluation) && - stage_out_var_id && !qual_pos_var_name.empty() && !capture_output_to_buffer) + if (is_vertex_like_shader() && stage_out_var_id && !qual_pos_var_name.empty() && !capture_output_to_buffer) { if (options.vertex.fixup_clipspace) statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name, @@ -5728,89 +11520,149 @@ void CompilerMSL::emit_fixup() string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const string &qualifier) { - auto &membertype = get(member_type_id); - - // If this member requires padding to maintain alignment, emit a dummy padding member. - MSLStructMemberKey key = get_struct_member_key(type.self, index); - uint32_t pad_len = struct_member_padding[key]; - if (pad_len > 0) - statement("char _m", index, "_pad", "[", to_string(pad_len), "];"); + if (member_is_remapped_physical_type(type, index)) + member_type_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID); + auto &physical_type = get(member_type_id); // If this member is packed, mark it as so. - string pack_pfx = ""; - - const SPIRType *effective_membertype = &membertype; - SPIRType override_type; + string pack_pfx; + // Allow Metal to use the array template to make arrays a value type uint32_t orig_id = 0; if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)) orig_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID); - if (member_is_packed_type(type, index)) + bool row_major = false; + if (is_matrix(physical_type)) + row_major = has_member_decoration(type.self, index, DecorationRowMajor); + + SPIRType row_major_physical_type; + const SPIRType *declared_type = &physical_type; + + // If a struct is being declared with physical layout, + // do not use array wrappers. + // This avoids a lot of complicated cases with packed vectors and matrices, + // and generally we cannot copy full arrays in and out of buffers into Function + // address space. + // Array of resources should also be declared as builtin arrays. + if (has_member_decoration(type.self, index, DecorationOffset)) + is_using_builtin_array = true; + else if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary)) + is_using_builtin_array = true; + + if (member_is_packed_physical_type(type, index)) { // If we're packing a matrix, output an appropriate typedef - if (membertype.basetype == SPIRType::Struct) + if (physical_type.basetype == SPIRType::Struct) { - pack_pfx = "/* FIXME: A padded struct is needed here. If you see this message, file a bug! */ "; + SPIRV_CROSS_THROW("Cannot emit a packed struct currently."); } - else if (membertype.vecsize > 1 && membertype.columns > 1) + else if (is_matrix(physical_type)) { - uint32_t rows = membertype.vecsize; - uint32_t cols = membertype.columns; + uint32_t rows = physical_type.vecsize; + uint32_t cols = physical_type.columns; pack_pfx = "packed_"; - if (has_member_decoration(type.self, index, DecorationRowMajor)) + if (row_major) { // These are stored transposed. - rows = membertype.columns; - cols = membertype.vecsize; + rows = physical_type.columns; + cols = physical_type.vecsize; pack_pfx = "packed_rm_"; } - string base_type = membertype.width == 16 ? "half" : "float"; + string base_type = physical_type.width == 16 ? "half" : "float"; string td_line = "typedef "; td_line += "packed_" + base_type + to_string(rows); td_line += " " + pack_pfx; // Use the actual matrix size here. - td_line += base_type + to_string(membertype.columns) + "x" + to_string(membertype.vecsize); + td_line += base_type + to_string(physical_type.columns) + "x" + to_string(physical_type.vecsize); td_line += "[" + to_string(cols) + "]"; td_line += ";"; add_typedef_line(td_line); } - else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct && - type_struct_member_array_stride(type, index) == 4 * membertype.width / 8) + else if (!is_scalar(physical_type)) // scalar type is already packed. + pack_pfx = "packed_"; + } + else if (is_matrix(physical_type)) + { + if (!msl_options.supports_msl_version(3, 0) && + has_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct)) + { + pack_pfx = "spvStorage_"; + add_spv_func_and_recompile(SPVFuncImplStorageMatrix); + // The pack prefix causes problems with array wrappers. + is_using_builtin_array = true; + } + if (row_major) { - // A "packed" float array, but we pad here instead to 4-vector. - override_type = membertype; - override_type.vecsize = 4; - effective_membertype = &override_type; + // Need to declare type with flipped vecsize/columns. + row_major_physical_type = physical_type; + swap(row_major_physical_type.vecsize, row_major_physical_type.columns); + declared_type = &row_major_physical_type; } - else - pack_pfx = "packed_"; } - // Very specifically, image load-store in argument buffers are disallowed on MSL on iOS. - if (msl_options.is_ios() && membertype.basetype == SPIRType::Image && membertype.image.sampled == 2) + // iOS Tier 1 argument buffers do not support writable images. + if (physical_type.basetype == SPIRType::Image && + physical_type.image.sampled == 2 && + msl_options.is_ios() && + msl_options.argument_buffers_tier <= Options::ArgumentBuffersTier::Tier1 && + !has_decoration(orig_id, DecorationNonWritable)) { - if (!has_decoration(orig_id, DecorationNonWritable)) - SPIRV_CROSS_THROW("Writable images are not allowed in argument buffers on iOS."); + SPIRV_CROSS_THROW("Writable images are not allowed on Tier1 argument buffers on iOS."); } // Array information is baked into these types. string array_type; - if (membertype.basetype != SPIRType::Image && membertype.basetype != SPIRType::Sampler && - membertype.basetype != SPIRType::SampledImage) + if (physical_type.basetype != SPIRType::Image && physical_type.basetype != SPIRType::Sampler && + physical_type.basetype != SPIRType::SampledImage) { - array_type = type_to_array_glsl(membertype); + BuiltIn builtin = BuiltInMax; + + // Special handling. In [[stage_out]] or [[stage_in]] blocks, + // we need flat arrays, but if we're somehow declaring gl_PerVertex for constant array reasons, we want + // template array types to be declared. + bool is_ib_in_out = + ((stage_out_var_id && get_stage_out_struct_type().self == type.self && + variable_storage_requires_stage_io(StorageClassOutput)) || + (stage_in_var_id && get_stage_in_struct_type().self == type.self && + variable_storage_requires_stage_io(StorageClassInput))); + if (is_ib_in_out && is_member_builtin(type, index, &builtin)) + is_using_builtin_array = true; + array_type = type_to_array_glsl(physical_type); } - return join(pack_pfx, type_to_glsl(*effective_membertype, orig_id), " ", qualifier, to_member_name(type, index), - member_attribute_qualifier(type, index), array_type, ";"); + auto result = join(pack_pfx, type_to_glsl(*declared_type, orig_id, true), " ", qualifier, + to_member_name(type, index), member_attribute_qualifier(type, index), array_type, ";"); + + is_using_builtin_array = false; + return result; } // Emit a structure member, padding and packing to maintain the correct memeber alignments. void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const string &qualifier, uint32_t) { + // If this member requires padding to maintain its declared offset, emit a dummy padding member before it. + if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget)) + { + uint32_t pad_len = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget); + statement("char _m", index, "_pad", "[", pad_len, "];"); + } + + // Handle HLSL-style 0-based vertex/instance index. + builtin_declaration = true; statement(to_struct_member(type, member_type_id, index, qualifier)); + builtin_declaration = false; +} + +void CompilerMSL::emit_struct_padding_target(const SPIRType &type) +{ + uint32_t struct_size = get_declared_struct_size_msl(type, true, true); + uint32_t target_size = get_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget); + if (target_size < struct_size) + SPIRV_CROSS_THROW("Cannot pad with negative bytes."); + else if (target_size > struct_size) + statement("char _m0_final_padding[", target_size - struct_size, "];"); } // Return a MSL qualifier for the specified function attribute member @@ -5825,8 +11677,15 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in bool is_builtin = is_member_builtin(type, index, &builtin); if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary)) - return join(" [[id(", - get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")]]"); + { + string quals = join( + " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")"); + if (interlocked_resources.count( + get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID))) + quals += ", raster_order_group(0)"; + quals += "]]"; + return quals; + } // Vertex function inputs if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput) @@ -5841,6 +11700,8 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in case BuiltInInstanceId: case BuiltInInstanceIndex: case BuiltInBaseInstance: + if (msl_options.vertex_for_tessellation) + return ""; return string(" [[") + builtin_qualifier(builtin) + "]]"; case BuiltInDrawIndex: @@ -5850,13 +11711,19 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in return ""; } } - uint32_t locn = get_ordered_member_location(type.self, index); + + uint32_t locn; + if (is_builtin) + locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index); + else + locn = get_member_location(type.self, index); + if (locn != k_unknown_location) return string(" [[attribute(") + convert_to_string(locn) + ")]]"; } // Vertex and tessellation evaluation function outputs - if ((execution.model == ExecutionModelVertex || execution.model == ExecutionModelTessellationEvaluation) && + if (((execution.model == ExecutionModelVertex && !msl_options.vertex_for_tessellation) || is_tese_shader()) && type.storage == StorageClassOutput) { if (is_builtin) @@ -5876,26 +11743,41 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in /* fallthrough */ case BuiltInPosition: case BuiltInLayer: - case BuiltInClipDistance: return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); + case BuiltInClipDistance: + if (has_member_decoration(type.self, index, DecorationIndex)) + return join(" [[user(clip", get_member_decoration(type.self, index, DecorationIndex), ")]]"); + else + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); + + case BuiltInCullDistance: + if (has_member_decoration(type.self, index, DecorationIndex)) + return join(" [[user(cull", get_member_decoration(type.self, index, DecorationIndex), ")]]"); + else + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); + default: return ""; } } - uint32_t comp; - uint32_t locn = get_ordered_member_location(type.self, index, &comp); - if (locn != k_unknown_location) - { - if (comp != k_unknown_component) - return string(" [[user(locn") + convert_to_string(locn) + "_" + convert_to_string(comp) + ")]]"; - else - return string(" [[user(locn") + convert_to_string(locn) + ")]]"; - } + string loc_qual = member_location_attribute_qualifier(type, index); + if (!loc_qual.empty()) + return join(" [[", loc_qual, "]]"); + } + + if (execution.model == ExecutionModelVertex && msl_options.vertex_for_tessellation && type.storage == StorageClassOutput) + { + // For this type of shader, we always arrange for it to capture its + // output to a buffer. For this reason, qualifiers are irrelevant here. + if (is_builtin) + // We still have to assign a location so the output struct will sort correctly. + get_or_allocate_builtin_output_member_location(builtin, type.self, index); + return ""; } // Tessellation control function inputs - if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassInput) + if (is_tesc_shader() && type.storage == StorageClassInput) { if (is_builtin) { @@ -5903,8 +11785,13 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in { case BuiltInInvocationId: case BuiltInPrimitiveId: + if (msl_options.multi_patch_workgroup) + return ""; + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage case BuiltInSubgroupSize: // FIXME: Should work in any stage + if (msl_options.emulate_subgroups) + return ""; return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); case BuiltInPatchVertices: return ""; @@ -5913,21 +11800,32 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in break; } } - uint32_t locn = get_ordered_member_location(type.self, index); + if (msl_options.multi_patch_workgroup) + return ""; + + uint32_t locn; + if (is_builtin) + locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index); + else + locn = get_member_location(type.self, index); + if (locn != k_unknown_location) return string(" [[attribute(") + convert_to_string(locn) + ")]]"; } // Tessellation control function outputs - if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassOutput) + if (is_tesc_shader() && type.storage == StorageClassOutput) { // For this type of shader, we always arrange for it to capture its // output to a buffer. For this reason, qualifiers are irrelevant here. + if (is_builtin) + // We still have to assign a location so the output struct will sort correctly. + get_or_allocate_builtin_output_member_location(builtin, type.self, index); return ""; } // Tessellation evaluation function inputs - if (execution.model == ExecutionModelTessellationEvaluation && type.storage == StorageClassInput) + if (is_tese_shader() && type.storage == StorageClassInput) { if (is_builtin) { @@ -5943,10 +11841,20 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in break; } } + + if (msl_options.raw_buffer_tese_input) + return ""; + // The special control point array must not be marked with an attribute. if (get_type(type.member_types[index]).basetype == SPIRType::ControlPointArray) return ""; - uint32_t locn = get_ordered_member_location(type.self, index); + + uint32_t locn; + if (is_builtin) + locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index); + else + locn = get_member_location(type.self, index); + if (locn != k_unknown_location) return string(" [[attribute(") + convert_to_string(locn) + ")]]"; } @@ -5962,7 +11870,7 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in switch (builtin) { case BuiltInViewIndex: - if (!msl_options.multiview) + if (!msl_options.multiview || !msl_options.multiview_layered_rendering) break; /* fallthrough */ case BuiltInFrontFacing: @@ -5971,29 +11879,24 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in case BuiltInSampleId: case BuiltInSampleMask: case BuiltInLayer: - case BuiltInBaryCoordNV: - case BuiltInBaryCoordNoPerspNV: + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: quals = builtin_qualifier(builtin); break; + case BuiltInClipDistance: + return join(" [[user(clip", get_member_decoration(type.self, index, DecorationIndex), ")]]"); + case BuiltInCullDistance: + return join(" [[user(cull", get_member_decoration(type.self, index, DecorationIndex), ")]]"); + default: break; } } else - { - uint32_t comp; - uint32_t locn = get_ordered_member_location(type.self, index, &comp); - if (locn != k_unknown_location) - { - if (comp != k_unknown_component) - quals = string("user(locn") + convert_to_string(locn) + "_" + convert_to_string(comp) + ")"; - else - quals = string("user(locn") + convert_to_string(locn) + ")"; - } - } + quals = member_location_attribute_qualifier(type, index); - if (builtin == BuiltInBaryCoordNV || builtin == BuiltInBaryCoordNoPerspNV) + if (builtin == BuiltInBaryCoordKHR || builtin == BuiltInBaryCoordNoPerspKHR) { if (has_member_decoration(type.self, index, DecorationFlat) || has_member_decoration(type.self, index, DecorationCentroid) || @@ -6055,19 +11958,33 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in switch (builtin) { case BuiltInFragStencilRefEXT: + // Similar to PointSize, only mark FragStencilRef if there's a stencil buffer. + // Some shaders may include a FragStencilRef builtin even when used to render + // without a stencil attachment, and Metal will reject this builtin + // when compiling the shader into a render pipeline that does not set + // stencilAttachmentPixelFormat. + if (!msl_options.enable_frag_stencil_ref_builtin) + return ""; if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Stencil export only supported in MSL 2.1 and up."); return string(" [[") + builtin_qualifier(builtin) + "]]"; - case BuiltInSampleMask: case BuiltInFragDepth: + // Ditto FragDepth. + if (!msl_options.enable_frag_depth_builtin) + return ""; + /* fallthrough */ + case BuiltInSampleMask: return string(" [[") + builtin_qualifier(builtin) + "]]"; default: return ""; } } - uint32_t locn = get_ordered_member_location(type.self, index); + uint32_t locn = get_member_location(type.self, index); + // Metal will likely complain about missing color attachments, too. + if (locn != k_unknown_location && !(msl_options.enable_frag_output_mask & (1 << locn))) + return ""; if (locn != k_unknown_location && has_member_decoration(type.self, index, DecorationIndex)) return join(" [[color(", locn, "), index(", get_member_decoration(type.self, index, DecorationIndex), ")]]"); @@ -6086,15 +12003,18 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in { switch (builtin) { + case BuiltInNumSubgroups: + case BuiltInSubgroupId: + case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage + case BuiltInSubgroupSize: // FIXME: Should work in any stage + if (msl_options.emulate_subgroups) + break; + /* fallthrough */ case BuiltInGlobalInvocationId: case BuiltInWorkgroupId: case BuiltInNumWorkgroups: case BuiltInLocalInvocationId: case BuiltInLocalInvocationIndex: - case BuiltInNumSubgroups: - case BuiltInSubgroupId: - case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage - case BuiltInSubgroupSize: // FIXME: Should work in any stage return string(" [[") + builtin_qualifier(builtin) + "]]"; default: @@ -6106,28 +12026,136 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in return ""; } +// A user-defined output variable is considered to match an input variable in the subsequent +// stage if the two variables are declared with the same Location and Component decoration and +// match in type and decoration, except that interpolation decorations are not required to match. +// For the purposes of interface matching, variables declared without a Component decoration are +// considered to have a Component decoration of zero. +string CompilerMSL::member_location_attribute_qualifier(const SPIRType &type, uint32_t index) +{ + string quals; + uint32_t comp; + uint32_t locn = get_member_location(type.self, index, &comp); + if (locn != k_unknown_location) + { + quals += "user(locn"; + quals += convert_to_string(locn); + if (comp != k_unknown_component && comp != 0) + { + quals += "_"; + quals += convert_to_string(comp); + } + quals += ")"; + } + return quals; +} + // Returns the location decoration of the member with the specified index in the specified type. // If the location of the member has been explicitly set, that location is used. If not, this // function assumes the members are ordered in their location order, and simply returns the // index as the location. -uint32_t CompilerMSL::get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp) +uint32_t CompilerMSL::get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp) const { - auto &m = ir.meta[type_id]; - if (index < m.members.size()) + if (comp) { - auto &dec = m.members[index]; - if (comp) - { - if (dec.decoration_flags.get(DecorationComponent)) - *comp = dec.component; - else - *comp = k_unknown_component; - } - if (dec.decoration_flags.get(DecorationLocation)) - return dec.location; + if (has_member_decoration(type_id, index, DecorationComponent)) + *comp = get_member_decoration(type_id, index, DecorationComponent); + else + *comp = k_unknown_component; + } + + if (has_member_decoration(type_id, index, DecorationLocation)) + return get_member_decoration(type_id, index, DecorationLocation); + else + return k_unknown_location; +} + +uint32_t CompilerMSL::get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin, + uint32_t type_id, uint32_t index, + uint32_t *comp) +{ + uint32_t loc = get_member_location(type_id, index, comp); + if (loc != k_unknown_location) + return loc; + + if (comp) + *comp = k_unknown_component; + + // Late allocation. Find a location which is unused by the application. + // This can happen for built-in inputs in tessellation which are mixed and matched with user inputs. + auto &mbr_type = get(get(type_id).member_types[index]); + uint32_t count = type_to_location_count(mbr_type); + + loc = 0; + + const auto location_range_in_use = [this](uint32_t location, uint32_t location_count) -> bool { + for (uint32_t i = 0; i < location_count; i++) + if (location_inputs_in_use.count(location + i) != 0) + return true; + return false; + }; + + while (location_range_in_use(loc, count)) + loc++; + + set_member_decoration(type_id, index, DecorationLocation, loc); + + // Triangle tess level inputs are shared in one packed float4, + // mark both builtins as sharing one location. + if (!msl_options.raw_buffer_tese_input && is_tessellating_triangles() && + (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter)) + { + builtin_to_automatic_input_location[BuiltInTessLevelInner] = loc; + builtin_to_automatic_input_location[BuiltInTessLevelOuter] = loc; + } + else + builtin_to_automatic_input_location[builtin] = loc; + + mark_location_as_used_by_shader(loc, mbr_type, StorageClassInput, true); + return loc; +} + +uint32_t CompilerMSL::get_or_allocate_builtin_output_member_location(spv::BuiltIn builtin, + uint32_t type_id, uint32_t index, + uint32_t *comp) +{ + uint32_t loc = get_member_location(type_id, index, comp); + if (loc != k_unknown_location) + return loc; + loc = 0; + + if (comp) + *comp = k_unknown_component; + + // Late allocation. Find a location which is unused by the application. + // This can happen for built-in outputs in tessellation which are mixed and matched with user inputs. + auto &mbr_type = get(get(type_id).member_types[index]); + uint32_t count = type_to_location_count(mbr_type); + + const auto location_range_in_use = [this](uint32_t location, uint32_t location_count) -> bool { + for (uint32_t i = 0; i < location_count; i++) + if (location_outputs_in_use.count(location + i) != 0) + return true; + return false; + }; + + while (location_range_in_use(loc, count)) + loc++; + + set_member_decoration(type_id, index, DecorationLocation, loc); + + // Triangle tess level inputs are shared in one packed float4; + // mark both builtins as sharing one location. + if (is_tessellating_triangles() && (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter)) + { + builtin_to_automatic_output_location[BuiltInTessLevelInner] = loc; + builtin_to_automatic_output_location[BuiltInTessLevelOuter] = loc; } + else + builtin_to_automatic_output_location[builtin] = loc; - return index; + mark_location_as_used_by_shader(loc, mbr_type, StorageClassOutput, true); + return loc; } // Returns the type declaration for a function, including the @@ -6150,7 +12178,9 @@ string CompilerMSL::func_type_decl(SPIRType &type) switch (execution.model) { case ExecutionModelVertex: - entry_type = "vertex"; + if (msl_options.vertex_for_tessellation && !msl_options.supports_msl_version(1, 2)) + SPIRV_CROSS_THROW("Tessellation requires Metal 1.2."); + entry_type = msl_options.vertex_for_tessellation ? "kernel" : "vertex"; break; case ExecutionModelTessellationEvaluation: if (!msl_options.supports_msl_version(1, 2)) @@ -6158,15 +12188,13 @@ string CompilerMSL::func_type_decl(SPIRType &type) if (execution.flags.get(ExecutionModeIsolines)) SPIRV_CROSS_THROW("Metal does not support isoline tessellation."); if (msl_options.is_ios()) - entry_type = - join("[[ patch(", execution.flags.get(ExecutionModeTriangles) ? "triangle" : "quad", ") ]] vertex"); + entry_type = join("[[ patch(", is_tessellating_triangles() ? "triangle" : "quad", ") ]] vertex"); else - entry_type = join("[[ patch(", execution.flags.get(ExecutionModeTriangles) ? "triangle" : "quad", ", ", + entry_type = join("[[ patch(", is_tessellating_triangles() ? "triangle" : "quad", ", ", execution.output_vertices, ") ]] vertex"); break; case ExecutionModelFragment: - entry_type = - execution.flags.get(ExecutionModeEarlyFragmentTests) ? "[[ early_fragment_tests ]] fragment" : "fragment"; + entry_type = uses_explicit_early_fragment_test() ? "[[ early_fragment_tests ]] fragment" : "fragment"; break; case ExecutionModelTessellationControl: if (!msl_options.supports_msl_version(1, 2)) @@ -6186,25 +12214,58 @@ string CompilerMSL::func_type_decl(SPIRType &type) return entry_type + " " + return_type; } +bool CompilerMSL::is_tesc_shader() const +{ + return get_execution_model() == ExecutionModelTessellationControl; +} + +bool CompilerMSL::is_tese_shader() const +{ + return get_execution_model() == ExecutionModelTessellationEvaluation; +} + +bool CompilerMSL::uses_explicit_early_fragment_test() +{ + auto &ep_flags = get_entry_point().flags; + return ep_flags.get(ExecutionModeEarlyFragmentTests) || ep_flags.get(ExecutionModePostDepthCoverage); +} + // In MSL, address space qualifiers are required for all pointer or reference variables string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) { const auto &type = get(argument.basetype); + return get_type_address_space(type, argument.self, true); +} + +string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument) +{ + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + auto *var = maybe_get(id); + if (var && type.basetype == SPIRType::Struct && + (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) + flags = get_buffer_block_flags(id); + else + flags = get_decoration_bitset(id); + const char *addr_space = nullptr; switch (type.storage) { case StorageClassWorkgroup: - return "threadgroup"; + addr_space = "threadgroup"; + break; case StorageClassStorageBuffer: + case StorageClassPhysicalStorageBuffer: { // For arguments from variable pointers, we use the write count deduction, so // we should not assume any constness here. Only for global SSBOs. bool readonly = false; - if (has_decoration(type.self, DecorationBlock)) - readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable); + if (!var || has_decoration(type.self, DecorationBlock)) + readonly = flags.get(DecorationNonWritable); - return readonly ? "const device" : "device"; + addr_space = readonly ? "const device" : "device"; + break; } case StorageClassUniform: @@ -6214,103 +12275,112 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) { bool ssbo = has_decoration(type.self, DecorationBufferBlock); if (ssbo) - { - bool readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable); - return readonly ? "const device" : "device"; - } + addr_space = flags.get(DecorationNonWritable) ? "const device" : "device"; else - return "constant"; + addr_space = "constant"; + } + else if (!argument) + { + addr_space = "constant"; + } + else if (type_is_msl_framebuffer_fetch(type)) + { + // Subpass inputs are passed around by value. + addr_space = ""; } break; case StorageClassFunction: case StorageClassGeneric: - // No address space for plain values. - return type.pointer ? "thread" : ""; + break; case StorageClassInput: - if (get_execution_model() == ExecutionModelTessellationControl && argument.basevariable == stage_in_ptr_var_id) - return "threadgroup"; + if (is_tesc_shader() && var && var->basevariable == stage_in_ptr_var_id) + addr_space = msl_options.multi_patch_workgroup ? "const device" : "threadgroup"; + // Don't pass tessellation levels in the device AS; we load and convert them + // to float manually. + if (is_tese_shader() && msl_options.raw_buffer_tese_input && var) + { + bool is_stage_in = var->basevariable == stage_in_ptr_var_id; + bool is_patch_stage_in = has_decoration(var->self, DecorationPatch); + bool is_builtin = has_decoration(var->self, DecorationBuiltIn); + BuiltIn builtin = (BuiltIn)get_decoration(var->self, DecorationBuiltIn); + bool is_tess_level = is_builtin && (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner); + if (is_stage_in || (is_patch_stage_in && !is_tess_level)) + addr_space = "const device"; + } + if (get_execution_model() == ExecutionModelFragment && var && var->basevariable == stage_in_var_id) + addr_space = "thread"; break; case StorageClassOutput: if (capture_output_to_buffer) - return "device"; + { + if (var && type.storage == StorageClassOutput) + { + bool is_masked = is_stage_output_variable_masked(*var); + + if (is_masked) + { + if (is_tessellation_shader()) + addr_space = "threadgroup"; + else + addr_space = "thread"; + } + else if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup)) + addr_space = "threadgroup"; + } + + if (!addr_space) + addr_space = "device"; + } break; default: break; } - return "thread"; + if (!addr_space) + { + // No address space for plain values. + addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : ""; + } + + return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space); } -string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id) +const char *CompilerMSL::to_restrict(uint32_t id, bool space) { - switch (type.storage) - { - case StorageClassWorkgroup: - return "threadgroup"; - - case StorageClassStorageBuffer: + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + if (ir.ids[id].get_type() == TypeVariable) { - // This can be called for variable pointer contexts as well, so be very careful about which method we choose. - Bitset flags; - if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock)) + uint32_t type_id = expression_type_id(id); + auto &type = expression_type(id); + if (type.basetype == SPIRType::Struct && + (has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock))) flags = get_buffer_block_flags(id); else flags = get_decoration_bitset(id); - - return flags.get(DecorationNonWritable) ? "const device" : "device"; - } - - case StorageClassUniform: - case StorageClassUniformConstant: - case StorageClassPushConstant: - if (type.basetype == SPIRType::Struct) - { - bool ssbo = has_decoration(type.self, DecorationBufferBlock); - if (ssbo) - { - // This can be called for variable pointer contexts as well, so be very careful about which method we choose. - Bitset flags; - if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock)) - flags = get_buffer_block_flags(id); - else - flags = get_decoration_bitset(id); - - return flags.get(DecorationNonWritable) ? "const device" : "device"; - } - else - return "constant"; - } - else - return "constant"; - - case StorageClassFunction: - case StorageClassGeneric: - // No address space for plain values. - return type.pointer ? "thread" : ""; - - case StorageClassOutput: - if (capture_output_to_buffer) - return "device"; - break; - - default: - break; } + else + flags = get_decoration_bitset(id); - return "thread"; + return flags.get(DecorationRestrict) || flags.get(DecorationRestrictPointerEXT) ? + (space ? "__restrict " : "__restrict") : ""; } string CompilerMSL::entry_point_arg_stage_in() { string decl; + if ((is_tesc_shader() && msl_options.multi_patch_workgroup) || + (is_tese_shader() && msl_options.raw_buffer_tese_input)) + return decl; + // Stage-in structure uint32_t stage_in_id; - if (get_execution_model() == ExecutionModelTessellationEvaluation) + if (is_tese_shader()) stage_in_id = patch_stage_in_var_id; else stage_in_id = stage_in_var_id; @@ -6327,15 +12397,95 @@ string CompilerMSL::entry_point_arg_stage_in() return decl; } +// Returns true if this input builtin should be a direct parameter on a shader function parameter list, +// and false for builtins that should be passed or calculated some other way. +bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type) +{ + switch (bi_type) + { + // Vertex function in + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInBaseVertex: + case BuiltInInstanceId: + case BuiltInInstanceIndex: + case BuiltInBaseInstance: + return get_execution_model() != ExecutionModelVertex || !msl_options.vertex_for_tessellation; + // Tess. control function in + case BuiltInPosition: + case BuiltInPointSize: + case BuiltInClipDistance: + case BuiltInCullDistance: + case BuiltInPatchVertices: + return false; + case BuiltInInvocationId: + case BuiltInPrimitiveId: + return !is_tesc_shader() || !msl_options.multi_patch_workgroup; + // Tess. evaluation function in + case BuiltInTessLevelInner: + case BuiltInTessLevelOuter: + return false; + // Fragment function in + case BuiltInSamplePosition: + case BuiltInHelperInvocation: + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: + return false; + case BuiltInViewIndex: + return get_execution_model() == ExecutionModelFragment && msl_options.multiview && + msl_options.multiview_layered_rendering; + // Compute function in + case BuiltInSubgroupId: + case BuiltInNumSubgroups: + return !msl_options.emulate_subgroups; + // Any stage function in + case BuiltInDeviceIndex: + case BuiltInSubgroupEqMask: + case BuiltInSubgroupGeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupLtMask: + return false; + case BuiltInSubgroupSize: + if (msl_options.fixed_subgroup_size != 0) + return false; + /* fallthrough */ + case BuiltInSubgroupLocalInvocationId: + return !msl_options.emulate_subgroups; + default: + return true; + } +} + +// Returns true if this is a fragment shader that runs per sample, and false otherwise. +bool CompilerMSL::is_sample_rate() const +{ + auto &caps = get_declared_capabilities(); + return get_execution_model() == ExecutionModelFragment && + (msl_options.force_sample_rate_shading || + std::find(caps.begin(), caps.end(), CapabilitySampleRateShading) != caps.end() || + (msl_options.use_framebuffer_fetch_subpasses && need_subpass_input_ms)); +} + +bool CompilerMSL::is_intersection_query() const +{ + auto &caps = get_declared_capabilities(); + return std::find(caps.begin(), caps.end(), CapabilityRayQueryKHR) != caps.end(); +} + void CompilerMSL::entry_point_args_builtin(string &ep_args) { // Builtin variables + SmallVector, 8> active_builtins; ir.for_each_typed_id([&](uint32_t var_id, SPIRVariable &var) { + if (var.storage != StorageClassInput) + return; + auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); // Don't emit SamplePosition as a separate parameter. In the entry // point, we get that by calling get_sample_position() on the sample ID. - if (var.storage == StorageClassInput && is_builtin_variable(var) && + if (is_builtin_variable(var) && get_variable_data_type(var).basetype != SPIRType::Struct && get_variable_data_type(var).basetype != SPIRType::ControlPointArray) { @@ -6344,36 +12494,74 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) if (!active_input_builtins.get(bi_type) || !interface_variable_exists_in_entry_point(var_id)) return; - // These builtins are emitted specially. If we pass this branch, the builtin directly matches - // a MSL builtin. - if (bi_type != BuiltInSamplePosition && bi_type != BuiltInHelperInvocation && - bi_type != BuiltInPatchVertices && bi_type != BuiltInTessLevelInner && - bi_type != BuiltInTessLevelOuter && bi_type != BuiltInPosition && bi_type != BuiltInPointSize && - bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance && bi_type != BuiltInSubgroupEqMask && - bi_type != BuiltInBaryCoordNV && bi_type != BuiltInBaryCoordNoPerspNV && - bi_type != BuiltInSubgroupGeMask && bi_type != BuiltInSubgroupGtMask && - bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask && - ((get_execution_model() == ExecutionModelFragment && msl_options.multiview) || - bi_type != BuiltInViewIndex) && - (get_execution_model() == ExecutionModelGLCompute || - (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) || - (bi_type != BuiltInSubgroupLocalInvocationId && bi_type != BuiltInSubgroupSize))) + // Remember this variable. We may need to correct its type. + active_builtins.push_back(make_pair(&var, bi_type)); + + if (is_direct_input_builtin(bi_type)) { if (!ep_args.empty()) ep_args += ", "; - ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id); - ep_args += " [[" + builtin_qualifier(bi_type) + "]]"; + // Handle HLSL-style 0-based vertex/instance index. + builtin_declaration = true; + + // Handle different MSL gl_TessCoord types. (float2, float3) + if (bi_type == BuiltInTessCoord && get_entry_point().flags.get(ExecutionModeQuads)) + ep_args += "float2 " + to_expression(var_id) + "In"; + else + ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id); + + ep_args += " [[" + builtin_qualifier(bi_type); + if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage)) + { + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Post-depth coverage requires MSL 2.0."); + if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Post-depth coverage on Mac requires MSL 2.3."); + ep_args += ", post_depth_coverage"; + } + ep_args += "]]"; + builtin_declaration = false; } } + + if (has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase)) + { + // This is a special implicit builtin, not corresponding to any SPIR-V builtin, + // which holds the base that was passed to vkCmdDispatchBase() or vkCmdDrawIndexed(). If it's present, + // assume we emitted it for a good reason. + assert(msl_options.supports_msl_version(1, 2)); + if (!ep_args.empty()) + ep_args += ", "; + + ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_origin]]"; + } + + if (has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInStageInputSize)) + { + // This is another special implicit builtin, not corresponding to any SPIR-V builtin, + // which holds the number of vertices and instances to draw. If it's present, + // assume we emitted it for a good reason. + assert(msl_options.supports_msl_version(1, 2)); + if (!ep_args.empty()) + ep_args += ", "; + + ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_size]]"; + } }); - // Vertex and instance index built-ins - if (needs_vertex_idx_arg) - ep_args += built_in_func_arg(BuiltInVertexIndex, !ep_args.empty()); + // Correct the types of all encountered active builtins. We couldn't do this before + // because ensure_correct_builtin_type() may increase the bound, which isn't allowed + // while iterating over IDs. + for (auto &var : active_builtins) + var.first->basetype = ensure_correct_builtin_type(var.first->basetype, var.second); + + // Handle HLSL-style 0-based vertex/instance index. + if (needs_base_vertex_arg == TriState::Yes) + ep_args += built_in_func_arg(BuiltInBaseVertex, !ep_args.empty()); - if (needs_instance_idx_arg) - ep_args += built_in_func_arg(BuiltInInstanceIndex, !ep_args.empty()); + if (needs_base_instance_arg == TriState::Yes) + ep_args += built_in_func_arg(BuiltInBaseInstance, !ep_args.empty()); if (capture_output_to_buffer) { @@ -6387,14 +12575,15 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) " [[buffer(", msl_options.shader_output_buffer_index, ")]]"); } - if (get_execution_model() == ExecutionModelTessellationControl) + if (is_tesc_shader()) { if (!ep_args.empty()) ep_args += ", "; ep_args += join("constant uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]"); } - else if (stage_out_var_id) + else if (stage_out_var_id && + !(get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)) { if (!ep_args.empty()) ep_args += ", "; @@ -6402,11 +12591,33 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) join("device uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]"); } + if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation && + (active_input_builtins.get(BuiltInVertexIndex) || active_input_builtins.get(BuiltInVertexId)) && + msl_options.vertex_index_type != Options::IndexType::None) + { + // Add the index buffer so we can set gl_VertexIndex correctly. + if (!ep_args.empty()) + ep_args += ", "; + switch (msl_options.vertex_index_type) + { + case Options::IndexType::None: + break; + case Options::IndexType::UInt16: + ep_args += join("const device ushort* ", index_buffer_var_name, " [[buffer(", + msl_options.shader_index_buffer_index, ")]]"); + break; + case Options::IndexType::UInt32: + ep_args += join("const device uint* ", index_buffer_var_name, " [[buffer(", + msl_options.shader_index_buffer_index, ")]]"); + break; + } + } + // Tessellation control shaders get three additional parameters: // a buffer to hold the per-patch data, a buffer to hold the per-patch // tessellation levels, and a block of workgroup memory to hold the // input control point data. - if (get_execution_model() == ExecutionModelTessellationControl) + if (is_tesc_shader()) { if (patch_stage_out_var_id) { @@ -6420,15 +12631,107 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) ep_args += ", "; ep_args += join("device ", get_tess_factor_struct_name(), "* ", tess_factor_buffer_var_name, " [[buffer(", convert_to_string(msl_options.shader_tess_factor_buffer_index), ")]]"); + + // Initializer for tess factors must be handled specially since it's never declared as a normal variable. + uint32_t outer_factor_initializer_id = 0; + uint32_t inner_factor_initializer_id = 0; + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + if (!has_decoration(var.self, DecorationBuiltIn) || var.storage != StorageClassOutput || !var.initializer) + return; + + BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + if (builtin == BuiltInTessLevelInner) + inner_factor_initializer_id = var.initializer; + else if (builtin == BuiltInTessLevelOuter) + outer_factor_initializer_id = var.initializer; + }); + + const SPIRConstant *c = nullptr; + + if (outer_factor_initializer_id && (c = maybe_get(outer_factor_initializer_id))) + { + auto &entry_func = get(ir.default_entry_point); + entry_func.fixup_hooks_in.push_back( + [=]() + { + uint32_t components = is_tessellating_triangles() ? 3 : 4; + for (uint32_t i = 0; i < components; i++) + { + statement(builtin_to_glsl(BuiltInTessLevelOuter, StorageClassOutput), "[", i, + "] = ", "half(", to_expression(c->subconstants[i]), ");"); + } + }); + } + + if (inner_factor_initializer_id && (c = maybe_get(inner_factor_initializer_id))) + { + auto &entry_func = get(ir.default_entry_point); + if (is_tessellating_triangles()) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_to_glsl(BuiltInTessLevelInner, StorageClassOutput), " = ", "half(", + to_expression(c->subconstants[0]), ");"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([=]() { + for (uint32_t i = 0; i < 2; i++) + { + statement(builtin_to_glsl(BuiltInTessLevelInner, StorageClassOutput), "[", i, "] = ", + "half(", to_expression(c->subconstants[i]), ");"); + } + }); + } + } + if (stage_in_var_id) { if (!ep_args.empty()) ep_args += ", "; - ep_args += join("threadgroup ", type_to_glsl(get_stage_in_struct_type()), "* ", input_wg_var_name, - " [[threadgroup(", convert_to_string(msl_options.shader_input_wg_index), ")]]"); + if (msl_options.multi_patch_workgroup) + { + ep_args += join("device ", type_to_glsl(get_stage_in_struct_type()), "* ", input_buffer_var_name, + " [[buffer(", convert_to_string(msl_options.shader_input_buffer_index), ")]]"); + } + else + { + ep_args += join("threadgroup ", type_to_glsl(get_stage_in_struct_type()), "* ", input_wg_var_name, + " [[threadgroup(", convert_to_string(msl_options.shader_input_wg_index), ")]]"); + } } } } + // Tessellation evaluation shaders get three additional parameters: + // a buffer for the per-patch data, a buffer for the per-patch + // tessellation levels, and a buffer for the control point data. + if (is_tese_shader() && msl_options.raw_buffer_tese_input) + { + if (patch_stage_in_var_id) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += + join("const device ", type_to_glsl(get_patch_stage_in_struct_type()), "* ", patch_input_buffer_var_name, + " [[buffer(", convert_to_string(msl_options.shader_patch_input_buffer_index), ")]]"); + } + + if (tess_level_inner_var_id || tess_level_outer_var_id) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("const device ", get_tess_factor_struct_name(), "* ", tess_factor_buffer_var_name, + " [[buffer(", convert_to_string(msl_options.shader_tess_factor_buffer_index), ")]]"); + } + + if (stage_in_var_id) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("const device ", type_to_glsl(get_stage_in_struct_type()), "* ", input_buffer_var_name, + " [[buffer(", convert_to_string(msl_options.shader_input_buffer_index), ")]]"); + } + } } string CompilerMSL::entry_point_args_argument_buffer(bool append_comma) @@ -6469,7 +12772,7 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma) claimed_bindings.set(buffer_binding); - ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id); + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(id, true) + to_name(id); ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]"; next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1); @@ -6514,28 +12817,61 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) struct Resource { SPIRVariable *var; + SPIRVariable *descriptor_alias; string name; SPIRType::BaseType basetype; uint32_t index; + uint32_t plane; + uint32_t secondary_index; }; SmallVector resources; - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + ir.for_each_typed_id([&](uint32_t var_id, SPIRVariable &var) { if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) && !is_hidden_variable(var)) { auto &type = get_variable_data_type(var); - uint32_t var_id = var.self; - if (var.storage != StorageClassPushConstant) + if (is_supported_argument_buffer_type(type) && var.storage != StorageClassPushConstant) { uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); if (descriptor_set_is_argument_buffer(desc_set)) return; } + // Handle descriptor aliasing. We can handle aliasing of buffers by casting pointers, + // but not for typed resources. + SPIRVariable *descriptor_alias = nullptr; + if (var.storage == StorageClassUniform || var.storage == StorageClassStorageBuffer) + { + for (auto &resource : resources) + { + if (get_decoration(resource.var->self, DecorationDescriptorSet) == + get_decoration(var_id, DecorationDescriptorSet) && + get_decoration(resource.var->self, DecorationBinding) == + get_decoration(var_id, DecorationBinding) && + resource.basetype == SPIRType::Struct && type.basetype == SPIRType::Struct && + (resource.var->storage == StorageClassUniform || + resource.var->storage == StorageClassStorageBuffer)) + { + // Possible, but horrible to implement, ignore for now. + if (!type.array.empty()) + SPIRV_CROSS_THROW("Aliasing arrayed discrete descriptors is currently not supported."); + + descriptor_alias = resource.var; + // Self-reference marks that we should declare the resource, + // and it's being used as an alias (so we can emit void* instead). + resource.descriptor_alias = resource.var; + // Need to promote interlocked usage so that the primary declaration is correct. + if (interlocked_resources.count(var_id)) + interlocked_resources.insert(resource.var->self); + break; + } + } + } + const MSLConstexprSampler *constexpr_sampler = nullptr; if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler) { @@ -6547,29 +12883,48 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) } } + // Emulate texture2D atomic operations + uint32_t secondary_index = 0; + if (atomic_image_vars.count(var.self)) + { + secondary_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0); + } + if (type.basetype == SPIRType::SampledImage) { add_resource_name(var_id); - resources.push_back( - { &var, to_name(var_id), SPIRType::Image, get_metal_resource_index(var, SPIRType::Image) }); + + uint32_t plane_count = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + plane_count = constexpr_sampler->planes; + + for (uint32_t i = 0; i < plane_count; i++) + resources.push_back({ &var, descriptor_alias, to_name(var_id), SPIRType::Image, + get_metal_resource_index(var, SPIRType::Image, i), i, secondary_index }); if (type.image.dim != DimBuffer && !constexpr_sampler) { - resources.push_back({ &var, to_sampler_expression(var_id), SPIRType::Sampler, - get_metal_resource_index(var, SPIRType::Sampler) }); + resources.push_back({ &var, descriptor_alias, to_sampler_expression(var_id), SPIRType::Sampler, + get_metal_resource_index(var, SPIRType::Sampler), 0, 0 }); } } else if (!constexpr_sampler) { // constexpr samplers are not declared as resources. add_resource_name(var_id); - resources.push_back( - { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) }); + + // Don't allocate resource indices for aliases. + uint32_t resource_index = ~0u; + if (!descriptor_alias) + resource_index = get_metal_resource_index(var, type.basetype); + + resources.push_back({ &var, descriptor_alias, to_name(var_id), type.basetype, + resource_index, 0, secondary_index }); } } }); - sort(resources.begin(), resources.end(), [](const Resource &lhs, const Resource &rhs) { + stable_sort(resources.begin(), resources.end(), [](const Resource &lhs, const Resource &rhs) { return tie(lhs.basetype, lhs.index) < tie(rhs.basetype, rhs.index); }); @@ -6587,7 +12942,29 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) auto &m = ir.meta[type.self]; if (m.members.size() == 0) break; - if (!type.array.empty()) + + if (r.descriptor_alias) + { + if (r.var == r.descriptor_alias) + { + auto primary_name = join("spvBufferAliasSet", + get_decoration(var_id, DecorationDescriptorSet), + "Binding", + get_decoration(var_id, DecorationBinding)); + + // Declare the primary alias as void* + if (!ep_args.empty()) + ep_args += ", "; + ep_args += get_argument_address_space(var) + " void* " + primary_name; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; + } + + buffer_aliases_discrete.push_back(r.var->self); + } + else if (!type.array.empty()) { if (type.array.size() > 1) SPIRV_CROSS_THROW("Arrays of arrays of buffers are not supported."); @@ -6600,42 +12977,94 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) if (array_size == 0) SPIRV_CROSS_THROW("Unsized arrays of buffers are not supported in MSL."); - buffer_arrays.push_back(var_id); + // Allow Metal to use the array template to make arrays a value type + is_using_builtin_array = true; + buffer_arrays_discrete.push_back(var_id); for (uint32_t i = 0; i < array_size; ++i) { if (!ep_args.empty()) ep_args += ", "; - ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + r.name + "_" + - convert_to_string(i); - ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]"; + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id, true) + + r.name + "_" + convert_to_string(i); + ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; } + is_using_builtin_array = false; } else { if (!ep_args.empty()) ep_args += ", "; - ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + r.name; - ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; + ep_args += + get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id, true) + r.name; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; } break; } case SPIRType::Sampler: if (!ep_args.empty()) ep_args += ", "; - ep_args += sampler_type(type) + " " + r.name; + ep_args += sampler_type(type, var_id) + " " + r.name; ep_args += " [[sampler(" + convert_to_string(r.index) + ")]]"; break; case SPIRType::Image: + { if (!ep_args.empty()) ep_args += ", "; - ep_args += image_type_glsl(type, var_id) + " " + r.name; - ep_args += " [[texture(" + convert_to_string(r.index) + ")]]"; + + // Use Metal's native frame-buffer fetch API for subpass inputs. + const auto &basetype = get(var.basetype); + if (!type_is_msl_framebuffer_fetch(basetype)) + { + ep_args += image_type_glsl(type, var_id) + " " + r.name; + if (r.plane > 0) + ep_args += join(plane_name_suffix, r.plane); + ep_args += " [[texture(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; + } + else + { + if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Framebuffer fetch on Mac is not supported before MSL 2.3."); + ep_args += image_type_glsl(type, var_id) + " " + r.name; + ep_args += " [[color(" + convert_to_string(r.index) + ")]]"; + } + + // Emulate texture2D atomic operations + if (atomic_image_vars.count(var.self)) + { + ep_args += ", device atomic_" + type_to_glsl(get(basetype.image.type), 0); + ep_args += "* " + r.name + "_atomic"; + ep_args += " [[buffer(" + convert_to_string(r.secondary_index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; + } + break; + } + case SPIRType::AccelerationStructure: + ep_args += ", " + type_to_glsl(type, var_id) + " " + r.name; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; break; default: if (!ep_args.empty()) ep_args += ", "; - ep_args += type_to_glsl(type, var_id) + " " + r.name; - ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; + if (!type.pointer) + ep_args += get_type_address_space(get(var.basetype), var_id) + " " + + type_to_glsl(type, var_id) + "& " + r.name; + else + ep_args += type_to_glsl(type, var_id) + " " + r.name; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; break; } } @@ -6657,6 +13086,21 @@ string CompilerMSL::entry_point_args_classic(bool append_comma) void CompilerMSL::fix_up_shader_inputs_outputs() { + auto &entry_func = this->get(ir.default_entry_point); + + // Emit a guard to ensure we don't execute beyond the last vertex. + // Vertex shaders shouldn't have the problems with barriers in non-uniform control flow that + // tessellation control shaders do, so early returns should be OK. We may need to revisit this + // if it ever becomes possible to use barriers from a vertex shader. + if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation) + { + entry_func.fixup_hooks_in.push_back([this]() { + statement("if (any(", to_expression(builtin_invocation_id_id), + " >= ", to_expression(builtin_stage_input_size_id), "))"); + statement(" return;"); + }); + } + // Look for sampled images and buffer. Add hooks to set up the swizzle constants or array lengths. ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = get_variable_data_type(var); @@ -6667,7 +13111,6 @@ void CompilerMSL::fix_up_shader_inputs_outputs() { if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) { - auto &entry_func = this->get(ir.default_entry_point); entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() { bool is_array_type = !type.array.empty(); @@ -6692,9 +13135,8 @@ void CompilerMSL::fix_up_shader_inputs_outputs() else if ((var.storage == StorageClassStorageBuffer || (var.storage == StorageClassUniform && ssbo)) && !is_hidden_variable(var)) { - if (buffers_requiring_array_length.count(var.self)) + if (buffer_requires_array_length(var.self)) { - auto &entry_func = this->get(ir.default_entry_point); entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() { bool is_array_type = !type.array.empty(); @@ -6719,13 +13161,17 @@ void CompilerMSL::fix_up_shader_inputs_outputs() }); // Builtin variables - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + ir.for_each_typed_id([this, &entry_func](uint32_t, SPIRVariable &var) { uint32_t var_id = var.self; BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type; - if (var.storage == StorageClassInput && is_builtin_variable(var)) + if (var.storage != StorageClassInput && var.storage != StorageClassOutput) + return; + if (!interface_variable_exists_in_entry_point(var.self)) + return; + + if (var.storage == StorageClassInput && is_builtin_variable(var) && active_input_builtins.get(bi_type)) { - auto &entry_func = this->get(ir.default_entry_point); switch (bi_type) { case BuiltInSamplePosition: @@ -6734,18 +13180,40 @@ void CompilerMSL::fix_up_shader_inputs_outputs() to_expression(builtin_sample_id_id), ");"); }); break; - case BuiltInHelperInvocation: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("simd_is_helper_thread() is only supported on macOS."); - else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1)) - SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS."); + case BuiltInFragCoord: + if (is_sample_rate()) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement(to_expression(var_id), ".xy += get_sample_position(", + to_expression(builtin_sample_id_id), ") - 0.5;"); + }); + } + break; + case BuiltInInvocationId: + // This is direct-mapped without multi-patch workgroups. + if (!is_tesc_shader() || !msl_options.multi_patch_workgroup) + break; + + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_invocation_id_id), ".x % ", this->get_entry_point().output_vertices, + ";"); + }); + break; + case BuiltInPrimitiveId: + // This is natively supported by fragment and tessellation evaluation shaders. + // In tessellation control shaders, this is direct-mapped without multi-patch workgroups. + if (!is_tesc_shader() || !msl_options.multi_patch_workgroup) + break; entry_func.fixup_hooks_in.push_back([=]() { - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_is_helper_thread();"); + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = min(", + to_expression(builtin_invocation_id_id), ".x / ", this->get_entry_point().output_vertices, + ", spvIndirectParams[1] - 1);"); }); break; case BuiltInPatchVertices: - if (get_execution_model() == ExecutionModelTessellationEvaluation) + if (is_tese_shader()) entry_func.fixup_hooks_in.push_back([=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", to_expression(patch_stage_in_var_id), ".gl_in.size();"); @@ -6756,144 +13224,254 @@ void CompilerMSL::fix_up_shader_inputs_outputs() }); break; case BuiltInTessCoord: + if (get_entry_point().flags.get(ExecutionModeQuads)) + { + // The entry point will only have a float2 TessCoord variable. + // Pad to float3. + entry_func.fixup_hooks_in.push_back([=]() { + auto name = builtin_to_glsl(BuiltInTessCoord, StorageClassInput); + statement("float3 " + name + " = float3(" + name + "In.x, " + name + "In.y, 0.0);"); + }); + } + // Emit a fixup to account for the shifted domain. Don't do this for triangles; // MoltenVK will just reverse the winding order instead. - if (msl_options.tess_domain_origin_lower_left && !get_entry_point().flags.get(ExecutionModeTriangles)) + if (msl_options.tess_domain_origin_lower_left && !is_tessellating_triangles()) { string tc = to_expression(var_id); entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); }); } break; - case BuiltInSubgroupLocalInvocationId: - // This is natively supported in compute shaders. - if (get_execution_model() == ExecutionModelGLCompute) - break; - - // This is natively supported in fragment shaders in MSL 2.2. - if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) + case BuiltInSubgroupId: + if (!msl_options.emulate_subgroups) break; - - if (msl_options.is_ios()) - SPIRV_CROSS_THROW( - "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS."); - - if (!msl_options.supports_msl_version(2, 1)) - SPIRV_CROSS_THROW( - "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1."); - - // Shaders other than compute shaders don't support the SIMD-group - // builtins directly, but we can emulate them using the SIMD-group - // functions. This might break if some of the subgroup terminated - // before reaching the entry point. + // For subgroup emulation, this is the same as the local invocation index. entry_func.fixup_hooks_in.push_back([=]() { - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = simd_prefix_exclusive_sum(1);"); + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_local_invocation_index_id), ";"); }); break; - case BuiltInSubgroupSize: - // This is natively supported in compute shaders. - if (get_execution_model() == ExecutionModelGLCompute) + case BuiltInNumSubgroups: + if (!msl_options.emulate_subgroups) break; - - // This is natively supported in fragment shaders in MSL 2.2. - if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) + // For subgroup emulation, this is the same as the workgroup size. + entry_func.fixup_hooks_in.push_back([=]() { + auto &type = expression_type(builtin_workgroup_size_id); + string size_expr = to_expression(builtin_workgroup_size_id); + if (type.vecsize >= 3) + size_expr = join(size_expr, ".x * ", size_expr, ".y * ", size_expr, ".z"); + else if (type.vecsize == 2) + size_expr = join(size_expr, ".x * ", size_expr, ".y"); + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", size_expr, ";"); + }); + break; + case BuiltInSubgroupLocalInvocationId: + if (!msl_options.emulate_subgroups) break; - - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS."); - - if (!msl_options.supports_msl_version(2, 1)) - SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1."); - + // For subgroup emulation, assume subgroups of size 1. entry_func.fixup_hooks_in.push_back( - [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); }); + [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;"); }); + break; + case BuiltInSubgroupSize: + if (msl_options.emulate_subgroups) + { + // For subgroup emulation, assume subgroups of size 1. + entry_func.fixup_hooks_in.push_back( + [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 1;"); }); + } + else if (msl_options.fixed_subgroup_size != 0) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + msl_options.fixed_subgroup_size, ";"); + }); + } break; case BuiltInSubgroupEqMask: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); entry_func.fixup_hooks_in.push_back([=]() { - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", - to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (", - to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ", - to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));"); + if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", "uint4(1 << ", + to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (", + to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ", + to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));"); + } }); break; case BuiltInSubgroupGeMask: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + if (msl_options.fixed_subgroup_size != 0) + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); entry_func.fixup_hooks_in.push_back([=]() { // Case where index < 32, size < 32: - // mask0 = bfe(0xFFFFFFFF, index, size - index); - // mask1 = bfe(0xFFFFFFFF, 0, 0); // Gives 0 + // mask0 = bfi(0, 0xFFFFFFFF, index, size - index); + // mask1 = bfi(0, 0xFFFFFFFF, 0, 0); // Gives 0 // Case where index < 32 but size >= 32: - // mask0 = bfe(0xFFFFFFFF, index, 32 - index); - // mask1 = bfe(0xFFFFFFFF, 0, size - 32); + // mask0 = bfi(0, 0xFFFFFFFF, index, 32 - index); + // mask1 = bfi(0, 0xFFFFFFFF, 0, size - 32); // Case where index >= 32: - // mask0 = bfe(0xFFFFFFFF, 32, 0); // Gives 0 - // mask1 = bfe(0xFFFFFFFF, index - 32, size - index); + // mask0 = bfi(0, 0xFFFFFFFF, 32, 0); // Gives 0 + // mask1 = bfi(0, 0xFFFFFFFF, index - 32, size - index); // This is expressed without branches to avoid divergent // control flow--hence the complicated min/max expressions. // This is further complicated by the fact that if you attempt - // to bfe out-of-bounds on Metal, undefined behavior is the + // to bfi/bfe out-of-bounds on Metal, undefined behavior is the // result. - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = uint4(extract_bits(0xFFFFFFFF, min(", - to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)", - to_expression(builtin_subgroup_size_id), ", 32) - (int)", - to_expression(builtin_subgroup_invocation_id_id), - ", 0)), extract_bits(0xFFFFFFFF, (uint)max((int)", - to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)", - to_expression(builtin_subgroup_size_id), " - (int)max(", - to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));"); + if (msl_options.fixed_subgroup_size > 32) + { + // Don't use the subgroup size variable with fixed subgroup sizes, + // since the variables could be defined in the wrong order. + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(32 - (int)", + to_expression(builtin_subgroup_invocation_id_id), + ", 0)), insert_bits(0u, 0xFFFFFFFF," + " (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), ", + msl_options.fixed_subgroup_size, " - max(", + to_expression(builtin_subgroup_invocation_id_id), + ", 32u)), uint2(0));"); + } + else if (msl_options.fixed_subgroup_size != 0) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), ", ", + msl_options.fixed_subgroup_size, " - ", + to_expression(builtin_subgroup_invocation_id_id), + "), uint3(0));"); + } + else if (msl_options.is_ios()) + { + // On iOS, the SIMD-group size will currently never exceed 32. + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), ", ", + to_expression(builtin_subgroup_size_id), " - ", + to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)", + to_expression(builtin_subgroup_size_id), ", 32) - (int)", + to_expression(builtin_subgroup_invocation_id_id), + ", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)", + to_expression(builtin_subgroup_size_id), " - (int)max(", + to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));"); + } }); break; case BuiltInSubgroupGtMask: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); entry_func.fixup_hooks_in.push_back([=]() { // The same logic applies here, except now the index is one // more than the subgroup invocation ID. - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = uint4(extract_bits(0xFFFFFFFF, min(", - to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)", - to_expression(builtin_subgroup_size_id), ", 32) - (int)", - to_expression(builtin_subgroup_invocation_id_id), - " - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)", - to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)", - to_expression(builtin_subgroup_size_id), " - (int)max(", - to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));"); + if (msl_options.fixed_subgroup_size > 32) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(32 - (int)", + to_expression(builtin_subgroup_invocation_id_id), + " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), ", + msl_options.fixed_subgroup_size, " - max(", + to_expression(builtin_subgroup_invocation_id_id), + " + 1, 32u)), uint2(0));"); + } + else if (msl_options.fixed_subgroup_size != 0) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), " + 1, ", + msl_options.fixed_subgroup_size, " - ", + to_expression(builtin_subgroup_invocation_id_id), + " - 1), uint3(0));"); + } + else if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), " + 1, ", + to_expression(builtin_subgroup_size_id), " - ", + to_expression(builtin_subgroup_invocation_id_id), " - 1), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)", + to_expression(builtin_subgroup_size_id), ", 32) - (int)", + to_expression(builtin_subgroup_invocation_id_id), + " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)", + to_expression(builtin_subgroup_size_id), " - (int)max(", + to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));"); + } }); break; case BuiltInSubgroupLeMask: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); entry_func.fixup_hooks_in.push_back([=]() { - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = uint4(extract_bits(0xFFFFFFFF, 0, min(", - to_expression(builtin_subgroup_invocation_id_id), - " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", - to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));"); + if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, ", + to_expression(builtin_subgroup_invocation_id_id), " + 1), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, min(", + to_expression(builtin_subgroup_invocation_id_id), + " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));"); + } }); break; case BuiltInSubgroupLtMask: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); entry_func.fixup_hooks_in.push_back([=]() { - statement(builtin_type_decl(bi_type), " ", to_expression(var_id), - " = uint4(extract_bits(0xFFFFFFFF, 0, min(", - to_expression(builtin_subgroup_invocation_id_id), - ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", - to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));"); + if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, ", + to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, min(", + to_expression(builtin_subgroup_invocation_id_id), + ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));"); + } }); break; case BuiltInViewIndex: @@ -6905,6 +13483,26 @@ void CompilerMSL::fix_up_shader_inputs_outputs() statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;"); }); } + else if (msl_options.view_index_from_device_index) + { + // In this case, we take the view index from that of the device we're running on. + entry_func.fixup_hooks_in.push_back([=]() { + statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + msl_options.device_index, ";"); + }); + // We actually don't want to set the render_target_array_index here. + // Since every physical device is rendering a different view, + // there's no need for layered rendering here. + } + else if (!msl_options.multiview_layered_rendering) + { + // In this case, the views are rendered one at a time. The view index, then, + // is just the first part of the "view mask". + entry_func.fixup_hooks_in.push_back([=]() { + statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(view_mask_buffer_id), "[0];"); + }); + } else if (get_execution_model() == ExecutionModelFragment) { // Because we adjusted the view index in the vertex shader, we have to @@ -6919,10 +13517,13 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // the view index in the instance index. entry_func.fixup_hooks_in.push_back([=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", - to_expression(view_mask_buffer_id), "[0] + ", to_expression(builtin_instance_idx_id), - " % ", to_expression(view_mask_buffer_id), "[1];"); - statement(to_expression(builtin_instance_idx_id), " /= ", to_expression(view_mask_buffer_id), - "[1];"); + to_expression(view_mask_buffer_id), "[0] + (", to_expression(builtin_instance_idx_id), + " - ", to_expression(builtin_base_instance_id), ") % ", + to_expression(view_mask_buffer_id), "[1];"); + statement(to_expression(builtin_instance_idx_id), " = (", + to_expression(builtin_instance_idx_id), " - ", + to_expression(builtin_base_instance_id), ") / ", to_expression(view_mask_buffer_id), + "[1] + ", to_expression(builtin_base_instance_id), ";"); }); // In addition to setting the variable itself, we also need to // set the render_target_array_index with it on output. We have to @@ -6934,15 +13535,124 @@ void CompilerMSL::fix_up_shader_inputs_outputs() }); } break; + case BuiltInDeviceIndex: + // Metal pipelines belong to the devices which create them, so we'll + // need to create a MTLPipelineState for every MTLDevice in a grouped + // VkDevice. We can assume, then, that the device index is constant. + entry_func.fixup_hooks_in.push_back([=]() { + statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + msl_options.device_index, ";"); + }); + break; + case BuiltInWorkgroupId: + if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInWorkgroupId)) + break; + + // The vkCmdDispatchBase() command lets the client set the base value + // of WorkgroupId. Metal has no direct equivalent; we must make this + // adjustment ourselves. + entry_func.fixup_hooks_in.push_back([=]() { + statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), ";"); + }); + break; + case BuiltInGlobalInvocationId: + if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInGlobalInvocationId)) + break; + + // GlobalInvocationId is defined as LocalInvocationId + WorkgroupId * WorkgroupSize. + // This needs to be adjusted too. + entry_func.fixup_hooks_in.push_back([=]() { + auto &execution = this->get_entry_point(); + uint32_t workgroup_size_id = execution.workgroup_size.constant; + if (workgroup_size_id) + statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), + " * ", to_expression(workgroup_size_id), ";"); + else + statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), + " * uint3(", execution.workgroup_size.x, ", ", execution.workgroup_size.y, ", ", + execution.workgroup_size.z, ");"); + }); + break; + case BuiltInVertexId: + case BuiltInVertexIndex: + // This is direct-mapped normally. + if (!msl_options.vertex_for_tessellation) + break; + + entry_func.fixup_hooks_in.push_back([=]() { + builtin_declaration = true; + switch (msl_options.vertex_index_type) + { + case Options::IndexType::None: + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_invocation_id_id), ".x + ", + to_expression(builtin_dispatch_base_id), ".x;"); + break; + case Options::IndexType::UInt16: + case Options::IndexType::UInt32: + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", index_buffer_var_name, + "[", to_expression(builtin_invocation_id_id), ".x] + ", + to_expression(builtin_dispatch_base_id), ".x;"); + break; + } + builtin_declaration = false; + }); + break; + case BuiltInBaseVertex: + // This is direct-mapped normally. + if (!msl_options.vertex_for_tessellation) + break; + + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_dispatch_base_id), ".x;"); + }); + break; + case BuiltInInstanceId: + case BuiltInInstanceIndex: + // This is direct-mapped normally. + if (!msl_options.vertex_for_tessellation) + break; + + entry_func.fixup_hooks_in.push_back([=]() { + builtin_declaration = true; + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_invocation_id_id), ".y + ", to_expression(builtin_dispatch_base_id), + ".y;"); + builtin_declaration = false; + }); + break; + case BuiltInBaseInstance: + // This is direct-mapped normally. + if (!msl_options.vertex_for_tessellation) + break; + + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_dispatch_base_id), ".y;"); + }); + break; default: break; } } + else if (var.storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment && + is_builtin_variable(var) && active_output_builtins.get(bi_type) && + bi_type == BuiltInSampleMask && has_additional_fixed_sample_mask()) + { + // If the additional fixed sample mask was set, we need to adjust the sample_mask + // output to reflect that. If the shader outputs the sample_mask itself too, we need + // to AND the two masks to get the final one. + string op_str = does_shader_write_sample_mask ? " &= " : " = "; + entry_func.fixup_hooks_out.push_back([=]() { + statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";"); + }); + } }); } // Returns the Metal index of the resource of the specified type as used by the specified variable. -uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype) +uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane) { auto &execution = get_entry_point(); auto &var_dec = ir.meta[var.self].decoration; @@ -6953,9 +13663,17 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base // If a matching binding has been specified, find and use it. auto itr = resource_bindings.find({ execution.model, var_desc_set, var_binding }); - auto resource_decoration = var_type.basetype == SPIRType::SampledImage && basetype == SPIRType::Sampler ? - SPIRVCrossDecorationResourceIndexSecondary : - SPIRVCrossDecorationResourceIndexPrimary; + // Atomic helper buffers for image atomics need to use secondary bindings as well. + bool use_secondary_binding = (var_type.basetype == SPIRType::SampledImage && basetype == SPIRType::Sampler) || + basetype == SPIRType::AtomicCounter; + + auto resource_decoration = + use_secondary_binding ? SPIRVCrossDecorationResourceIndexSecondary : SPIRVCrossDecorationResourceIndexPrimary; + + if (plane == 1) + resource_decoration = SPIRVCrossDecorationResourceIndexTertiary; + if (plane == 2) + resource_decoration = SPIRVCrossDecorationResourceIndexQuaternary; if (itr != end(resource_bindings)) { @@ -6964,8 +13682,8 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base switch (basetype) { case SPIRType::Image: - set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture); - return remap.first.msl_texture; + set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture + plane); + return remap.first.msl_texture + plane; case SPIRType::Sampler: set_extended_decoration(var.self, resource_decoration, remap.first.msl_sampler); return remap.first.msl_sampler; @@ -6979,33 +13697,49 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base if (has_extended_decoration(var.self, resource_decoration)) return get_extended_decoration(var.self, resource_decoration); + auto &type = get(var.basetype); + + if (type_is_msl_framebuffer_fetch(type)) + { + // Frame-buffer fetch gets its fallback resource index from the input attachment index, + // which is then treated as color index. + return get_decoration(var.self, DecorationInputAttachmentIndex); + } + else if (msl_options.enable_decoration_binding) + { + // Allow user to enable decoration binding. + // If there is no explicit mapping of bindings to MSL, use the declared binding as a fallback. + if (has_decoration(var.self, DecorationBinding)) + { + var_binding = get_decoration(var.self, DecorationBinding); + // Avoid emitting sentinel bindings. + if (var_binding < 0x80000000u) + return var_binding; + } + } + // If we did not explicitly remap, allocate bindings on demand. // We cannot reliably use Binding decorations since SPIR-V and MSL's binding models are very different. + bool allocate_argument_buffer_ids = false; + + if (var.storage != StorageClassPushConstant) + allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(var_desc_set); + uint32_t binding_stride = 1; - auto &type = get(var.basetype); for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) - binding_stride *= type.array_size_literal[i] ? type.array[i] : get(type.array[i]).scalar(); + binding_stride *= to_array_size_literal(type, i); assert(binding_stride != 0); // If a binding has not been specified, revert to incrementing resource indices. uint32_t resource_index; - bool allocate_argument_buffer_ids = false; - uint32_t desc_set = 0; - - if (var.storage != StorageClassPushConstant) - { - desc_set = get_decoration(var.self, DecorationDescriptorSet); - allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(desc_set); - } - if (allocate_argument_buffer_ids) { // Allocate from a flat ID binding space. - resource_index = next_metal_resource_ids[desc_set]; - next_metal_resource_ids[desc_set] += binding_stride; + resource_index = next_metal_resource_ids[var_desc_set]; + next_metal_resource_ids[var_desc_set] += binding_stride; } else { @@ -7031,12 +13765,69 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base return resource_index; } +bool CompilerMSL::type_is_msl_framebuffer_fetch(const SPIRType &type) const +{ + return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && + msl_options.use_framebuffer_fetch_subpasses; +} + +bool CompilerMSL::type_is_pointer(const SPIRType &type) const +{ + if (!type.pointer) + return false; + auto &parent_type = get(type.parent_type); + // Safeguards when we forget to set pointer_depth (there is an assert for it in type_to_glsl), + // but the extra check shouldn't hurt. + return (type.pointer_depth > parent_type.pointer_depth) || !parent_type.pointer; +} + +bool CompilerMSL::type_is_pointer_to_pointer(const SPIRType &type) const +{ + if (!type.pointer) + return false; + auto &parent_type = get(type.parent_type); + return type.pointer_depth > parent_type.pointer_depth && type_is_pointer(parent_type); +} + +const char *CompilerMSL::descriptor_address_space(uint32_t id, StorageClass storage, const char *plain_address_space) const +{ + if (msl_options.argument_buffers) + { + bool storage_class_is_descriptor = storage == StorageClassUniform || + storage == StorageClassStorageBuffer || + storage == StorageClassUniformConstant; + + uint32_t desc_set = get_decoration(id, DecorationDescriptorSet); + if (storage_class_is_descriptor && descriptor_set_is_argument_buffer(desc_set)) + { + // An awkward case where we need to emit *more* address space declarations (yay!). + // An example is where we pass down an array of buffer pointers to leaf functions. + // It's a constant array containing pointers to constants. + // The pointer array is always constant however. E.g. + // device SSBO * constant (&array)[N]. + // const device SSBO * constant (&array)[N]. + // constant SSBO * constant (&array)[N]. + // However, this only matters for argument buffers, since for MSL 1.0 style codegen, + // we emit the buffer array on stack instead, and that seems to work just fine apparently. + + // If the argument was marked as being in device address space, any pointer to member would + // be const device, not constant. + if (argument_buffer_device_storage_mask & (1u << desc_set)) + return "const device"; + else + return "constant"; + } + } + + return plain_address_space; +} + string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) { auto &var = get(arg.id); auto &type = get_variable_data_type(var); auto &var_type = get(arg.type); - StorageClass storage = var_type.storage; + StorageClass type_storage = var_type.storage; bool is_pointer = var_type.pointer; // If we need to modify the name of the variable, make sure we use the original variable. @@ -7046,37 +13837,93 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) name_id = var.basevariable; bool constref = !arg.alias_global_variable && is_pointer && arg.write_count == 0; + // Framebuffer fetch is plain value, const looks out of place, but it is not wrong. + if (type_is_msl_framebuffer_fetch(type)) + constref = false; + else if (type_storage == StorageClassUniformConstant) + constref = true; bool type_is_image = type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler; - // Arrays of images/samplers in MSL are always const. - if (!type.array.empty() && type_is_image) - constref = true; + // For opaque types we handle const later due to descriptor address spaces. + const char *cv_qualifier = (constref && !type_is_image) ? "const " : ""; + string decl; + + // If this is a combined image-sampler for a 2D image with floating-point type, + // we emitted the 'spvDynamicImageSampler' type, and this is *not* an alias parameter + // for a global, then we need to emit a "dynamic" combined image-sampler. + // Unfortunately, this is necessary to properly support passing around + // combined image-samplers with Y'CbCr conversions on them. + bool is_dynamic_img_sampler = !arg.alias_global_variable && type.basetype == SPIRType::SampledImage && + type.image.dim == Dim2D && type_is_floating_point(get(type.image.type)) && + spv_function_implementations.count(SPVFuncImplDynamicImageSampler); + + // Allow Metal to use the array template to make arrays a value type + string address_space = get_argument_address_space(var); + bool builtin = has_decoration(var.self, DecorationBuiltIn); + auto builtin_type = BuiltIn(get_decoration(arg.id, DecorationBuiltIn)); - string decl; - if (constref) - decl += "const "; + if (address_space == "threadgroup") + is_using_builtin_array = true; - bool builtin = is_builtin_variable(var); - if (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id) - decl += type_to_glsl(type, arg.id); + if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)) + decl = join(cv_qualifier, type_to_glsl(type, arg.id)); else if (builtin) - decl += builtin_type_decl(static_cast(get_decoration(arg.id, DecorationBuiltIn)), arg.id); - else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type)) - decl += join(type_to_glsl(type, arg.id), "*"); - else - decl += type_to_glsl(type, arg.id); + { + // Only use templated array for Clip/Cull distance when feasible. + // In other scenarios, we need need to override array length for tess levels (if used as outputs), + // or we need to emit the expected type for builtins (uint vs int). + auto storage = get(var.basetype).storage; + + if (storage == StorageClassInput && + (builtin_type == BuiltInTessLevelInner || builtin_type == BuiltInTessLevelOuter)) + { + is_using_builtin_array = false; + } + else if (builtin_type != BuiltInClipDistance && builtin_type != BuiltInCullDistance) + { + is_using_builtin_array = true; + } - bool opaque_handle = storage == StorageClassUniformConstant; + if (storage == StorageClassOutput && variable_storage_requires_stage_io(storage) && + !is_stage_output_builtin_masked(builtin_type)) + is_using_builtin_array = true; - string address_space = get_argument_address_space(var); + if (is_using_builtin_array) + decl = join(cv_qualifier, builtin_type_decl(builtin_type, arg.id)); + else + decl = join(cv_qualifier, type_to_glsl(type, arg.id)); + } + else if ((type_storage == StorageClassUniform || type_storage == StorageClassStorageBuffer) && is_array(type)) + { + is_using_builtin_array = true; + decl += join(cv_qualifier, type_to_glsl(type, arg.id), "*"); + } + else if (is_dynamic_img_sampler) + { + decl = join(cv_qualifier, "spvDynamicImageSampler<", type_to_glsl(get(type.image.type)), ">"); + // Mark the variable so that we can handle passing it to another function. + set_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + } + else + { + // The type is a pointer type we need to emit cv_qualifier late. + if (type_is_pointer(type)) + { + decl = type_to_glsl(type, arg.id); + if (*cv_qualifier != '\0') + decl += join(" ", cv_qualifier); + } + else + decl = join(cv_qualifier, type_to_glsl(type, arg.id)); + } - if (!builtin && !opaque_handle && !is_pointer && - (storage == StorageClassFunction || storage == StorageClassGeneric)) + if (!builtin && !is_pointer && + (type_storage == StorageClassFunction || type_storage == StorageClassGeneric)) { // If the argument is a pure value and not an opaque type, we will pass by value. - if (is_array(type)) + if (msl_options.force_native_arrays && is_array(type)) { // We are receiving an array by value. This is problematic. // We cannot be sure of the target address space since we are supposed to receive a copy, @@ -7087,6 +13934,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) // non-constant arrays, but we can create thread const from constant. decl = string("thread const ") + decl; decl += " (&"; + const char *restrict_kw = to_restrict(name_id, true); + if (*restrict_kw) + { + decl += " "; + decl += restrict_kw; + } decl += to_expression(name_id); decl += ")"; decl += type_to_array_glsl(type); @@ -7101,49 +13954,99 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } else if (is_array(type) && !type_is_image) { - // Arrays of images and samplers are special cased. + // Arrays of opaque types are special cased. if (!address_space.empty()) decl = join(address_space, " ", decl); - if (msl_options.argument_buffers) + const char *argument_buffer_space = descriptor_address_space(name_id, type_storage, nullptr); + if (argument_buffer_space) + { + decl += " "; + decl += argument_buffer_space; + } + + // Special case, need to override the array size here if we're using tess level as an argument. + if (is_tesc_shader() && builtin && + (builtin_type == BuiltInTessLevelInner || builtin_type == BuiltInTessLevelOuter)) { - uint32_t desc_set = get_decoration(name_id, DecorationDescriptorSet); - if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && - descriptor_set_is_argument_buffer(desc_set)) + uint32_t array_size = get_physical_tess_level_array_size(builtin_type); + if (array_size == 1) + { + decl += " &"; + decl += to_expression(name_id); + } + else { - // An awkward case where we need to emit *more* address space declarations (yay!). - // An example is where we pass down an array of buffer pointers to leaf functions. - // It's a constant array containing pointers to constants. - // The pointer array is always constant however. E.g. - // device SSBO * constant (&array)[N]. - // const device SSBO * constant (&array)[N]. - // constant SSBO * constant (&array)[N]. - // However, this only matters for argument buffers, since for MSL 1.0 style codegen, - // we emit the buffer array on stack instead, and that seems to work just fine apparently. - decl += " constant"; + decl += " (&"; + decl += to_expression(name_id); + decl += ")"; + decl += join("[", array_size, "]"); } } + else + { + auto array_size_decl = type_to_array_glsl(type); + if (array_size_decl.empty()) + decl += "& "; + else + decl += " (&"; - decl += " (&"; - decl += to_expression(name_id); - decl += ")"; - decl += type_to_array_glsl(type); + const char *restrict_kw = to_restrict(name_id, true); + if (*restrict_kw) + { + decl += " "; + decl += restrict_kw; + } + decl += to_expression(name_id); + + if (!array_size_decl.empty()) + { + decl += ")"; + decl += array_size_decl; + } + } } - else if (!opaque_handle) + else if (!type_is_image && (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct)) { // If this is going to be a reference to a variable pointer, the address space // for the reference has to go before the '&', but after the '*'. if (!address_space.empty()) { - if (decl.back() == '*') - decl += join(" ", address_space, " "); + if (type_is_pointer(type)) + { + if (*cv_qualifier == '\0') + decl += ' '; + decl += join(address_space, " "); + } else decl = join(address_space, " ", decl); } decl += "&"; decl += " "; + decl += to_restrict(name_id, true); decl += to_expression(name_id); } + else if (type_is_image) + { + if (type.array.empty()) + { + // For non-arrayed types we can just pass opaque descriptors by value. + // This fixes problems if descriptors are passed by value from argument buffers and plain descriptors + // in same shader. + // There is no address space we can actually use, but value will work. + // This will break if applications attempt to pass down descriptor arrays as arguments, but + // fortunately that is extremely unlikely ... + decl += " "; + decl += to_expression(name_id); + } + else + { + const char *img_address_space = descriptor_address_space(name_id, type_storage, "thread const"); + decl = join(img_address_space, " ", decl); + decl += "& "; + decl += to_expression(name_id); + } + } else { if (!address_space.empty()) @@ -7152,6 +14055,16 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl += to_expression(name_id); } + // Emulate texture2D atomic operations + auto *backing_var = maybe_get_backing_variable(name_id); + if (backing_var && atomic_image_vars.count(backing_var->self)) + { + decl += ", device atomic_" + type_to_glsl(get(var_type.image.type), 0); + decl += "* " + to_expression(name_id) + "_atomic"; + } + + is_using_builtin_array = false; + return decl; } @@ -7168,8 +14081,8 @@ string CompilerMSL::to_name(uint32_t id, bool allow_alias) const return Compiler::to_name(id, allow_alias); } -// Returns a name that combines the name of the struct with the name of the member, except for Builtins -string CompilerMSL::to_qualified_member_name(const SPIRType &type, uint32_t index) +// Appends the name of the member to the variable qualifier string, except for Builtins. +string CompilerMSL::append_member_name(const string &qualifier, const SPIRType &type, uint32_t index) { // Don't qualify Builtin names because they are unique and are treated as such when building expressions BuiltIn builtin = BuiltInMax; @@ -7180,7 +14093,7 @@ string CompilerMSL::to_qualified_member_name(const SPIRType &type, uint32_t inde string mbr_name = to_member_name(type, index); size_t startPos = mbr_name.find_first_not_of("_"); mbr_name = (startPos != string::npos) ? mbr_name.substr(startPos) : ""; - return join(to_name(type.self), "_", mbr_name); + return join(qualifier, "_", mbr_name); } // Ensures that the specified name is permanently usable by prepending a prefix @@ -7190,17 +14103,21 @@ string CompilerMSL::ensure_valid_name(string name, string pfx) return (name.size() >= 2 && name[0] == '_' && isdigit(name[1])) ? (pfx + name) : name; } -// Replace all names that match MSL keywords or Metal Standard Library functions. -void CompilerMSL::replace_illegal_names() +const std::unordered_set &CompilerMSL::get_reserved_keyword_set() { - // FIXME: MSL and GLSL are doing two different things here. - // Agree on convention and remove this override. static const unordered_set keywords = { "kernel", "vertex", "fragment", "compute", + "constant", + "device", "bias", + "level", + "gradient2d", + "gradientcube", + "gradient3d", + "min_lod_clamp", "assert", "VARIABLE_TRACEPOINT", "STATIC_DATA_TRACEPOINT", @@ -7321,12 +14238,21 @@ void CompilerMSL::replace_illegal_names() "M_SQRT2", "M_SQRT1_2", "quad_broadcast", + "thread", + "threadgroup", }; + return keywords; +} + +const std::unordered_set &CompilerMSL::get_illegal_func_names() +{ static const unordered_set illegal_func_names = { "main", "saturate", "assert", + "fmin3", + "fmax3", "VARIABLE_TRACEPOINT", "STATIC_DATA_TRACEPOINT", "STATIC_DATA_TRACEPOINT_V", @@ -7447,24 +14373,57 @@ void CompilerMSL::replace_illegal_names() "M_SQRT1_2", }; + return illegal_func_names; +} + +// Replace all names that match MSL keywords or Metal Standard Library functions. +void CompilerMSL::replace_illegal_names() +{ + // FIXME: MSL and GLSL are doing two different things here. + // Agree on convention and remove this override. + auto &keywords = get_reserved_keyword_set(); + auto &illegal_func_names = get_illegal_func_names(); + ir.for_each_typed_id([&](uint32_t self, SPIRVariable &) { - auto &dec = ir.meta[self].decoration; + auto *meta = ir.find_meta(self); + if (!meta) + return; + + auto &dec = meta->decoration; if (keywords.find(dec.alias) != end(keywords)) dec.alias += "0"; }); ir.for_each_typed_id([&](uint32_t self, SPIRFunction &) { - auto &dec = ir.meta[self].decoration; + auto *meta = ir.find_meta(self); + if (!meta) + return; + + auto &dec = meta->decoration; if (illegal_func_names.find(dec.alias) != end(illegal_func_names)) dec.alias += "0"; }); ir.for_each_typed_id([&](uint32_t self, SPIRType &) { - for (auto &mbr_dec : ir.meta[self].members) + auto *meta = ir.find_meta(self); + if (!meta) + return; + + for (auto &mbr_dec : meta->members) if (keywords.find(mbr_dec.alias) != end(keywords)) mbr_dec.alias += "0"; }); + CompilerGLSL::replace_illegal_names(); +} + +void CompilerMSL::replace_illegal_entry_point_names() +{ + auto &illegal_func_names = get_illegal_func_names(); + + // It is important to this before we fixup identifiers, + // since if ep_name is reserved, we will need to fix that up, + // and then copy alias back into entry.name after the fixup. for (auto &entry : ir.entry_points) { // Change both the entry point name and the alias, to keep them synced. @@ -7472,16 +14431,19 @@ void CompilerMSL::replace_illegal_names() if (illegal_func_names.find(ep_name) != end(illegal_func_names)) ep_name += "0"; - // Always write this because entry point might have been renamed earlier. ir.meta[entry.first].decoration.alias = ep_name; } +} - CompilerGLSL::replace_illegal_names(); +void CompilerMSL::sync_entry_point_aliases_and_names() +{ + for (auto &entry : ir.entry_points) + entry.second.name = ir.meta[entry.first].decoration.alias; } -string CompilerMSL::to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) +string CompilerMSL::to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved) { - auto *var = maybe_get(base); + auto *var = maybe_get_backing_variable(base); // If this is a buffer array, we have to dereference the buffer pointers. // Otherwise, if this is a pointer expression, dereference it. @@ -7489,11 +14451,16 @@ string CompilerMSL::to_member_reference(uint32_t base, const SPIRType &type, uin if (var) { - bool is_buffer_variable = var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer; + // Only allow -> dereference for block types. This is so we get expressions like + // buffer[i]->first_member.second_member, rather than buffer[i]->first->second. + bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + + bool is_buffer_variable = + is_block && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer); declared_as_pointer = is_buffer_variable && is_array(get(var->basetype)); } - if (declared_as_pointer || (!ptr_chain && should_dereference(base))) + if (declared_as_pointer || (!ptr_chain_is_resolved && should_dereference(base))) return join("->", to_member_name(type, index)); else return join(".", to_member_name(type, index)); @@ -7503,8 +14470,10 @@ string CompilerMSL::to_qualifiers_glsl(uint32_t id) { string quals; + auto *var = maybe_get(id); auto &type = expression_type(id); - if (type.storage == StorageClassWorkgroup) + + if (type.storage == StorageClassWorkgroup || (var && variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))) quals += "threadgroup "; return quals; @@ -7513,14 +14482,37 @@ string CompilerMSL::to_qualifiers_glsl(uint32_t id) // The optional id parameter indicates the object whose type we are trying // to find the description for. It is optional. Most type descriptions do not // depend on a specific object's use of that type. -string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) +string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id, bool member) { string type_name; // Pointer? if (type.pointer) { - type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get(type.parent_type), id)); + assert(type.pointer_depth > 0); + + const char *restrict_kw; + + auto type_address_space = get_type_address_space(type, id); + const auto *p_parent_type = &get(type.parent_type); + + // Work around C pointer qualifier rules. If glsl_type is a pointer type as well + // we'll need to emit the address space to the right. + // We could always go this route, but it makes the code unnatural. + // Prefer emitting thread T *foo over T thread* foo since it's more readable, + // but we'll have to emit thread T * thread * T constant bar; for example. + if (type_is_pointer_to_pointer(type)) + type_name = join(type_to_glsl(*p_parent_type, id), " ", type_address_space, " "); + else + { + // Since this is not a pointer-to-pointer, ensure we've dug down to the base type. + // Some situations chain pointers even though they are not formally pointers-of-pointers. + while (type_is_pointer(*p_parent_type)) + p_parent_type = &get(p_parent_type->parent_type); + + type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id)); + } + switch (type.basetype) { case SPIRType::Image: @@ -7531,6 +14523,12 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) default: // Anything else can be a raw pointer. type_name += "*"; + restrict_kw = to_restrict(id, false); + if (*restrict_kw) + { + type_name += " "; + type_name += restrict_kw; + } break; } return type_name; @@ -7540,14 +14538,16 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) { case SPIRType::Struct: // Need OpName lookup here to get a "sensible" name for a struct. - return to_name(type.self); + // Allow Metal to use the array template to make arrays a value type + type_name = to_name(type.self); + break; case SPIRType::Image: case SPIRType::SampledImage: return image_type_glsl(type, id); case SPIRType::Sampler: - return sampler_type(type); + return sampler_type(type, id); case SPIRType::Void: return "void"; @@ -7558,10 +14558,27 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) case SPIRType::ControlPointArray: return join("patch_control_point<", type_to_glsl(get(type.parent_type), id), ">"); + case SPIRType::Interpolant: + return join("interpolant<", type_to_glsl(get(type.parent_type), id), ", interpolation::", + has_decoration(type.self, DecorationNoPerspective) ? "no_perspective" : "perspective", ">"); + // Scalars case SPIRType::Boolean: - type_name = "bool"; + { + auto *var = maybe_get_backing_variable(id); + if (var && var->basevariable) + var = &get(var->basevariable); + + // Need to special-case threadgroup booleans. They are supposed to be logical + // storage, but MSL compilers will sometimes crash if you use threadgroup bool. + // Workaround this by using 16-bit types instead and fixup on load-store to this data. + if ((var && var->storage == StorageClassWorkgroup) || type.storage == StorageClassWorkgroup || member) + type_name = "short"; + else + type_name = "bool"; break; + } + case SPIRType::Char: case SPIRType::SByte: type_name = "char"; @@ -7600,6 +14617,16 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) case SPIRType::Double: type_name = "double"; // Currently unsupported break; + case SPIRType::AccelerationStructure: + if (msl_options.supports_msl_version(2, 4)) + type_name = "raytracing::acceleration_structure"; + else if (msl_options.supports_msl_version(2, 3)) + type_name = "raytracing::instance_acceleration_structure"; + else + SPIRV_CROSS_THROW("Acceleration Structure Type is supported in MSL 2.3 and above."); + break; + case SPIRType::RayQuery: + return "raytracing::intersection_query"; default: return "unknown_type"; @@ -7607,17 +14634,157 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) // Matrix? if (type.columns > 1) + { + auto *var = maybe_get_backing_variable(id); + if (var && var->basevariable) + var = &get(var->basevariable); + + // Need to special-case threadgroup matrices. Due to an oversight, Metal's + // matrix struct prior to Metal 3 lacks constructors in the threadgroup AS, + // preventing us from default-constructing or initializing matrices in threadgroup storage. + // Work around this by using our own type as storage. + if (((var && var->storage == StorageClassWorkgroup) || type.storage == StorageClassWorkgroup) && + !msl_options.supports_msl_version(3, 0)) + { + add_spv_func_and_recompile(SPVFuncImplStorageMatrix); + type_name = "spvStorage_" + type_name; + } + type_name += to_string(type.columns) + "x"; + } // Vector or Matrix? if (type.vecsize > 1) type_name += to_string(type.vecsize); - return type_name; + if (type.array.empty() || using_builtin_array()) + { + return type_name; + } + else + { + // Allow Metal to use the array template to make arrays a value type + add_spv_func_and_recompile(SPVFuncImplUnsafeArray); + string res; + string sizes; + + for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) + { + res += "spvUnsafeArray<"; + sizes += ", "; + sizes += to_array_size(type, i); + sizes += ">"; + } + + res += type_name + sizes; + return res; + } +} + +string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) +{ + return type_to_glsl(type, id, false); +} + +string CompilerMSL::type_to_array_glsl(const SPIRType &type) +{ + // Allow Metal to use the array template to make arrays a value type + switch (type.basetype) + { + case SPIRType::AtomicCounter: + case SPIRType::ControlPointArray: + case SPIRType::RayQuery: + return CompilerGLSL::type_to_array_glsl(type); + + default: + if (type_is_array_of_pointers(type) || using_builtin_array()) + return CompilerGLSL::type_to_array_glsl(type); + else + return ""; + } +} + +string CompilerMSL::constant_op_expression(const SPIRConstantOp &cop) +{ + switch (cop.opcode) + { + case OpQuantizeToF16: + add_spv_func_and_recompile(SPVFuncImplQuantizeToF16); + return join("spvQuantizeToF16(", to_expression(cop.arguments[0]), ")"); + default: + return CompilerGLSL::constant_op_expression(cop); + } +} + +bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const +{ + if (variable.storage == storage) + return true; + + if (storage == StorageClassWorkgroup) + { + // Specially masked IO block variable. + // Normally, we will never access IO blocks directly here. + // The only scenario which that should occur is with a masked IO block. + if (is_tesc_shader() && variable.storage == StorageClassOutput && + has_decoration(get(variable.basetype).self, DecorationBlock)) + { + return true; + } + + return variable.storage == StorageClassOutput && is_tesc_shader() && is_stage_output_variable_masked(variable); + } + else if (storage == StorageClassStorageBuffer) + { + // These builtins are passed directly; we don't want to use remapping + // for them. + auto builtin = (BuiltIn)get_decoration(variable.self, DecorationBuiltIn); + if (is_tese_shader() && is_builtin_variable(variable) && (builtin == BuiltInTessCoord || builtin == BuiltInPrimitiveId)) + return false; + + // We won't be able to catch writes to control point outputs here since variable + // refers to a function local pointer. + // This is fine, as there cannot be concurrent writers to that memory anyways, + // so we just ignore that case. + + return (variable.storage == StorageClassOutput || variable.storage == StorageClassInput) && + !variable_storage_requires_stage_io(variable.storage) && + (variable.storage != StorageClassOutput || !is_stage_output_variable_masked(variable)); + } + else + { + return false; + } +} + +std::string CompilerMSL::variable_decl(const SPIRVariable &variable) +{ + bool old_is_using_builtin_array = is_using_builtin_array; + + // Threadgroup arrays can't have a wrapper type. + if (variable_decl_is_remapped_storage(variable, StorageClassWorkgroup)) + is_using_builtin_array = true; + + auto expr = CompilerGLSL::variable_decl(variable); + is_using_builtin_array = old_is_using_builtin_array; + return expr; } -std::string CompilerMSL::sampler_type(const SPIRType &type) +// GCC workaround of lambdas calling protected funcs +std::string CompilerMSL::variable_decl(const SPIRType &type, const std::string &name, uint32_t id) { + return CompilerGLSL::variable_decl(type, name, id); +} + +std::string CompilerMSL::sampler_type(const SPIRType &type, uint32_t id) +{ + auto *var = maybe_get(id); + if (var && var->basevariable) + { + // Check against the base variable, and not a fake ID which might have been generated for this variable. + id = var->basevariable; + } + if (!type.array.empty()) { if (!msl_options.supports_msl_version(2)) @@ -7627,12 +14794,16 @@ std::string CompilerMSL::sampler_type(const SPIRType &type) SPIRV_CROSS_THROW("Arrays of arrays of samplers are not supported in MSL."); // Arrays of samplers in MSL must be declared with a special array syntax ala C++11 std::array. + // If we have a runtime array, it could be a variable-count descriptor set binding. uint32_t array_size = to_array_size_literal(type); + if (array_size == 0) + array_size = get_resource_array_size(id); + if (array_size == 0) SPIRV_CROSS_THROW("Unsized array of samplers is not supported in MSL."); auto &parent = get(get_pointee_type(type).parent_type); - return join("array<", sampler_type(parent), ", ", array_size, ">"); + return join("array<", sampler_type(parent, id), ", ", array_size, ">"); } else return "sampler"; @@ -7669,7 +14840,11 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) SPIRV_CROSS_THROW("Arrays of arrays of textures are not supported in MSL."); // Arrays of images in MSL must be declared with a special array syntax ala C++11 std::array. + // If we have a runtime array, it could be a variable-count descriptor set binding. uint32_t array_size = to_array_size_literal(type); + if (array_size == 0) + array_size = get_resource_array_size(id); + if (array_size == 0) SPIRV_CROSS_THROW("Unsized array of images is not supported in MSL."); @@ -7681,14 +14856,19 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) // Bypass pointers because we need the real image struct auto &img_type = get(type.self).image; - if (image_is_comparison(type, id)) + if (is_depth_image(type, id)) { switch (img_type.dim) { case Dim1D: - img_type_name += "depth1d_unsupported_by_metal"; - break; case Dim2D: + if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D) + { + // Use a native Metal 1D texture + img_type_name += "depth1d_unsupported_by_metal"; + break; + } + if (img_type.ms && img_type.arrayed) { if (!msl_options.supports_msl_version(2, 1)) @@ -7706,7 +14886,10 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) img_type_name += "depth3d_unsupported_by_metal"; break; case DimCube: - img_type_name += (img_type.arrayed ? "depthcube_array" : "depthcube"); + if (!msl_options.emulate_cube_array) + img_type_name += (img_type.arrayed ? "depthcube_array" : "depthcube"); + else + img_type_name += (img_type.arrayed ? "depth2d_array" : "depthcube"); break; default: img_type_name += "unknown_depth_texture_type"; @@ -7717,9 +14900,6 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) { switch (img_type.dim) { - case Dim1D: - img_type_name += (img_type.arrayed ? "texture1d_array" : "texture1d"); - break; case DimBuffer: if (img_type.ms || img_type.arrayed) SPIRV_CROSS_THROW("Cannot use texel buffers with multisampling or array layers."); @@ -7733,9 +14913,27 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) else img_type_name += "texture2d"; break; + case Dim1D: case Dim2D: case DimSubpassData: - if (img_type.ms && img_type.arrayed) + { + bool subpass_array = + img_type.dim == DimSubpassData && (msl_options.multiview || msl_options.arrayed_subpass_input); + if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D) + { + // Use a native Metal 1D texture + img_type_name += (img_type.arrayed ? "texture1d_array" : "texture1d"); + break; + } + + // Use Metal's native frame-buffer fetch API for subpass inputs. + if (type_is_msl_framebuffer_fetch(type)) + { + auto img_type_4 = get(img_type.type); + img_type_4.vecsize = 4; + return type_to_glsl(img_type_4); + } + if (img_type.ms && (img_type.arrayed || subpass_array)) { if (!msl_options.supports_msl_version(2, 1)) SPIRV_CROSS_THROW("Multisampled array textures are supported from 2.1."); @@ -7743,16 +14941,20 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) } else if (img_type.ms) img_type_name += "texture2d_ms"; - else if (img_type.arrayed) + else if (img_type.arrayed || subpass_array) img_type_name += "texture2d_array"; else img_type_name += "texture2d"; break; + } case Dim3D: img_type_name += "texture3d"; break; case DimCube: - img_type_name += (img_type.arrayed ? "texturecube_array" : "texturecube"); + if (!msl_options.emulate_cube_array) + img_type_name += (img_type.arrayed ? "texturecube_array" : "texturecube"); + else + img_type_name += (img_type.arrayed ? "texture2d_array" : "texturecube"); break; default: img_type_name += "unknown_texture_type"; @@ -7812,19 +15014,57 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) const uint32_t *ops = stream(i); auto op = static_cast(i.op); - // Metal 2.0 is required. iOS only supports quad ops. macOS only supports - // broadcast and shuffle on 10.13 (2.0), with full support in 10.14 (2.1). - // Note that iOS makes no distinction between a quad-group and a subgroup; - // all subgroups are quad-groups there. + if (msl_options.emulate_subgroups) + { + // In this mode, only the GroupNonUniform cap is supported. The only op + // we need to handle, then, is OpGroupNonUniformElect. + if (op != OpGroupNonUniformElect) + SPIRV_CROSS_THROW("Subgroup emulation does not support operations other than Elect."); + // In this mode, the subgroup size is assumed to be one, so every invocation + // is elected. + emit_op(ops[0], ops[1], "true", true); + return; + } + + // Metal 2.0 is required. iOS only supports quad ops on 11.0 (2.0), with + // full support in 13.0 (2.2). macOS only supports broadcast and shuffle on + // 10.13 (2.0), with full support in 10.14 (2.1). + // Note that Apple GPUs before A13 make no distinction between a quad-group + // and a SIMD-group; all SIMD-groups are quad-groups on those. if (!msl_options.supports_msl_version(2)) SPIRV_CROSS_THROW("Subgroups are only supported in Metal 2.0 and up."); - if (msl_options.is_ios()) + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(i); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + if (msl_options.is_ios() && (!msl_options.supports_msl_version(2, 3) || !msl_options.ios_use_simdgroup_functions)) { switch (op) { default: - SPIRV_CROSS_THROW("iOS only supports quad-group operations."); + SPIRV_CROSS_THROW("Subgroup ops beyond broadcast, ballot, and shuffle on iOS require Metal 2.3 and up."); + case OpGroupNonUniformBroadcastFirst: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("BroadcastFirst on iOS requires Metal 2.2 and up."); + break; + case OpGroupNonUniformElect: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Elect on iOS requires Metal 2.2 and up."); + break; + case OpGroupNonUniformAny: + case OpGroupNonUniformAll: + case OpGroupNonUniformAllEqual: + case OpGroupNonUniformBallot: + case OpGroupNonUniformInverseBallot: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformBallotBitCount: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Ballot ops on iOS requires Metal 2.2 and up."); + break; case OpGroupNonUniformBroadcast: case OpGroupNonUniformShuffle: case OpGroupNonUniformShuffleXor: @@ -7841,7 +15081,7 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) switch (op) { default: - SPIRV_CROSS_THROW("Subgroup ops beyond broadcast and shuffle on macOS require Metal 2.0 and up."); + SPIRV_CROSS_THROW("Subgroup ops beyond broadcast and shuffle on macOS require Metal 2.1 and up."); case OpGroupNonUniformBroadcast: case OpGroupNonUniformShuffle: case OpGroupNonUniformShuffleXor: @@ -7854,23 +15094,25 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) uint32_t result_type = ops[0]; uint32_t id = ops[1]; - auto scope = static_cast(get(ops[2]).scalar()); + auto scope = static_cast(evaluate_constant_u32(ops[2])); if (scope != ScopeSubgroup) SPIRV_CROSS_THROW("Only subgroup scope is supported."); switch (op) { case OpGroupNonUniformElect: - emit_op(result_type, id, "simd_is_first()", true); + if (msl_options.use_quadgroup_operation()) + emit_op(result_type, id, "quad_is_first()", false); + else + emit_op(result_type, id, "simd_is_first()", false); break; case OpGroupNonUniformBroadcast: - emit_binary_func_op(result_type, id, ops[3], ops[4], - msl_options.is_ios() ? "quad_broadcast" : "simd_broadcast"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBroadcast"); break; case OpGroupNonUniformBroadcastFirst: - emit_unary_func_op(result_type, id, ops[3], "simd_broadcast_first"); + emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBroadcastFirst"); break; case OpGroupNonUniformBallot: @@ -7886,54 +15128,63 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) break; case OpGroupNonUniformBallotFindLSB: - emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindLSB"); + emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindLSB"); break; case OpGroupNonUniformBallotFindMSB: - emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindMSB"); + emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindMSB"); break; case OpGroupNonUniformBallotBitCount: { auto operation = static_cast(ops[3]); - if (operation == GroupOperationReduce) - emit_unary_func_op(result_type, id, ops[4], "spvSubgroupBallotBitCount"); - else if (operation == GroupOperationInclusiveScan) + switch (operation) + { + case GroupOperationReduce: + emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_size_id, "spvSubgroupBallotBitCount"); + break; + case GroupOperationInclusiveScan: emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id, "spvSubgroupBallotInclusiveBitCount"); - else if (operation == GroupOperationExclusiveScan) + break; + case GroupOperationExclusiveScan: emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id, "spvSubgroupBallotExclusiveBitCount"); - else + break; + default: SPIRV_CROSS_THROW("Invalid BitCount operation."); + } break; } case OpGroupNonUniformShuffle: - emit_binary_func_op(result_type, id, ops[3], ops[4], msl_options.is_ios() ? "quad_shuffle" : "simd_shuffle"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffle"); break; case OpGroupNonUniformShuffleXor: - emit_binary_func_op(result_type, id, ops[3], ops[4], - msl_options.is_ios() ? "quad_shuffle_xor" : "simd_shuffle_xor"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleXor"); break; case OpGroupNonUniformShuffleUp: - emit_binary_func_op(result_type, id, ops[3], ops[4], - msl_options.is_ios() ? "quad_shuffle_up" : "simd_shuffle_up"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleUp"); break; case OpGroupNonUniformShuffleDown: - emit_binary_func_op(result_type, id, ops[3], ops[4], - msl_options.is_ios() ? "quad_shuffle_down" : "simd_shuffle_down"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleDown"); break; case OpGroupNonUniformAll: - emit_unary_func_op(result_type, id, ops[3], "simd_all"); + if (msl_options.use_quadgroup_operation()) + emit_unary_func_op(result_type, id, ops[3], "quad_all"); + else + emit_unary_func_op(result_type, id, ops[3], "simd_all"); break; case OpGroupNonUniformAny: - emit_unary_func_op(result_type, id, ops[3], "simd_any"); + if (msl_options.use_quadgroup_operation()) + emit_unary_func_op(result_type, id, ops[3], "quad_any"); + else + emit_unary_func_op(result_type, id, ops[3], "simd_any"); break; case OpGroupNonUniformAllEqual: @@ -7954,7 +15205,7 @@ case OpGroupNonUniform##op: \ else if (operation == GroupOperationClusteredReduce) \ { \ /* Only cluster sizes of 4 are supported. */ \ - uint32_t cluster_size = get(ops[5]).scalar(); \ + uint32_t cluster_size = evaluate_constant_u32(ops[5]); \ if (cluster_size != 4) \ SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \ emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \ @@ -7969,6 +15220,7 @@ case OpGroupNonUniform##op: \ MSL_GROUP_OP(IMul, product) #undef MSL_GROUP_OP // The others, unfortunately, don't support InclusiveScan or ExclusiveScan. + #define MSL_GROUP_OP(op, msl_op) \ case OpGroupNonUniform##op: \ { \ @@ -7982,7 +15234,7 @@ case OpGroupNonUniform##op: \ else if (operation == GroupOperationClusteredReduce) \ { \ /* Only cluster sizes of 4 are supported. */ \ - uint32_t cluster_size = get(ops[5]).scalar(); \ + uint32_t cluster_size = evaluate_constant_u32(ops[5]); \ if (cluster_size != 4) \ SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \ emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \ @@ -7991,12 +15243,36 @@ case OpGroupNonUniform##op: \ SPIRV_CROSS_THROW("Invalid group operation."); \ break; \ } + +#define MSL_GROUP_OP_CAST(op, msl_op, type) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op_cast(result_type, id, ops[4], "simd_" #msl_op, type, type); \ + else if (operation == GroupOperationInclusiveScan) \ + SPIRV_CROSS_THROW("Metal doesn't support InclusiveScan for OpGroupNonUniform" #op "."); \ + else if (operation == GroupOperationExclusiveScan) \ + SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \ + else if (operation == GroupOperationClusteredReduce) \ + { \ + /* Only cluster sizes of 4 are supported. */ \ + uint32_t cluster_size = evaluate_constant_u32(ops[5]); \ + if (cluster_size != 4) \ + SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \ + emit_unary_func_op_cast(result_type, id, ops[4], "quad_" #msl_op, type, type); \ + } \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + MSL_GROUP_OP(FMin, min) MSL_GROUP_OP(FMax, max) - MSL_GROUP_OP(SMin, min) - MSL_GROUP_OP(SMax, max) - MSL_GROUP_OP(UMin, min) - MSL_GROUP_OP(UMax, max) + MSL_GROUP_OP_CAST(SMin, min, int_type) + MSL_GROUP_OP_CAST(SMax, max, int_type) + MSL_GROUP_OP_CAST(UMin, min, uint_type) + MSL_GROUP_OP_CAST(UMax, max, uint_type) MSL_GROUP_OP(BitwiseAnd, and) MSL_GROUP_OP(BitwiseOr, or) MSL_GROUP_OP(BitwiseXor, xor) @@ -8004,28 +15280,15 @@ case OpGroupNonUniform##op: \ MSL_GROUP_OP(LogicalOr, or) MSL_GROUP_OP(LogicalXor, xor) // clang-format on +#undef MSL_GROUP_OP +#undef MSL_GROUP_OP_CAST case OpGroupNonUniformQuadSwap: - { - // We can implement this easily based on the following table giving - // the target lane ID from the direction and current lane ID: - // Direction - // | 0 | 1 | 2 | - // ---+---+---+---+ - // L 0 | 1 2 3 - // a 1 | 0 3 2 - // n 2 | 3 0 1 - // e 3 | 2 1 0 - // Notice that target = source ^ (direction + 1). - uint32_t mask = get(ops[4]).scalar() + 1; - uint32_t mask_id = ir.increase_bound_by(1); - set(mask_id, expression_type_id(ops[4]), mask, false); - emit_binary_func_op(result_type, id, ops[3], mask_id, "quad_shuffle_xor"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadSwap"); break; - } case OpGroupNonUniformQuadBroadcast: - emit_binary_func_op(result_type, id, ops[3], ops[4], "quad_broadcast"); + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadBroadcast"); break; default: @@ -8043,19 +15306,24 @@ string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in assert(out_type.basetype != SPIRType::Boolean); assert(in_type.basetype != SPIRType::Boolean); - bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type); - bool same_size_cast = out_type.width == in_type.width; + bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type) && (out_type.vecsize == in_type.vecsize); + bool same_size_cast = (out_type.width * out_type.vecsize) == (in_type.width * in_type.vecsize); - if (integral_cast && same_size_cast) - { - // Trivial bitcast case, casts between integers. - return type_to_glsl(out_type); - } - else - { - // Fall back to the catch-all bitcast in MSL. + // Bitcasting can only be used between types of the same overall size. + // And always formally cast between integers, because it's trivial, and also + // because Metal can internally cast the results of some integer ops to a larger + // size (eg. short shift right becomes int), which means chaining integer ops + // together may introduce size variations that SPIR-V doesn't know about. + if (same_size_cast && !integral_cast) return "as_type<" + type_to_glsl(out_type) + ">"; - } + else + return type_to_glsl(out_type); +} + +bool CompilerMSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t) +{ + // This is handled from the outside where we deal with PtrToU/UToPtr and friends. + return false; } // Returns an MSL string identifying the name of a SPIR-V builtin. @@ -8064,20 +15332,114 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) { switch (builtin) { - + // Handle HLSL-style 0-based vertex/instance index. // Override GLSL compiler strictness case BuiltInVertexId: - return "gl_VertexID"; + ensure_builtin(StorageClassInput, BuiltInVertexId); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_vertex_arg != TriState::No) + needs_base_vertex_arg = TriState::Yes; + return "gl_VertexID"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseVertex); + return "(gl_VertexID - gl_BaseVertex)"; + } + } + else + { + return "gl_VertexID"; + } case BuiltInInstanceId: - return "gl_InstanceID"; + ensure_builtin(StorageClassInput, BuiltInInstanceId); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_instance_arg != TriState::No) + needs_base_instance_arg = TriState::Yes; + return "gl_InstanceID"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseInstance); + return "(gl_InstanceID - gl_BaseInstance)"; + } + } + else + { + return "gl_InstanceID"; + } case BuiltInVertexIndex: - return "gl_VertexIndex"; + ensure_builtin(StorageClassInput, BuiltInVertexIndex); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_vertex_arg != TriState::No) + needs_base_vertex_arg = TriState::Yes; + return "gl_VertexIndex"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseVertex); + return "(gl_VertexIndex - gl_BaseVertex)"; + } + } + else + { + return "gl_VertexIndex"; + } case BuiltInInstanceIndex: - return "gl_InstanceIndex"; + ensure_builtin(StorageClassInput, BuiltInInstanceIndex); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_instance_arg != TriState::No) + needs_base_instance_arg = TriState::Yes; + return "gl_InstanceIndex"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseInstance); + return "(gl_InstanceIndex - gl_BaseInstance)"; + } + } + else + { + return "gl_InstanceIndex"; + } case BuiltInBaseVertex: - return "gl_BaseVertex"; + if (msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + needs_base_vertex_arg = TriState::No; + return "gl_BaseVertex"; + } + else + { + SPIRV_CROSS_THROW("BaseVertex requires Metal 1.1 and Mac or Apple A9+ hardware."); + } case BuiltInBaseInstance: - return "gl_BaseInstance"; + if (msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + needs_base_instance_arg = TriState::No; + return "gl_BaseInstance"; + } + else + { + SPIRV_CROSS_THROW("BaseInstance requires Metal 1.1 and Mac or Apple A9+ hardware."); + } case BuiltInDrawIndex: SPIRV_CROSS_THROW("DrawIndex is not supported in MSL."); @@ -8088,55 +15450,76 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) if (!msl_options.supports_msl_version(2, 0)) SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); /* fallthrough */ + case BuiltInFragDepth: + case BuiltInFragStencilRefEXT: + if ((builtin == BuiltInFragDepth && !msl_options.enable_frag_depth_builtin) || + (builtin == BuiltInFragStencilRefEXT && !msl_options.enable_frag_stencil_ref_builtin)) + break; + /* fallthrough */ case BuiltInPosition: case BuiltInPointSize: case BuiltInClipDistance: case BuiltInCullDistance: case BuiltInLayer: - case BuiltInFragDepth: - case BuiltInFragStencilRefEXT: - case BuiltInSampleMask: - if (get_execution_model() == ExecutionModelTessellationControl) + if (is_tesc_shader()) break; - if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) && + !is_stage_output_builtin_masked(builtin)) return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); + break; + case BuiltInSampleMask: + if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point) && + (has_additional_fixed_sample_mask() || needs_sample_id)) + { + string samp_mask_in; + samp_mask_in += "(" + CompilerGLSL::builtin_to_glsl(builtin, storage); + if (has_additional_fixed_sample_mask()) + samp_mask_in += " & " + additional_fixed_sample_mask_str(); + if (needs_sample_id) + samp_mask_in += " & (1 << gl_SampleID)"; + samp_mask_in += ")"; + return samp_mask_in; + } + if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) && + !is_stage_output_builtin_masked(builtin)) + return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); break; - case BuiltInBaryCoordNV: - case BuiltInBaryCoordNoPerspNV: + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) return stage_in_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); break; case BuiltInTessLevelOuter: - if (get_execution_model() == ExecutionModelTessellationEvaluation) + if (is_tesc_shader() && storage != StorageClassInput && current_function && + (current_function->self == ir.default_entry_point)) { - if (storage != StorageClassOutput && !get_entry_point().flags.get(ExecutionModeTriangles) && - current_function && (current_function->self == ir.default_entry_point)) - return join(patch_stage_in_var_name, ".", CompilerGLSL::builtin_to_glsl(builtin, storage)); - else - break; - } - if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "].edgeTessellationFactor"); + } break; case BuiltInTessLevelInner: - if (get_execution_model() == ExecutionModelTessellationEvaluation) + if (is_tesc_shader() && storage != StorageClassInput && current_function && + (current_function->self == ir.default_entry_point)) { - if (storage != StorageClassOutput && !get_entry_point().flags.get(ExecutionModeTriangles) && - current_function && (current_function->self == ir.default_entry_point)) - return join(patch_stage_in_var_name, ".", CompilerGLSL::builtin_to_glsl(builtin, storage)); - else - break; - } - if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "].insideTessellationFactor"); + } break; + case BuiltInHelperInvocation: + if (needs_manual_helper_invocation_updates()) + break; + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.3 on iOS."); + else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS."); + // In SPIR-V 1.6 with Volatile HelperInvocation, we cannot emit a fixup early. + return "simd_is_helper_thread()"; + default: break; } @@ -8190,6 +15573,11 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) // Tess. control function in case BuiltInInvocationId: + if (msl_options.multi_patch_workgroup) + { + // Shouldn't be reached. + SPIRV_CROSS_THROW("InvocationId is computed manually with multi-patch workgroups in MSL."); + } return "thread_index_in_threadgroup"; case BuiltInPatchVertices: // Shouldn't be reached. @@ -8198,12 +15586,17 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) switch (execution.model) { case ExecutionModelTessellationControl: + if (msl_options.multi_patch_workgroup) + { + // Shouldn't be reached. + SPIRV_CROSS_THROW("PrimitiveId is computed manually with multi-patch workgroups in MSL."); + } return "threadgroup_position_in_grid"; case ExecutionModelTessellationEvaluation: return "patch_id"; case ExecutionModelFragment: - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("PrimitiveId is not supported in fragment on iOS."); + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("PrimitiveId on iOS requires MSL 2.3."); else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("PrimitiveId on macOS requires MSL 2.2."); return "primitive_id"; @@ -8271,6 +15664,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) return "thread_index_in_threadgroup"; case BuiltInSubgroupSize: + if (msl_options.emulate_subgroups || msl_options.fixed_subgroup_size != 0) + // Shouldn't be reached. + SPIRV_CROSS_THROW("Emitting threads_per_simdgroup attribute with fixed subgroup size??"); if (execution.model == ExecutionModelFragment) { if (!msl_options.supports_msl_version(2, 2)) @@ -8285,28 +15681,42 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) } case BuiltInNumSubgroups: + if (msl_options.emulate_subgroups) + // Shouldn't be reached. + SPIRV_CROSS_THROW("NumSubgroups is handled specially with emulation."); if (!msl_options.supports_msl_version(2)) SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); - return msl_options.is_ios() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup"; + return msl_options.use_quadgroup_operation() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup"; case BuiltInSubgroupId: + if (msl_options.emulate_subgroups) + // Shouldn't be reached. + SPIRV_CROSS_THROW("SubgroupId is handled specially with emulation."); if (!msl_options.supports_msl_version(2)) SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); - return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup"; + return msl_options.use_quadgroup_operation() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup"; case BuiltInSubgroupLocalInvocationId: + if (msl_options.emulate_subgroups) + // Shouldn't be reached. + SPIRV_CROSS_THROW("SubgroupLocalInvocationId is handled specially with emulation."); if (execution.model == ExecutionModelFragment) { if (!msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders."); return "thread_index_in_simdgroup"; } - else + else if (execution.model == ExecutionModelKernel || execution.model == ExecutionModelGLCompute || + execution.model == ExecutionModelTessellationControl || + (execution.model == ExecutionModelVertex && msl_options.vertex_for_tessellation)) { + // We are generating a Metal kernel function. if (!msl_options.supports_msl_version(2)) - SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); - return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup"; + SPIRV_CROSS_THROW("Subgroup builtins in kernel functions require Metal 2.0."); + return msl_options.use_quadgroup_operation() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup"; } + else + SPIRV_CROSS_THROW("Subgroup builtins are not available in this type of function."); case BuiltInSubgroupEqMask: case BuiltInSubgroupGeMask: @@ -8316,18 +15726,16 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) // Shouldn't be reached. SPIRV_CROSS_THROW("Subgroup ballot masks are handled specially in MSL."); - case BuiltInBaryCoordNV: - // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Barycentrics not supported on iOS."); + case BuiltInBaryCoordKHR: + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); else if (!msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); return "barycentric_coord, center_perspective"; - case BuiltInBaryCoordNoPerspNV: - // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. - if (msl_options.is_ios()) - SPIRV_CROSS_THROW("Barycentrics not supported on iOS."); + case BuiltInBaryCoordNoPerspKHR: + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); else if (!msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); return "barycentric_coord, center_no_perspective"; @@ -8340,7 +15748,6 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) // Returns an MSL string type declaration for a SPIR-V builtin string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) { - const SPIREntryPoint &execution = get_entry_point(); switch (builtin) { // Vertex function in @@ -8361,6 +15768,7 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) // Vertex function out case BuiltInClipDistance: + case BuiltInCullDistance: return "float"; case BuiltInPointSize: return "float"; @@ -8383,17 +15791,17 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) // Tess. control function out case BuiltInTessLevelInner: - if (execution.model == ExecutionModelTessellationEvaluation) - return !execution.flags.get(ExecutionModeTriangles) ? "float2" : "float"; + if (is_tese_shader()) + return (msl_options.raw_buffer_tese_input || is_tessellating_triangles()) ? "float" : "float2"; return "half"; case BuiltInTessLevelOuter: - if (execution.model == ExecutionModelTessellationEvaluation) - return !execution.flags.get(ExecutionModeTriangles) ? "float4" : "float"; + if (is_tese_shader()) + return (msl_options.raw_buffer_tese_input || is_tessellating_triangles()) ? "float" : "float4"; return "half"; // Tess. evaluation function in case BuiltInTessCoord: - return execution.flags.get(ExecutionModeTriangles) ? "float3" : "float2"; + return "float3"; // Fragment function in case BuiltInFrontFacing: @@ -8411,6 +15819,14 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) case BuiltInViewIndex: return "uint"; + case BuiltInHelperInvocation: + return "bool"; + + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: + // Use the type as declared, can be 1, 2 or 3 components. + return type_to_glsl(get_variable_data_type(get(id))); + // Fragment function out case BuiltInFragDepth: return "float"; @@ -8437,13 +15853,8 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) case BuiltInSubgroupLtMask: return "uint4"; - case BuiltInHelperInvocation: - return "bool"; - - case BuiltInBaryCoordNV: - case BuiltInBaryCoordNoPerspNV: - // Use the type as declared, can be 1, 2 or 3 components. - return type_to_glsl(get_variable_data_type(get(id))); + case BuiltInDeviceIndex: + return "int"; default: return "unsupported-built-in-type"; @@ -8457,17 +15868,153 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma) if (prefix_comma) bi_arg += ", "; + // Handle HLSL-style 0-based vertex/instance index. + builtin_declaration = true; bi_arg += builtin_type_decl(builtin); bi_arg += " " + builtin_to_glsl(builtin, StorageClassInput); bi_arg += " [[" + builtin_qualifier(builtin) + "]]"; + builtin_declaration = false; return bi_arg; } +const SPIRType &CompilerMSL::get_physical_member_type(const SPIRType &type, uint32_t index) const +{ + if (member_is_remapped_physical_type(type, index)) + return get(get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID)); + else + return get(type.member_types[index]); +} + +SPIRType CompilerMSL::get_presumed_input_type(const SPIRType &ib_type, uint32_t index) const +{ + SPIRType type = get_physical_member_type(ib_type, index); + uint32_t loc = get_member_decoration(ib_type.self, index, DecorationLocation); + uint32_t cmp = get_member_decoration(ib_type.self, index, DecorationComponent); + auto p_va = inputs_by_location.find({loc, cmp}); + if (p_va != end(inputs_by_location) && p_va->second.vecsize > type.vecsize) + type.vecsize = p_va->second.vecsize; + + return type; +} + +uint32_t CompilerMSL::get_declared_type_array_stride_msl(const SPIRType &type, bool is_packed, bool row_major) const +{ + // Array stride in MSL is always size * array_size. sizeof(float3) == 16, + // unlike GLSL and HLSL where array stride would be 16 and size 12. + + // We could use parent type here and recurse, but that makes creating physical type remappings + // far more complicated. We'd rather just create the final type, and ignore having to create the entire type + // hierarchy in order to compute this value, so make a temporary type on the stack. + + auto basic_type = type; + basic_type.array.clear(); + basic_type.array_size_literal.clear(); + uint32_t value_size = get_declared_type_size_msl(basic_type, is_packed, row_major); + + uint32_t dimensions = uint32_t(type.array.size()); + assert(dimensions > 0); + dimensions--; + + // Multiply together every dimension, except the last one. + for (uint32_t dim = 0; dim < dimensions; dim++) + { + uint32_t array_size = to_array_size_literal(type, dim); + value_size *= max(array_size, 1u); + } + + return value_size; +} + +uint32_t CompilerMSL::get_declared_struct_member_array_stride_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_array_stride_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_input_array_stride_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_array_stride_msl(get_presumed_input_type(type, index), false, + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const +{ + // For packed matrices, we just use the size of the vector type. + // Otherwise, MatrixStride == alignment, which is the size of the underlying vector type. + if (packed) + return (type.width / 8) * ((row_major && type.columns > 1) ? type.columns : type.vecsize); + else + return get_declared_type_alignment_msl(type, false, row_major); +} + +uint32_t CompilerMSL::get_declared_struct_member_matrix_stride_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_matrix_stride_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_input_matrix_stride_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_matrix_stride_msl(get_presumed_input_type(type, index), false, + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment, + bool ignore_padding) const +{ + // If we have a target size, that is the declared size as well. + if (!ignore_padding && has_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget)) + return get_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget); + + if (struct_type.member_types.empty()) + return 0; + + uint32_t mbr_cnt = uint32_t(struct_type.member_types.size()); + + // In MSL, a struct's alignment is equal to the maximum alignment of any of its members. + uint32_t alignment = 1; + + if (!ignore_alignment) + { + for (uint32_t i = 0; i < mbr_cnt; i++) + { + uint32_t mbr_alignment = get_declared_struct_member_alignment_msl(struct_type, i); + alignment = max(alignment, mbr_alignment); + } + } + + // Last member will always be matched to the final Offset decoration, but size of struct in MSL now depends + // on physical size in MSL, and the size of the struct itself is then aligned to struct alignment. + uint32_t spirv_offset = type_struct_member_offset(struct_type, mbr_cnt - 1); + uint32_t msl_size = spirv_offset + get_declared_struct_member_size_msl(struct_type, mbr_cnt - 1); + msl_size = (msl_size + alignment - 1) & ~(alignment - 1); + return msl_size; +} + // Returns the byte size of a struct member. -size_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const +uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_packed, bool row_major) const { - auto &type = get(struct_type.member_types[index]); + // Pointers take 8 bytes each + if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + { + uint32_t type_size = 8 * (type.vecsize == 3 ? 4 : type.vecsize); + + // Work our way through potentially layered arrays, + // stopping when we hit a pointer that is not also an array. + int32_t dim_idx = (int32_t)type.array.size() - 1; + auto *p_type = &type; + while (!type_is_pointer(*p_type) && dim_idx >= 0) + { + type_size *= to_array_size_literal(*p_type, dim_idx); + p_type = &get(p_type->parent_type); + dim_idx--; + } + + return type_size; + } switch (type.basetype) { @@ -8481,39 +16028,56 @@ size_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &struct_t default: { - // For arrays, we can use ArrayStride to get an easy check. - // Runtime arrays will have zero size so force to min of one. if (!type.array.empty()) { uint32_t array_size = to_array_size_literal(type); - return type_struct_member_array_stride(struct_type, index) * max(array_size, 1u); + return get_declared_type_array_stride_msl(type, is_packed, row_major) * max(array_size, 1u); } if (type.basetype == SPIRType::Struct) + return get_declared_struct_size_msl(type); + + if (is_packed) { - // The size of a struct in Metal is aligned up to its natural alignment. - auto size = get_declared_struct_size(type); - auto alignment = get_declared_struct_member_alignment(struct_type, index); - return (size + alignment - 1) & ~(alignment - 1); + return type.vecsize * type.columns * (type.width / 8); } + else + { + // An unpacked 3-element vector or matrix column is the same memory size as a 4-element. + uint32_t vecsize = type.vecsize; + uint32_t columns = type.columns; - uint32_t component_size = type.width / 8; - uint32_t vecsize = type.vecsize; - uint32_t columns = type.columns; + if (row_major && columns > 1) + swap(vecsize, columns); - // An unpacked 3-element vector or matrix column is the same memory size as a 4-element. - if (vecsize == 3 && !has_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPacked)) - vecsize = 4; + if (vecsize == 3) + vecsize = 4; - return component_size * vecsize * columns; + return vecsize * columns * (type.width / 8); + } } } } -// Returns the byte alignment of a struct member. -size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const +uint32_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_size_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_input_size_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_size_msl(get_presumed_input_type(type, index), false, + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +// Returns the byte alignment of a type. +uint32_t CompilerMSL::get_declared_type_alignment_msl(const SPIRType &type, bool is_packed, bool row_major) const { - auto &type = get(struct_type.member_types[index]); + // Pointers aligns on multiples of 8 bytes + if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + return 8 * (type.vecsize == 3 ? 4 : type.vecsize); switch (type.basetype) { @@ -8525,10 +16089,6 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_ case SPIRType::Sampler: SPIRV_CROSS_THROW("Querying alignment of opaque object."); - case SPIRType::Int64: - SPIRV_CROSS_THROW("long types are not supported in buffers in MSL."); - case SPIRType::UInt64: - SPIRV_CROSS_THROW("ulong types are not supported in buffers in MSL."); case SPIRType::Double: SPIRV_CROSS_THROW("double types are not supported in buffers in MSL."); @@ -8537,40 +16097,47 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_ // In MSL, a struct's alignment is equal to the maximum alignment of any of its members. uint32_t alignment = 1; for (uint32_t i = 0; i < type.member_types.size(); i++) - alignment = max(alignment, uint32_t(get_declared_struct_member_alignment(type, i))); + alignment = max(alignment, uint32_t(get_declared_struct_member_alignment_msl(type, i))); return alignment; } default: { + if (type.basetype == SPIRType::Int64 && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("long types in buffers are only supported in MSL 2.3 and above."); + if (type.basetype == SPIRType::UInt64 && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("ulong types in buffers are only supported in MSL 2.3 and above."); // Alignment of packed type is the same as the underlying component or column size. // Alignment of unpacked type is the same as the vector size. // Alignment of 3-elements vector is the same as 4-elements (including packed using column). - if (member_is_packed_type(struct_type, index)) - { - // This is getting pretty complicated. - // The special case of array of float/float2 needs to be handled here. - uint32_t packed_type_id = - get_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPackedType); - const SPIRType *packed_type = packed_type_id != 0 ? &get(packed_type_id) : nullptr; - if (packed_type && is_array(*packed_type) && !is_matrix(*packed_type) && - packed_type->basetype != SPIRType::Struct) - { - uint32_t stride = type_struct_member_array_stride(struct_type, index); - if (stride == (packed_type->width / 8) * 4) - return stride; - else - return packed_type->width / 8; - } - else - return type.width / 8; + if (is_packed) + { + // If we have packed_T and friends, the alignment is always scalar. + return type.width / 8; } else - return (type.width / 8) * (type.vecsize == 3 ? 4 : type.vecsize); + { + // This is the general rule for MSL. Size == alignment. + uint32_t vecsize = (row_major && type.columns > 1) ? type.columns : type.vecsize; + return (type.width / 8) * (vecsize == 3 ? 4 : vecsize); + } } } } +uint32_t CompilerMSL::get_declared_struct_member_alignment_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_alignment_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_input_alignment_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_alignment_msl(get_presumed_input_type(type, index), false, + has_member_decoration(type.self, index, DecorationRowMajor)); +} + bool CompilerMSL::skip_argument(uint32_t) const { return false; @@ -8626,6 +16193,17 @@ bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *ar return true; } +// If a needed custom function wasn't added before, add it and force a recompile. +void CompilerMSL::add_spv_func_and_recompile(SPVFuncImpl spv_func) +{ + if (spv_function_implementations.count(spv_func) == 0) + { + spv_function_implementations.insert(spv_func); + suppress_missing_prototypes = true; + force_recompile(); + } +} + bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length) { // Since MSL exists in a single execution scope, function prototype declarations are not @@ -8648,14 +16226,27 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui suppress_missing_prototypes = true; break; + case OpDemoteToHelperInvocationEXT: + uses_discard = true; + break; + + // Emulate texture2D atomic operations + case OpImageTexelPointer: + { + auto *var = compiler.maybe_get_backing_variable(args[2]); + image_pointers[args[1]] = var ? var->self : ID(0); + break; + } + case OpImageWrite: - uses_resource_write = true; + uses_image_write = true; break; case OpStore: check_resource_write(args[0]); break; + // Emulate texture2D atomic operations case OpAtomicExchange: case OpAtomicCompareExchange: case OpAtomicCompareExchangeWeak: @@ -8670,20 +16261,57 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui case OpAtomicAnd: case OpAtomicOr: case OpAtomicXor: + { + uses_atomics = true; + auto it = image_pointers.find(args[2]); + if (it != image_pointers.end()) + { + uses_image_write = true; + compiler.atomic_image_vars.insert(it->second); + } + else + check_resource_write(args[2]); + break; + } + + case OpAtomicStore: + { uses_atomics = true; - check_resource_write(args[2]); + auto it = image_pointers.find(args[0]); + if (it != image_pointers.end()) + { + compiler.atomic_image_vars.insert(it->second); + uses_image_write = true; + } + else + check_resource_write(args[0]); break; + } case OpAtomicLoad: + { uses_atomics = true; + auto it = image_pointers.find(args[2]); + if (it != image_pointers.end()) + { + compiler.atomic_image_vars.insert(it->second); + } break; + } case OpGroupNonUniformInverseBallot: needs_subgroup_invocation_id = true; break; + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + needs_subgroup_size = true; + break; + case OpGroupNonUniformBallotBitCount: - if (args[3] != GroupOperationReduce) + if (args[3] == GroupOperationReduce) + needs_subgroup_size = true; + else needs_subgroup_invocation_id = true; break; @@ -8703,11 +16331,66 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui uint32_t result_type = args[0]; uint32_t id = args[1]; uint32_t ptr = args[2]; + compiler.set(id, "", result_type, true); compiler.register_read(id, ptr, true); compiler.ir.ids[id].set_allow_type_rewrite(); break; - } + } + + case OpExtInst: + { + uint32_t extension_set = args[2]; + if (compiler.get(extension_set).ext == SPIRExtension::GLSL) + { + auto op_450 = static_cast(args[3]); + switch (op_450) + { + case GLSLstd450InterpolateAtCentroid: + case GLSLstd450InterpolateAtSample: + case GLSLstd450InterpolateAtOffset: + { + if (!compiler.msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Pull-model interpolation requires MSL 2.3."); + // Fragment varyings used with pull-model interpolation need special handling, + // due to the way pull-model interpolation works in Metal. + auto *var = compiler.maybe_get_backing_variable(args[4]); + if (var) + { + compiler.pull_model_inputs.insert(var->self); + auto &var_type = compiler.get_variable_element_type(*var); + // In addition, if this variable has a 'Sample' decoration, we need the sample ID + // in order to do default interpolation. + if (compiler.has_decoration(var->self, DecorationSample)) + { + needs_sample_id = true; + } + else if (var_type.basetype == SPIRType::Struct) + { + // Now we need to check each member and see if it has this decoration. + for (uint32_t i = 0; i < var_type.member_types.size(); ++i) + { + if (compiler.has_member_decoration(var_type.self, i, DecorationSample)) + { + needs_sample_id = true; + break; + } + } + } + } + break; + } + default: + break; + } + } + break; + } + + case OpIsHelperInvocationEXT: + if (compiler.needs_manual_helper_invocation_updates()) + needs_helper_invocation = true; + break; default: break; @@ -8727,7 +16410,7 @@ void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id) auto *p_var = compiler.maybe_get_backing_variable(var_id); StorageClass sc = p_var ? p_var->storage : StorageClassMax; if (sc == StorageClassUniform || sc == StorageClassStorageBuffer) - uses_resource_write = true; + uses_buffer_write = true; } // Returns an enumeration of a SPIR-V function that needs to be output for certain Op codes. @@ -8738,60 +16421,61 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o case OpFMod: return SPVFuncImplMod; - case OpFunctionCall: - { - auto &return_type = compiler.get(args[0]); - if (return_type.array.size() > 1) + case OpFAdd: + case OpFSub: + if (compiler.msl_options.invariant_float_math || + compiler.has_decoration(args[1], DecorationNoContraction)) { - if (return_type.array.size() > SPVFuncImplArrayCopyMultidimMax) - SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); - return static_cast(SPVFuncImplArrayCopyMultidimBase + return_type.array.size()); + return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub; } - else if (return_type.array.size() > 0) - return SPVFuncImplArrayCopy; - break; - } - - case OpStore: - { - // Get the result type of the RHS. Since this is run as a pre-processing stage, - // we must extract the result type directly from the Instruction, rather than the ID. - uint32_t id_lhs = args[0]; - uint32_t id_rhs = args[1]; - const SPIRType *type = nullptr; - if (compiler.ir.ids[id_rhs].get_type() != TypeNone) - { - // Could be a constant, or similar. - type = &compiler.expression_type(id_rhs); - } - else + case OpFMul: + case OpOuterProduct: + case OpMatrixTimesVector: + case OpVectorTimesMatrix: + case OpMatrixTimesMatrix: + if (compiler.msl_options.invariant_float_math || + compiler.has_decoration(args[1], DecorationNoContraction)) { - // Or ... an expression. - uint32_t tid = result_types[id_rhs]; - if (tid) - type = &compiler.get(tid); + return SPVFuncImplFMul; } + break; - auto *var = compiler.maybe_get(id_lhs); + case OpQuantizeToF16: + return SPVFuncImplQuantizeToF16; + + case OpTypeArray: + { + // Allow Metal to use the array template to make arrays a value type + return SPVFuncImplUnsafeArray; + } - // Are we simply assigning to a statically assigned variable which takes a constant? - // Don't bother emitting this function. - bool static_expression_lhs = - var && var->storage == StorageClassFunction && var->statically_assigned && var->remapped_variable; - if (type && compiler.is_array(*type) && !static_expression_lhs) + // Emulate texture2D atomic operations + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicCompareExchangeWeak: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + case OpAtomicLoad: + case OpAtomicStore: + { + auto it = image_pointers.find(args[opcode == OpAtomicStore ? 0 : 2]); + if (it != image_pointers.end()) { - if (type->array.size() > 1) - { - if (type->array.size() > SPVFuncImplArrayCopyMultidimMax) - SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); - return static_cast(SPVFuncImplArrayCopyMultidimBase + type->array.size()); - } - else - return SPVFuncImplArrayCopy; + uint32_t tid = compiler.get(it->second).basetype; + if (tid && compiler.get(tid).image.dim == Dim2D) + return SPVFuncImplImage2DAtomicCoords; } - break; } @@ -8803,27 +16487,9 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o uint32_t tid = result_types[args[opcode == OpImageWrite ? 0 : 2]]; if (tid && compiler.get(tid).image.dim == DimBuffer && !compiler.msl_options.texture_buffer_native) return SPVFuncImplTexelBufferCoords; - - if (opcode == OpImageFetch && compiler.msl_options.swizzle_texture_samples) - return SPVFuncImplTextureSwizzle; - break; } - case OpImageSampleExplicitLod: - case OpImageSampleProjExplicitLod: - case OpImageSampleDrefExplicitLod: - case OpImageSampleProjDrefExplicitLod: - case OpImageSampleImplicitLod: - case OpImageSampleProjImplicitLod: - case OpImageSampleDrefImplicitLod: - case OpImageSampleProjDrefImplicitLod: - case OpImageGather: - case OpImageDrefGather: - if (compiler.msl_options.swizzle_texture_samples) - return SPVFuncImplTextureSwizzle; - break; - case OpExtInst: { uint32_t extension_set = args[2]; @@ -8849,16 +16515,21 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o auto &type = compiler.get(args[0]); if (type.vecsize == 1) return SPVFuncImplReflectScalar; - else - return SPVFuncImplNone; + break; } case GLSLstd450Refract: { auto &type = compiler.get(args[0]); if (type.vecsize == 1) return SPVFuncImplRefractScalar; - else - return SPVFuncImplNone; + break; + } + case GLSLstd450FaceForward: + { + auto &type = compiler.get(args[0]); + if (type.vecsize == 1) + return SPVFuncImplFaceForwardScalar; + break; } case GLSLstd450MatrixInverse: { @@ -8883,6 +16554,12 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o break; } + case OpGroupNonUniformBroadcast: + return SPVFuncImplSubgroupBroadcast; + + case OpGroupNonUniformBroadcastFirst: + return SPVFuncImplSubgroupBroadcastFirst; + case OpGroupNonUniformBallot: return SPVFuncImplSubgroupBallot; @@ -8902,6 +16579,24 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o case OpGroupNonUniformAllEqual: return SPVFuncImplSubgroupAllEqual; + case OpGroupNonUniformShuffle: + return SPVFuncImplSubgroupShuffle; + + case OpGroupNonUniformShuffleXor: + return SPVFuncImplSubgroupShuffleXor; + + case OpGroupNonUniformShuffleUp: + return SPVFuncImplSubgroupShuffleUp; + + case OpGroupNonUniformShuffleDown: + return SPVFuncImplSubgroupShuffleDown; + + case OpGroupNonUniformQuadBroadcast: + return SPVFuncImplQuadBroadcast; + + case OpGroupNonUniformQuadSwap: + return SPVFuncImplQuadSwap; + default: break; } @@ -8916,8 +16611,27 @@ void CompilerMSL::MemberSorter::sort() // the members should be reordered, based on builtin and sorting aspect meta info. size_t mbr_cnt = type.member_types.size(); SmallVector mbr_idxs(mbr_cnt); - iota(mbr_idxs.begin(), mbr_idxs.end(), 0); // Fill with consecutive indices - std::sort(mbr_idxs.begin(), mbr_idxs.end(), *this); // Sort member indices based on sorting aspect + std::iota(mbr_idxs.begin(), mbr_idxs.end(), 0); // Fill with consecutive indices + std::stable_sort(mbr_idxs.begin(), mbr_idxs.end(), *this); // Sort member indices based on sorting aspect + + bool sort_is_identity = true; + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + if (mbr_idx != mbr_idxs[mbr_idx]) + { + sort_is_identity = false; + break; + } + } + + if (sort_is_identity) + return; + + if (meta.members.size() < type.member_types.size()) + { + // This should never trigger in normal circumstances, but to be safe. + meta.members.resize(type.member_types.size()); + } // Move type and meta member info to the order defined by the sorted member indices. // This is done by creating temporary copies of both member types and meta, and then @@ -8929,32 +16643,36 @@ void CompilerMSL::MemberSorter::sort() type.member_types[mbr_idx] = mbr_types_cpy[mbr_idxs[mbr_idx]]; meta.members[mbr_idx] = mbr_meta_cpy[mbr_idxs[mbr_idx]]; } + + // If we're sorting by Offset, this might affect user code which accesses a buffer block. + // We will need to redirect member indices from defined index to sorted index using reverse lookup. + if (sort_aspect == SortAspect::Offset) + { + type.member_type_index_redirection.resize(mbr_cnt); + for (uint32_t map_idx = 0; map_idx < mbr_cnt; map_idx++) + type.member_type_index_redirection[mbr_idxs[map_idx]] = map_idx; + } } -// Sort first by builtin status (put builtins at end), then by the sorting aspect. bool CompilerMSL::MemberSorter::operator()(uint32_t mbr_idx1, uint32_t mbr_idx2) { auto &mbr_meta1 = meta.members[mbr_idx1]; auto &mbr_meta2 = meta.members[mbr_idx2]; - if (mbr_meta1.builtin != mbr_meta2.builtin) - return mbr_meta2.builtin; - else - switch (sort_aspect) - { - case Location: + + if (sort_aspect == LocationThenBuiltInType) + { + // Sort first by builtin status (put builtins at end), then by the sorting aspect. + if (mbr_meta1.builtin != mbr_meta2.builtin) + return mbr_meta2.builtin; + else if (mbr_meta1.builtin) + return mbr_meta1.builtin_type < mbr_meta2.builtin_type; + else if (mbr_meta1.location == mbr_meta2.location) + return mbr_meta1.component < mbr_meta2.component; + else return mbr_meta1.location < mbr_meta2.location; - case LocationReverse: - return mbr_meta1.location > mbr_meta2.location; - case Offset: - return mbr_meta1.offset < mbr_meta2.offset; - case OffsetThenLocationReverse: - return (mbr_meta1.offset < mbr_meta2.offset) || - ((mbr_meta1.offset == mbr_meta2.offset) && (mbr_meta1.location > mbr_meta2.location)); - case Alphabetical: - return mbr_meta1.alias < mbr_meta2.alias; - default: - return false; - } + } + else + return mbr_meta1.offset < mbr_meta2.offset; } CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa) @@ -8966,7 +16684,7 @@ CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa) meta.members.resize(max(type.member_types.size(), meta.members.size())); } -void CompilerMSL::remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler) +void CompilerMSL::remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler) { auto &type = get(get(id).basetype); if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler) @@ -8982,18 +16700,60 @@ void CompilerMSL::remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t constexpr_samplers_by_binding[{ desc_set, binding }] = sampler; } -void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) +void CompilerMSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) { + bool is_packed = has_extended_decoration(source_id, SPIRVCrossDecorationPhysicalTypePacked); + auto *source_expr = maybe_get(source_id); auto *var = maybe_get_backing_variable(source_id); + const SPIRType *var_type, *phys_type; + if (uint32_t phys_id = get_extended_decoration(source_id, SPIRVCrossDecorationPhysicalTypeID)) + phys_type = &get(phys_id); + else + phys_type = &expr_type; if (var) + { source_id = var->self; + var_type = &get_variable_data_type(*var); + } - // Only interested in standalone builtin variables. + // Type fixups for workgroup variables if they are booleans. + if (var && (var->storage == StorageClassWorkgroup || var_type->basetype == SPIRType::Struct) && + expr_type.basetype == SPIRType::Boolean) + expr = join(type_to_glsl(expr_type), "(", expr, ")"); + // Type fixups for workgroup variables if they are matrices. + // Don't do fixup for packed types; those are handled specially. + // FIXME: Maybe use a type like spvStorageMatrix for packed matrices? + if (!msl_options.supports_msl_version(3, 0) && var && + (var->storage == StorageClassWorkgroup || + (var_type->basetype == SPIRType::Struct && + has_extended_decoration(var_type->self, SPIRVCrossDecorationWorkgroupStruct) && !is_packed)) && + expr_type.columns > 1) + { + SPIRType matrix_type = *phys_type; + if (source_expr && source_expr->need_transpose) + swap(matrix_type.vecsize, matrix_type.columns); + matrix_type.array.clear(); + matrix_type.array_size_literal.clear(); + expr = join(type_to_glsl(matrix_type), "(", expr, ")"); + } + + // Only interested in standalone builtin variables in the switch below. if (!has_decoration(source_id, DecorationBuiltIn)) + { + // If the backing variable does not match our expected sign, we can fix it up here. + // See ensure_correct_input_type(). + if (var && var->storage == StorageClassInput) + { + auto &base_type = get(var->basetype); + if (base_type.basetype != SPIRType::Struct && expr_type.basetype != base_type.basetype) + expr = join(type_to_glsl(expr_type), "(", expr, ")"); + } return; + } auto builtin = static_cast(get_decoration(source_id, DecorationBuiltIn)); auto expected_type = expr_type.basetype; + auto expected_width = expr_type.width; switch (builtin) { case BuiltInGlobalInvocationId: @@ -9009,13 +16769,21 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp case BuiltInSubgroupSize: case BuiltInSubgroupLocalInvocationId: case BuiltInViewIndex: + case BuiltInVertexIndex: + case BuiltInInstanceIndex: + case BuiltInBaseInstance: + case BuiltInBaseVertex: expected_type = SPIRType::UInt; + expected_width = 32; break; case BuiltInTessLevelInner: case BuiltInTessLevelOuter: - if (get_execution_model() == ExecutionModelTessellationControl) + if (is_tesc_shader()) + { expected_type = SPIRType::Half; + expected_width = 16; + } break; default: @@ -9023,21 +16791,79 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp } if (expected_type != expr_type.basetype) - expr = bitcast_expression(expr_type, expected_type, expr); - - if (builtin == BuiltInTessCoord && get_entry_point().flags.get(ExecutionModeQuads) && expr_type.vecsize == 3) { - // In SPIR-V, this is always a vec3, even for quads. In Metal, though, it's a float2 for quads. - // The code is expecting a float3, so we need to widen this. - expr = join("float3(", expr, ", 0)"); + if (!expr_type.array.empty() && (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter)) + { + // Triggers when loading TessLevel directly as an array. + // Need explicit padding + cast. + auto wrap_expr = join(type_to_glsl(expr_type), "({ "); + + uint32_t array_size = get_physical_tess_level_array_size(builtin); + for (uint32_t i = 0; i < array_size; i++) + { + if (array_size > 1) + wrap_expr += join("float(", expr, "[", i, "])"); + else + wrap_expr += join("float(", expr, ")"); + if (i + 1 < array_size) + wrap_expr += ", "; + } + + if (is_tessellating_triangles()) + wrap_expr += ", 0.0"; + + wrap_expr += " })"; + expr = std::move(wrap_expr); + } + else + { + // These are of different widths, so we cannot do a straight bitcast. + if (expected_width != expr_type.width) + expr = join(type_to_glsl(expr_type), "(", expr, ")"); + else + expr = bitcast_expression(expr_type, expected_type, expr); + } } } -void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) +void CompilerMSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) { + bool is_packed = has_extended_decoration(target_id, SPIRVCrossDecorationPhysicalTypePacked); + auto *target_expr = maybe_get(target_id); auto *var = maybe_get_backing_variable(target_id); + const SPIRType *var_type, *phys_type; + if (uint32_t phys_id = get_extended_decoration(target_id, SPIRVCrossDecorationPhysicalTypeID)) + phys_type = &get(phys_id); + else + phys_type = &expr_type; if (var) + { target_id = var->self; + var_type = &get_variable_data_type(*var); + } + + // Type fixups for workgroup variables if they are booleans. + if (var && (var->storage == StorageClassWorkgroup || var_type->basetype == SPIRType::Struct) && + expr_type.basetype == SPIRType::Boolean) + { + auto short_type = expr_type; + short_type.basetype = SPIRType::Short; + expr = join(type_to_glsl(short_type), "(", expr, ")"); + } + // Type fixups for workgroup variables if they are matrices. + // Don't do fixup for packed types; those are handled specially. + // FIXME: Maybe use a type like spvStorageMatrix for packed matrices? + if (!msl_options.supports_msl_version(3, 0) && var && + (var->storage == StorageClassWorkgroup || + (var_type->basetype == SPIRType::Struct && + has_extended_decoration(var_type->self, SPIRVCrossDecorationWorkgroupStruct) && !is_packed)) && + expr_type.columns > 1) + { + SPIRType matrix_type = *phys_type; + if (target_expr && target_expr->need_transpose) + swap(matrix_type.vecsize, matrix_type.columns); + expr = join("spvStorage_", type_to_glsl(matrix_type), "(", expr, ")"); + } // Only interested in standalone builtin variables. if (!has_decoration(target_id, DecorationBuiltIn)) @@ -9045,6 +16871,7 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr auto builtin = static_cast(get_decoration(target_id, DecorationBuiltIn)); auto expected_type = expr_type.basetype; + auto expected_width = expr_type.width; switch (builtin) { case BuiltInLayer: @@ -9053,11 +16880,13 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr case BuiltInPrimitiveId: case BuiltInViewIndex: expected_type = SPIRType::UInt; + expected_width = 32; break; case BuiltInTessLevelInner: case BuiltInTessLevelOuter: expected_type = SPIRType::Half; + expected_width = 16; break; default: @@ -9066,10 +16895,13 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr if (expected_type != expr_type.basetype) { - if (expected_type == SPIRType::Half && expr_type.basetype == SPIRType::Float) + if (expected_width != expr_type.width) { // These are of different widths, so we cannot do a straight bitcast. - expr = join("half(", expr, ")"); + auto type = expr_type; + type.basetype = expected_type; + type.width = expected_width; + expr = join(type_to_glsl(type), "(", expr, ")"); } else { @@ -9080,17 +16912,29 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr } } -std::string CompilerMSL::to_initializer_expression(const SPIRVariable &var) +string CompilerMSL::to_initializer_expression(const SPIRVariable &var) { // We risk getting an array initializer here with MSL. If we have an array. // FIXME: We cannot handle non-constant arrays being initialized. // We will need to inject spvArrayCopy here somehow ... auto &type = get(var.basetype); + string expr; if (ir.ids[var.initializer].get_type() == TypeConstant && (!type.array.empty() || type.basetype == SPIRType::Struct)) - return constant_expression(get(var.initializer)); + expr = constant_expression(get(var.initializer)); else - return CompilerGLSL::to_initializer_expression(var); + expr = CompilerGLSL::to_initializer_expression(var); + // If the initializer has more vector components than the variable, add a swizzle. + // FIXME: This can't handle arrays or structs. + auto &init_type = expression_type(var.initializer); + if (type.array.empty() && type.basetype != SPIRType::Struct && init_type.vecsize > type.vecsize) + expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0)); + return expr; +} + +string CompilerMSL::to_zero_initialized_expression(uint32_t) +{ + return "{}"; } bool CompilerMSL::descriptor_set_is_argument_buffer(uint32_t desc_set) const @@ -9103,6 +16947,18 @@ bool CompilerMSL::descriptor_set_is_argument_buffer(uint32_t desc_set) const return (argument_buffer_discrete_mask & (1u << desc_set)) == 0; } +bool CompilerMSL::is_supported_argument_buffer_type(const SPIRType &type) const +{ + // iOS Tier 1 argument buffers do not support writable images. + // When the argument buffer is encoded, we don't know whether this image will have a + // NonWritable decoration, so just use discrete arguments for all storage images on iOS. + bool is_supported_type = !(type.basetype == SPIRType::Image && + type.image.sampled == 2 && + msl_options.is_ios() && + msl_options.argument_buffers_tier <= Options::ArgumentBuffersTier::Tier1); + return is_supported_type && !type_is_msl_framebuffer_fetch(type); +} + void CompilerMSL::analyze_argument_buffers() { // Gather all used resources and sort them out into argument buffers. @@ -9119,11 +16975,14 @@ void CompilerMSL::analyze_argument_buffers() struct Resource { SPIRVariable *var; + SPIRVariable *descriptor_alias; string name; SPIRType::BaseType basetype; uint32_t index; + uint32_t plane; }; SmallVector resources_in_set[kMaxArgumentBuffers]; + SmallVector inline_block_vars; bool set_needs_swizzle_buffer[kMaxArgumentBuffers] = {}; bool set_needs_buffer_sizes[kMaxArgumentBuffers] = {}; @@ -9156,33 +17015,85 @@ void CompilerMSL::analyze_argument_buffers() } } + // Handle descriptor aliasing as well as we can. + // We can handle aliasing of buffers by casting pointers, but not for typed resources. + // Inline UBOs cannot be handled since it's not a pointer, but inline data. + SPIRVariable *descriptor_alias = nullptr; + if (var.storage == StorageClassUniform || var.storage == StorageClassStorageBuffer) + { + for (auto &resource : resources_in_set[desc_set]) + { + if (get_decoration(resource.var->self, DecorationBinding) == + get_decoration(var_id, DecorationBinding) && + resource.basetype == SPIRType::Struct && type.basetype == SPIRType::Struct && + (resource.var->storage == StorageClassUniform || + resource.var->storage == StorageClassStorageBuffer)) + { + descriptor_alias = resource.var; + // Self-reference marks that we should declare the resource, + // and it's being used as an alias (so we can emit void* instead). + resource.descriptor_alias = resource.var; + // Need to promote interlocked usage so that the primary declaration is correct. + if (interlocked_resources.count(var_id)) + interlocked_resources.insert(resource.var->self); + break; + } + } + } + + uint32_t binding = get_decoration(var_id, DecorationBinding); if (type.basetype == SPIRType::SampledImage) { add_resource_name(var_id); - uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image); - uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler); + uint32_t plane_count = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + plane_count = constexpr_sampler->planes; - resources_in_set[desc_set].push_back({ &var, to_name(var_id), SPIRType::Image, image_resource_index }); + for (uint32_t i = 0; i < plane_count; i++) + { + uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image, i); + resources_in_set[desc_set].push_back( + { &var, descriptor_alias, to_name(var_id), SPIRType::Image, image_resource_index, i }); + } if (type.image.dim != DimBuffer && !constexpr_sampler) { + uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler); resources_in_set[desc_set].push_back( - { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index }); + { &var, descriptor_alias, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0 }); } } - else if (!constexpr_sampler) + else if (inline_uniform_blocks.count(SetBindingPair{ desc_set, binding })) + { + inline_block_vars.push_back(var_id); + } + else if (!constexpr_sampler && is_supported_argument_buffer_type(type)) { // constexpr samplers are not declared as resources. + // Inline uniform blocks are always emitted at the end. add_resource_name(var_id); + + uint32_t resource_index = ~0u; + if (!descriptor_alias) + resource_index = get_metal_resource_index(var, type.basetype); + resources_in_set[desc_set].push_back( - { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) }); + { &var, descriptor_alias, to_name(var_id), type.basetype, resource_index, 0 }); + + // Emulate texture2D atomic operations + if (atomic_image_vars.count(var.self)) + { + uint32_t buffer_resource_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0); + resources_in_set[desc_set].push_back( + { &var, descriptor_alias, to_name(var_id) + "_atomic", SPIRType::Struct, buffer_resource_index, 0 }); + } } // Check if this descriptor set needs a swizzle buffer. if (needs_swizzle_buffer_def && is_sampled_image_type(type)) set_needs_swizzle_buffer[desc_set] = true; - else if (buffers_requiring_array_length.count(var_id) != 0) + else if (buffer_requires_array_length(var_id)) { set_needs_buffer_sizes[desc_set] = true; needs_buffer_sizes = true; @@ -9202,20 +17113,13 @@ void CompilerMSL::analyze_argument_buffers() if (uint_ptr_type_id == 0) { - uint32_t offset = ir.increase_bound_by(2); - uint32_t type_id = offset; - uint_ptr_type_id = offset + 1; + uint_ptr_type_id = ir.increase_bound_by(1); // Create a buffer to hold extra data, including the swizzle constants. - SPIRType uint_type; - uint_type.basetype = SPIRType::UInt; - uint_type.width = 32; - set(type_id, uint_type); - - SPIRType uint_type_pointer = uint_type; + SPIRType uint_type_pointer = get_uint_type(); uint_type_pointer.pointer = true; - uint_type_pointer.pointer_depth = 1; - uint_type_pointer.parent_type = type_id; + uint_type_pointer.pointer_depth++; + uint_type_pointer.parent_type = get_uint_type_id(); uint_type_pointer.storage = StorageClassUniform; set(uint_ptr_type_id, uint_type_pointer); set_decoration(uint_ptr_type_id, DecorationArrayStride, 4); @@ -9229,7 +17133,7 @@ void CompilerMSL::analyze_argument_buffers() set_decoration(var_id, DecorationDescriptorSet, desc_set); set_decoration(var_id, DecorationBinding, kSwizzleBufferBinding); resources_in_set[desc_set].push_back( - { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt) }); + { &var, nullptr, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 }); } if (set_needs_buffer_sizes[desc_set]) @@ -9240,11 +17144,21 @@ void CompilerMSL::analyze_argument_buffers() set_decoration(var_id, DecorationDescriptorSet, desc_set); set_decoration(var_id, DecorationBinding, kBufferSizeBufferBinding); resources_in_set[desc_set].push_back( - { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt) }); + { &var, nullptr, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 }); } } } + // Now add inline uniform blocks. + for (uint32_t var_id : inline_block_vars) + { + auto &var = get(var_id); + uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); + add_resource_name(var_id); + resources_in_set[desc_set].push_back( + { &var, nullptr, to_name(var_id), SPIRType::Struct, get_metal_resource_index(var, SPIRType::Struct), 0 }); + } + for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++) { auto &resources = resources_in_set[desc_set]; @@ -9259,14 +17173,26 @@ void CompilerMSL::analyze_argument_buffers() argument_buffer_ids[desc_set] = next_id; auto &buffer_type = set(type_id); - buffer_type.storage = StorageClassUniform; + buffer_type.basetype = SPIRType::Struct; + + if ((argument_buffer_device_storage_mask & (1u << desc_set)) != 0) + { + buffer_type.storage = StorageClassStorageBuffer; + // Make sure the argument buffer gets marked as const device. + set_decoration(next_id, DecorationNonWritable); + // Need to mark the type as a Block to enable this. + set_decoration(type_id, DecorationBlock); + } + else + buffer_type.storage = StorageClassUniform; + set_name(type_id, join("spvDescriptorSetBuffer", desc_set)); auto &ptr_type = set(ptr_type_id); ptr_type = buffer_type; ptr_type.pointer = true; - ptr_type.pointer_depth = 1; + ptr_type.pointer_depth++; ptr_type.parent_type = type_id; uint32_t buffer_variable_id = next_id; @@ -9274,16 +17200,71 @@ void CompilerMSL::analyze_argument_buffers() set_name(buffer_variable_id, join("spvDescriptorSet", desc_set)); // Ids must be emitted in ID order. - sort(begin(resources), end(resources), [&](const Resource &lhs, const Resource &rhs) -> bool { + stable_sort(begin(resources), end(resources), [&](const Resource &lhs, const Resource &rhs) -> bool { return tie(lhs.index, lhs.basetype) < tie(rhs.index, rhs.basetype); }); uint32_t member_index = 0; + uint32_t next_arg_buff_index = 0; for (auto &resource : resources) { auto &var = *resource.var; auto &type = get_variable_data_type(var); + + // If needed, synthesize and add padding members. + // member_index and next_arg_buff_index are incremented when padding members are added. + if (msl_options.pad_argument_buffer_resources) + { + while (resource.index > next_arg_buff_index) + { + auto &rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index); + switch (rez_bind.basetype) + { + case SPIRType::Void: + case SPIRType::Boolean: + case SPIRType::SByte: + case SPIRType::UByte: + case SPIRType::Short: + case SPIRType::UShort: + case SPIRType::Int: + case SPIRType::UInt: + case SPIRType::Int64: + case SPIRType::UInt64: + case SPIRType::AtomicCounter: + case SPIRType::Half: + case SPIRType::Float: + case SPIRType::Double: + add_argument_buffer_padding_buffer_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + case SPIRType::Image: + add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + case SPIRType::Sampler: + add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + case SPIRType::SampledImage: + if (next_arg_buff_index == rez_bind.msl_sampler) + add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + else + add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + default: + break; + } + } + + // Adjust the number of slots consumed by current member itself. + // If actual member is an array, allow runtime array resolution as well. + uint32_t elem_cnt = type.array.empty() ? 1 : to_array_size_literal(type); + if (elem_cnt == 0) + elem_cnt = get_resource_array_size(var.self); + + next_arg_buff_index += elem_cnt; + } + string mbr_name = ensure_valid_name(resource.name, "m"); + if (resource.plane > 0) + mbr_name += join(plane_name_suffix, resource.plane); set_member_name(buffer_type.self, member_index, mbr_name); if (resource.basetype == SPIRType::Sampler && type.basetype != SPIRType::Sampler) @@ -9311,18 +17292,68 @@ void CompilerMSL::analyze_argument_buffers() } else { + uint32_t binding = get_decoration(var.self, DecorationBinding); + SetBindingPair pair = { desc_set, binding }; + if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler || resource.basetype == SPIRType::SampledImage) { // Drop pointer information when we emit the resources into a struct. buffer_type.member_types.push_back(get_variable_data_type_id(var)); + if (resource.plane == 0) + set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); + } + else if (buffers_requiring_dynamic_offset.count(pair)) + { + if (resource.descriptor_alias) + SPIRV_CROSS_THROW("Descriptor aliasing is currently not supported with dynamic offsets."); + + // Don't set the qualified name here; we'll define a variable holding the corrected buffer address later. + buffer_type.member_types.push_back(var.basetype); + buffers_requiring_dynamic_offset[pair].second = var.self; + } + else if (inline_uniform_blocks.count(pair)) + { + if (resource.descriptor_alias) + SPIRV_CROSS_THROW("Descriptor aliasing is currently not supported with inline UBOs."); + + // Put the buffer block itself into the argument buffer. + buffer_type.member_types.push_back(get_variable_data_type_id(var)); set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); } + else if (atomic_image_vars.count(var.self)) + { + // Emulate texture2D atomic operations. + // Don't set the qualified name: it's already set for this variable, + // and the code that references the buffer manually appends "_atomic" + // to the name. + uint32_t offset = ir.increase_bound_by(2); + uint32_t atomic_type_id = offset; + uint32_t type_ptr_id = offset + 1; + + SPIRType atomic_type; + atomic_type.basetype = SPIRType::AtomicCounter; + atomic_type.width = 32; + atomic_type.vecsize = 1; + set(atomic_type_id, atomic_type); + + atomic_type.pointer = true; + atomic_type.pointer_depth++; + atomic_type.parent_type = atomic_type_id; + atomic_type.storage = StorageClassStorageBuffer; + auto &atomic_ptr_type = set(type_ptr_id, atomic_type); + atomic_ptr_type.self = atomic_type_id; + + buffer_type.member_types.push_back(type_ptr_id); + } else { - // Resources will be declared as pointers not references, so automatically dereference as appropriate. - buffer_type.member_types.push_back(var.basetype); - if (type.array.empty()) + if (!resource.descriptor_alias || resource.descriptor_alias == resource.var) + buffer_type.member_types.push_back(var.basetype); + + if (resource.descriptor_alias && resource.descriptor_alias != resource.var) + buffer_aliases_argument.push_back({ var.self, resource.descriptor_alias->self }); + else if (type.array.empty()) set_qualified_name(var.self, join("(*", to_name(buffer_variable_id), ".", mbr_name, ")")); else set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); @@ -9338,29 +17369,174 @@ void CompilerMSL::analyze_argument_buffers() } } -bool CompilerMSL::SetBindingPair::operator==(const SetBindingPair &other) const +// Return the resource type of the app-provided resources for the descriptor set, +// that matches the resource index of the argument buffer index. +// This is a two-step lookup, first lookup the resource binding number from the argument buffer index, +// then lookup the resource binding using the binding number. +MSLResourceBinding &CompilerMSL::get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx) +{ + auto stage = get_entry_point().model; + StageSetBinding arg_idx_tuple = { stage, desc_set, arg_idx }; + auto arg_itr = resource_arg_buff_idx_to_binding_number.find(arg_idx_tuple); + if (arg_itr != end(resource_arg_buff_idx_to_binding_number)) + { + StageSetBinding bind_tuple = { stage, desc_set, arg_itr->second }; + auto bind_itr = resource_bindings.find(bind_tuple); + if (bind_itr != end(resource_bindings)) + return bind_itr->second.first; + } + SPIRV_CROSS_THROW("Argument buffer resource base type could not be determined. When padding argument buffer " + "elements, all descriptor set resources must be supplied with a base type by the app."); +} + +// Adds an argument buffer padding argument buffer type as one or more members of the struct type at the member index. +// Metal does not support arrays of buffers, so these are emitted as multiple struct members. +void CompilerMSL::add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, + uint32_t &arg_buff_index, MSLResourceBinding &rez_bind) +{ + if (!argument_buffer_padding_buffer_type_id) + { + uint32_t buff_type_id = ir.increase_bound_by(2); + auto &buff_type = set(buff_type_id); + buff_type.basetype = rez_bind.basetype; + buff_type.storage = StorageClassUniformConstant; + + uint32_t ptr_type_id = buff_type_id + 1; + auto &ptr_type = set(ptr_type_id); + ptr_type = buff_type; + ptr_type.pointer = true; + ptr_type.pointer_depth++; + ptr_type.parent_type = buff_type_id; + + argument_buffer_padding_buffer_type_id = ptr_type_id; + } + + for (uint32_t rez_idx = 0; rez_idx < rez_bind.count; rez_idx++) + add_argument_buffer_padding_type(argument_buffer_padding_buffer_type_id, struct_type, mbr_idx, arg_buff_index, 1); +} + +// Adds an argument buffer padding argument image type as a member of the struct type at the member index. +void CompilerMSL::add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, + uint32_t &arg_buff_index, MSLResourceBinding &rez_bind) +{ + if (!argument_buffer_padding_image_type_id) + { + uint32_t base_type_id = ir.increase_bound_by(2); + auto &base_type = set(base_type_id); + base_type.basetype = SPIRType::Float; + base_type.width = 32; + + uint32_t img_type_id = base_type_id + 1; + auto &img_type = set(img_type_id); + img_type.basetype = SPIRType::Image; + img_type.storage = StorageClassUniformConstant; + + img_type.image.type = base_type_id; + img_type.image.dim = Dim2D; + img_type.image.depth = false; + img_type.image.arrayed = false; + img_type.image.ms = false; + img_type.image.sampled = 1; + img_type.image.format = ImageFormatUnknown; + img_type.image.access = AccessQualifierMax; + + argument_buffer_padding_image_type_id = img_type_id; + } + + add_argument_buffer_padding_type(argument_buffer_padding_image_type_id, struct_type, mbr_idx, arg_buff_index, rez_bind.count); +} + +// Adds an argument buffer padding argument sampler type as a member of the struct type at the member index. +void CompilerMSL::add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, + uint32_t &arg_buff_index, MSLResourceBinding &rez_bind) +{ + if (!argument_buffer_padding_sampler_type_id) + { + uint32_t samp_type_id = ir.increase_bound_by(1); + auto &samp_type = set(samp_type_id); + samp_type.basetype = SPIRType::Sampler; + samp_type.storage = StorageClassUniformConstant; + + argument_buffer_padding_sampler_type_id = samp_type_id; + } + + add_argument_buffer_padding_type(argument_buffer_padding_sampler_type_id, struct_type, mbr_idx, arg_buff_index, rez_bind.count); +} + +// Adds the argument buffer padding argument type as a member of the struct type at the member index. +// Advances both arg_buff_index and mbr_idx to next argument slots. +void CompilerMSL::add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, + uint32_t &arg_buff_index, uint32_t count) +{ + uint32_t type_id = mbr_type_id; + if (count > 1) + { + uint32_t ary_type_id = ir.increase_bound_by(1); + auto &ary_type = set(ary_type_id); + ary_type = get(type_id); + ary_type.array.push_back(count); + ary_type.array_size_literal.push_back(true); + ary_type.parent_type = type_id; + type_id = ary_type_id; + } + + set_member_name(struct_type.self, mbr_idx, join("_m", arg_buff_index, "_pad")); + set_extended_member_decoration(struct_type.self, mbr_idx, SPIRVCrossDecorationResourceIndexPrimary, arg_buff_index); + struct_type.member_types.push_back(type_id); + + arg_buff_index += count; + mbr_idx++; +} + +void CompilerMSL::activate_argument_buffer_resources() +{ + // For ABI compatibility, force-enable all resources which are part of argument buffers. + ir.for_each_typed_id([&](uint32_t self, const SPIRVariable &) { + if (!has_decoration(self, DecorationDescriptorSet)) + return; + + uint32_t desc_set = get_decoration(self, DecorationDescriptorSet); + if (descriptor_set_is_argument_buffer(desc_set)) + add_active_interface_variable(self); + }); +} + +bool CompilerMSL::using_builtin_array() const +{ + return msl_options.force_native_arrays || is_using_builtin_array; +} + +void CompilerMSL::set_combined_sampler_suffix(const char *suffix) { - return desc_set == other.desc_set && binding == other.binding; + sampler_name_suffix = suffix; } -bool CompilerMSL::StageSetBinding::operator==(const StageSetBinding &other) const +const char *CompilerMSL::get_combined_sampler_suffix() const { - return model == other.model && desc_set == other.desc_set && binding == other.binding; + return sampler_name_suffix.c_str(); } -size_t CompilerMSL::InternalHasher::operator()(const SetBindingPair &value) const +void CompilerMSL::emit_block_hints(const SPIRBlock &) { - // Quality of hash doesn't really matter here. - auto hash_set = std::hash()(value.desc_set); - auto hash_binding = std::hash()(value.binding); - return (hash_set * 0x10001b31) ^ hash_binding; } -size_t CompilerMSL::InternalHasher::operator()(const StageSetBinding &value) const +string CompilerMSL::additional_fixed_sample_mask_str() const { - // Quality of hash doesn't really matter here. - auto hash_model = std::hash()(value.model); - auto hash_set = std::hash()(value.desc_set); - auto tmp_hash = (hash_model * 0x10001b31) ^ hash_set; - return (tmp_hash * 0x10001b31) ^ value.binding; + char print_buffer[32]; +#ifdef _MSC_VER + // snprintf does not exist or is buggy on older MSVC versions, some of + // them being used by MinGW. Use sprintf instead and disable + // corresponding warning. +#pragma warning(push) +#pragma warning(disable : 4996) +#endif +#if _WIN32 + sprintf(print_buffer, "0x%x", msl_options.additional_fixed_sample_mask); +#else + snprintf(print_buffer, sizeof(print_buffer), "0x%x", msl_options.additional_fixed_sample_mask); +#endif +#ifdef _MSC_VER +#pragma warning(pop) +#endif + return print_buffer; } diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 9f2bab49d4a..737575d49b8 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2016-2019 The Brenwill Workshop Ltd. + * Copyright 2016-2021 The Brenwill Workshop Ltd. + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_MSL_HPP #define SPIRV_CROSS_MSL_HPP @@ -27,43 +34,81 @@ namespace SPIRV_CROSS_NAMESPACE { -// Indicates the format of the vertex attribute. Currently limited to specifying -// if the attribute is an 8-bit unsigned integer, 16-bit unsigned integer, or +// Indicates the format of a shader interface variable. Currently limited to specifying +// if the input is an 8-bit unsigned integer, 16-bit unsigned integer, or // some other format. -enum MSLVertexFormat +enum MSLShaderVariableFormat +{ + MSL_SHADER_VARIABLE_FORMAT_OTHER = 0, + MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1, + MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2, + MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3, + MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4, + + // Deprecated aliases. + MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER, + MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8, + MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16, + MSL_SHADER_INPUT_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER, + MSL_SHADER_INPUT_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8, + MSL_SHADER_INPUT_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16, + MSL_SHADER_INPUT_FORMAT_ANY16 = MSL_SHADER_VARIABLE_FORMAT_ANY16, + MSL_SHADER_INPUT_FORMAT_ANY32 = MSL_SHADER_VARIABLE_FORMAT_ANY32, + + MSL_SHADER_VARIABLE_FORMAT_INT_MAX = 0x7fffffff +}; + +// Indicates the rate at which a variable changes value, one of: per-vertex, +// per-primitive, or per-patch. +enum MSLShaderVariableRate { - MSL_VERTEX_FORMAT_OTHER = 0, - MSL_VERTEX_FORMAT_UINT8 = 1, - MSL_VERTEX_FORMAT_UINT16 = 2, - MSL_VERTEX_FORMAT_INT_MAX = 0x7fffffff + MSL_SHADER_VARIABLE_RATE_PER_VERTEX = 0, + MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE = 1, + MSL_SHADER_VARIABLE_RATE_PER_PATCH = 2, + + MSL_SHADER_VARIABLE_RATE_INT_MAX = 0x7fffffff, }; -// Defines MSL characteristics of a vertex attribute at a particular location. +// Defines MSL characteristics of a shader interface variable at a particular location. // After compilation, it is possible to query whether or not this location was used. -struct MSLVertexAttr +// If vecsize is nonzero, it must be greater than or equal to the vecsize declared in the shader, +// or behavior is undefined. +struct MSLShaderInterfaceVariable { uint32_t location = 0; - uint32_t msl_buffer = 0; - uint32_t msl_offset = 0; - uint32_t msl_stride = 0; - bool per_instance = false; - MSLVertexFormat format = MSL_VERTEX_FORMAT_OTHER; + uint32_t component = 0; + MSLShaderVariableFormat format = MSL_SHADER_VARIABLE_FORMAT_OTHER; spv::BuiltIn builtin = spv::BuiltInMax; + uint32_t vecsize = 0; + MSLShaderVariableRate rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX; }; // Matches the binding index of a MSL resource for a binding within a descriptor set. // Taken together, the stage, desc_set and binding combine to form a reference to a resource -// descriptor used in a particular shading stage. -// If using MSL 2.0 argument buffers, and the descriptor set is not marked as a discrete descriptor set, -// the binding reference we remap to will become an [[id(N)]] attribute within -// the "descriptor set" argument buffer structure. -// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will become a -// [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. +// descriptor used in a particular shading stage. The count field indicates the number of +// resources consumed by this binding, if the binding represents an array of resources. +// If the resource array is a run-time-sized array, which are legal in GLSL or SPIR-V, this value +// will be used to declare the array size in MSL, which does not support run-time-sized arrays. +// If pad_argument_buffer_resources is enabled, the base_type and count values are used to +// specify the base type and array size of the resource in the argument buffer, if that resource +// is not defined and used by the shader. With pad_argument_buffer_resources enabled, this +// information will be used to pad the argument buffer structure, in order to align that +// structure consistently for all uses, across all shaders, of the descriptor set represented +// by the arugment buffer. If pad_argument_buffer_resources is disabled, base_type does not +// need to be populated, and if the resource is also not a run-time sized array, the count +// field does not need to be populated. +// If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set, +// and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we +// remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure. +// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will +// become a [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. struct MSLResourceBinding { spv::ExecutionModel stage = spv::ExecutionModelMax; + SPIRType::BaseType basetype = SPIRType::Unknown; uint32_t desc_set = 0; uint32_t binding = 0; + uint32_t count = 0; uint32_t msl_buffer = 0; uint32_t msl_texture = 0; uint32_t msl_sampler = 0; @@ -122,6 +167,50 @@ enum MSLSamplerBorderColor MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff }; +enum MSLFormatResolution +{ + MSL_FORMAT_RESOLUTION_444 = 0, + MSL_FORMAT_RESOLUTION_422, + MSL_FORMAT_RESOLUTION_420, + MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff +}; + +enum MSLChromaLocation +{ + MSL_CHROMA_LOCATION_COSITED_EVEN = 0, + MSL_CHROMA_LOCATION_MIDPOINT, + MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff +}; + +enum MSLComponentSwizzle +{ + MSL_COMPONENT_SWIZZLE_IDENTITY = 0, + MSL_COMPONENT_SWIZZLE_ZERO, + MSL_COMPONENT_SWIZZLE_ONE, + MSL_COMPONENT_SWIZZLE_R, + MSL_COMPONENT_SWIZZLE_G, + MSL_COMPONENT_SWIZZLE_B, + MSL_COMPONENT_SWIZZLE_A, + MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerYCbCrModelConversion +{ + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerYCbCrRange +{ + MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, + MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, + MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff +}; + struct MSLConstexprSampler { MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED; @@ -137,21 +226,50 @@ struct MSLConstexprSampler float lod_clamp_max = 1000.0f; int max_anisotropy = 1; + // Sampler Y'CbCr conversion parameters + uint32_t planes = 0; + MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444; + MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST; + MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; + MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; + MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY + MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY; + MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL; + uint32_t bpc = 8; + bool compare_enable = false; bool lod_clamp_enable = false; bool anisotropy_enable = false; -}; + bool ycbcr_conversion_enable = false; -// Tracks the type ID and member index of a struct member -using MSLStructMemberKey = uint64_t; + MSLConstexprSampler() + { + for (uint32_t i = 0; i < 4; i++) + swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY; + } + bool swizzle_is_identity() const + { + return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY && + swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY); + } + bool swizzle_has_one_or_zero() const + { + return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE); + } +}; // Special constant used in a MSLResourceBinding desc_set // element to indicate the bindings for the push constants. -static const uint32_t kPushConstDescSet = ~(0u); +// Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. +static const uint32_t kPushConstDescSet = ResourceBindingPushConstantDescriptorSet; // Special constant used in a MSLResourceBinding binding // element to indicate the bindings for the push constants. -static const uint32_t kPushConstBinding = 0; +// Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. +static const uint32_t kPushConstBinding = ResourceBindingPushConstantBinding; // Special constant used in a MSLResourceBinding binding // element to indicate the buffer binding for swizzle buffers. @@ -169,6 +287,9 @@ static const uint32_t kArgumentBufferBinding = ~(3u); static const uint32_t kMaxArgumentBuffers = 8; +// The arbitrary maximum for the nesting of array of array copies. +static const uint32_t kArrayCopyMultidimMax = 6; + // Decompiles SPIR-V to Metal Shading Language class CompilerMSL : public CompilerGLSL { @@ -185,6 +306,8 @@ class CompilerMSL : public CompilerGLSL Platform platform = macOS; uint32_t msl_version = make_msl_version(1, 2); uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers + uint32_t r32ui_linear_texture_alignment = 4; + uint32_t r32ui_alignment_constant_id = 65535; uint32_t swizzle_buffer_index = 30; uint32_t indirect_params_buffer_index = 29; uint32_t shader_output_buffer_index = 28; @@ -192,41 +315,199 @@ class CompilerMSL : public CompilerGLSL uint32_t shader_tess_factor_buffer_index = 26; uint32_t buffer_size_buffer_index = 25; uint32_t view_mask_buffer_index = 24; + uint32_t dynamic_offsets_buffer_index = 23; + uint32_t shader_input_buffer_index = 22; + uint32_t shader_index_buffer_index = 21; + uint32_t shader_patch_input_buffer_index = 20; uint32_t shader_input_wg_index = 0; + uint32_t device_index = 0; + uint32_t enable_frag_output_mask = 0xffffffff; + // Metal doesn't allow setting a fixed sample mask directly in the pipeline. + // We can evade this restriction by ANDing the internal sample_mask output + // of the shader with the additional fixed sample mask. + uint32_t additional_fixed_sample_mask = 0xffffffff; bool enable_point_size_builtin = true; + bool enable_frag_depth_builtin = true; + bool enable_frag_stencil_ref_builtin = true; bool disable_rasterization = false; bool capture_output_to_buffer = false; bool swizzle_texture_samples = false; bool tess_domain_origin_lower_left = false; bool multiview = false; + bool multiview_layered_rendering = true; + bool view_index_from_device_index = false; + bool dispatch_base = false; + bool texture_1D_as_2D = false; - // Enable use of MSL 2.0 indirect argument buffers. + // Enable use of Metal argument buffers. // MSL 2.0 must also be enabled. bool argument_buffers = false; + // Defines Metal argument buffer tier levels. + // Uses same values as Metal MTLArgumentBuffersTier enumeration. + enum class ArgumentBuffersTier + { + Tier1 = 0, + Tier2 = 1, + }; + + // When using Metal argument buffers, indicates the Metal argument buffer tier level supported by the Metal platform. + // Ignored when Options::argument_buffers is disabled. + // - Tier1 supports writable images on macOS, but not on iOS. + // - Tier2 supports writable images on macOS and iOS, and higher resource count limits. + // Tier capabilities based on recommendations from Apple engineering. + ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1; + + // Ensures vertex and instance indices start at zero. This reflects the behavior of HLSL with SV_VertexID and SV_InstanceID. + bool enable_base_index_zero = false; + // Fragment output in MSL must have at least as many components as the render pass. // Add support to explicit pad out components. bool pad_fragment_output_components = false; + // Specifies whether the iOS target version supports the [[base_vertex]] and [[base_instance]] attributes. + bool ios_support_base_vertex_instance = false; + + // Use Metal's native frame-buffer fetch API for subpass inputs. + bool use_framebuffer_fetch_subpasses = false; + + // Enables use of "fma" intrinsic for invariant float math + bool invariant_float_math = false; + + // Emulate texturecube_array with texture2d_array for iOS where this type is not available + bool emulate_cube_array = false; + + // Allow user to enable decoration binding + bool enable_decoration_binding = false; + // Requires MSL 2.1, use the native support for texel buffers. bool texture_buffer_native = false; - bool is_ios() + // Forces all resources which are part of an argument buffer to be considered active. + // This ensures ABI compatibility between shaders where some resources might be unused, + // and would otherwise declare a different IAB. + bool force_active_argument_buffer_resources = false; + + // Aligns each resource in an argument buffer to its assigned index value, id(N), + // by adding synthetic padding members in the argument buffer struct for any resources + // in the argument buffer that are not defined and used by the shader. This allows + // the shader to index into the correct argument in a descriptor set argument buffer + // that is shared across shaders, where not all resources in the argument buffer are + // defined in each shader. For this to work, an MSLResourceBinding must be provided for + // all descriptors in any descriptor set held in an argument buffer in the shader, and + // that MSLResourceBinding must have the basetype and count members populated correctly. + // The implementation here assumes any inline blocks in the argument buffer is provided + // in a Metal buffer, and doesn't take into consideration inline blocks that are + // optionally embedded directly into the argument buffer via add_inline_uniform_block(). + bool pad_argument_buffer_resources = false; + + // Forces the use of plain arrays, which works around certain driver bugs on certain versions + // of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210. + // May reduce performance in scenarios where arrays are copied around as value-types. + bool force_native_arrays = false; + + // If a shader writes clip distance, also emit user varyings which + // can be read in subsequent stages. + bool enable_clip_distance_user_varying = true; + + // In a tessellation control shader, assume that more than one patch can be processed in a + // single workgroup. This requires changes to the way the InvocationId and PrimitiveId + // builtins are processed, but should result in more efficient usage of the GPU. + bool multi_patch_workgroup = false; + + // Use storage buffers instead of vertex-style attributes for tessellation evaluation + // input. This may require conversion of inputs in the generated post-tessellation + // vertex shader, but allows the use of nested arrays. + bool raw_buffer_tese_input = false; + + // If set, a vertex shader will be compiled as part of a tessellation pipeline. + // It will be translated as a compute kernel, so it can use the global invocation ID + // to index the output buffer. + bool vertex_for_tessellation = false; + + // Assume that SubpassData images have multiple layers. Layered input attachments + // are addressed relative to the Layer output from the vertex pipeline. This option + // has no effect with multiview, since all input attachments are assumed to be layered + // and will be addressed using the current ViewIndex. + bool arrayed_subpass_input = false; + + // Whether to use SIMD-group or quadgroup functions to implement group non-uniform + // operations. Some GPUs on iOS do not support the SIMD-group functions, only the + // quadgroup functions. + bool ios_use_simdgroup_functions = false; + + // If set, the subgroup size will be assumed to be one, and subgroup-related + // builtins and operations will be emitted accordingly. This mode is intended to + // be used by MoltenVK on hardware/software configurations which do not provide + // sufficient support for subgroups. + bool emulate_subgroups = false; + + // If nonzero, a fixed subgroup size to assume. Metal, similarly to VK_EXT_subgroup_size_control, + // allows the SIMD-group size (aka thread execution width) to vary depending on + // register usage and requirements. In certain circumstances--for example, a pipeline + // in MoltenVK without VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT-- + // this is undesirable. This fixes the value of the SubgroupSize builtin, instead of + // mapping it to the Metal builtin [[thread_execution_width]]. If the thread + // execution width is reduced, the extra invocations will appear to be inactive. + // If zero, the SubgroupSize will be allowed to vary, and the builtin will be mapped + // to the Metal [[thread_execution_width]] builtin. + uint32_t fixed_subgroup_size = 0; + + enum class IndexType + { + None = 0, + UInt16 = 1, + UInt32 = 2 + }; + + // The type of index in the index buffer, if present. For a compute shader, Metal + // requires specifying the indexing at pipeline creation, rather than at draw time + // as with graphics pipelines. This means we must create three different pipelines, + // for no indexing, 16-bit indices, and 32-bit indices. Each requires different + // handling for the gl_VertexIndex builtin. We may as well, then, create three + // different shaders for these three scenarios. + IndexType vertex_index_type = IndexType::None; + + // If set, a dummy [[sample_id]] input is added to a fragment shader if none is present. + // This will force the shader to run at sample rate, assuming Metal does not optimize + // the extra threads away. + bool force_sample_rate_shading = false; + + // If set, gl_HelperInvocation will be set manually whenever a fragment is discarded. + // Some Metal devices have a bug where simd_is_helper_thread() does not return true + // after a fragment has been discarded. This is a workaround that is only expected to be needed + // until the bug is fixed in Metal; it is provided as an option to allow disabling it when that occurs. + bool manual_helper_invocation_updates = true; + + // If set, extra checks will be emitted in fragment shaders to prevent writes + // from discarded fragments. Some Metal devices have a bug where writes to storage resources + // from discarded fragment threads continue to occur, despite the fragment being + // discarded. This is a workaround that is only expected to be needed until the + // bug is fixed in Metal; it is provided as an option so it can be enabled + // only when the bug is present. + bool check_discarded_frag_stores = false; + + bool is_ios() const { return platform == iOS; } - bool is_macos() + bool is_macos() const { return platform == macOS; } + bool use_quadgroup_operation() const + { + return is_ios() && !ios_use_simdgroup_functions; + } + void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) { msl_version = make_msl_version(major, minor, patch); } - bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) + bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const { return msl_version >= make_msl_version(major, minor, patch); } @@ -270,32 +551,44 @@ class CompilerMSL : public CompilerGLSL return !buffers_requiring_array_length.empty(); } + bool buffer_requires_array_length(VariableID id) const + { + return buffers_requiring_array_length.count(id) != 0; + } + // Provide feedback to calling API to allow it to pass a buffer // containing the view mask for the current multiview subpass. bool needs_view_mask_buffer() const { - return msl_options.multiview; + return msl_options.multiview && !msl_options.view_index_from_device_index; + } + + // Provide feedback to calling API to allow it to pass a buffer + // containing the dispatch base workgroup ID. + bool needs_dispatch_base_buffer() const + { + return msl_options.dispatch_base && !msl_options.supports_msl_version(1, 2); } // Provide feedback to calling API to allow it to pass an output // buffer if the shader needs it. bool needs_output_buffer() const { - return capture_output_to_buffer && stage_out_var_id != 0; + return capture_output_to_buffer && stage_out_var_id != ID(0); } // Provide feedback to calling API to allow it to pass a patch output // buffer if the shader needs it. bool needs_patch_output_buffer() const { - return capture_output_to_buffer && patch_stage_out_var_id != 0; + return capture_output_to_buffer && patch_stage_out_var_id != ID(0); } // Provide feedback to calling API to allow it to pass an input threadgroup // buffer if the shader needs it. bool needs_input_threadgroup_mem() const { - return capture_output_to_buffer && stage_in_var_id != 0; + return capture_output_to_buffer && stage_in_var_id != ID(0); } explicit CompilerMSL(std::vector spirv); @@ -303,11 +596,15 @@ class CompilerMSL : public CompilerGLSL explicit CompilerMSL(const ParsedIR &ir); explicit CompilerMSL(ParsedIR &&ir); - // attr is a vertex attribute binding used to match - // vertex content locations to MSL attributes. If vertex attributes are provided, - // is_msl_vertex_attribute_used() will return true after calling ::compile() if - // the location was used by the MSL code. - void add_msl_vertex_attribute(const MSLVertexAttr &attr); + // input is a shader interface variable description used to fix up shader input variables. + // If shader inputs are provided, is_msl_shader_input_used() will return true after + // calling ::compile() if the location were used by the MSL code. + void add_msl_shader_input(const MSLShaderInterfaceVariable &input); + + // output is a shader interface variable description used to fix up shader output variables. + // If shader outputs are provided, is_msl_shader_output_used() will return true after + // calling ::compile() if the location were used by the MSL code. + void add_msl_shader_output(const MSLShaderInterfaceVariable &output); // resource is a resource binding to indicate the MSL buffer, // texture or sampler index to use for a particular SPIR-V description set @@ -316,18 +613,54 @@ class CompilerMSL : public CompilerGLSL // the set/binding combination was used by the MSL code. void add_msl_resource_binding(const MSLResourceBinding &resource); + // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource + // in this shader. index is the index within the dynamic offset buffer to use. This + // function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC + // or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers + // are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with + // an offset taken from the dynamic offset buffer. + void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index); + + // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource + // in this shader. This function marks that resource as an inline uniform block + // (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT). This function only has any effect if argument buffers + // are enabled. If so, the buffer block will be directly embedded into the argument + // buffer, instead of being referenced indirectly via pointer. + void add_inline_uniform_block(uint32_t desc_set, uint32_t binding); + // When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets. // This corresponds to VK_KHR_push_descriptor in Vulkan. void add_discrete_descriptor_set(uint32_t desc_set); - // Query after compilation is done. This allows you to check if a location or set/binding combination was used by the shader. - bool is_msl_vertex_attribute_used(uint32_t location); + // If an argument buffer is large enough, it may need to be in the device storage space rather than + // constant. Opt-in to this behavior here on a per set basis. + void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage); + + // Query after compilation is done. This allows you to check if an input location was used by the shader. + bool is_msl_shader_input_used(uint32_t location); + + // Query after compilation is done. This allows you to check if an output location were used by the shader. + bool is_msl_shader_output_used(uint32_t location); + + // If not using add_msl_shader_input, it's possible + // that certain builtin attributes need to be automatically assigned locations. + // This is typical for tessellation builtin inputs such as tess levels, gl_Position, etc. + // This returns k_unknown_location if the location was explicitly assigned with + // add_msl_shader_input or the builtin is not used, otherwise returns N in [[attribute(N)]]. + uint32_t get_automatic_builtin_input_location(spv::BuiltIn builtin) const; + + // If not using add_msl_shader_output, it's possible + // that certain builtin attributes need to be automatically assigned locations. + // This is typical for tessellation builtin outputs such as tess levels, gl_Position, etc. + // This returns k_unknown_location if the location were explicitly assigned with + // add_msl_shader_output or the builtin were not used, otherwise returns N in [[attribute(N)]]. + uint32_t get_automatic_builtin_output_location(spv::BuiltIn builtin) const; // NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here. // Constexpr samplers are always assumed to be emitted. // No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped // by remap_constexpr_sampler(_by_binding). - bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding); + bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding) const; // This must only be called after a successful call to CompilerMSL::compile(). // For a variable resource ID obtained through reflection API, report the automatically assigned resource index. @@ -340,8 +673,17 @@ class CompilerMSL : public CompilerGLSL // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers, in which case the // sampler's binding is returned instead. For any other resource type, -1 is returned. + // Secondary bindings are also used for the auxillary image atomic buffer. uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const; + // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images, + // in which case the second plane's binding is returned instead. For any other resource type, -1 is returned. + uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const; + + // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images, + // in which case the third plane's binding is returned instead. For any other resource type, -1 is returned. + uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const; + // Compiles the SPIR-V code into Metal Shading Language. std::string compile() override; @@ -352,7 +694,7 @@ class CompilerMSL : public CompilerGLSL // This can be used on both combined image/samplers (sampler2D) or standalone samplers. // The remapped sampler must not be an array of samplers. // Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways. - void remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler); + void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler); // Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID. // Remaps based on ID take priority over set/binding remaps. @@ -362,10 +704,13 @@ class CompilerMSL : public CompilerGLSL // to use for a particular location. The default is 4 if number of components is not overridden. void set_fragment_output_components(uint32_t location, uint32_t components); + void set_combined_sampler_suffix(const char *suffix); + const char *get_combined_sampler_suffix() const; + protected: // An enum of SPIR-V functions that are implemented in additional // source code that is added to the shader if necessary. - enum SPVFuncImpl + enum SPVFuncImpl : uint8_t { SPVFuncImplNone, SPVFuncImplMod, @@ -385,73 +730,151 @@ class CompilerMSL : public CompilerGLSL SPVFuncImplArrayOfArrayCopy5Dim = SPVFuncImplArrayCopyMultidimBase + 5, SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6, SPVFuncImplTexelBufferCoords, + SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations + SPVFuncImplFMul, + SPVFuncImplFAdd, + SPVFuncImplFSub, + SPVFuncImplQuantizeToF16, + SPVFuncImplCubemapTo2DArrayFace, + SPVFuncImplUnsafeArray, // Allow Metal to use the array template to make arrays a value type + SPVFuncImplStorageMatrix, // Allow threadgroup construction of matrices SPVFuncImplInverse4x4, SPVFuncImplInverse3x3, SPVFuncImplInverse2x2, - SPVFuncImplRowMajor2x3, - SPVFuncImplRowMajor2x4, - SPVFuncImplRowMajor3x2, - SPVFuncImplRowMajor3x4, - SPVFuncImplRowMajor4x2, - SPVFuncImplRowMajor4x3, + // It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's + // emitted before them. + SPVFuncImplForwardArgs, + // Likewise, this must come before *Swizzle. + SPVFuncImplGetSwizzle, SPVFuncImplTextureSwizzle, + SPVFuncImplGatherSwizzle, + SPVFuncImplGatherCompareSwizzle, + SPVFuncImplSubgroupBroadcast, + SPVFuncImplSubgroupBroadcastFirst, SPVFuncImplSubgroupBallot, SPVFuncImplSubgroupBallotBitExtract, SPVFuncImplSubgroupBallotFindLSB, SPVFuncImplSubgroupBallotFindMSB, SPVFuncImplSubgroupBallotBitCount, SPVFuncImplSubgroupAllEqual, + SPVFuncImplSubgroupShuffle, + SPVFuncImplSubgroupShuffleXor, + SPVFuncImplSubgroupShuffleUp, + SPVFuncImplSubgroupShuffleDown, + SPVFuncImplQuadBroadcast, + SPVFuncImplQuadSwap, SPVFuncImplReflectScalar, SPVFuncImplRefractScalar, - SPVFuncImplArrayCopyMultidimMax = 6 + SPVFuncImplFaceForwardScalar, + SPVFuncImplChromaReconstructNearest2Plane, + SPVFuncImplChromaReconstructNearest3Plane, + SPVFuncImplChromaReconstructLinear422CositedEven2Plane, + SPVFuncImplChromaReconstructLinear422CositedEven3Plane, + SPVFuncImplChromaReconstructLinear422Midpoint2Plane, + SPVFuncImplChromaReconstructLinear422Midpoint3Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane, + SPVFuncImplExpandITUFullRange, + SPVFuncImplExpandITUNarrowRange, + SPVFuncImplConvertYCbCrBT709, + SPVFuncImplConvertYCbCrBT601, + SPVFuncImplConvertYCbCrBT2020, + SPVFuncImplDynamicImageSampler, }; + // If the underlying resource has been used for comparison then duplicate loads of that resource must be too + // Use Metal's native frame-buffer fetch API for subpass inputs. + void emit_texture_op(const Instruction &i, bool sparse) override; + void emit_binary_ptr_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + std::string to_ptr_expression(uint32_t id, bool register_expression_read = true); void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); void emit_instruction(const Instruction &instr) override; void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, uint32_t count) override; + void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op, + const uint32_t *args, uint32_t count) override; void emit_header() override; void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; void emit_subgroup_op(const Instruction &i) override; + std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, + SmallVector &inherited_expressions) override; void emit_fixup() override; std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier = ""); void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier = "", uint32_t base_offset = 0) override; + void emit_struct_padding_target(const SPIRType &type) override; + std::string type_to_glsl(const SPIRType &type, uint32_t id, bool member); std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; + void emit_block_hints(const SPIRBlock &block) override; + + // Allow Metal to use the array template to make arrays a value type + std::string type_to_array_glsl(const SPIRType &type) override; + std::string constant_op_expression(const SPIRConstantOp &cop) override; + + // Threadgroup arrays can't have a wrapper type + std::string variable_decl(const SPIRVariable &variable) override; + + bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override; + + // GCC workaround of lambdas calling protected functions (for older GCC versions) + std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; + std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override; - std::string sampler_type(const SPIRType &type); + std::string sampler_type(const SPIRType &type, uint32_t id); std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; - size_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const; - std::string to_func_call_arg(uint32_t id) override; + std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; std::string to_name(uint32_t id, bool allow_alias = true) const override; - std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, - bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref, uint32_t lod, - uint32_t minlod) override; - std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, - uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, - uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, - uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward) override; + std::string to_function_name(const TextureFunctionNameArguments &args) override; + std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward) override; std::string to_initializer_expression(const SPIRVariable &var) override; - std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id) override; + std::string to_zero_initialized_expression(uint32_t type_id) override; + + std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, + bool is_packed, bool row_major) override; + + // Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but [[sample_mask]] is a scalar in Metal. + bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override; + std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; + bool emit_complex_bitcast(uint32_t result_id, uint32_t id, uint32_t op0) override; bool skip_argument(uint32_t id) const override; - std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override; + std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved) override; std::string to_qualifiers_glsl(uint32_t id) override; void replace_illegal_names() override; - void declare_undefined_values() override; void declare_constant_arrays(); + + void replace_illegal_entry_point_names(); + void sync_entry_point_aliases_and_names(); + + static const std::unordered_set &get_reserved_keyword_set(); + static const std::unordered_set &get_illegal_func_names(); + + // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries + void declare_complex_constant_arrays(); + bool is_patch_block(const SPIRType &type); bool is_non_native_row_major_matrix(uint32_t id) override; bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override; - std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed) override; + std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, + bool is_packed) override; + + bool is_tesc_shader() const; + bool is_tese_shader() const; void preprocess_op_codes(); void localize_global_variables(); void extract_global_variables_from_functions(); void mark_packable_structs(); void mark_as_packable(SPIRType &type); + void mark_as_workgroup_struct(SPIRType &type); std::unordered_map> function_global_vars; void extract_global_variables_from_function(uint32_t func_id, std::set &added_arg_ids, @@ -460,33 +883,66 @@ class CompilerMSL : public CompilerGLSL uint32_t add_interface_block(spv::StorageClass storage, bool patch = false); uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage); + struct InterfaceBlockMeta + { + struct LocationMeta + { + uint32_t base_type_id = 0; + uint32_t num_components = 0; + bool flat = false; + bool noperspective = false; + bool centroid = false; + bool sample = false; + }; + std::unordered_map location_meta; + bool strip_array = false; + bool allow_local_declaration = false; + }; + + std::string to_tesc_invocation_id(); + void emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array); void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type, - SPIRVariable &var, bool strip_array); + SPIRVariable &var, InterfaceBlockMeta &meta); void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var, bool strip_array); + SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); void add_plain_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var, bool strip_array); - void add_plain_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var, uint32_t index, - bool strip_array); - void add_composite_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var, uint32_t index, - bool strip_array); - uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array); + SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); + bool add_component_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, + SPIRVariable &var, const SPIRType &type, + InterfaceBlockMeta &meta); + void add_plain_member_variable_to_interface_block(spv::StorageClass storage, + const std::string &ib_var_ref, SPIRType &ib_type, + SPIRVariable &var, SPIRType &var_type, + uint32_t mbr_idx, InterfaceBlockMeta &meta, + const std::string &mbr_name_qual, + const std::string &var_chain_qual, + uint32_t &location, uint32_t &var_mbr_idx); + void add_composite_member_variable_to_interface_block(spv::StorageClass storage, + const std::string &ib_var_ref, SPIRType &ib_type, + SPIRVariable &var, SPIRType &var_type, + uint32_t mbr_idx, InterfaceBlockMeta &meta, + const std::string &mbr_name_qual, + const std::string &var_chain_qual, + uint32_t &location, uint32_t &var_mbr_idx); void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var); + void add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var); void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id); - void mark_location_as_used_by_shader(uint32_t location, spv::StorageClass storage); + void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, + spv::StorageClass storage, bool fallback = false); uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin); - uint32_t ensure_correct_attribute_type(uint32_t type_id, uint32_t location); + uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component, + uint32_t num_components, bool strip_array); + void emit_custom_templates(); void emit_custom_functions(); void emit_resources(); void emit_specialization_constants_and_structs(); void emit_interface_block(uint32_t ib_var_id); bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs); - void add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows); + uint32_t get_resource_array_size(uint32_t id) const; + void fix_up_shader_inputs_outputs(); std::string func_type_decl(SPIRType &type); @@ -495,45 +951,89 @@ class CompilerMSL : public CompilerGLSL std::string entry_point_arg_stage_in(); void entry_point_args_builtin(std::string &args); void entry_point_args_discrete_descriptors(std::string &args); - std::string to_qualified_member_name(const SPIRType &type, uint32_t index); + std::string append_member_name(const std::string &qualifier, const SPIRType &type, uint32_t index); std::string ensure_valid_name(std::string name, std::string pfx); std::string to_sampler_expression(uint32_t id); std::string to_swizzle_expression(uint32_t id); std::string to_buffer_size_expression(uint32_t id); + bool is_sample_rate() const; + bool is_intersection_query() const; + bool is_direct_input_builtin(spv::BuiltIn builtin); std::string builtin_qualifier(spv::BuiltIn builtin); std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0); std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma); std::string member_attribute_qualifier(const SPIRType &type, uint32_t index); + std::string member_location_attribute_qualifier(const SPIRType &type, uint32_t index); std::string argument_decl(const SPIRFunction::Parameter &arg); + const char *descriptor_address_space(uint32_t id, spv::StorageClass storage, const char *plain_address_space) const; std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp); - uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype); - uint32_t get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); - size_t get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0); + uint32_t get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr) const; + uint32_t get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin, + uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); + uint32_t get_or_allocate_builtin_output_member_location(spv::BuiltIn builtin, + uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); + + uint32_t get_physical_tess_level_array_size(spv::BuiltIn builtin) const; + + // MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output. + // These values can change depending on various extended decorations which control packing rules. + // We need to make these rules match up with SPIR-V declared rules. + uint32_t get_declared_type_size_msl(const SPIRType &type, bool packed, bool row_major) const; + uint32_t get_declared_type_array_stride_msl(const SPIRType &type, bool packed, bool row_major) const; + uint32_t get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const; + uint32_t get_declared_type_alignment_msl(const SPIRType &type, bool packed, bool row_major) const; + + uint32_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_struct_member_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const; + + uint32_t get_declared_input_size_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_input_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_input_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_input_alignment_msl(const SPIRType &struct_type, uint32_t index) const; + + const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const; + SPIRType get_presumed_input_type(const SPIRType &struct_type, uint32_t index) const; + + uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false, + bool ignore_padding = false) const; + std::string to_component_argument(uint32_t id); - void align_struct(SPIRType &ib_type); - bool is_member_packable(SPIRType &ib_type, uint32_t index, uint32_t base_offset = 0); - uint32_t get_member_packed_type(SPIRType &ib_type, uint32_t index); - MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index); + void align_struct(SPIRType &ib_type, std::unordered_set &aligned_structs); + void mark_scalar_layout_structs(const SPIRType &ib_type); + void mark_struct_members_packed(const SPIRType &type); + void ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index); + bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const; std::string get_argument_address_space(const SPIRVariable &argument); - std::string get_type_address_space(const SPIRType &type, uint32_t id); + std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); + const char *to_restrict(uint32_t id, bool space); SPIRType &get_stage_in_struct_type(); SPIRType &get_stage_out_struct_type(); SPIRType &get_patch_stage_in_struct_type(); SPIRType &get_patch_stage_out_struct_type(); std::string get_tess_factor_struct_name(); - void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1, - uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, + SPIRType &get_uint_type(); + uint32_t get_uint_type_id(); + void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, spv::Op opcode, + uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0); const char *get_memory_order(uint32_t spv_mem_sem); void add_pragma_line(const std::string &line); void add_typedef_line(const std::string &line); void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); - void emit_array_copy(const std::string &lhs, uint32_t rhs_id) override; + void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id, + spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override; void build_implicit_builtins(); uint32_t build_constant_uint_array_pointer(); void emit_entry_point_declarations() override; + bool uses_explicit_early_fragment_test(); + uint32_t builtin_frag_coord_id = 0; uint32_t builtin_sample_id_id = 0; + uint32_t builtin_sample_mask_id = 0; + uint32_t builtin_helper_invocation_id = 0; uint32_t builtin_vertex_idx_id = 0; uint32_t builtin_base_vertex_id = 0; uint32_t builtin_instance_idx_id = 0; @@ -544,56 +1044,64 @@ class CompilerMSL : public CompilerGLSL uint32_t builtin_primitive_id_id = 0; uint32_t builtin_subgroup_invocation_id_id = 0; uint32_t builtin_subgroup_size_id = 0; + uint32_t builtin_dispatch_base_id = 0; + uint32_t builtin_stage_input_size_id = 0; + uint32_t builtin_local_invocation_index_id = 0; + uint32_t builtin_workgroup_size_id = 0; uint32_t swizzle_buffer_id = 0; uint32_t buffer_size_buffer_id = 0; uint32_t view_mask_buffer_id = 0; + uint32_t dynamic_offsets_buffer_id = 0; + uint32_t uint_type_id = 0; + uint32_t argument_buffer_padding_buffer_type_id = 0; + uint32_t argument_buffer_padding_image_type_id = 0; + uint32_t argument_buffer_padding_sampler_type_id = 0; + + bool does_shader_write_sample_mask = false; + bool frag_shader_needs_discard_checks = false; - void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; - void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; + void cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; + void cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; void analyze_sampled_image_usage(); + bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override; + void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, + bool &is_packed) override; + void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length); + void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override; + bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); + bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr); bool is_out_of_bounds_tessellation_level(uint32_t id_lhs); + void ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin); + void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id); std::string convert_to_f32(const std::string &expr, uint32_t components); Options msl_options; std::set spv_function_implementations; - std::unordered_map vtx_attrs_by_location; - std::unordered_map vtx_attrs_by_builtin; - std::unordered_set vtx_attrs_in_use; + // Must be ordered to ensure declarations are in a specific order. + std::map inputs_by_location; + std::unordered_map inputs_by_builtin; + std::map outputs_by_location; + std::unordered_map outputs_by_builtin; + std::unordered_set location_inputs_in_use; + std::unordered_set location_inputs_in_use_fallback; + std::unordered_set location_outputs_in_use; + std::unordered_set location_outputs_in_use_fallback; std::unordered_map fragment_output_components; - std::unordered_map struct_member_padding; + std::unordered_map builtin_to_automatic_input_location; + std::unordered_map builtin_to_automatic_output_location; std::set pragma_lines; std::set typedef_lines; SmallVector vars_needing_early_declaration; - struct SetBindingPair - { - uint32_t desc_set; - uint32_t binding; - bool operator==(const SetBindingPair &other) const; - }; - - struct StageSetBinding - { - spv::ExecutionModel model; - uint32_t desc_set; - uint32_t binding; - bool operator==(const StageSetBinding &other) const; - }; - - struct InternalHasher - { - size_t operator()(const SetBindingPair &value) const; - size_t operator()(const StageSetBinding &value) const; - }; - std::unordered_map, InternalHasher> resource_bindings; + std::unordered_map resource_arg_buff_idx_to_binding_number; uint32_t next_metal_resource_index_buffer = 0; uint32_t next_metal_resource_index_texture = 0; @@ -601,21 +1109,41 @@ class CompilerMSL : public CompilerGLSL // Intentionally uninitialized, works around MSVC 2013 bug. uint32_t next_metal_resource_ids[kMaxArgumentBuffers]; - uint32_t stage_in_var_id = 0; - uint32_t stage_out_var_id = 0; - uint32_t patch_stage_in_var_id = 0; - uint32_t patch_stage_out_var_id = 0; - uint32_t stage_in_ptr_var_id = 0; - uint32_t stage_out_ptr_var_id = 0; + VariableID stage_in_var_id = 0; + VariableID stage_out_var_id = 0; + VariableID patch_stage_in_var_id = 0; + VariableID patch_stage_out_var_id = 0; + VariableID stage_in_ptr_var_id = 0; + VariableID stage_out_ptr_var_id = 0; + VariableID tess_level_inner_var_id = 0; + VariableID tess_level_outer_var_id = 0; + VariableID stage_out_masked_builtin_type_id = 0; + + // Handle HLSL-style 0-based vertex/instance index. + enum class TriState + { + Neutral, + No, + Yes + }; + TriState needs_base_vertex_arg = TriState::Neutral; + TriState needs_base_instance_arg = TriState::Neutral; + bool has_sampled_images = false; - bool needs_vertex_idx_arg = false; - bool needs_instance_idx_arg = false; + bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index. + + bool is_using_builtin_array = false; // Force the use of C style array declaration. + bool using_builtin_array() const; + bool is_rasterization_disabled = false; bool capture_output_to_buffer = false; bool needs_swizzle_buffer_def = false; bool used_swizzle_buffer = false; bool added_builtin_tess_level = false; bool needs_subgroup_invocation_id = false; + bool needs_subgroup_size = false; + bool needs_sample_id = false; + bool needs_helper_invocation = false; std::string qual_pos_var_name; std::string stage_in_var_name = "in"; std::string stage_out_var_name = "out"; @@ -624,10 +1152,14 @@ class CompilerMSL : public CompilerGLSL std::string sampler_name_suffix = "Smplr"; std::string swizzle_name_suffix = "Swzl"; std::string buffer_size_name_suffix = "BufferSize"; + std::string plane_name_suffix = "Plane"; std::string input_wg_var_name = "gl_in"; + std::string input_buffer_var_name = "spvIn"; std::string output_buffer_var_name = "spvOut"; + std::string patch_input_buffer_var_name = "spvPatchIn"; std::string patch_output_buffer_var_name = "spvPatchOut"; std::string tess_factor_buffer_var_name = "spvTessLevel"; + std::string index_buffer_var_name = "spvIndices"; spv::Op previous_instruction_opcode = spv::OpNop; // Must be ordered since declaration is in a specific order. @@ -636,18 +1168,62 @@ class CompilerMSL : public CompilerGLSL const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const; std::unordered_set buffers_requiring_array_length; - SmallVector buffer_arrays; + SmallVector buffer_arrays_discrete; + SmallVector> buffer_aliases_argument; + SmallVector buffer_aliases_discrete; + std::unordered_set atomic_image_vars; // Emulate texture2D atomic operations + std::unordered_set pull_model_inputs; + + // Must be ordered since array is in a specific order. + std::map> buffers_requiring_dynamic_offset; + + SmallVector disabled_frag_outputs; + + std::unordered_set inline_uniform_blocks; uint32_t argument_buffer_ids[kMaxArgumentBuffers]; uint32_t argument_buffer_discrete_mask = 0; + uint32_t argument_buffer_device_storage_mask = 0; + void analyze_argument_buffers(); bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; + MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx); + void add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); + void add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); + void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); + void add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, uint32_t count); uint32_t get_target_components_for_fragment_location(uint32_t location) const; - uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components); + uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components, + SPIRType::BaseType basetype = SPIRType::Unknown); + uint32_t build_msl_interpolant_type(uint32_t type_id, bool is_noperspective); bool suppress_missing_prototypes = false; + void add_spv_func_and_recompile(SPVFuncImpl spv_func); + + void activate_argument_buffer_resources(); + + bool type_is_msl_framebuffer_fetch(const SPIRType &type) const; + bool type_is_pointer(const SPIRType &type) const; + bool type_is_pointer_to_pointer(const SPIRType &type) const; + bool is_supported_argument_buffer_type(const SPIRType &type) const; + + bool variable_storage_requires_stage_io(spv::StorageClass storage) const; + + bool needs_manual_helper_invocation_updates() const + { + return msl_options.manual_helper_invocation_updates && msl_options.supports_msl_version(2, 3); + } + bool needs_frag_discard_checks() const + { + return get_execution_model() == spv::ExecutionModelFragment && msl_options.supports_msl_version(2, 3) && + msl_options.check_discarded_frag_stores && frag_shader_needs_discard_checks; + } + + bool has_additional_fixed_sample_mask() const { return msl_options.additional_fixed_sample_mask != 0xffffffff; } + std::string additional_fixed_sample_mask_str() const; + // OpcodeHandler that handles several MSL preprocessing operations. struct OpCodePreprocessor : OpcodeHandler { @@ -662,10 +1238,16 @@ class CompilerMSL : public CompilerGLSL CompilerMSL &compiler; std::unordered_map result_types; + std::unordered_map image_pointers; // Emulate texture2D atomic operations bool suppress_missing_prototypes = false; bool uses_atomics = false; - bool uses_resource_write = false; + bool uses_image_write = false; + bool uses_buffer_write = false; + bool uses_discard = false; bool needs_subgroup_invocation_id = false; + bool needs_subgroup_size = false; + bool needs_sample_id = false; + bool needs_helper_invocation = false; }; // OpcodeHandler that scans for uses of sampled images @@ -688,11 +1270,8 @@ class CompilerMSL : public CompilerGLSL { enum SortAspect { - Location, - LocationReverse, - Offset, - OffsetThenLocationReverse, - Alphabetical + LocationThenBuiltInType, + Offset }; void sort(); diff --git a/spirv_parser.cpp b/spirv_parser.cpp index d5a16337d1c..01c2e381241 100644 --- a/spirv_parser.cpp +++ b/spirv_parser.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2018-2019 Arm Limited + * Copyright 2018-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #include "spirv_parser.hpp" #include @@ -24,7 +31,7 @@ namespace SPIRV_CROSS_NAMESPACE { Parser::Parser(vector spirv) { - ir.spirv = move(spirv); + ir.spirv = std::move(spirv); } Parser::Parser(const uint32_t *spirv_data, size_t word_count) @@ -60,6 +67,8 @@ static bool is_valid_spirv_version(uint32_t version) case 0x10200: // SPIR-V 1.2 case 0x10300: // SPIR-V 1.3 case 0x10400: // SPIR-V 1.4 + case 0x10500: // SPIR-V 1.5 + case 0x10600: // SPIR-V 1.6 return true; default: @@ -85,6 +94,11 @@ void Parser::parse() SPIRV_CROSS_THROW("Invalid SPIRV format."); uint32_t bound = s[3]; + + const uint32_t MaximumNumberOfIDs = 0x3fffff; + if (bound > MaximumNumberOfIDs) + SPIRV_CROSS_THROW("ID bound exceeds limit of 0x3fffff.\n"); + ir.set_id_bounds(bound); uint32_t offset = 5; @@ -113,10 +127,22 @@ void Parser::parse() for (auto &i : instructions) parse(i); + for (auto &fixup : forward_pointer_fixups) + { + auto &target = get(fixup.first); + auto &source = get(fixup.second); + target.member_types = source.member_types; + target.basetype = source.basetype; + target.self = source.self; + } + forward_pointer_fixups.clear(); + if (current_function) SPIRV_CROSS_THROW("Function was not terminated."); if (current_block) SPIRV_CROSS_THROW("Block was not terminated."); + if (ir.default_entry_point == 0) + SPIRV_CROSS_THROW("There is no entry point in the SPIR-V module."); } const uint32_t *Parser::stream(const Instruction &instr) const @@ -157,6 +183,15 @@ void Parser::parse(const Instruction &instruction) auto op = static_cast(instruction.op); uint32_t length = instruction.length; + // HACK for glslang that might emit OpEmitMeshTasksEXT followed by return / branch. + // Instead of failing hard, just ignore it. + if (ignore_trailing_block_opcodes) + { + ignore_trailing_block_opcodes = false; + if (op == OpReturn || op == OpBranch || op == OpUnreachable) + return; + } + switch (op) { case OpSourceContinued: @@ -233,29 +268,37 @@ void Parser::parse(const Instruction &instruction) case OpExtension: { auto ext = extract_string(ir.spirv, instruction.offset); - ir.declared_extensions.push_back(move(ext)); + ir.declared_extensions.push_back(std::move(ext)); break; } case OpExtInstImport: { uint32_t id = ops[0]; + + SPIRExtension::Extension spirv_ext = SPIRExtension::Unsupported; + auto ext = extract_string(ir.spirv, instruction.offset + 1); if (ext == "GLSL.std.450") - set(id, SPIRExtension::GLSL); + spirv_ext = SPIRExtension::GLSL; else if (ext == "DebugInfo") - set(id, SPIRExtension::SPV_debug_info); + spirv_ext = SPIRExtension::SPV_debug_info; else if (ext == "SPV_AMD_shader_ballot") - set(id, SPIRExtension::SPV_AMD_shader_ballot); + spirv_ext = SPIRExtension::SPV_AMD_shader_ballot; else if (ext == "SPV_AMD_shader_explicit_vertex_parameter") - set(id, SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter); + spirv_ext = SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter; else if (ext == "SPV_AMD_shader_trinary_minmax") - set(id, SPIRExtension::SPV_AMD_shader_trinary_minmax); + spirv_ext = SPIRExtension::SPV_AMD_shader_trinary_minmax; else if (ext == "SPV_AMD_gcn_shader") - set(id, SPIRExtension::SPV_AMD_gcn_shader); - else - set(id, SPIRExtension::Unsupported); - + spirv_ext = SPIRExtension::SPV_AMD_gcn_shader; + else if (ext == "NonSemantic.DebugPrintf") + spirv_ext = SPIRExtension::NonSemanticDebugPrintf; + else if (ext == "NonSemantic.Shader.DebugInfo.100") + spirv_ext = SPIRExtension::NonSemanticShaderDebugInfo; + else if (ext.find("NonSemantic.") == 0) + spirv_ext = SPIRExtension::NonSemanticGeneric; + + set(id, spirv_ext); // Other SPIR-V extensions which have ExtInstrs are currently not supported. break; @@ -265,7 +308,15 @@ void Parser::parse(const Instruction &instruction) { // The SPIR-V debug information extended instructions might come at global scope. if (current_block) + { current_block->ops.push_back(instruction); + if (length >= 2) + { + const auto *type = maybe_get(ops[0]); + if (type) + ir.load_type_width.insert({ ops[1], type->width }); + } + } break; } @@ -278,7 +329,9 @@ void Parser::parse(const Instruction &instruction) // Strings need nul-terminator and consume the whole word. uint32_t strlen_words = uint32_t((e.name.size() + 1 + 3) >> 2); - e.interface_variables.insert(end(e.interface_variables), ops + strlen_words + 2, ops + instruction.length); + + for (uint32_t i = strlen_words + 2; i < instruction.length; i++) + e.interface_variables.push_back(ops[i]); // Set the name of the entry point in case OpName is not provided later. ir.set_name(ops[1], e.name); @@ -311,12 +364,32 @@ void Parser::parse(const Instruction &instruction) execution.output_vertices = ops[2]; break; + case ExecutionModeOutputPrimitivesEXT: + execution.output_primitives = ops[2]; + break; + default: break; } break; } + case OpExecutionModeId: + { + auto &execution = ir.entry_points[ops[0]]; + auto mode = static_cast(ops[1]); + execution.flags.set(mode); + + if (mode == ExecutionModeLocalSizeId) + { + execution.workgroup_size.id_x = ops[2]; + execution.workgroup_size.id_y = ops[3]; + execution.workgroup_size.id_z = ops[4]; + } + + break; + } + case OpName: { uint32_t id = ops[0]; @@ -535,6 +608,11 @@ void Parser::parse(const Instruction &instruction) auto *c = maybe_get(cid); bool literal = c && !c->specialization; + // We're copying type information into Array types, so we'll need a fixup for any physical pointer + // references. + if (base.forward_pointer) + forward_pointer_fixups.push_back({ id, tid }); + arraybase.array_size_literal.push_back(literal); arraybase.array.push_back(literal ? c->scalar() : cid); // Do NOT set arraybase.self! @@ -548,6 +626,11 @@ void Parser::parse(const Instruction &instruction) auto &base = get(ops[1]); auto &arraybase = set(id); + // We're copying type information into Array types, so we'll need a fixup for any physical pointer + // references. + if (base.forward_pointer) + forward_pointer_fixups.push_back({ id, ops[1] }); + arraybase = base; arraybase.array.push_back(0); arraybase.array_size_literal.push_back(true); @@ -595,10 +678,15 @@ void Parser::parse(const Instruction &instruction) { uint32_t id = ops[0]; - auto &base = get(ops[2]); + // Very rarely, we might receive a FunctionPrototype here. + // We won't be able to compile it, but we shouldn't crash when parsing. + // We should be able to reflect. + auto *base = maybe_get(ops[2]); auto &ptrbase = set(id); - ptrbase = base; + if (base) + ptrbase = *base; + ptrbase.pointer = true; ptrbase.pointer_depth++; ptrbase.storage = static_cast(ops[1]); @@ -606,6 +694,9 @@ void Parser::parse(const Instruction &instruction) if (ptrbase.storage == StorageClassAtomicCounter) ptrbase.basetype = SPIRType::AtomicCounter; + if (base && base->forward_pointer) + forward_pointer_fixups.push_back({ id, ops[2] }); + ptrbase.parent_type = ops[2]; // Do NOT set ptrbase.self! @@ -619,6 +710,7 @@ void Parser::parse(const Instruction &instruction) ptrbase.pointer = true; ptrbase.pointer_depth++; ptrbase.storage = static_cast(ops[1]); + ptrbase.forward_pointer = true; if (ptrbase.storage == StorageClassAtomicCounter) ptrbase.basetype = SPIRType::AtomicCounter; @@ -658,7 +750,7 @@ void Parser::parse(const Instruction &instruction) } } - if (type.type_alias == 0) + if (type.type_alias == TypeID(0)) global_struct_cache.push_back(id); } break; @@ -675,11 +767,19 @@ void Parser::parse(const Instruction &instruction) break; } - case OpTypeAccelerationStructureNV: + case OpTypeAccelerationStructureKHR: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::AccelerationStructure; + break; + } + + case OpTypeRayQueryKHR: { uint32_t id = ops[0]; auto &type = set(id); - type.basetype = SPIRType::AccelerationStructureNV; + type.basetype = SPIRType::RayQuery; break; } @@ -700,15 +800,6 @@ void Parser::parse(const Instruction &instruction) } set(id, type, storage, initializer); - - // hlsl based shaders don't have those decorations. force them and then reset when reading/writing images - auto &ttype = get(type); - if (ttype.basetype == SPIRType::BaseType::Image) - { - ir.set_decoration(id, DecorationNonWritable); - ir.set_decoration(id, DecorationNonReadable); - } - break; } @@ -772,7 +863,7 @@ void Parser::parse(const Instruction &instruction) { uint32_t id = ops[1]; uint32_t type = ops[0]; - make_constant_null(id, type); + ir.make_constant_null(id, type, true); break; } @@ -916,6 +1007,58 @@ void Parser::parse(const Instruction &instruction) current_block->false_block = ops[2]; current_block->terminator = SPIRBlock::Select; + + if (current_block->true_block == current_block->false_block) + { + // Bogus conditional, translate to a direct branch. + // Avoids some ugly edge cases later when analyzing CFGs. + + // There are some super jank cases where the merge block is different from the true/false, + // and later branches can "break" out of the selection construct this way. + // This is complete nonsense, but CTS hits this case. + // In this scenario, we should see the selection construct as more of a Switch with one default case. + // The problem here is that this breaks any attempt to break out of outer switch statements, + // but it's theoretically solvable if this ever comes up using the ladder breaking system ... + + if (current_block->true_block != current_block->next_block && + current_block->merge == SPIRBlock::MergeSelection) + { + uint32_t ids = ir.increase_bound_by(2); + + SPIRType type; + type.basetype = SPIRType::Int; + type.width = 32; + set(ids, type); + auto &c = set(ids + 1, ids); + + current_block->condition = c.self; + current_block->default_block = current_block->true_block; + current_block->terminator = SPIRBlock::MultiSelect; + ir.block_meta[current_block->next_block] &= ~ParsedIR::BLOCK_META_SELECTION_MERGE_BIT; + ir.block_meta[current_block->next_block] |= ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT; + } + else + { + // Collapse loops if we have to. + bool collapsed_loop = current_block->true_block == current_block->merge_block && + current_block->merge == SPIRBlock::MergeLoop; + + if (collapsed_loop) + { + ir.block_meta[current_block->merge_block] &= ~ParsedIR::BLOCK_META_LOOP_MERGE_BIT; + ir.block_meta[current_block->continue_block] &= ~ParsedIR::BLOCK_META_CONTINUE_BIT; + } + + current_block->next_block = current_block->true_block; + current_block->condition = 0; + current_block->true_block = 0; + current_block->false_block = 0; + current_block->merge_block = 0; + current_block->merge = SPIRBlock::MergeNone; + current_block->terminator = SPIRBlock::Direct; + } + } + current_block = nullptr; break; } @@ -930,8 +1073,21 @@ void Parser::parse(const Instruction &instruction) current_block->condition = ops[0]; current_block->default_block = ops[1]; - for (uint32_t i = 2; i + 2 <= length; i += 2) - current_block->cases.push_back({ ops[i], ops[i + 1] }); + uint32_t remaining_ops = length - 2; + if ((remaining_ops % 2) == 0) + { + for (uint32_t i = 2; i + 2 <= length; i += 2) + current_block->cases_32bit.push_back({ ops[i], ops[i + 1] }); + } + + if ((remaining_ops % 3) == 0) + { + for (uint32_t i = 2; i + 3 <= length; i += 3) + { + uint64_t value = (static_cast(ops[i + 1]) << 32) | ops[i]; + current_block->cases_64bit.push_back({ value, ops[i + 2] }); + } + } // If we jump to next block, make it break instead since we're inside a switch case block at that point. ir.block_meta[current_block->next_block] |= ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT; @@ -941,6 +1097,7 @@ void Parser::parse(const Instruction &instruction) } case OpKill: + case OpTerminateInvocation: { if (!current_block) SPIRV_CROSS_THROW("Trying to end a non-existing block."); @@ -949,6 +1106,34 @@ void Parser::parse(const Instruction &instruction) break; } + case OpTerminateRayKHR: + // NV variant is not a terminator. + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::TerminateRay; + current_block = nullptr; + break; + + case OpIgnoreIntersectionKHR: + // NV variant is not a terminator. + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::IgnoreIntersection; + current_block = nullptr; + break; + + case OpEmitMeshTasksEXT: + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::EmitMeshTasks; + for (uint32_t i = 0; i < 3; i++) + current_block->mesh.groups[i] = ops[i]; + current_block->mesh.payload = length >= 4 ? ops[3] : 0; + current_block = nullptr; + // Currently glslang is bugged and does not treat EmitMeshTasksEXT as a terminator. + ignore_trailing_block_opcodes = true; + break; + case OpReturn: { if (!current_block) @@ -1008,12 +1193,12 @@ void Parser::parse(const Instruction &instruction) ir.block_meta[current_block->self] |= ParsedIR::BLOCK_META_LOOP_HEADER_BIT; ir.block_meta[current_block->merge_block] |= ParsedIR::BLOCK_META_LOOP_MERGE_BIT; - ir.continue_block_to_loop_header[current_block->continue_block] = current_block->self; + ir.continue_block_to_loop_header[current_block->continue_block] = BlockID(current_block->self); // Don't add loop headers to continue blocks, // which would make it impossible branch into the loop header since // they are treated as continues. - if (current_block->continue_block != current_block->self) + if (current_block->continue_block != BlockID(current_block->self)) ir.block_meta[current_block->continue_block] |= ParsedIR::BLOCK_META_CONTINUE_BIT; if (length >= 3) @@ -1073,6 +1258,13 @@ void Parser::parse(const Instruction &instruction) // Actual opcodes. default: { + if (length >= 2) + { + const auto *type = maybe_get(ops[0]); + if (type) + ir.load_type_width.insert({ ops[1], type->width }); + } + if (!current_block) SPIRV_CROSS_THROW("Currently no block to insert opcode."); @@ -1137,46 +1329,4 @@ bool Parser::variable_storage_is_aliased(const SPIRVariable &v) const return !is_restrict && (ssbo || image || counter); } - -void Parser::make_constant_null(uint32_t id, uint32_t type) -{ - auto &constant_type = get(type); - - if (constant_type.pointer) - { - auto &constant = set(id, type); - constant.make_null(constant_type); - } - else if (!constant_type.array.empty()) - { - assert(constant_type.parent_type); - uint32_t parent_id = ir.increase_bound_by(1); - make_constant_null(parent_id, constant_type.parent_type); - - if (!constant_type.array_size_literal.back()) - SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal."); - - SmallVector elements(constant_type.array.back()); - for (uint32_t i = 0; i < constant_type.array.back(); i++) - elements[i] = parent_id; - set(id, type, elements.data(), uint32_t(elements.size()), false); - } - else if (!constant_type.member_types.empty()) - { - uint32_t member_ids = ir.increase_bound_by(uint32_t(constant_type.member_types.size())); - SmallVector elements(constant_type.member_types.size()); - for (uint32_t i = 0; i < constant_type.member_types.size(); i++) - { - make_constant_null(member_ids + i, constant_type.member_types[i]); - elements[i] = member_ids + i; - } - set(id, type, elements.data(), uint32_t(elements.size()), false); - } - else - { - auto &constant = set(id, type); - constant.make_null(constant_type); - } -} - } // namespace SPIRV_CROSS_NAMESPACE diff --git a/spirv_parser.hpp b/spirv_parser.hpp index ef2c1b9869e..dabc0e22446 100644 --- a/spirv_parser.hpp +++ b/spirv_parser.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2018-2019 Arm Limited + * Copyright 2018-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_PARSER_HPP #define SPIRV_CROSS_PARSER_HPP @@ -39,6 +46,8 @@ class Parser ParsedIR ir; SPIRFunction *current_function = nullptr; SPIRBlock *current_block = nullptr; + // For workarounds. + bool ignore_trailing_block_opcodes = false; void parse(const Instruction &instr); const uint32_t *stream(const Instruction &instr) const; @@ -84,10 +93,10 @@ class Parser // This must be an ordered data structure so we always pick the same type aliases. SmallVector global_struct_cache; + SmallVector> forward_pointer_fixups; bool types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const; bool variable_storage_is_aliased(const SPIRVariable &v) const; - void make_constant_null(uint32_t id, uint32_t type); }; } // namespace SPIRV_CROSS_NAMESPACE diff --git a/spirv_reflect.cpp b/spirv_reflect.cpp index b187a7fa611..0bd224e6c2b 100644 --- a/spirv_reflect.cpp +++ b/spirv_reflect.cpp @@ -1,5 +1,6 @@ /* - * Copyright 2018-2019 Bradley Austin Davis + * Copyright 2018-2021 Bradley Austin Davis + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #include "spirv_reflect.hpp" #include "spirv_glsl.hpp" #include @@ -61,6 +68,7 @@ class Stream void end_json_array(); void emit_json_array_value(const std::string &value); void emit_json_array_value(uint32_t value); + void emit_json_array_value(bool value); std::string str() const { @@ -158,6 +166,16 @@ void Stream::emit_json_array_value(uint32_t value) stack.top().second = true; } +void Stream::emit_json_array_value(bool value) +{ + if (stack.empty() || stack.top().first != Type::Array) + SPIRV_CROSS_THROW("Invalid JSON state"); + if (stack.top().second) + statement_inner(",\n"); + statement_no_return(value ? "true" : "false"); + stack.top().second = true; +} + void Stream::begin_json_object() { if (!stack.empty() && stack.top().second) @@ -256,7 +274,6 @@ string CompilerReflection::compile() json_stream = std::make_shared(); json_stream->set_current_locale_radix_character(current_locale_radix_character); json_stream->begin_json_object(); - fixup_type_alias(); reorder_type_alias(); emit_entry_points(); emit_types(); @@ -266,53 +283,97 @@ string CompilerReflection::compile() return json_stream->str(); } +static bool naturally_emit_type(const SPIRType &type) +{ + return type.basetype == SPIRType::Struct && !type.pointer && type.array.empty(); +} + +bool CompilerReflection::type_is_reference(const SPIRType &type) const +{ + // Physical pointers and arrays of physical pointers need to refer to the pointee's type. + return type_is_top_level_physical_pointer(type) || + (!type.array.empty() && type_is_top_level_physical_pointer(get(type.parent_type))); +} + void CompilerReflection::emit_types() { bool emitted_open_tag = false; - ir.for_each_typed_id([&](uint32_t, SPIRType &type) { - if (type.basetype == SPIRType::Struct && !type.pointer && type.array.empty()) - emit_type(type, emitted_open_tag); + SmallVector physical_pointee_types; + + // If we have physical pointers or arrays of physical pointers, it's also helpful to emit the pointee type + // and chain the type hierarchy. For POD, arrays can emit the entire type in-place. + ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { + if (naturally_emit_type(type)) + { + emit_type(self, emitted_open_tag); + } + else if (type_is_reference(type)) + { + if (!naturally_emit_type(this->get(type.parent_type)) && + find(physical_pointee_types.begin(), physical_pointee_types.end(), type.parent_type) == + physical_pointee_types.end()) + { + physical_pointee_types.push_back(type.parent_type); + } + } }); + for (uint32_t pointee_type : physical_pointee_types) + emit_type(pointee_type, emitted_open_tag); + if (emitted_open_tag) { json_stream->end_json_object(); } } -void CompilerReflection::emit_type(const SPIRType &type, bool &emitted_open_tag) +void CompilerReflection::emit_type(uint32_t type_id, bool &emitted_open_tag) { + auto &type = get(type_id); auto name = type_to_glsl(type); - if (type.type_alias != 0) - return; - if (!emitted_open_tag) { json_stream->emit_json_key_object("types"); emitted_open_tag = true; } - json_stream->emit_json_key_object("_" + std::to_string(type.self)); + json_stream->emit_json_key_object("_" + std::to_string(type_id)); json_stream->emit_json_key_value("name", name); - json_stream->emit_json_key_array("members"); - // FIXME ideally we'd like to emit the size of a structure as a - // convenience to people parsing the reflected JSON. The problem - // is that there's no implicit size for a type. It's final size - // will be determined by the top level declaration in which it's - // included. So there might be one size for the struct if it's - // included in a std140 uniform block and another if it's included - // in a std430 uniform block. - // The solution is to include *all* potential sizes as a map of - // layout type name to integer, but that will probably require - // some additional logic being written in this class, or in the - // parent CompilerGLSL class. - auto size = type.member_types.size(); - for (uint32_t i = 0; i < size; ++i) + + if (type_is_top_level_physical_pointer(type)) { - emit_type_member(type, i); + json_stream->emit_json_key_value("type", "_" + std::to_string(type.parent_type)); + json_stream->emit_json_key_value("physical_pointer", true); } - json_stream->end_json_array(); + else if (!type.array.empty()) + { + emit_type_array(type); + json_stream->emit_json_key_value("type", "_" + std::to_string(type.parent_type)); + json_stream->emit_json_key_value("array_stride", get_decoration(type_id, DecorationArrayStride)); + } + else + { + json_stream->emit_json_key_array("members"); + // FIXME ideally we'd like to emit the size of a structure as a + // convenience to people parsing the reflected JSON. The problem + // is that there's no implicit size for a type. It's final size + // will be determined by the top level declaration in which it's + // included. So there might be one size for the struct if it's + // included in a std140 uniform block and another if it's included + // in a std430 uniform block. + // The solution is to include *all* potential sizes as a map of + // layout type name to integer, but that will probably require + // some additional logic being written in this class, or in the + // parent CompilerGLSL class. + auto size = type.member_types.size(); + for (uint32_t i = 0; i < size; ++i) + { + emit_type_member(type, i); + } + json_stream->end_json_array(); + } + json_stream->end_json_object(); } @@ -324,7 +385,12 @@ void CompilerReflection::emit_type_member(const SPIRType &type, uint32_t index) // FIXME we'd like to emit the offset of each member, but such offsets are // context dependent. See the comment above regarding structure sizes json_stream->emit_json_key_value("name", name); - if (membertype.basetype == SPIRType::Struct) + + if (type_is_reference(membertype)) + { + json_stream->emit_json_key_value("type", "_" + std::to_string(membertype.parent_type)); + } + else if (membertype.basetype == SPIRType::Struct) { json_stream->emit_json_key_value("type", "_" + std::to_string(membertype.self)); } @@ -338,7 +404,7 @@ void CompilerReflection::emit_type_member(const SPIRType &type, uint32_t index) void CompilerReflection::emit_type_array(const SPIRType &type) { - if (!type.array.empty()) + if (!type_is_top_level_physical_pointer(type) && !type.array.empty()) { json_stream->emit_json_key_array("array"); // Note that we emit the zeros here as a means of identifying @@ -347,15 +413,16 @@ void CompilerReflection::emit_type_array(const SPIRType &type) for (const auto &value : type.array) json_stream->emit_json_array_value(value); json_stream->end_json_array(); + + json_stream->emit_json_key_array("array_size_is_literal"); + for (const auto &value : type.array_size_literal) + json_stream->emit_json_array_value(value); + json_stream->end_json_array(); } } void CompilerReflection::emit_type_member_qualifiers(const SPIRType &type, uint32_t index) { - auto flags = combined_decoration_for_member(type, index); - if (flags.get(DecorationRowMajor)) - json_stream->emit_json_key_value("row_major", true); - auto &membertype = get(type.member_types[index]); emit_type_array(membertype); auto &memb = ir.meta[type.self].members; @@ -366,6 +433,19 @@ void CompilerReflection::emit_type_member_qualifiers(const SPIRType &type, uint3 json_stream->emit_json_key_value("location", dec.location); if (dec.decoration_flags.get(DecorationOffset)) json_stream->emit_json_key_value("offset", dec.offset); + + // Array stride is a property of the array type, not the struct. + if (has_decoration(type.member_types[index], DecorationArrayStride)) + json_stream->emit_json_key_value("array_stride", + get_decoration(type.member_types[index], DecorationArrayStride)); + + if (dec.decoration_flags.get(DecorationMatrixStride)) + json_stream->emit_json_key_value("matrix_stride", dec.matrix_stride); + if (dec.decoration_flags.get(DecorationRowMajor)) + json_stream->emit_json_key_value("row_major", true); + + if (type_is_top_level_physical_pointer(membertype)) + json_stream->emit_json_key_value("physical_pointer", true); } } @@ -424,6 +504,28 @@ void CompilerReflection::emit_entry_points() json_stream->begin_json_object(); json_stream->emit_json_key_value("name", e.name); json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model)); + if (e.execution_model == ExecutionModelGLCompute) + { + const auto &spv_entry = get_entry_point(e.name, e.execution_model); + + SpecializationConstant spec_x, spec_y, spec_z; + get_work_group_size_specialization_constants(spec_x, spec_y, spec_z); + + json_stream->emit_json_key_array("workgroup_size"); + json_stream->emit_json_array_value(spec_x.id != ID(0) ? spec_x.constant_id : + spv_entry.workgroup_size.x); + json_stream->emit_json_array_value(spec_y.id != ID(0) ? spec_y.constant_id : + spv_entry.workgroup_size.y); + json_stream->emit_json_array_value(spec_z.id != ID(0) ? spec_z.constant_id : + spv_entry.workgroup_size.z); + json_stream->end_json_array(); + + json_stream->emit_json_key_array("workgroup_size_is_spec_constant_id"); + json_stream->emit_json_array_value(spec_x.id != ID(0)); + json_stream->emit_json_array_value(spec_y.id != ID(0)); + json_stream->emit_json_array_value(spec_z.id != ID(0)); + json_stream->end_json_array(); + } json_stream->end_json_object(); } json_stream->end_json_array(); @@ -468,7 +570,7 @@ void CompilerReflection::emit_resources(const char *tag, const SmallVectorbegin_json_object(); @@ -485,18 +587,18 @@ void CompilerReflection::emit_resources(const char *tag, const SmallVectoremit_json_key_value("writeonly", true); - if (buffer_flags.get(DecorationNonWritable)) - json_stream->emit_json_key_value("readonly", true); - if (buffer_flags.get(DecorationRestrict)) - json_stream->emit_json_key_value("restrict", true); - if (buffer_flags.get(DecorationCoherent)) - json_stream->emit_json_key_value("coherent", true); - } + Bitset qualifier_mask = ssbo_block ? get_buffer_block_flags(res.id) : mask; + + if (qualifier_mask.get(DecorationNonReadable)) + json_stream->emit_json_key_value("writeonly", true); + if (qualifier_mask.get(DecorationNonWritable)) + json_stream->emit_json_key_value("readonly", true); + if (qualifier_mask.get(DecorationRestrict)) + json_stream->emit_json_key_value("restrict", true); + if (qualifier_mask.get(DecorationCoherent)) + json_stream->emit_json_key_value("coherent", true); + if (qualifier_mask.get(DecorationVolatile)) + json_stream->emit_json_key_value("volatile", true); } emit_type_array(type); @@ -552,13 +654,15 @@ void CompilerReflection::emit_specialization_constants() return; json_stream->emit_json_key_array("specialization_constants"); - for (const auto spec_const : specialization_constants) + for (const auto &spec_const : specialization_constants) { auto &c = get(spec_const.id); auto type = get(c.constant_type); json_stream->begin_json_object(); + json_stream->emit_json_key_value("name", get_name(spec_const.id)); json_stream->emit_json_key_value("id", spec_const.constant_id); json_stream->emit_json_key_value("type", type_to_glsl(type)); + json_stream->emit_json_key_value("variable_id", spec_const.id); switch (type.basetype) { case SPIRType::UInt: diff --git a/spirv_reflect.hpp b/spirv_reflect.hpp index 5a228a68376..a129ba54da5 100644 --- a/spirv_reflect.hpp +++ b/spirv_reflect.hpp @@ -1,5 +1,6 @@ /* - * Copyright 2018-2019 Bradley Austin Davis + * Copyright 2018-2021 Bradley Austin Davis + * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,12 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + */ + #ifndef SPIRV_CROSS_REFLECT_HPP #define SPIRV_CROSS_REFLECT_HPP @@ -67,11 +74,12 @@ class CompilerReflection : public CompilerGLSL void emit_resources(); void emit_specialization_constants(); - void emit_type(const SPIRType &type, bool &emitted_open_tag); + void emit_type(uint32_t type_id, bool &emitted_open_tag); void emit_type_member(const SPIRType &type, uint32_t index); void emit_type_member_qualifiers(const SPIRType &type, uint32_t index); void emit_type_array(const SPIRType &type); void emit_resources(const char *tag, const SmallVector &resources); + bool type_is_reference(const SPIRType &type) const; std::string to_member_name(const SPIRType &type, uint32_t index) const; diff --git a/test_shaders.py b/test_shaders.py index d2f75e80150..cf329e5e4c1 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -1,5 +1,20 @@ #!/usr/bin/env python3 +# Copyright 2015-2021 Arm Limited +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys import os import os.path @@ -86,24 +101,25 @@ def get_shader_stats(shader): def print_msl_compiler_version(): try: subprocess.check_call(['xcrun', '--sdk', 'iphoneos', 'metal', '--version']) - print('...are the Metal compiler characteristics.\n') # display after so xcrun FNF is silent + print('... are the Metal compiler characteristics.\n') # display after so xcrun FNF is silent except OSError as e: if (e.errno != errno.ENOENT): # Ignore xcrun not found error raise + print('Metal SDK is not present.\n') except subprocess.CalledProcessError: pass -def msl_compiler_supports_22(): +def msl_compiler_supports_version(version): try: - subprocess.check_call(['xcrun', '--sdk', 'macosx', 'metal', '-x', 'metal', '-std=macos-metal2.2', '-'], + subprocess.check_call(['xcrun', '--sdk', 'macosx', 'metal', '-x', 'metal', '-std=macos-metal' + version, '-'], stdin = subprocess.DEVNULL, stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) - print('Current SDK supports MSL 2.2. Enabling validation for MSL 2.2 shaders.') + print('Current SDK supports MSL {0}. Enabling validation for MSL {0} shaders.'.format(version)) return True except OSError as e: - print('Failed to check if MSL 2.2 is not supported. It probably is not.') + print('Failed to check if MSL {} is not supported. It probably is not.'.format(version)) return False except subprocess.CalledProcessError: - print('Current SDK does NOT support MSL 2.2. Disabling validation for MSL 2.2 shaders.') + print('Current SDK does NOT support MSL {0}. Disabling validation for MSL {0} shaders.'.format(version)) return False def path_to_msl_standard(shader): @@ -114,6 +130,10 @@ def path_to_msl_standard(shader): return '-std=ios-metal2.1' elif '.msl22.' in shader: return '-std=ios-metal2.2' + elif '.msl23.' in shader: + return '-std=ios-metal2.3' + elif '.msl24.' in shader: + return '-std=ios-metal2.4' elif '.msl11.' in shader: return '-std=ios-metal1.1' elif '.msl10.' in shader: @@ -127,6 +147,10 @@ def path_to_msl_standard(shader): return '-std=macos-metal2.1' elif '.msl22.' in shader: return '-std=macos-metal2.2' + elif '.msl23.' in shader: + return '-std=macos-metal2.3' + elif '.msl24.' in shader: + return '-std=macos-metal2.4' elif '.msl11.' in shader: return '-std=macos-metal1.1' else: @@ -139,6 +163,10 @@ def path_to_msl_standard_cli(shader): return '20100' elif '.msl22.' in shader: return '20200' + elif '.msl23.' in shader: + return '20300' + elif '.msl24.' in shader: + return '20400' elif '.msl11.' in shader: return '10100' else: @@ -164,23 +192,42 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths): spirv_path = create_temporary() msl_path = create_temporary(os.path.basename(shader)) - spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader] + spirv_16 = '.spv16.' in shader + spirv_14 = '.spv14.' in shader + + if spirv_16: + spirv_env = 'spv1.6' + glslang_env = 'spirv1.6' + elif spirv_14: + spirv_env = 'vulkan1.1spv1.4' + glslang_env = 'spirv1.4' + else: + spirv_env = 'vulkan1.1' + glslang_env = 'vulkan1.1' + + spirv_cmd = [paths.spirv_as, '--target-env', spirv_env, '-o', spirv_path, shader] if '.preserve.' in shader: spirv_cmd.append('--preserve-numeric-ids') if spirv: subprocess.check_call(spirv_cmd) else: - subprocess.check_call([paths.glslang, '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader]) + subprocess.check_call([paths.glslang, '--amb' ,'--target-env', glslang_env, '-V', '-o', spirv_path, shader]) - if opt: - subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path]) + if opt and (not shader_is_invalid_spirv(shader)): + if '.graphics-robust-access.' in shader: + subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '--graphics-robust-access', '-o', spirv_path, spirv_path]) + else: + subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path]) spirv_cross_path = paths.spirv_cross - msl_args = [spirv_cross_path, '--entry', 'main', '--output', msl_path, spirv_path, '--msl', '--iterations', str(iterations)] + msl_args = [spirv_cross_path, '--output', msl_path, spirv_path, '--msl', '--iterations', str(iterations)] msl_args.append('--msl-version') msl_args.append(path_to_msl_standard_cli(shader)) + if not '.nomain.' in shader: + msl_args.append('--entry') + msl_args.append('main') if '.swizzle.' in shader: msl_args.append('--msl-swizzle-texture-samples') if '.ios.' in shader: @@ -195,21 +242,134 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths): msl_args.append('--msl-argument-buffers') if '.texture-buffer-native.' in shader: msl_args.append('--msl-texture-buffer-native') + if '.framebuffer-fetch.' in shader: + msl_args.append('--msl-framebuffer-fetch') + if '.invariant-float-math.' in shader: + msl_args.append('--msl-invariant-float-math') + if '.emulate-cube-array.' in shader: + msl_args.append('--msl-emulate-cube-array') if '.discrete.' in shader: # Arbitrary for testing purposes. msl_args.append('--msl-discrete-descriptor-set') msl_args.append('2') msl_args.append('--msl-discrete-descriptor-set') msl_args.append('3') + if '.force-active.' in shader: + msl_args.append('--msl-force-active-argument-buffer-resources') if '.line.' in shader: msl_args.append('--emit-line-directives') if '.multiview.' in shader: msl_args.append('--msl-multiview') + if '.no-layered.' in shader: + msl_args.append('--msl-multiview-no-layered-rendering') + if '.viewfromdev.' in shader: + msl_args.append('--msl-view-index-from-device-index') + if '.dispatchbase.' in shader: + msl_args.append('--msl-dispatch-base') + if '.dynamic-buffer.' in shader: + # Arbitrary for testing purposes. + msl_args.append('--msl-dynamic-buffer') + msl_args.append('0') + msl_args.append('0') + msl_args.append('--msl-dynamic-buffer') + msl_args.append('1') + msl_args.append('2') + if '.inline-block.' in shader: + # Arbitrary for testing purposes. + msl_args.append('--msl-inline-uniform-block') + msl_args.append('0') + msl_args.append('0') + if '.device-argument-buffer.' in shader: + msl_args.append('--msl-device-argument-buffer') + msl_args.append('0') + msl_args.append('--msl-device-argument-buffer') + msl_args.append('1') + if '.force-native-array.' in shader: + msl_args.append('--msl-force-native-arrays') + if '.zero-initialize.' in shader: + msl_args.append('--force-zero-initialized-variables') + if '.frag-output.' in shader: + # Arbitrary for testing purposes. + msl_args.append('--msl-disable-frag-depth-builtin') + msl_args.append('--msl-disable-frag-stencil-ref-builtin') + msl_args.append('--msl-enable-frag-output-mask') + msl_args.append('0x000000ca') + if '.no-user-varying.' in shader: + msl_args.append('--msl-no-clip-distance-user-varying') + if '.shader-inputs.' in shader: + # Arbitrary for testing purposes. + msl_args.append('--msl-shader-input') + msl_args.append('0') + msl_args.append('u8') + msl_args.append('2') + msl_args.append('--msl-shader-input') + msl_args.append('1') + msl_args.append('u16') + msl_args.append('3') + msl_args.append('--msl-shader-input') + msl_args.append('6') + msl_args.append('other') + msl_args.append('4') + if '.multi-patch.' in shader: + msl_args.append('--msl-multi-patch-workgroup') + # Arbitrary for testing purposes. + msl_args.append('--msl-shader-input') + msl_args.append('0') + msl_args.append('any32') + msl_args.append('3') + msl_args.append('--msl-shader-input') + msl_args.append('1') + msl_args.append('any16') + msl_args.append('2') + if '.raw-tess-in.' in shader: + msl_args.append('--msl-raw-buffer-tese-input') + if '.for-tess.' in shader: + msl_args.append('--msl-vertex-for-tessellation') + if '.fixed-sample-mask.' in shader: + msl_args.append('--msl-additional-fixed-sample-mask') + msl_args.append('0x00000022') + if '.arrayed-subpass.' in shader: + msl_args.append('--msl-arrayed-subpass-input') + if '.1d-as-2d.' in shader: + msl_args.append('--msl-texture-1d-as-2d') + if '.simd.' in shader: + msl_args.append('--msl-ios-use-simdgroup-functions') + if '.emulate-subgroup.' in shader: + msl_args.append('--msl-emulate-subgroups') + if '.fixed-subgroup.' in shader: + # Arbitrary for testing purposes. + msl_args.append('--msl-fixed-subgroup-size') + msl_args.append('32') + if '.force-sample.' in shader: + msl_args.append('--msl-force-sample-rate-shading') + if '.discard-checks.' in shader: + msl_args.append('--msl-check-discarded-frag-stores') + if '.decoration-binding.' in shader: + msl_args.append('--msl-decoration-binding') + if '.mask-location-0.' in shader: + msl_args.append('--mask-stage-output-location') + msl_args.append('0') + msl_args.append('0') + if '.mask-location-1.' in shader: + msl_args.append('--mask-stage-output-location') + msl_args.append('1') + msl_args.append('0') + if '.mask-position.' in shader: + msl_args.append('--mask-stage-output-builtin') + msl_args.append('Position') + if '.mask-point-size.' in shader: + msl_args.append('--mask-stage-output-builtin') + msl_args.append('PointSize') + if '.mask-clip-distance.' in shader: + msl_args.append('--mask-stage-output-builtin') + msl_args.append('ClipDistance') + if '.relax-nan.' in shader: + msl_args.append('--relax-nan-checks') subprocess.check_call(msl_args) if not shader_is_invalid_spirv(msl_path): - subprocess.check_call([paths.spirv_val, '--target-env', 'vulkan1.1', spirv_path]) + subprocess.check_call([paths.spirv_val, '--allow-localsizeid', '--scalar-block-layout', '--target-env', spirv_env, spirv_path]) return (spirv_path, msl_path) @@ -226,6 +386,10 @@ def shader_model_hlsl(shader): return '-Tps_5_1' elif '.comp' in shader: return '-Tcs_5_1' + elif '.mesh' in shader: + return '-Tms_6_5' + elif '.task' in shader: + return '-Tas_6_5' else: return None @@ -245,9 +409,21 @@ def shader_to_win_path(shader): ignore_fxc = False def validate_shader_hlsl(shader, force_no_external_validation, paths): - if not '.nonuniformresource' in shader: - # glslang HLSL does not support this, so rely on fxc to test it. - subprocess.check_call([paths.glslang, '-e', 'main', '-D', '--target-env', 'vulkan1.1', '-V', shader]) + test_glslang = True + if '.nonuniformresource.' in shader: + test_glslang = False + if '.fxconly.' in shader: + test_glslang = False + if '.task' in shader or '.mesh' in shader: + test_glslang = False + + hlsl_args = [paths.glslang, '--amb', '-e', 'main', '-D', '--target-env', 'vulkan1.1', '-V', shader] + if '.sm30.' in shader: + hlsl_args.append('--hlsl-dx9-compatible') + + if test_glslang: + subprocess.check_call(hlsl_args) + is_no_fxc = '.nofxc.' in shader global ignore_fxc if (not ignore_fxc) and (not force_no_external_validation) and (not is_no_fxc): @@ -270,7 +446,9 @@ def validate_shader_hlsl(shader, force_no_external_validation, paths): raise RuntimeError('Failed compiling HLSL shader') def shader_to_sm(shader): - if '.sm60.' in shader: + if '.sm62.' in shader: + return '62' + elif '.sm60.' in shader: return '60' elif '.sm51.' in shader: return '51' @@ -283,16 +461,29 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati spirv_path = create_temporary() hlsl_path = create_temporary(os.path.basename(shader)) - spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader] + spirv_16 = '.spv16.' in shader + spirv_14 = '.spv14.' in shader + + if spirv_16: + spirv_env = 'spv1.6' + glslang_env = 'spirv1.6' + elif spirv_14: + spirv_env = 'vulkan1.1spv1.4' + glslang_env = 'spirv1.4' + else: + spirv_env = 'vulkan1.1' + glslang_env = 'vulkan1.1' + + spirv_cmd = [paths.spirv_as, '--target-env', spirv_env, '-o', spirv_path, shader] if '.preserve.' in shader: spirv_cmd.append('--preserve-numeric-ids') if spirv: subprocess.check_call(spirv_cmd) else: - subprocess.check_call([paths.glslang, '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader]) + subprocess.check_call([paths.glslang, '--amb', '--target-env', glslang_env, '-V', '-o', spirv_path, shader]) - if opt: + if opt and (not shader_is_invalid_spirv(hlsl_path)): subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path]) spirv_cross_path = paths.spirv_cross @@ -302,13 +493,28 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati hlsl_args = [spirv_cross_path, '--entry', 'main', '--output', hlsl_path, spirv_path, '--hlsl-enable-compat', '--hlsl', '--shader-model', sm, '--iterations', str(iterations)] if '.line.' in shader: hlsl_args.append('--emit-line-directives') + if '.flatten.' in shader: + hlsl_args.append('--flatten-ubo') + if '.force-uav.' in shader: + hlsl_args.append('--hlsl-force-storage-buffer-as-uav') + if '.zero-initialize.' in shader: + hlsl_args.append('--force-zero-initialized-variables') + if '.nonwritable-uav-texture.' in shader: + hlsl_args.append('--hlsl-nonwritable-uav-texture-as-srv') + if '.native-16bit.' in shader: + hlsl_args.append('--hlsl-enable-16bit-types') + if '.flatten-matrix-vertex-input.' in shader: + hlsl_args.append('--hlsl-flatten-matrix-vertex-input-semantics') + if '.relax-nan.' in shader: + hlsl_args.append('--relax-nan-checks') + subprocess.check_call(hlsl_args) if not shader_is_invalid_spirv(hlsl_path): - subprocess.check_call([paths.spirv_val, '--target-env', 'vulkan1.1', spirv_path]) + subprocess.check_call([paths.spirv_val, '--allow-localsizeid', '--scalar-block-layout', '--target-env', spirv_env, spirv_path]) validate_shader_hlsl(hlsl_path, force_no_external_validation, paths) - + return (spirv_path, hlsl_path) def cross_compile_reflect(shader, spirv, opt, iterations, paths): @@ -322,9 +528,9 @@ def cross_compile_reflect(shader, spirv, opt, iterations, paths): if spirv: subprocess.check_call(spirv_cmd) else: - subprocess.check_call([paths.glslang, '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader]) + subprocess.check_call([paths.glslang, '--amb', '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader]) - if opt: + if opt and (not shader_is_invalid_spirv(reflect_path)): subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path]) spirv_cross_path = paths.spirv_cross @@ -335,37 +541,58 @@ def cross_compile_reflect(shader, spirv, opt, iterations, paths): def validate_shader(shader, vulkan, paths): if vulkan: - subprocess.check_call([paths.glslang, '--target-env', 'vulkan1.1', '-V', shader]) + spirv_14 = '.spv14.' in shader + glslang_env = 'spirv1.4' if spirv_14 else 'vulkan1.1' + subprocess.check_call([paths.glslang, '--amb', '--target-env', glslang_env, '-V', shader]) else: subprocess.check_call([paths.glslang, shader]) -def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, flatten_ubo, sso, flatten_dim, opt, push_ubo, iterations, paths): +def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, force_es, flatten_ubo, sso, flatten_dim, opt, push_ubo, iterations, paths): spirv_path = create_temporary() glsl_path = create_temporary(os.path.basename(shader)) + spirv_16 = '.spv16.' in shader + spirv_14 = '.spv14.' in shader + if spirv_16: + spirv_env = 'spv1.6' + glslang_env = 'spirv1.6' + elif spirv_14: + spirv_env = 'vulkan1.1spv1.4' + glslang_env = 'spirv1.4' + else: + spirv_env = 'vulkan1.1' + glslang_env = 'vulkan1.1' + if vulkan or spirv: vulkan_glsl_path = create_temporary('vk' + os.path.basename(shader)) - spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader] + spirv_cmd = [paths.spirv_as, '--target-env', spirv_env, '-o', spirv_path, shader] if '.preserve.' in shader: spirv_cmd.append('--preserve-numeric-ids') if spirv: subprocess.check_call(spirv_cmd) else: - subprocess.check_call([paths.glslang, '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader]) + glslang_cmd = [paths.glslang, '--amb', '--target-env', glslang_env, '-V', '-o', spirv_path, shader] + if '.g.' in shader: + glslang_cmd.append('-g') + if '.gV.' in shader: + glslang_cmd.append('-gV') + subprocess.check_call(glslang_cmd) if opt and (not invalid_spirv): subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path]) if not invalid_spirv: - subprocess.check_call([paths.spirv_val, '--target-env', 'vulkan1.1', spirv_path]) + subprocess.check_call([paths.spirv_val, '--allow-localsizeid', '--scalar-block-layout', '--target-env', spirv_env, spirv_path]) extra_args = ['--iterations', str(iterations)] if eliminate: extra_args += ['--remove-unused-variables'] if is_legacy: extra_args += ['--version', '100', '--es'] + if force_es: + extra_args += ['--version', '310', '--es'] if flatten_ubo: extra_args += ['--flatten-ubo'] if sso: @@ -376,19 +603,37 @@ def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, fl extra_args += ['--glsl-emit-push-constant-as-ubo'] if '.line.' in shader: extra_args += ['--emit-line-directives'] + if '.no-samplerless.' in shader: + extra_args += ['--vulkan-glsl-disable-ext-samplerless-texture-functions'] + if '.no-qualifier-deduction.' in shader: + extra_args += ['--disable-storage-image-qualifier-deduction'] + if '.framebuffer-fetch.' in shader: + extra_args += ['--glsl-remap-ext-framebuffer-fetch', '0', '0'] + extra_args += ['--glsl-remap-ext-framebuffer-fetch', '1', '1'] + extra_args += ['--glsl-remap-ext-framebuffer-fetch', '2', '2'] + extra_args += ['--glsl-remap-ext-framebuffer-fetch', '3', '3'] + if '.framebuffer-fetch-noncoherent.' in shader: + extra_args += ['--glsl-ext-framebuffer-fetch-noncoherent'] + if '.zero-initialize.' in shader: + extra_args += ['--force-zero-initialized-variables'] + if '.force-flattened-io.' in shader: + extra_args += ['--glsl-force-flattened-io-blocks'] + if '.relax-nan.' in shader: + extra_args.append('--relax-nan-checks') spirv_cross_path = paths.spirv_cross # A shader might not be possible to make valid GLSL from, skip validation for this case. - if not ('nocompat' in glsl_path): + if (not ('nocompat' in glsl_path)) or (not vulkan): subprocess.check_call([spirv_cross_path, '--entry', 'main', '--output', glsl_path, spirv_path] + extra_args) - validate_shader(glsl_path, False, paths) + if not 'nocompat' in glsl_path: + validate_shader(glsl_path, False, paths) else: remove_file(glsl_path) glsl_path = None - if vulkan or spirv: - subprocess.check_call([spirv_cross_path, '--entry', 'main', '--vulkan-semantics', '--output', vulkan_glsl_path, spirv_path] + extra_args) + if (vulkan or spirv) and (not is_legacy): + subprocess.check_call([spirv_cross_path, '--entry', 'main', '-V', '--output', vulkan_glsl_path, spirv_path] + extra_args) validate_shader(vulkan_glsl_path, True, paths) # SPIR-V shaders might just want to validate Vulkan GLSL output, we don't always care about the output. if not vulkan: @@ -419,30 +664,14 @@ def reference_path(directory, relpath, opt): reference_dir = os.path.join(reference_dir, split_paths[1]) return os.path.join(reference_dir, relpath) -def json_ordered(obj): - if isinstance(obj, dict): - return sorted((k, json_ordered(v)) for k, v in obj.items()) - if isinstance(obj, list): - return sorted(json_ordered(x) for x in obj) - else: - return obj - -def json_compare(json_a, json_b): - return json_ordered(json_a) == json_ordered(json_b) - def regression_check_reflect(shader, json_file, args): reference = reference_path(shader[0], shader[1], args.opt) + '.json' joined_path = os.path.join(shader[0], shader[1]) print('Reference shader reflection path:', reference) if os.path.exists(reference): - actual = '' - expected = '' - with open(json_file) as f: - actual_json = f.read(); - actual = json.loads(actual_json) - with open(reference) as f: - expected = json.load(f) - if (json_compare(actual, expected) != True): + actual = md5_for_file(json_file) + expected = md5_for_file(reference) + if actual != expected: if args.update: print('Generated reflection json has changed for {}!'.format(reference)) # If we expect changes, update the reference file. @@ -452,13 +681,22 @@ def regression_check_reflect(shader, json_file, args): shutil.move(json_file, reference) else: print('Generated reflection json in {} does not match reference {}!'.format(json_file, reference)) - with open(json_file, 'r') as f: - print('') - print('Generated:') - print('======================') - print(f.read()) - print('======================') - print('') + if args.diff: + diff_path = generate_diff_file(reference, glsl) + with open(diff_path, 'r') as f: + print('') + print('Diff:') + print(f.read()) + print('') + remove_file(diff_path) + else: + with open(json_file, 'r') as f: + print('') + print('Generated:') + print('======================') + print(f.read()) + print('======================') + print('') # Otherwise, fail the test. Keep the shader file around so we can inspect. if not args.keep: @@ -471,7 +709,20 @@ def regression_check_reflect(shader, json_file, args): print('Found new shader {}. Placing generated source code in {}'.format(joined_path, reference)) make_reference_dir(reference) shutil.move(json_file, reference) - + +def generate_diff_file(origin, generated): + diff_destination = create_temporary() + with open(diff_destination, "w") as f: + try: + subprocess.check_call(["diff", origin, generated], stdout=f) + except subprocess.CalledProcessError as e: + # diff returns 1 when the files are different so we can safely + # ignore this case. + if e.returncode != 1: + raise e + + return diff_destination + def regression_check(shader, glsl, args): reference = reference_path(shader[0], shader[1], args.opt) joined_path = os.path.join(shader[0], shader[1]) @@ -488,13 +739,22 @@ def regression_check(shader, glsl, args): shutil.move(glsl, reference) else: print('Generated source code in {} does not match reference {}!'.format(glsl, reference)) - with open(glsl, 'r') as f: - print('') - print('Generated:') - print('======================') - print(f.read()) - print('======================') - print('') + if args.diff: + diff_path = generate_diff_file(reference, glsl) + with open(diff_path, 'r') as f: + print('') + print('Diff:') + print(f.read()) + print('') + remove_file(diff_path) + else: + with open(glsl, 'r') as f: + print('') + print('Generated:') + print('======================') + print(f.read()) + print('======================') + print('') # Otherwise, fail the test. Keep the shader file around so we can inspect. if not args.keep: @@ -525,6 +785,9 @@ def shader_is_invalid_spirv(shader): def shader_is_legacy(shader): return '.legacy.' in shader +def shader_is_force_es(shader): + return '.es.' in shader + def shader_is_flatten_ubo(shader): return '.flatten.' in shader @@ -548,6 +811,7 @@ def test_shader(stats, shader, args, paths): is_spirv = shader_is_spirv(shader[1]) invalid_spirv = shader_is_invalid_spirv(shader[1]) is_legacy = shader_is_legacy(shader[1]) + force_es = shader_is_force_es(shader[1]) flatten_ubo = shader_is_flatten_ubo(shader[1]) sso = shader_is_sso(shader[1]) flatten_dim = shader_is_flatten_dimensions(shader[1]) @@ -555,7 +819,7 @@ def test_shader(stats, shader, args, paths): push_ubo = shader_is_push_ubo(shader[1]) print('Testing shader:', joined_path) - spirv, glsl, vulkan_glsl = cross_compile(joined_path, vulkan, is_spirv, invalid_spirv, eliminate, is_legacy, flatten_ubo, sso, flatten_dim, args.opt and (not noopt), push_ubo, args.iterations, paths) + spirv, glsl, vulkan_glsl = cross_compile(joined_path, vulkan, is_spirv, invalid_spirv, eliminate, is_legacy, force_es, flatten_ubo, sso, flatten_dim, args.opt and (not noopt), push_ubo, args.iterations, paths) # Only test GLSL stats if we have a shader following GL semantics. if stats and (not vulkan) and (not is_spirv) and (not desktop): @@ -596,7 +860,9 @@ def test_shader_msl(stats, shader, args, paths): # print('SPRIV shader: ' + spirv) shader_is_msl22 = 'msl22' in joined_path - skip_validation = shader_is_msl22 and (not args.msl22) + shader_is_msl23 = 'msl23' in joined_path + shader_is_msl24 = 'msl24' in joined_path + skip_validation = (shader_is_msl22 and (not args.msl22)) or (shader_is_msl23 and (not args.msl23)) or (shader_is_msl24 and (not args.msl24)) if '.invalid.' in joined_path: skip_validation = True @@ -647,23 +913,24 @@ def test_shaders_helper(stats, backend, args): relpath = os.path.relpath(path, args.folder) all_files.append(relpath) - # The child processes in parallel execution mode don't have the proper state for the global args variable, so + # The child processes in parallel execution mode don't have the proper state for the global args variable, so # at this point we need to switch to explicit arguments if args.parallel: - pool = multiprocessing.Pool(multiprocessing.cpu_count()) - - results = [] - for f in all_files: - results.append(pool.apply_async(test_shader_file, - args = (f, stats, args, backend))) - - for res in results: - error = res.get() - if error is not None: - pool.close() - pool.join() - print('Error:', error) - sys.exit(1) + with multiprocessing.Pool(multiprocessing.cpu_count()) as pool: + results = [] + for f in all_files: + results.append(pool.apply_async(test_shader_file, + args = (f, stats, args, backend))) + + pool.close() + pool.join() + results_completed = [res.get() for res in results] + + for error in results_completed: + if error is not None: + print('Error:', error) + sys.exit(1) + else: for i in all_files: e = test_shader_file(i, stats, args, backend) @@ -689,6 +956,9 @@ def main(): parser.add_argument('--keep', action = 'store_true', help = 'Leave failed GLSL shaders on disk if they fail regression. Useful for debugging.') + parser.add_argument('--diff', + action = 'store_true', + help = 'Displays a diff instead of the generated output on failure. Useful for debugging.') parser.add_argument('--malisc', action = 'store_true', help = 'Use malisc offline compiler to determine static cycle counts before and after spirv-cross.') @@ -732,7 +1002,7 @@ def main(): default = 1, type = int, help = 'Number of iterations to run SPIRV-Cross (benchmarking)') - + args = parser.parse_args() if not args.folder: sys.stderr.write('Need shader folder.\n') @@ -741,16 +1011,20 @@ def main(): if (args.parallel and (args.malisc or args.force_no_external_validation or args.update)): sys.stderr.write('Parallel execution is disabled when using the flags --update, --malisc or --force-no-external-validation\n') args.parallel = False - + args.msl22 = False + args.msl23 = False + args.msl24 = False if args.msl: print_msl_compiler_version() - args.msl22 = msl_compiler_supports_22() + args.msl22 = msl_compiler_supports_version('2.2') + args.msl23 = msl_compiler_supports_version('2.3') + args.msl24 = msl_compiler_supports_version('2.4') backend = 'glsl' - if (args.msl or args.metal): + if (args.msl or args.metal): backend = 'msl' - elif args.hlsl: + elif args.hlsl: backend = 'hlsl' elif args.reflect: backend = 'reflect' diff --git a/test_shaders.sh b/test_shaders.sh index 4498ac3f08c..c2ab23243ce 100755 --- a/test_shaders.sh +++ b/test_shaders.sh @@ -1,4 +1,8 @@ #!/bin/bash +# Copyright 2016-2021 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +OPTS=$@ if [ -z "$SPIRV_CROSS_PATH" ]; then echo "Building spirv-cross" @@ -11,14 +15,17 @@ echo "Using glslangValidation in: $(which glslangValidator)." echo "Using spirv-opt in: $(which spirv-opt)." echo "Using SPIRV-Cross in: \"$SPIRV_CROSS_PATH\"." -./test_shaders.py shaders --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-no-opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-msl --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-msl --msl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-msl-no-opt --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-hlsl --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-hlsl --hlsl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-hlsl-no-opt --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-reflection --reflect --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders ${OPTS} --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders ${OPTS} --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-no-opt ${OPTS} --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-msl ${OPTS} --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-msl ${OPTS} --msl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-msl-no-opt ${OPTS} --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-hlsl ${OPTS} --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-hlsl ${OPTS} --hlsl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-hlsl-no-opt ${OPTS} --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-reflection ${OPTS} --reflect --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-ue4 ${OPTS} --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-ue4 ${OPTS} --msl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-ue4-no-opt ${OPTS} --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 diff --git a/tests-other/hlsl_resource_binding.spv b/tests-other/hlsl_resource_binding.spv new file mode 100644 index 00000000000..c48dc49ea00 Binary files /dev/null and b/tests-other/hlsl_resource_binding.spv differ diff --git a/tests-other/hlsl_resource_bindings.cpp b/tests-other/hlsl_resource_bindings.cpp new file mode 100644 index 00000000000..1a938dac333 --- /dev/null +++ b/tests-other/hlsl_resource_bindings.cpp @@ -0,0 +1,89 @@ +// Testbench for HLSL resource binding APIs. +// It does not validate output at the moment, but it's useful for ad-hoc testing. + +#include +#include +#include +#include + +#define SPVC_CHECKED_CALL(x) do { \ + if ((x) != SPVC_SUCCESS) { \ + fprintf(stderr, "Failed at line %d.\n", __LINE__); \ + exit(1); \ + } \ +} while(0) + +static std::vector read_file(const char *path) +{ + long len; + FILE *file = fopen(path, "rb"); + + if (!file) + return {}; + + fseek(file, 0, SEEK_END); + len = ftell(file); + rewind(file); + + std::vector buffer(len / sizeof(SpvId)); + if (fread(buffer.data(), 1, len, file) != (size_t)len) + { + fclose(file); + return {}; + } + + fclose(file); + return buffer; +} + +int main(int argc, char **argv) +{ + if (argc != 2) + return EXIT_FAILURE; + + auto buffer = read_file(argv[1]); + if (buffer.empty()) + return EXIT_FAILURE; + + spvc_context ctx; + spvc_parsed_ir parsed_ir; + spvc_compiler compiler; + + SPVC_CHECKED_CALL(spvc_context_create(&ctx)); + SPVC_CHECKED_CALL(spvc_context_parse_spirv(ctx, buffer.data(), buffer.size(), &parsed_ir)); + SPVC_CHECKED_CALL(spvc_context_create_compiler(ctx, SPVC_BACKEND_HLSL, parsed_ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler)); + + spvc_compiler_options opts; + SPVC_CHECKED_CALL(spvc_compiler_create_compiler_options(compiler, &opts)); + SPVC_CHECKED_CALL(spvc_compiler_options_set_uint(opts, SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, 51)); + SPVC_CHECKED_CALL(spvc_compiler_install_compiler_options(compiler, opts)); + + spvc_hlsl_resource_binding binding; + spvc_hlsl_resource_binding_init(&binding); + binding.stage = SpvExecutionModelFragment; + binding.desc_set = 1; + binding.binding = 4; + binding.srv.register_space = 2; + binding.srv.register_binding = 3; + binding.sampler.register_space = 4; + binding.sampler.register_binding = 5; + SPVC_CHECKED_CALL(spvc_compiler_hlsl_add_resource_binding(compiler, &binding)); + + binding.desc_set = SPVC_HLSL_PUSH_CONSTANT_DESC_SET; + binding.binding = SPVC_HLSL_PUSH_CONSTANT_BINDING; + binding.cbv.register_space = 0; + binding.cbv.register_binding = 4; + SPVC_CHECKED_CALL(spvc_compiler_hlsl_add_resource_binding(compiler, &binding)); + + const char *str; + SPVC_CHECKED_CALL(spvc_compiler_compile(compiler, &str)); + + fprintf(stderr, "Output:\n%s\n", str); + + if (!spvc_compiler_hlsl_is_resource_used(compiler, SpvExecutionModelFragment, 1, 4)) + return EXIT_FAILURE; + + if (!spvc_compiler_hlsl_is_resource_used(compiler, SpvExecutionModelFragment, SPVC_HLSL_PUSH_CONSTANT_DESC_SET, SPVC_HLSL_PUSH_CONSTANT_BINDING)) + return EXIT_FAILURE; +} + diff --git a/tests-other/msl_ycbcr_conversion_test.cpp b/tests-other/msl_ycbcr_conversion_test.cpp new file mode 100644 index 00000000000..deab27bec20 --- /dev/null +++ b/tests-other/msl_ycbcr_conversion_test.cpp @@ -0,0 +1,103 @@ +// Testbench for MSL constexpr samplers, with Y'CbCr conversion. +// It does not validate output, but it's useful for ad-hoc testing. + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include +#include + +#define SPVC_CHECKED_CALL(x) do { \ + if ((x) != SPVC_SUCCESS) { \ + fprintf(stderr, "Failed at line %d.\n", __LINE__); \ + exit(1); \ + } \ +} while(0) +#define SPVC_CHECKED_CALL_NEGATIVE(x) do { \ + g_fail_on_error = SPVC_FALSE; \ + if ((x) == SPVC_SUCCESS) { \ + fprintf(stderr, "Failed at line %d.\n", __LINE__); \ + exit(1); \ + } \ + g_fail_on_error = SPVC_TRUE; \ +} while(0) + +static std::vector read_file(const char *path) +{ + long len; + FILE *file = fopen(path, "rb"); + + if (!file) + return {}; + + fseek(file, 0, SEEK_END); + len = ftell(file); + rewind(file); + + std::vector buffer(len / sizeof(SpvId)); + if (fread(buffer.data(), 1, len, file) != (size_t)len) + { + fclose(file); + return {}; + } + + fclose(file); + return buffer; +} + +int main(int argc, char **argv) +{ + if (argc != 2) + return EXIT_FAILURE; + + auto buffer = read_file(argv[1]); + if (buffer.empty()) + return EXIT_FAILURE; + + spvc_context ctx; + spvc_parsed_ir parsed_ir; + spvc_compiler compiler; + spvc_compiler_options options; + + SPVC_CHECKED_CALL(spvc_context_create(&ctx)); + SPVC_CHECKED_CALL(spvc_context_parse_spirv(ctx, buffer.data(), buffer.size(), &parsed_ir)); + SPVC_CHECKED_CALL(spvc_context_create_compiler(ctx, SPVC_BACKEND_MSL, parsed_ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler)); + SPVC_CHECKED_CALL(spvc_compiler_create_compiler_options(compiler, &options)); + SPVC_CHECKED_CALL(spvc_compiler_options_set_uint(options, SPVC_COMPILER_OPTION_MSL_VERSION, SPVC_MAKE_MSL_VERSION(2, 0, 0))); + SPVC_CHECKED_CALL(spvc_compiler_install_compiler_options(compiler, options)); + + spvc_msl_resource_binding binding; + spvc_msl_resource_binding_init(&binding); + binding.desc_set = 1; + binding.binding = 2; + binding.stage = SpvExecutionModelFragment; + binding.msl_texture = 0; + binding.msl_sampler = 0; + SPVC_CHECKED_CALL(spvc_compiler_msl_add_resource_binding(compiler, &binding)); + + spvc_msl_constexpr_sampler samp; + spvc_msl_sampler_ycbcr_conversion conv; + spvc_msl_constexpr_sampler_init(&samp); + spvc_msl_sampler_ycbcr_conversion_init(&conv); + conv.planes = 3; + conv.resolution = SPVC_MSL_FORMAT_RESOLUTION_422; + conv.chroma_filter = SPVC_MSL_SAMPLER_FILTER_LINEAR; + conv.x_chroma_offset = SPVC_MSL_CHROMA_LOCATION_MIDPOINT; + conv.ycbcr_model = SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020; + conv.ycbcr_range = SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW; + conv.bpc = 8; + SPVC_CHECKED_CALL(spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(compiler, 1, 2, &samp, &conv)); + + const char *str; + SPVC_CHECKED_CALL(spvc_compiler_compile(compiler, &str)); + + // Should be marked, as a sanity check. + if (!spvc_compiler_msl_is_resource_used(compiler, SpvExecutionModelFragment, 1, 2)) + return EXIT_FAILURE; + + fprintf(stderr, "Output:\n%s\n", str); +} + diff --git a/tests-other/msl_ycbcr_conversion_test.spv b/tests-other/msl_ycbcr_conversion_test.spv new file mode 100644 index 00000000000..62372d5c652 Binary files /dev/null and b/tests-other/msl_ycbcr_conversion_test.spv differ diff --git a/tests-other/msl_ycbcr_conversion_test_2.spv b/tests-other/msl_ycbcr_conversion_test_2.spv new file mode 100644 index 00000000000..10fa7690d0d Binary files /dev/null and b/tests-other/msl_ycbcr_conversion_test_2.spv differ diff --git a/tests-other/small_vector.cpp b/tests-other/small_vector.cpp index 7b03d85c603..e9a3bb0bee4 100644 --- a/tests-other/small_vector.cpp +++ b/tests-other/small_vector.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2019 Hans-Kristian Arntzen + * Copyright 2019-2021 Hans-Kristian Arntzen * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests-other/typed_id_test.cpp b/tests-other/typed_id_test.cpp new file mode 100644 index 00000000000..e8ecb16cc6d --- /dev/null +++ b/tests-other/typed_id_test.cpp @@ -0,0 +1,49 @@ +#include "spirv_common.hpp" + +using namespace SPIRV_CROSS_NAMESPACE; + +int main() +{ + // Construct from uint32_t. + VariableID var_id = 10; + TypeID type_id = 20; + ConstantID constant_id = 30; + + // Assign from uint32_t. + var_id = 100; + type_id = 40; + constant_id = 60; + + // Construct generic ID. + ID generic_var_id = var_id; + ID generic_type_id = type_id; + ID generic_constant_id = constant_id; + + // Assign generic id. + generic_var_id = var_id; + generic_type_id = type_id; + generic_constant_id = constant_id; + + // Assign generic ID to typed ID + var_id = generic_var_id; + type_id = generic_type_id; + constant_id = generic_constant_id; + + // Implicit conversion to uint32_t. + uint32_t a; + a = var_id; + a = type_id; + a = constant_id; + a = generic_var_id; + a = generic_type_id; + a = generic_constant_id; + + // Copy assignment. + var_id = VariableID(10); + type_id = TypeID(10); + constant_id = ConstantID(10); + + // These operations are blocked, assign or construction from mismatched types. + //var_id = type_id; + //var_id = TypeID(100); +} \ No newline at end of file diff --git a/update_test_shaders.sh b/update_test_shaders.sh index c33afc5caaa..85f9f0b7a1d 100755 --- a/update_test_shaders.sh +++ b/update_test_shaders.sh @@ -1,25 +1,6 @@ #!/bin/bash +# Copyright 2016-2021 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 -if [ -z "$SPIRV_CROSS_PATH" ]; then - echo "Building spirv-cross" - make -j$(nproc) - SPIRV_CROSS_PATH="./spirv-cross" -fi - -export PATH="./external/glslang-build/output/bin:./external/spirv-tools-build/output/bin:.:$PATH" -echo "Using glslangValidation in: $(which glslangValidator)." -echo "Using spirv-opt in: $(which spirv-opt)." -echo "Using SPIRV-Cross in: \"$SPIRV_CROSS_PATH\"." - -./test_shaders.py shaders --update --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders --update --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-no-opt --update --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-msl --update --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-msl --update --msl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-msl-no-opt --update --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-hlsl --update --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-hlsl --update --hlsl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-hlsl-no-opt --update --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 -./test_shaders.py shaders-reflection --reflect --update --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 - +./test_shaders.sh --update